In [None]:
import requests
from io import StringIO
import pandas as pd
import numpy as np
import sqlite3
sqlite3.register_adapter(np.int64, int)
import re
import math
from datetime import date
import matplotlib.pyplot as plt

# 展開所有dataframe columns
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# 不縮寫成科學記號
pd.set_option('display.float_format',lambda x : '%.2f' % x)

db = sqlite3.connect('./stock.db' , isolation_level=None)

In [None]:
QUARTERS = ['2023q1','2022q2','2022q3','2022q4']

In [None]:
# df
df_stock_id_name = pd.read_sql_query("select * from stockIdName",db)
daily = pd.read_sql_query("select * from daily",db)
daily = daily.sort_values(by=['date'])

df_financial_statement = pd.read_sql_query("select * from financialStatement",db)
df_financial_statement = df_financial_statement.fillna(0)
df_financial_statement.replace('--', 0, inplace=True)

In [None]:
ids = df_stock_id_name['id'].tolist()

## 月營收

In [None]:
sql=f"select * from monthlyRevenue order by date" 
df = pd.read_sql_query(sql, db)

In [None]:
# 用讀檔更新方式改寫
columns = ['id','name', '201801', '201802', '201803', '201804', '201805', '201806', '201807', '201808', '201809', '201810', '201811', '201812', '201901', '201902', '201903', '201904', '201905', '201906', '201907', '201908', '201909', '201910', '201911', '201912', '202001', '202002', '202003', '202004', '202005', '202006', '202007', '202008', '202009', '202010', '202011', '202012', '202101', '202102', '202103', '202104', '202105', '202106', '202107', '202108', '202109', '202110', '202111', '202112', '202201', '202202', '202203', '202204','202205','202206', '202207', '202208','202209', '202210','202211','202212','202301','202302','202303','202304','202305','202306']
df_revenue = pd.DataFrame(columns=columns)
df_revenue

In [None]:
def getRowRevenue(id):
    a = df[df['id']==id][['date','name','revenue']]
    a['revenue'] = round(a['revenue']/100000, 2)
    b = a.T
    b.columns = b.iloc[0]
    b = b.drop(['date'])
    b['id'] = id
    b['name'] = b['201801']['name']
    b = b.drop(['name'])
    return b[columns]
for id in ids:
    try:
        row = getRowRevenue(id)
        df_revenue = df_revenue.append(row)
    except:
        continue


In [None]:
df_revenue.to_csv('revenue06.csv', index = False)
df_revenue.head()

## 每月收盤價

In [None]:
df_close = pd.DataFrame(columns=columns)
df_close

In [None]:
daily['yyyymm'] = daily['date'].apply(lambda x: str(x)[:6])

In [None]:
def getRowMonthClose(id):
    df = daily[daily['id']==id][['yyyymm','id','name','close']]
    name = df.head(1)['name'].iloc[0]
    df1 = round(df.groupby('yyyymm').mean().T, 0)
    df1 = df1.drop(['id'])
    df1['id'] = id
    df1['name'] = name
    try:
        return df1[columns]
    except:
        for col in columns:
            if col not in df1.columns:
                df1[col] = ''
        return df1[columns]

for id in ids:
    try:
        row = getRowMonthClose(id)
        df_close = df_close.append(row)
    except:
        continue

In [None]:
df_close.to_csv('monthlyClose06.csv', index = False)
df_close.head()

## EPS 

In [None]:
columns=['id','name','Q1','Q2','Q3','Q4']
df_eps = pd.DataFrame(columns=columns)
df_eps

# 客製化用QUARTERS排序
df_mapping_quarters = pd.DataFrame({
    'date': QUARTERS,
})
sort_mapping = df_mapping_quarters.reset_index().set_index('date')

# eps csv
def nearFourQuarterEPS(id):
    try:
        fid = df_financial_statement['id'] == id
        df = df_financial_statement[fid].tail(4)[['date','id','name','qeps']]
        df['order'] = df['date'].map(sort_mapping['index'])
        name = df['name'].iloc[0]
        df = df.sort_values('order')
        return [id, name] + df['qeps'].tolist()
    except:
        return np.nan
    

for id in ids:
    try:
        row = nearFourQuarterEPS(id)
        df_eps.loc[len(df_eps.index)] = row
    except:
        continue
        
df_eps.to_csv('EPS.csv', index = False)

In [None]:
dfq1 = df_financial_statement[df_financial_statement['date']=='2023q1']
sortedEps = dfq1[dfq1['eps']>0].sort_values(by=['eps'], ascending=False) # 1243/1777
sortedEps = sortedEps[['id','name','eps']]
sortedEps = sortedEps.reset_index(drop=True)
sortedEps.to_csv('2023q1_eps.csv')
sortedEps

In [None]:
db.close()