In [17]:
import requests
from io import StringIO
import pandas as pd
import numpy as np
import sqlite3
sqlite3.register_adapter(np.int64, int)
import re
import math
from datetime import date
import matplotlib.pyplot as plt

# 展開所有dataframe columns
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# 不縮寫成科學記號
pd.set_option('display.float_format',lambda x : '%.2f' % x)

db = sqlite3.connect('./stock.db' , isolation_level=None)

In [18]:
df_stock_id_name = pd.read_sql_query("select * from stockIdName",db)

# 宣告一個空的df
df = df_stock_id_name[['id', 'name', 'industry', 'capital']]
df.head()

Unnamed: 0,id,name,industry,capital
0,1101,台泥,水泥工業,69.37
1,1102,亞泥,水泥工業,35.46
2,1103,嘉泥,水泥工業,7.75
3,1104,環泥,水泥工業,6.54
4,1108,幸福,水泥工業,4.05


In [19]:
def financial_statement(year, season, type='綜合損益彙總表',TYPEK='sii'):
    if year >= 1000:
        year -= 1911

    if type == '綜合損益彙總表':
        url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb04'
    elif type == '資產負債彙總表':
        url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb05'
    elif type == '現金流量表':
        url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb20'
    else:
        print('type does not match')

    r = requests.post(url, {
        'encodeURIComponent':1,
        'step':1,
        'firstin':1,
        'off':1,
        'TYPEK':TYPEK,  # sii上市, otc上櫃
        'year':str(year),
        'season':str(season),
    })

    r.encoding = 'utf8'
    dfs = pd.read_html(r.text, header=None)
    return pd.concat(dfs[1:], axis=0, sort=False)


In [11]:
# 檢查目前有幾家公司更新最新財報
f = financial_statement(2023, 2, '綜合損益彙總表', 'sii') # 981
# f = financial_statement(2023, 2, '綜合損益彙總表', 'otc')   # 801

len(f)
# 2023 2 896/781


896

# 綜合損益彙總表

In [20]:
PROFIT_AND_LOSS = '綜合損益彙總表'

def writeToFinancialStatementDB(df, year, quarter, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, eps):
    date = f"{year}q{quarter}"
    sql = 'insert into financialStatement (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, qincome, eps, qeps) values (?,?,?,?,?,?,?,?,?,?,?)'
    
    if quarter == 1:
        db.execute(sql, (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, income, eps, eps))
    else:
        try:
            dateDf = df[(df['id']==id) & (df['date']==f'{year}q{quarter-1}')]
            prev_eps = dateDf['eps'].iloc[0]
            prev_income = dateDf['income'].iloc[0]
            qeps = eps - prev_eps
            qincome = income - prev_income
            db.execute(sql, (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, qincome, eps, qeps))
        except:
            db.execute(sql, (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, np.nan, eps, np.nan))
        
def downloadProfitLoss(years, quarters, market='sii'):
    for year in years:
        for quarter in quarters:
            print(f"download {year}q{quarter}")
            historyFinancialStatement = pd.read_sql_query(f"select * from financialStatement order by date",db)
            try:
                df = financial_statement(year, quarter, PROFIT_AND_LOSS, market)
                for index, row in df.iterrows():
                    writeToFinancialStatementDB(historyFinancialStatement,year, quarter, row['公司代號'], row['公司名稱'], row['營業收入'], row['營業毛利（毛損）'], row['營業利益（損失）'], row['稅前淨利（淨損）'], row['本期淨利（淨損）'], row['基本每股盈餘（元）'])
            except:
                print('下載或寫入發生錯誤')


In [None]:
# f[f['公司代號']==23300]['營業收入'].iloc[0]

In [24]:
# 下載綜合損益彙總表 
years = [2018,2019,2020,2021]
quarters = [1,2,3,4]
# sii
downloadProfitLoss(years, quarters)
# otc
downloadProfitLoss(years, quarters, 'otc')

download 2018q1
download 2018q2
download 2018q3
download 2018q4
download 2019q1
download 2019q2
download 2019q3
download 2019q4
download 2020q1
download 2020q2
download 2020q3
download 2020q4
download 2021q1
download 2021q2
download 2021q3
download 2021q4
download 2018q1
download 2018q2
download 2018q3
download 2018q4
download 2019q1
download 2019q2
download 2019q3
download 2019q4
download 2020q1
download 2020q2
download 2020q3
download 2020q4
download 2021q1
download 2021q2
download 2021q3
download 2021q4


In [25]:
# search in db
pd.read_sql_query(f"select * from financialStatement where id=4977 order by date",db)

Unnamed: 0,date,id,name,revenue,grossProfit,operatingIncome,incomeBeforeTax,income,qincome,eps,qeps
0,2018q1,4977,眾達-KY,413644,107646,35546,100214,97185,97185,1.66,1.66
1,2018q2,4977,眾達-KY,936644,239927,107326,135438,140623,43438,2.41,0.75
2,2018q3,4977,眾達-KY,1423652,391371,194011,216987,218500,77877,3.78,1.37
3,2018q4,4977,眾達-KY,1930806,514234,247619,256055,257641,39141,4.5,0.72
4,2019q1,4977,眾達-KY,540477,135057,42544,48017,46225,46225,0.84,0.84
5,2019q2,4977,眾達-KY,1180511,297936,133271,121902,129388,83163,2.22,1.38
6,2019q3,4977,眾達-KY,1797682,466337,251788,184743,185667,56279,3.23,1.01
7,2019q4,4977,眾達-KY,2534015,607031,324359,256806,248304,62637,4.22,0.99
8,2020q1,4977,眾達-KY,655272,140503,75609,34470,35446,35446,0.57,0.57
9,2020q2,4977,眾達-KY,1474126,316510,217192,194534,180205,144759,2.75,2.18


In [26]:
pd.read_sql_query(f"select * from financialStatement where id=5425 order by date",db)

Unnamed: 0,date,id,name,revenue,grossProfit,operatingIncome,incomeBeforeTax,income,qincome,eps,qeps
0,2018q1,5425,台半,2244561,802259,374251,442891,312085,312085.0,0.98,0.98
1,2018q2,5425,台半,4702033,1677161,795980,901382,630089,,1.83,
2,2018q3,5425,台半,7180164,2599824,1257151,1309332,911201,281112.0,2.61,0.78
3,2018q4,5425,台半,9610470,3528054,1708886,1775463,1249500,338299.0,3.53,0.92
4,2019q1,5425,台半,2475615,816964,300849,310574,248955,248955.0,0.63,0.63
5,2019q2,5425,台半,5148139,1673342,631163,637948,504318,255363.0,1.17,0.54
6,2019q3,5425,台半,7803804,2498976,937689,935310,737504,233186.0,1.68,0.51
7,2019q4,5425,台半,10504202,3361687,1298331,1298990,1011179,273675.0,2.29,0.61
8,2020q1,5425,台半,2381563,686040,203566,243392,182442,182442.0,0.42,0.42
9,2020q2,5425,台半,4764067,1393779,466488,511845,375687,193245.0,0.87,0.45


In [None]:
# delete duplicates
# pd.read_sql_query(f"delete from financialStatement where rowid not in \
#                   (SELECT min(rowid) from financialStatement GROUP BY id, date)" \
#                   , db)

# t = pd.read_sql_query(f"select * from financialStatement where id=2454 order by date",db)
# t1 = t.drop_duplicates()
# print(len(t), "->", len(t1))

In [None]:
# delete row
# sql = 'delete from financialStatement where date="2022q4"'
# pd.read_sql_query(sql, db)

In [16]:
db.close()

# Example 2018Q1 2330
營業收入 248078671\
營業成本 123103977\
營業毛利（毛損）124974694 = 營業收入 - 營業成本 = 248078671 - 123103977\
營業利益（損失）96826946 = 營業毛利（毛損）- 營業費用 - 已實現銷貨（損）益 - 其他收益及費損淨額 = 124974694 - 26728394 - 117155 - 1302199\
稅前淨利（淨損）99943621 = 營業利益（損失）+ 營業外收入及支出 = 96826946 + 3116675\
本期淨利（淨損）89787574 = 稅前淨利（淨損） - 所得稅費用（利益） = 99943621 - 10156047\
淨利（淨損）歸屬於母公司業主 89784622 = 本期淨利（淨損）-  淨利（淨損）歸屬於非控制權益 = 89787574 - 2952


# 資產負債彙總表

In [27]:
BALANCE_SHEET = '資產負債彙總表'
# sii 權益總額, otc 權益總計 
def downloadBalanceSheet(years, quarters, market='sii'):
    for year in years:
        for quarter in quarters:
            date = f"{year}q{quarter}"
            print(f"download {date} {market}")
            try:
                df = financial_statement(year, quarter, BALANCE_SHEET, market)
                for index, row in df.iterrows():
                    sql = 'insert into balanceSheet (date,id,name,asset,currentAsset,nonCurrentAsset,liabilities,currentLiabilities,nonCurrentLiabilities,additionalPaid,retainedEarning,treasury,shareholderEquity,capital,pb) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'
                    db.execute(sql, (date, row['公司代號'], row['公司名稱'], row['資產總計'], row['流動資產'], row['非流動資產'], row['負債總計'], row['流動負債'], row['非流動負債'], row['資本公積'], row['保留盈餘'], row['庫藏股票'], row['權益總計'], row['股本'], row['每股參考淨值']))
            except Exception as e:
                print('下載或寫入發生錯誤', e)

In [38]:
years = [2023]  
quarters = [2]  

downloadBalanceSheet(years, quarters, 'sii') 
downloadBalanceSheet(years, quarters, 'otc') 

download 2023q2 sii
download 2023q2 otc


In [37]:
# check db
sql = f'select * from balanceSheet order by date'
a = pd.read_sql_query(sql, db)
a[a['id']==2330]
# a

Unnamed: 0,date,id,name,asset,currentAsset,nonCurrentAsset,liabilities,currentLiabilities,nonCurrentLiabilities,additionalPaid,retainedEarning,treasury,shareholderEquity,capital,pb
285,2021q1,2330,台積電,2919131461.0,1139786823.0,1779344638.0,978134499.0,661648612.0,316485887,64753607,1663271807.0,--,,259303805,74.77
2030,2021q2,2330,台積電,3092815189.0,1241104023.0,1851711166.0,1098844616.0,647748947.0,451095669,64753836,1726322203.0,--,,259303805,76.81
3787,2021q3,2330,台積電,3332311884.0,1370639169.0,1961672715.0,1253991574.0,655621450.0,598370124,64746864,1811273605.0,--,,259303805,80.06
5543,2021q4,2330,台積電,3725503455.0,1607072907.0,2118430548.0,1554770250.0,739503358.0,815266892,64761602,1906829661.0,--,,259303805,83.62
7297,2022q1,2330,台積電,3992676667.0,1722237632.0,2270439035.0,1671207913.0,822867707.0,848340206,65215135,2038261537.0,-871566,,259317675,89.26
9033,2022q2,2330,台積電,4345941335.0,1905866396.0,2440074939.0,1835479210.0,845240981.0,990238229,69333650,2203125601.0,0,,259303805,96.27
10663,2022q3,2330,台積電,4643301766.0,2014232358.0,2629069408.0,1890985607.0,807431287.0,1083554320,69316391,2412773597.0,0,2752316159.0,259303805,105.59
12418,2022q4,2330,台積電,4964778878.0,2052896744.0,2911882134.0,2004290011.0,944226817.0,1060063194,69330328,2637524688.0,0,2960488867.0,259303805,113.6
14201,2023q1,2330,台積電,5045844348.0,1995727521.0,3050116827.0,1952946750.0,873089921.0,1079856829,69894441,2773288070.0,0,3092897598.0,259320710,118.7


In [36]:
# delete row
sql = 'delete from balanceSheet where date="2023q2"'
# pd.read_sql_query(sql, db)

# 現金流量表

In [39]:
CASHFLOW = '現金流量表'
def downloadCashFlow(years, quarters, market='sii'):
    for year in years:
        for quarter in quarters:
            date = f"{year}q{quarter}"
            print(f"download {date}", year, quarter, CASHFLOW, market)
            df = financial_statement(year, quarter, CASHFLOW, market)
            sql = 'insert into cashflow (date,id,name,operating,investing,financing,exchange,change,beginning,end, cashflow, qcashflow) values (?,?,?,?,?,?,?,?,?,?,?,?)'
            
            if quarter == 1:
                for index, row in df.iterrows():
                    try:
                        cashflow = int(row["營業活動之淨現金流入（流出）"]) + int(row["投資活動之淨現金流入（流出）"])
                        db.execute(sql, (date, row["公司代號"], row["公司名稱"], row["營業活動之淨現金流入（流出）"], row["投資活動之淨現金流入（流出）"], row["籌資活動之淨現金流入（流出）"], row["匯率變動對現金及約當現金之影響"], row["本期現金及約當現金增加（減少）數"], row["期初現金及約當現金餘額"], row["期末現金及約當現金餘額"], cashflow, cashflow))    
                    except:
                        db.execute(sql, (date, row["公司代號"], row["公司名稱"], row["營業活動之淨現金流入（流出）"], row["投資活動之淨現金流入（流出）"], row["籌資活動之淨現金流入（流出）"], row["匯率變動對現金及約當現金之影響"], row["本期現金及約當現金增加（減少）數"], row["期初現金及約當現金餘額"], row["期末現金及約當現金餘額"], np.nan, np.nan))    
            else:
                df_cashflow = pd.read_sql_query(f'select * from cashflow order by date', db)
                
                for index, row in df.iterrows():
                    try:
                        cashflow = int(row["營業活動之淨現金流入（流出）"]) + int(row["投資活動之淨現金流入（流出）"])
                        prevCashflow = df_cashflow[(df_cashflow['id']==row["公司代號"]) & (df_cashflow['date']==f"{year}q{quarter-1}")]['cashflow'].iloc[0]
                        qcashflow = cashflow - prevCashflow if prevCashflow > 0 else cashflow + prevCashflow
                        db.execute(sql, (date, row["公司代號"], row["公司名稱"], row["營業活動之淨現金流入（流出）"], row["投資活動之淨現金流入（流出）"], row["籌資活動之淨現金流入（流出）"], row["匯率變動對現金及約當現金之影響"], row["本期現金及約當現金增加（減少）數"], row["期初現金及約當現金餘額"], row["期末現金及約當現金餘額"], cashflow, qcashflow))
                    except:
                        db.execute(sql, (date, row["公司代號"], row["公司名稱"], row["營業活動之淨現金流入（流出）"], row["投資活動之淨現金流入（流出）"], row["籌資活動之淨現金流入（流出）"], row["匯率變動對現金及約當現金之影響"], row["本期現金及約當現金增加（減少）數"], row["期初現金及約當現金餘額"], row["期末現金及約當現金餘額"], np.nan, np.nan))


In [44]:
years = [2023]
quarters = [1,2]
downloadCashFlow(years, quarters) 
downloadCashFlow(years, quarters, 'otc') 

download 2023q1 2023 1 現金流量表 sii
download 2023q2 2023 2 現金流量表 sii
download 2023q1 2023 1 現金流量表 otc
download 2023q2 2023 2 現金流量表 otc


In [43]:
sql = f'select * from cashflow order by date'
a = pd.read_sql_query(sql, db)
# a[a['id']==2330]
a[a['id']==5425]

Unnamed: 0,date,id,name,operating,investing,cashflow,qcashflow,financing,exchange,change,beginning,end
1300,2018q1,5425,台半,-985800,-48430,-1034230.0,-1034230.0,939786,16825,-77619,2570350,2492731
2974,2018q2,5425,台半,-23114,-489635,-512749.0,-1546979.0,840652,-48475,279428,2570350,2849778
4643,2018q3,5425,台半,237788,-882857,-645069.0,-1157818.0,760193,-180760,-65636,2570350,2504714
6357,2018q4,5425,台半,1472747,-1784004,-311257.0,-956326.0,515106,-78628,125221,2570350,2695571
8053,2019q1,5425,台半,479376,-1353722,-874346.0,-874346.0,775689,37979,-60678,2695571,2634893
9761,2019q2,5425,台半,1442135,-1652884,-210749.0,-1085095.0,-67208,13107,-264850,2695571,2430721
11463,2019q3,5425,台半,1822640,-1876989,-54349.0,-265098.0,-285032,-62781,-402162,2695571,2293409


In [None]:
# delete row
# sql = 'delete from cashflow where date="2022q1"'
# pd.read_sql_query(sql, db)

In [None]:
financial_statement(2022, 2, CASHFLOW, 'otc')

# 股本

In [None]:
# Asset資產, Liabilities負債
BALANCE_SHEET = '資產負債彙總表'
df_AL_sii = financial_statement(2023,1, BALANCE_SHEET)
df_AL_otc = financial_statement(2023,1, BALANCE_SHEET, 'otc')

In [None]:
print('聯發科股本：', df_AL_sii[df_AL_sii['公司代號'] == 2454]['股本'].iloc[0])
print('中美晶股本：', df_AL_otc[df_AL_otc['公司代號'] == 5483]['股本'].iloc[0])

In [None]:
# 在已存在table新增 股本 欄位
# db.execute("ALTER TABLE stockIdName ADD COLUMN capital float")

In [None]:
def writeStockCapital(df):
    for index, row in df_stock_id_name.iterrows():
        stock_id = row['id']
        try:
            stock_capital = df[df['公司代號'] == stock_id]['股本'].iloc[0]
            format_stock_capital = round(stock_capital/1000000,2)
            sql = f"UPDATE stockIdName SET capital = {format_stock_capital} WHERE id = {stock_id}"
            db.execute(sql)
        except:
            continue

writeStockCapital(df_AL_sii)
writeStockCapital(df_AL_otc)

In [None]:
# read stockIdName table in db
c = pd.read_sql_query("select * from stockIdName",db)
c.head()

In [None]:
c[c['id']==5483]

In [45]:
db.close()