In [1]:
import requests
from io import StringIO
import pandas as pd
import numpy as np
import sqlite3
sqlite3.register_adapter(np.int64, int)
import re
import math
from datetime import date
import matplotlib.pyplot as plt

# 展開所有dataframe columns
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# 不縮寫成科學記號
pd.set_option('display.float_format',lambda x : '%.2f' % x)

db = sqlite3.connect('../stock.db' , isolation_level=None)

In [2]:
df_stock_id_name = pd.read_sql_query("select * from stockIdName",db)

# 宣告一個空的df
df = df_stock_id_name[['id', 'name', 'industry', 'capital']]
df.head()

Unnamed: 0,id,name,industry,capital
0,1101,台泥,水泥工業,69.37
1,1102,亞泥,水泥工業,35.46
2,1103,嘉泥,水泥工業,7.75
3,1104,環泥,水泥工業,6.54
4,1108,幸福,水泥工業,4.05


In [3]:
def financial_statement(year, season, type='綜合損益彙總表',TYPEK='sii'):
    if year >= 1000:
        year -= 1911

    if type == '綜合損益彙總表':
        url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb04'
    elif type == '資產負債彙總表':
        url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb05'
    elif type == '現金流量表':
        url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb20'
    else:
        print('type does not match')

    r = requests.post(url, {
        'encodeURIComponent':1,
        'step':1,
        'firstin':1,
        'off':1,
        'TYPEK':TYPEK,  # sii上市, otc上櫃
        'year':str(year),
        'season':str(season),
    })

    r.encoding = 'utf8'
    dfs = pd.read_html(r.text, header=None)
    return pd.concat(dfs[1:], axis=0, sort=False)


In [4]:
# 檢查目前有幾家公司更新最新財報
# financial_statement(2022, 3, '綜合損益彙總表', 'otc')   # 801
f = financial_statement(2023, 1, '綜合損益彙總表', 'sii') # 976
len(f)

967

# 綜合損益彙總表

In [5]:
PROFIT_AND_LOSS = '綜合損益彙總表'

def writeToFinancialStatementDB(df, year, quarter, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, eps):
    date = f"{year}q{quarter}"
    sql = 'insert into financialStatement (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, eps, qeps) values (?,?,?,?,?,?,?,?,?,?)'
    
    if quarter == 1:
        db.execute(sql, (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, eps, eps))
    else:
        try:
            prev_eps = df[df['id']==id].tail(1)['eps'].iloc[0]
            qeps = eps - prev_eps
            db.execute(sql, (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, eps, qeps))
        except:
            db.execute(sql, (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, eps, np.nan))
        
def downloadProfitLoss(years, quarters, market='sii'):
    for year in years:
        for quarter in quarters:
            print(f"download {year}q{quarter}")
            historyFinancialStatement = pd.read_sql_query(f"select * from financialStatement",db)
            try:
                df = financial_statement(year, quarter, PROFIT_AND_LOSS, market)
                for index, row in df.iterrows():
                    writeToFinancialStatementDB(historyFinancialStatement,year, quarter, row['公司代號'], row['公司名稱'], row['營業收入'], row['營業毛利（毛損）'], row['營業利益（損失）'], row['稅前淨利（淨損）'], row['本期淨利（淨損）'], row['基本每股盈餘（元）'])
            except:
                print('下載或寫入發生錯誤')


In [6]:
# 下載綜合損益彙總表 
years = [2023]
quarters = [1]
# sii
downloadProfitLoss(years, quarters)
# otc
downloadProfitLoss(years, quarters, 'otc')

download 2023q1
download 2023q1


In [7]:
# search in db
t = pd.read_sql_query(f"select * from financialStatement",db)

In [8]:
len(t[(t['date']=='2023q1')])

1777

# Example 2018Q1 2330
營業收入 248078671\
營業成本 123103977\
營業毛利（毛損）124974694 = 營業收入 - 營業成本 = 248078671 - 123103977\
營業利益（損失）96826946 = 營業毛利（毛損）- 營業費用 - 已實現銷貨（損）益 - 其他收益及費損淨額 = 124974694 - 26728394 - 117155 - 1302199\
稅前淨利（淨損）99943621 = 營業利益（損失）+ 營業外收入及支出 = 96826946 + 3116675\
本期淨利（淨損）89787574 = 稅前淨利（淨損） - 所得稅費用（利益） = 99943621 - 10156047\
淨利（淨損）歸屬於母公司業主 89784622 = 本期淨利（淨損）-  淨利（淨損）歸屬於非控制權益 = 89787574 - 2952


## !! 刪除DB

In [None]:
## 建立 financialStatement
#sql = 'create table if not exists financialStatement \
#           (date TEXT,\
#            id INT ,\
#           name TEXT ,\
#            revenue INTEGER,\
#            grossProfit INTEGER,\
#            operatingIncome INTEGER,\
#            incomeBeforeTax INTEGER,\
#            income INTEGER, \
#            eps FLOAT,\
#            qeps FLOAT);'

# db.execute(sql)

#sql = f'select * from financialStatement'
#pd.read_sql_query(sql, db)

In [None]:
# 刪除 financialStatement
# db.execute('drop table financialStatement')

# 資產負債彙總表

In [9]:
BALANCE_SHEET = '資產負債彙總表'
# sii 權益總額, otc 權益總計 
def downloadBalanceSheet(years, quarters, market='sii'):
    for year in years:
        for quarter in quarters:
            date = f"{year}q{quarter}"
            print(f"download {date} {market}")
            try:
                df = financial_statement(year, quarter, BALANCE_SHEET, market)
                for index, row in df.iterrows():
                    sql = 'insert into balanceSheet (date,id,name,asset,currentAsset,nonCurrentAsset,liabilities,currentLiabilities,nonCurrentLiabilities,additionalPaid,retainedEarning,treasury,shareholderEquity,capital,pb) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'
                    db.execute(sql, (date, row['公司代號'], row['公司名稱'], row['資產總計'], row['流動資產'], row['非流動資產'], row['負債總計'], row['流動負債'], row['非流動負債'], row['資本公積'], row['保留盈餘'], row['庫藏股票'], row['權益總計'], row['股本'], row['每股參考淨值']))
            except Exception as e:
                print('下載或寫入發生錯誤', e)

In [None]:
# financial_statement(2022, 2, BALANCE_SHEET, 'otc')

In [10]:
years = [2023]  
quarters = [1]  
downloadBalanceSheet(years, quarters, 'sii') 
downloadBalanceSheet(years, quarters, 'otc') 

download 2023q1 sii
download 2023q1 otc


In [19]:
# check db
sql = f'select * from balanceSheet order by date'
a = pd.read_sql_query(sql, db)
a[a['id']==2330]
# a

Unnamed: 0,date,id,name,asset,currentAsset,nonCurrentAsset,liabilities,currentLiabilities,nonCurrentLiabilities,additionalPaid,retainedEarning,treasury,shareholderEquity,capital,pb
285,2021q1,2330,台積電,2919131461.0,1139786823.0,1779344638.0,978134499.0,661648612.0,316485887,64753607,1663271807.0,--,,259303805,74.77
2030,2021q2,2330,台積電,3092815189.0,1241104023.0,1851711166.0,1098844616.0,647748947.0,451095669,64753836,1726322203.0,--,,259303805,76.81
3787,2021q3,2330,台積電,3332311884.0,1370639169.0,1961672715.0,1253991574.0,655621450.0,598370124,64746864,1811273605.0,--,,259303805,80.06
5543,2021q4,2330,台積電,3725503455.0,1607072907.0,2118430548.0,1554770250.0,739503358.0,815266892,64761602,1906829661.0,--,,259303805,83.62
7297,2022q1,2330,台積電,3992676667.0,1722237632.0,2270439035.0,1671207913.0,822867707.0,848340206,65215135,2038261537.0,-871566,,259317675,89.26
9033,2022q2,2330,台積電,4345941335.0,1905866396.0,2440074939.0,1835479210.0,845240981.0,990238229,69333650,2203125601.0,0,,259303805,96.27
10663,2022q3,2330,台積電,4643301766.0,2014232358.0,2629069408.0,1890985607.0,807431287.0,1083554320,69316391,2412773597.0,0,2752316159.0,259303805,105.59
12418,2022q4,2330,台積電,4964778878.0,2052896744.0,2911882134.0,2004290011.0,944226817.0,1060063194,69330328,2637524688.0,0,2960488867.0,259303805,113.6


# 現金流量表

In [11]:
CASHFLOW = '現金流量表'
def downloadCashFlow(years, quarters, market='sii'):
    for year in years:
        for quarter in quarters:
            date = f"{year}q{quarter}"
            print(f"download {date}")
            try:
                df = financial_statement(year, quarter, CASHFLOW, market)
                for index, row in df.iterrows():
                    sql = 'insert into cashflow (date,id,name,operating,investing,financing,exchange,change,beginning,end) values (?,?,?,?,?,?,?,?,?,?)'
                    db.execute(sql, (date, row["公司代號"], row["公司名稱"], row["營業活動之淨現金流入（流出）"], row["投資活動之淨現金流入（流出）"], row["籌資活動之淨現金流入（流出）"], row["匯率變動對現金及約當現金之影響"], row["本期現金及約當現金增加（減少）數"], row["期初現金及約當現金餘額"], row["期末現金及約當現金餘額"]))
            except:
                print('下載或寫入發生錯誤')

In [12]:
years = [2023]  
quarters = [1]
downloadCashFlow(years, quarters) 
downloadCashFlow(years, quarters, 'otc') 

download 2023q1
download 2023q1


In [None]:
sql = f'select * from cashflow'
a = pd.read_sql_query(sql, db)
a[a['id']==2330]

# 股本
塞在stockIdName table裡面

In [None]:
# Asset資產, Liabilities負債
BALANCE_SHEET = '資產負債彙總表'
df_AL_sii = financial_statement(2022,2, BALANCE_SHEET)
df_AL_otc = financial_statement(2022,2, BALANCE_SHEET, 'otc')

In [None]:
print('聯發科股本：', df_AL_sii[df_AL_sii['公司代號'] == 2454]['股本'].iloc[0])
print('中美晶股本：', df_AL_otc[df_AL_otc['公司代號'] == 5483]['股本'].iloc[0])

In [None]:
# 在已存在table新增 股本 欄位
db.execute("ALTER TABLE stockIdName ADD COLUMN capital float")

In [None]:
def writeStockCapital(df):
    for index, row in df_stock_id_name.iterrows():
        stock_id = row['id']
        try:
            stock_capital = df[df['公司代號'] == stock_id]['股本'].iloc[0]
            format_stock_capital = round(stock_capital/1000000,2)
            sql = f"UPDATE stockIdName SET capital = {format_stock_capital} WHERE id = {stock_id}"
            db.execute(sql)
        except:
            continue

writeStockCapital(df_AL_sii)
writeStockCapital(df_AL_otc)

In [None]:
# read stockIdName table in db
c = pd.read_sql_query("select * from stockIdName",db)
c.head()

In [None]:
c[c['id']==5483]

In [13]:
db.close()