In [1]:
import requests
from io import StringIO
import pandas as pd
import numpy as np
import sqlite3
sqlite3.register_adapter(np.int64, int)
import re
import math
from datetime import date
import matplotlib.pyplot as plt

# 展開所有dataframe columns
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# 不縮寫成科學記號
pd.set_option('display.float_format',lambda x : '%.2f' % x)

db = sqlite3.connect('./stock.db' , isolation_level=None)

In [2]:
df_stock_id_name = pd.read_sql_query("select * from stockIdName",db)

# 宣告一個空的df
df = df_stock_id_name[['id', 'name', 'industry', 'capital']]
df.head()

Unnamed: 0,id,name,industry,capital
0,1101,台泥,水泥工業,73.56
1,1102,亞泥,水泥工業,35.46
2,1103,嘉泥,水泥工業,7.75
3,1104,環泥,水泥工業,6.54
4,1108,幸福,水泥工業,4.05


In [3]:
def financial_statement(year, season, type='綜合損益彙總表',TYPEK='sii'):
    if year >= 1000:
        year -= 1911

    if type == '綜合損益彙總表':
        url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb04'
    elif type == '資產負債彙總表':
        url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb05'
    elif type == '現金流量表':
        url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb20'
    else:
        print('type does not match')

    r = requests.post(url, {
        'encodeURIComponent':1,
        'step':1,
        'firstin':1,
        'off':1,
        'TYPEK':TYPEK,  # sii上市, otc上櫃
        'year':str(year),
        'season':str(season),
    })

    r.encoding = 'utf8'
    dfs = pd.read_html(r.text, header=None)
    return pd.concat(dfs[1:], axis=0, sort=False)


In [None]:
# 檢查目前有幾家公司更新最新財報
# financial_statement(2022, 3, '綜合損益彙總表', 'otc')   # 801
f = financial_statement(2023, 1, '綜合損益彙總表', 'sii') # 981
len(f)

# 綜合損益彙總表

In [None]:
PROFIT_AND_LOSS = '綜合損益彙總表'

def writeToFinancialStatementDB(df, year, quarter, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, eps):
    date = f"{year}q{quarter}"
    sql = 'insert into financialStatement (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, eps, qeps) values (?,?,?,?,?,?,?,?,?,?)'
    
    if quarter == 1:
        db.execute(sql, (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, eps, eps))
    else:
        try:
            prev_eps = df[df['id']==id].tail(1)['eps'].iloc[0]
            qeps = eps - prev_eps
            db.execute(sql, (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, eps, qeps))
        except:
            db.execute(sql, (date, id, name, revenue, grossProfit, operatingIncome, incomeBeforeTax, income, eps, np.nan))
        
def downloadProfitLoss(years, quarters, market='sii'):
    for year in years:
        for quarter in quarters:
            print(f"download {year}q{quarter}")
            historyFinancialStatement = pd.read_sql_query(f"select * from financialStatement",db)
            try:
                df = financial_statement(year, quarter, PROFIT_AND_LOSS, market)
                for index, row in df.iterrows():
                    writeToFinancialStatementDB(historyFinancialStatement,year, quarter, row['公司代號'], row['公司名稱'], row['營業收入'], row['營業毛利（毛損）'], row['營業利益（損失）'], row['稅前淨利（淨損）'], row['本期淨利（淨損）'], row['基本每股盈餘（元）'])
            except:
                print('下載或寫入發生錯誤')


In [None]:
# 下載綜合損益彙總表 
years = [2013,2014,2015,2016]
quarters = [4]
# sii
downloadProfitLoss(years, quarters)
# otc
downloadProfitLoss(years, quarters, 'otc')

In [9]:
# search in db
pd.read_sql_query(f"select * from financialStatement where id=2330 order by date",db)

Unnamed: 0,date,id,name,revenue,grossProfit,operatingIncome,incomeBeforeTax,income,eps,qeps
0,2013q4,2330,台積電,597024197,280966377,209429363,215487122,188018937,7.26,-5.97
1,2014q4,2330,台積電,762806465,377705819,295890293,302097546,263780869,10.18,2.92
2,2015q4,2330,台積電,843497368,410379767,320047775,350428911,306556167,11.82,1.64
3,2016q4,2330,台積電,947938344,474861171,377957778,385959380,334338236,12.89,1.07
4,2017q4,2330,台積電,977447241,494830955,385559223,396133030,343146848,13.23,-0.31
5,2018q4,2330,台積電,1031473557,497986041,383623524,397510263,351184406,13.54,0.22
6,2019q1,2330,台積電,218704469,90352125,64266023,68181652,61387310,2.37,2.37
7,2019q2,2330,台積電,459702944,194025355,140570076,148727092,128163161,4.94,2.57
8,2019q3,2330,台積電,752748383,333457516,248457368,261063363,229265615,8.84,3.9
9,2019q4,2330,台積電,1069985448,492698501,372701090,389845336,345343809,13.32,4.48


In [None]:
pd.read_sql_query(f"select * from financialStatement where id=5425 order by date",db)

In [None]:
# delete duplicates
# pd.read_sql_query(f"delete from financialStatement where rowid not in \
#                   (SELECT min(rowid) from financialStatement GROUP BY id, date)" \
#                   , db)

In [None]:
t = pd.read_sql_query(f"select * from financialStatement where id=2454 order by date",db)
t1 = t.drop_duplicates()
print(len(t), "->", len(t1))

# Example 2018Q1 2330
營業收入 248078671\
營業成本 123103977\
營業毛利（毛損）124974694 = 營業收入 - 營業成本 = 248078671 - 123103977\
營業利益（損失）96826946 = 營業毛利（毛損）- 營業費用 - 已實現銷貨（損）益 - 其他收益及費損淨額 = 124974694 - 26728394 - 117155 - 1302199\
稅前淨利（淨損）99943621 = 營業利益（損失）+ 營業外收入及支出 = 96826946 + 3116675\
本期淨利（淨損）89787574 = 稅前淨利（淨損） - 所得稅費用（利益） = 99943621 - 10156047\
淨利（淨損）歸屬於母公司業主 89784622 = 本期淨利（淨損）-  淨利（淨損）歸屬於非控制權益 = 89787574 - 2952


# 資產負債彙總表

In [6]:
BALANCE_SHEET = '資產負債彙總表'
# sii 權益總額, otc 權益總計 
def downloadBalanceSheet(years, quarters, market='sii'):
    for year in years:
        for quarter in quarters:
            date = f"{year}q{quarter}"
            print(f"download {date} {market}")
            try:
                df = financial_statement(year, quarter, BALANCE_SHEET, market)
                for index, row in df.iterrows():
                    sql = 'insert into balanceSheet (date,id,name,asset,currentAsset,nonCurrentAsset,liabilities,currentLiabilities,nonCurrentLiabilities,additionalPaid,retainedEarning,treasury,shareholderEquity,capital,pb) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'
                    db.execute(sql, (date, row['公司代號'], row['公司名稱'], row['資產總計'], row['流動資產'], row['非流動資產'], row['負債總計'], row['流動負債'], row['非流動負債'], row['資本公積'], row['保留盈餘'], row['庫藏股票'], row['權益總計'], row['股本'], row['每股參考淨值']))
            except Exception as e:
                print('下載或寫入發生錯誤', e)

In [None]:
years = [2023]  
quarters = [1]  
downloadBalanceSheet(years, quarters, 'sii') 
downloadBalanceSheet(years, quarters, 'otc') 

In [4]:
# check db
sql = f'select * from balanceSheet order by date'
a = pd.read_sql_query(sql, db)
a[a['id']==2330]
# a

Unnamed: 0,date,id,name,asset,currentAsset,nonCurrentAsset,liabilities,currentLiabilities,nonCurrentLiabilities,additionalPaid,retainedEarning,treasury,shareholderEquity,capital,pb
285,2022q1,2330,台積電,3992676667.0,1722237632.0,2270439035.0,1671207913.0,822867707.0,848340206,65215135,2038261537.0,-871566,2321468754.0,259317675,89.26
2058,2022q2,2330,台積電,4345941335.0,1905866396.0,2440074939.0,1835479210.0,845240981.0,990238229,69333650,2203125601.0,0,2510462125.0,259303805,96.27
3847,2022q3,2330,台積電,4643301766.0,2014232358.0,2629069408.0,1890985607.0,807431287.0,1083554320,69316391,2412773597.0,0,2752316159.0,259303805,105.59
5636,2022q4,2330,台積電,4964778878.0,2052896744.0,2911882134.0,2004290011.0,944226817.0,1060063194,69330328,2637524688.0,0,2960488867.0,259303805,113.6
7428,2023q1,2330,台積電,5045844348.0,1995727521.0,3050116827.0,1952946750.0,873089921.0,1079856829,69894441,2773288070.0,0,3092897598.0,259320710,118.7


In [10]:
206949036/3092897598.00

0.06691105328990592

# 現金流量表

In [4]:
CASHFLOW = '現金流量表'
def downloadCashFlow(years, quarters, market='sii'):
    for year in years:
        for quarter in quarters:
            date = f"{year}q{quarter}"
            print(f"download {date}", year, quarter, CASHFLOW, market)
            df = financial_statement(year, quarter, CASHFLOW, market)
            sql = 'insert into cashflow (date,id,name,operating,investing,financing,exchange,change,beginning,end, cashflow, qcashflow) values (?,?,?,?,?,?,?,?,?,?,?,?)'
            
            if quarter == 1:
                for index, row in df.iterrows():
                    try:
                        cashflow = int(row["營業活動之淨現金流入（流出）"]) + int(row["投資活動之淨現金流入（流出）"])
                        db.execute(sql, (date, row["公司代號"], row["公司名稱"], row["營業活動之淨現金流入（流出）"], row["投資活動之淨現金流入（流出）"], row["籌資活動之淨現金流入（流出）"], row["匯率變動對現金及約當現金之影響"], row["本期現金及約當現金增加（減少）數"], row["期初現金及約當現金餘額"], row["期末現金及約當現金餘額"], cashflow, cashflow))    
                    except:
                        db.execute(sql, (date, row["公司代號"], row["公司名稱"], row["營業活動之淨現金流入（流出）"], row["投資活動之淨現金流入（流出）"], row["籌資活動之淨現金流入（流出）"], row["匯率變動對現金及約當現金之影響"], row["本期現金及約當現金增加（減少）數"], row["期初現金及約當現金餘額"], row["期末現金及約當現金餘額"], np.nan, np.nan))    
            else:
                df_cashflow = pd.read_sql_query(f'select * from cashflow order by date', db)
                
                for index, row in df.iterrows():
                    try:
                        cashflow = int(row["營業活動之淨現金流入（流出）"]) + int(row["投資活動之淨現金流入（流出）"])
                        prevCashflow = df_cashflow[(df_cashflow['id']==row["公司代號"]) & (df_cashflow['date']==f"{year}q{quarter-1}")]['cashflow'].iloc[0]
                        qcashflow = cashflow - prevCashflow if prevCashflow > 0 else cashflow + prevCashflow
                        db.execute(sql, (date, row["公司代號"], row["公司名稱"], row["營業活動之淨現金流入（流出）"], row["投資活動之淨現金流入（流出）"], row["籌資活動之淨現金流入（流出）"], row["匯率變動對現金及約當現金之影響"], row["本期現金及約當現金增加（減少）數"], row["期初現金及約當現金餘額"], row["期末現金及約當現金餘額"], cashflow, qcashflow))
                    except:
                        db.execute(sql, (date, row["公司代號"], row["公司名稱"], row["營業活動之淨現金流入（流出）"], row["投資活動之淨現金流入（流出）"], row["籌資活動之淨現金流入（流出）"], row["匯率變動對現金及約當現金之影響"], row["本期現金及約當現金增加（減少）數"], row["期初現金及約當現金餘額"], row["期末現金及約當現金餘額"], np.nan, np.nan))


In [20]:
years = [2023]  
quarters = [1]
downloadCashFlow(years, quarters) 
downloadCashFlow(years, quarters, 'otc') 

download 2023q1 2023 1 現金流量表 sii
download 2023q1 2023 1 現金流量表 otc


In [22]:
sql = f'select * from cashflow order by date'
a = pd.read_sql_query(sql, db)
# a[a['id']==2330]
a[a['id']==5425]

Unnamed: 0,date,id,name,operating,investing,cashflow,qcashflow,financing,exchange,change,beginning,end
404,2018q1,5425,台半,-985800,-48430,-1034230.0,-1034230.0,939786,16825,-77619,2570350,2492731
2041,2018q2,5425,台半,-23114,-489635,-512749.0,-1546979.0,840652,-48475,279428,2570350,2849778
3731,2018q3,5425,台半,237788,-882857,-645069.0,-1157818.0,760193,-180760,-65636,2570350,2504714
5402,2018q4,5425,台半,1472747,-1784004,-311257.0,-956326.0,515106,-78628,125221,2570350,2695571
8052,2019q1,5425,台半,479376,-1353722,-874346.0,-874346.0,775689,37979,-60678,2695571,2634893
9759,2019q2,5425,台半,1442135,-1652884,-210749.0,-1085095.0,-67208,13107,-264850,2695571,2430721
11461,2019q3,5425,台半,1822640,-1876989,-54349.0,-265098.0,-285032,-62781,-402162,2695571,2293409
13196,2019q4,5425,台半,2083980,-2065173,18807.0,-35542.0,-379185,-104937,-465315,2695571,2230256
14918,2020q1,5425,台半,385910,-155192,230718.0,230718.0,-329974,-8903,-108159,2230256,2122097
16656,2020q2,5425,台半,905680,-255497,650183.0,419465.0,-431198,-69357,149628,2230256,2379884


In [None]:
130479.00-113522.00

In [None]:
# delete row
# sql = 'delete from cashflow where date="2022q1"'
# pd.read_sql_query(sql, db)

In [None]:
financial_statement(2022, 2, CASHFLOW, 'otc')

# 股本

In [None]:
# Asset資產, Liabilities負債
BALANCE_SHEET = '資產負債彙總表'
df_AL_sii = financial_statement(2023,1, BALANCE_SHEET)
df_AL_otc = financial_statement(2023,1, BALANCE_SHEET, 'otc')

In [None]:
print('聯發科股本：', df_AL_sii[df_AL_sii['公司代號'] == 2454]['股本'].iloc[0])
print('中美晶股本：', df_AL_otc[df_AL_otc['公司代號'] == 5483]['股本'].iloc[0])

In [None]:
# 在已存在table新增 股本 欄位
# db.execute("ALTER TABLE stockIdName ADD COLUMN capital float")

In [None]:
def writeStockCapital(df):
    for index, row in df_stock_id_name.iterrows():
        stock_id = row['id']
        try:
            stock_capital = df[df['公司代號'] == stock_id]['股本'].iloc[0]
            format_stock_capital = round(stock_capital/1000000,2)
            sql = f"UPDATE stockIdName SET capital = {format_stock_capital} WHERE id = {stock_id}"
            db.execute(sql)
        except:
            continue

writeStockCapital(df_AL_sii)
writeStockCapital(df_AL_otc)

In [None]:
# read stockIdName table in db
c = pd.read_sql_query("select * from stockIdName",db)
c.head()

In [None]:
c[c['id']==5483]

In [11]:
db.close()