In [1]:
import requests
from io import StringIO
import pandas as pd
import numpy as np
import sqlite3
sqlite3.register_adapter(np.int64, int)
import re

db = sqlite3.connect('./stock.db' , isolation_level=None)

In [2]:
def fetchData(year, month, type="sii"):
    print(f"Download {year}{month:02} {type}")
    year = year - 1911 if year > 2000 else year
    url = f"https://mops.twse.com.tw/nas/t21/{type}/t21sc03_{year}_{month}_0.html"
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    r = requests.get(url, headers=headers)
    r.encoding = 'big5'
    dfs = pd.read_html(StringIO(r.text))

    df = pd.concat([df for df in dfs if df.shape[1] <= 11 and df.shape[1] > 5])
    if 'levels' in dir(df.columns):
        df.columns = df.columns.get_level_values(1)
    else:
        df = df[list(range(0,10))]
        column_index = df.index[(df[0] == '公司代號')][0]
        df.columns = df.iloc[column_index]
    df['當月營收'] = pd.to_numeric(df['當月營收'], 'coerce')
    df = df[~df['當月營收'].isnull()]
    df = df[df['公司代號'] != '合計']   
    return df

In [4]:
a = fetchData(2023, 7, 'sii')
len(a) 
# 2月 900
# 3月 902
# 4月 902
# 5月 904
# 6月 903
# 7月 904 

Download 202307 sii


904

In [22]:
# 下載最新月營收
year = 2013
start_month = 1
n = 12
name = "monthlyRevenue"
for t in ['sii', 'otc']: # 上市 sii / 上櫃 otc
    for m in range(start_month, start_month + n):
        df = fetchData(year, m, t)
        for index, row in df.iterrows():
            sql_insert = f'insert into {name} (date,id,name,revenue,lastMonthRevenue,MoM,lastYearRevenue,YoY,lastSum,sumYoY,sum,note) \
                          values (?,?,?,?,?,?,?,?,?,?,?,?)'
            try:
                db.execute(sql_insert, (f"{year}{m:02}", row['公司代號'], row['公司名稱'], row['當月營收'], row['上月營收'], row['上月比較增減(%)'], row['去年當月營收'], row['去年同月增減(%)'], row['去年累計營收'], row['前期比較增減(%)'], row['當月累計營收'], row['備註']))
            except:
                print("下載或寫入錯誤")

Download 201301 sii
Download 201302 sii
Download 201303 sii
Download 201304 sii
Download 201305 sii
Download 201306 sii
Download 201307 sii
Download 201308 sii
Download 201309 sii
Download 201310 sii
Download 201311 sii
Download 201312 sii
Download 201301 otc
Download 201302 otc
Download 201303 otc
Download 201304 otc
Download 201305 otc
Download 201306 otc
Download 201307 otc
Download 201308 otc
Download 201309 otc
Download 201310 otc
Download 201311 otc
Download 201312 otc


#  DB Columns

|date|id|name|revenue|lastMonthRevenue|MoM|lastYearRevenue|YoY|lastSum|sumYoY|sum|note|
|--|--|--|--|--|--|--|--|--|--|--|--|
|年月份|公司代號|公司名稱|當月營收|上月營收|上月比較增減|去年當月營收|去年同月增減|去年累計營收|前期比較增減|當月累計營收|備註|

In [10]:
# 查詢資料庫資料
sql=f"select * from monthlyRevenue order by date"
search = pd.read_sql_query(sql, db)

a = search[search['id']==5483]
a

Unnamed: 0,date,id,name,revenue,lastMonthRevenue,MoM,lastYearRevenue,YoY,lastSum,sumYoY,sum,note
19896,202201,5483,中美晶,5945365,5981219,-0.59,4909783,21.09,4909783,21.09,5945365,-
21580,202201,5483,中美晶,5945365,5981219,-0.59,4909783,21.09,4909783,21.09,5945365,-
23264,202202,5483,中美晶,6136661,5945365,3.21,4958538,23.75,9868321,22.43,12082026,-
24948,202202,5483,中美晶,6136661,5945365,3.21,4958538,23.75,9868321,22.43,12082026,-
26632,202203,5483,中美晶,6687393,6136661,8.97,6183757,8.14,16052078,16.92,18769419,-
28316,202203,5483,中美晶,6687393,6136661,8.97,6183757,8.14,16052078,16.92,18769419,-
30000,202204,5483,中美晶,6101054,6687393,-8.76,5561509,9.7,21613587,15.06,24870474,-
31684,202204,5483,中美晶,6101054,6687393,-8.76,5561509,9.7,21613587,15.06,24870474,-
33368,202205,5483,中美晶,6914330,6101054,13.33,5394253,28.17,27007839,17.68,31784804,-
35052,202205,5483,中美晶,6914330,6101054,13.33,5394253,28.17,27007839,17.68,31784804,-


# API columns 

|公司代號|公司名稱|備註|上月比較增減(%)|上月營收|去年同月增減(%)|去年當月營收|當月營收|前期比較增減(%)|去年累計營收|當月累計營收|
|--|--|--|--|--|--|--|--|--|--|--|

In [24]:
# 測試API是否正常
fetchData(2019, 1, 'otc')

Download 201901


Unnamed: 0,公司代號,公司名稱,備註,上月比較增減(%),上月營收,去年同月增減(%),去年當月營收,當月營收,前期比較增減(%),去年累計營收,當月累計營收
0,1264,德麥,-,17.15,374937,6.88,410967,439259,6.88,410967,439259
1,1796,金穎生技,-,2.29,33619,13.31,30350,34391,13.31,30350,34391
2,4205,中華食,-,25.68,126440,16.69,136172,158912,16.69,136172,158912
3,4207,環泰,-,37.46,269770,10.01,337079,370850,10.01,337079,370850
4,4712,南璋,因客戶需求降低，導致營收大幅衰退,-0.92,2045,-87.23,15874,2026,-87.23,15874,2026
...,...,...,...,...,...,...,...,...,...,...,...
3,5287,數字,-,0.95,120192,2.60,118263,121344,2.60,118263,121344
4,8044,網家,-,6.24,3228591,26.28,2716246,3430126,26.28,2716246,3430126
5,8472,夠麻吉,-,-2.44,40402,-9.50,43554,39416,-9.50,43554,39416
6,8477,創業家,-,22.82,411604,11.31,454156,505557,11.31,454156,505557


# Delete table

In [23]:
db.close()