In [None]:
import requests
from io import StringIO
import pandas as pd
import numpy as np
import sqlite3
sqlite3.register_adapter(np.int64, int)
import re

db = sqlite3.connect('./stock.db' , isolation_level=None)

In [None]:
def fetchData(year, month, type="sii"):
    print(f"Download {year}{month:02} {type}")
    year = year - 1911 if year > 2000 else year
    url = f"https://mops.twse.com.tw/nas/t21/{type}/t21sc03_{year}_{month}_0.html"
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    r = requests.get(url, headers=headers)
    r.encoding = 'big5'
    dfs = pd.read_html(StringIO(r.text))

    df = pd.concat([df for df in dfs if df.shape[1] <= 11 and df.shape[1] > 5])
    if 'levels' in dir(df.columns):
        df.columns = df.columns.get_level_values(1)
    else:
        df = df[list(range(0,10))]
        column_index = df.index[(df[0] == '公司代號')][0]
        df.columns = df.iloc[column_index]
    df['當月營收'] = pd.to_numeric(df['當月營收'], 'coerce')
    df = df[~df['當月營收'].isnull()]
    df = df[df['公司代號'] != '合計']   
    return df

In [None]:
a = fetchData(2024, 8, 'sii')
len(a) 
#         2023 2月 900 3月 902 4月 902 5月 904 6月 903 7月 905 8月 905 9月 908
# 2024 1月 922 2月 924 3月 927 4月 936 5月 936 6月 938 7月 938

In [None]:
# 下載最新月營收
year = 2024
start_month = 7
n = 1
name = "monthlyRevenue"
for t in ['sii', 'otc']: # 上市 sii / 上櫃 otc
    for m in range(start_month, start_month + n):
        df = fetchData(year, m, t)
        for index, row in df.iterrows():
            sql_insert = f'insert into {name} (date,id,name,revenue,lastMonthRevenue,MoM,lastYearRevenue,YoY,lastSum,sumYoY,sum,note) \
                          values (?,?,?,?,?,?,?,?,?,?,?,?)'
            try:
                db.execute(sql_insert, (f"{year}{m:02}", row['公司代號'], row['公司名稱'], row['當月營收'], row['上月營收'], row['上月比較增減(%)'], row['去年當月營收'], row['去年同月增減(%)'], row['去年累計營收'], row['前期比較增減(%)'], row['當月累計營收'], row['備註']))
            except:
                print("下載或寫入錯誤")

#  DB Columns

|date|id|name|revenue|lastMonthRevenue|MoM|lastYearRevenue|YoY|lastSum|sumYoY|sum|note|
|--|--|--|--|--|--|--|--|--|--|--|--|
|年月份|公司代號|公司名稱|當月營收|上月營收|上月比較增減|去年當月營收|去年同月增減|去年累計營收|前期比較增減|當月累計營收|備註|

In [None]:
# 查詢資料庫資料
sql=f"select * from monthlyRevenue order by date"
search = pd.read_sql_query(sql, db)

a = search[search['id']==2454]
print(len(a))
a.tail()

In [None]:
# duplicate rows
# sql="""
#     delete from monthlyRevenue where rowid not in (select min(rowid) from monthlyRevenue group by date, id)
# """
# pd.read_sql_query(sql, db)

# API columns 

|公司代號|公司名稱|備註|上月比較增減(%)|上月營收|去年同月增減(%)|去年當月營收|當月營收|前期比較增減(%)|去年累計營收|當月累計營收|
|--|--|--|--|--|--|--|--|--|--|--|

In [None]:
# 測試API是否正常
fetchData(2023, 7, 'sii')


# Delete table

In [None]:
db.close()