In [1]:
import requests
from io import StringIO
import pandas as pd
import numpy as np
import sqlite3
sqlite3.register_adapter(np.int64, int)
import re

db = sqlite3.connect('../stock.db' , isolation_level=None)

In [2]:
def fetchData(year, month, type="sii"):
    print(f"Download {year}{month:02} {type}")
    year = year - 1911 if year > 2000 else year
    url = f"https://mops.twse.com.tw/nas/t21/{type}/t21sc03_{year}_{month}_0.html"
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    r = requests.get(url, headers=headers)
    r.encoding = 'big5'
    dfs = pd.read_html(StringIO(r.text))

    df = pd.concat([df for df in dfs if df.shape[1] <= 11 and df.shape[1] > 5])
    if 'levels' in dir(df.columns):
        df.columns = df.columns.get_level_values(1)
    else:
        df = df[list(range(0,10))]
        column_index = df.index[(df[0] == '公司代號')][0]
        df.columns = df.iloc[column_index]
    df['當月營收'] = pd.to_numeric(df['當月營收'], 'coerce')
    df = df[~df['當月營收'].isnull()]
    df = df[df['公司代號'] != '合計']   
    return df

In [3]:
a = fetchData(2023, 6, 'sii')
len(a) 
# 2月 900
# 3月 902
# 4月 902
# 5月 904
# 6月 903

Download 202306 sii


903

In [4]:
# 下載最新月營收
year = 2023
start_month = 6
n = 1
name = "monthlyRevenue"
for t in ['sii', 'otc']: # 上市 sii / 上櫃 otc
    for m in range(start_month, start_month + n):
        df = fetchData(year, m, t)
        for index, row in df.iterrows():
            sql_insert = f'insert into {name} (date,id,name,revenue,lastMonthRevenue,MoM,lastYearRevenue,YoY,lastSum,sumYoY,sum,note) \
                          values (?,?,?,?,?,?,?,?,?,?,?,?)'
            try:
                db.execute(sql_insert, (f"{year}{m:02}", row['公司代號'], row['公司名稱'], row['當月營收'], row['上月營收'], row['上月比較增減(%)'], row['去年當月營收'], row['去年同月增減(%)'], row['去年累計營收'], row['前期比較增減(%)'], row['當月累計營收'], row['備註']))
            except:
                print("下載或寫入錯誤")

Download 202306 sii
Download 202306 otc


#  DB Columns

|date|id|name|revenue|lastMonthRevenue|MoM|lastYearRevenue|YoY|lastSum|sumYoY|sum|note|
|--|--|--|--|--|--|--|--|--|--|--|--|
|年月份|公司代號|公司名稱|當月營收|上月營收|上月比較增減|去年當月營收|去年同月增減|去年累計營收|前期比較增減|當月累計營收|備註|

In [5]:
# 查詢資料庫資料
sql=f"select * from monthlyRevenue"
search = pd.read_sql_query(sql, db)

a = search[search['id']==2454]
print(len(a))
a

66


Unnamed: 0,date,id,name,revenue,lastMonthRevenue,MoM,lastYearRevenue,YoY,lastSum,sumYoY,sum,note
296,202001,2454,聯發科,19817777,22089329,-10.28,16242046,22.01,16242046,22.01,19817777,海外子公司之營收係以當月平均匯率換算之
1180,202002,2454,聯發科,18220837,19817777,-8.05,14161182,28.66,30403228,25.11,38038614,海外子公司之營收係以當月平均匯率換算之
2064,202003,2454,聯發科,22824361,18220837,25.26,22318664,2.26,52721892,15.44,60862975,海外子公司之營收係以當月平均匯率換算之
2948,202004,2454,聯發科,20545910,22824361,-9.98,21553235,-4.67,74275128,9.60,81408885,海外子公司之營收係以當月平均匯率換算之
3832,202005,2454,聯發科,21777663,20545910,5.99,19120819,13.89,93395946,10.48,103186548,海外子公司之營收係以當月平均匯率換算之
...,...,...,...,...,...,...,...,...,...,...,...,...
100240,202302,2454,聯發科,30309642,22383397,35.41,40028948,-24.28,83531290,-36.91,52693039,海外子公司之營收係以當月平均匯率換算之。
101921,202303,2454,聯發科,42958473,30309642,41.73,59179559,-27.40,142710849,-32.97,95651512,海外子公司之營收係以當月平均匯率換算之。
103602,202304,2454,聯發科,28349722,42958473,-34.00,52624634,-46.12,195335483,-36.51,124001234,海外子公司之營收係以當月平均匯率換算之。
105284,202305,2454,聯發科,31566805,28349722,11.34,52076120,-39.38,247411603,-37.12,155568039,海外子公司之營收係以當月平均匯率換算之。


In [29]:
# duplicate rows
# sql="""
#     delete from monthlyRevenue where rowid not in (select min(rowid) from monthlyRevenue group by date, id)
# """
# pd.read_sql_query(sql, db)

# API columns 

|公司代號|公司名稱|備註|上月比較增減(%)|上月營收|去年同月增減(%)|去年當月營收|當月營收|前期比較增減(%)|去年累計營收|當月累計營收|
|--|--|--|--|--|--|--|--|--|--|--|

In [24]:
# 測試API是否正常
fetchData(2019, 1, 'otc')


Download 201901


Unnamed: 0,公司代號,公司名稱,備註,上月比較增減(%),上月營收,去年同月增減(%),去年當月營收,當月營收,前期比較增減(%),去年累計營收,當月累計營收
0,1264,德麥,-,17.15,374937,6.88,410967,439259,6.88,410967,439259
1,1796,金穎生技,-,2.29,33619,13.31,30350,34391,13.31,30350,34391
2,4205,中華食,-,25.68,126440,16.69,136172,158912,16.69,136172,158912
3,4207,環泰,-,37.46,269770,10.01,337079,370850,10.01,337079,370850
4,4712,南璋,因客戶需求降低，導致營收大幅衰退,-0.92,2045,-87.23,15874,2026,-87.23,15874,2026
...,...,...,...,...,...,...,...,...,...,...,...
3,5287,數字,-,0.95,120192,2.60,118263,121344,2.60,118263,121344
4,8044,網家,-,6.24,3228591,26.28,2716246,3430126,26.28,2716246,3430126
5,8472,夠麻吉,-,-2.44,40402,-9.50,43554,39416,-9.50,43554,39416
6,8477,創業家,-,22.82,411604,11.31,454156,505557,11.31,454156,505557


# Delete table

In [6]:
db.close()