In [None]:
import requests
from io import StringIO
import pandas as pd
import numpy as np
import sqlite3
sqlite3.register_adapter(np.int64, int)
import re

db = sqlite3.connect('../stock.db' , isolation_level=None)

# 不縮寫成科學記號
pd.set_option('display.float_format',lambda x : '%.2f' % x)

# 展開所有dataframe columns
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [None]:
def getYield(year, TYPEK='sii'):
    if year >= 1000:
        year -= 1911
        
    url = 'https://mops.twse.com.tw/server-java/t05st09sub'

    r = requests.post(url, {
        'step':1,
        'TYPEK':TYPEK,  # sii上市, otc上櫃
        'YEAR':str(year),
        'qryType': 1
    })
    
    r.encoding = 'big5'
    dfs = pd.read_html(StringIO(r.text), header=None)
    
    df = pd.concat(dfs[3:], axis=0, sort=False)  # 2021 -> 3 , 2022 -> 2
    
    if 'levels' in dir(df.columns):
        df.columns = df.columns.get_level_values(1)
        
    return df


In [None]:
YEAR = 2023
df_sii = getYield(YEAR)
df_otc = getYield(YEAR, 'otc')

In [None]:
print(len(df_sii))
print(len(df_otc))
df = df_sii.append(df_otc, ignore_index=True)
print(len(df))
df

In [None]:
def getId(r):
    try:
        return r['公司代號 名稱'].replace(" ", "").split("-")
    except:
        return [np.nan, np.nan]

df['id'], df['name'] = zip(*df.apply(getId, axis=1))

In [None]:
df['cash'] = df['盈餘分配\t之現金股利\t(元/股)'] + df['法定盈餘公積發放之現金(元/股)'] + df['資本公積發放之現金(元/股)']
df['stock'] = df['盈餘轉\t增資配股\t(元/股)'] + df['法定盈餘\t公積轉增資\t配股(元/股)'] + df['資本公積\t轉增資配股\t(元/股)']
df['total'] = df['cash'] + df['stock']

In [None]:
df[df['id']=='5483']

In [None]:
df[['id','name','cash','stock','total']].sort_values(by=['total'], ascending=False).head(100)

# Write to DB

In [None]:
for index, row in df.iterrows(): 
    sql_insert = f'insert into yield (date, id, name, cash, stock, total) \
                   values (?,?,?,?,?,?)'
    db.execute(sql_insert, (YEAR, row['id'],row['name'],row['cash'],row['stock'],row['total']))


# Create Table

In [None]:
sql = 'create table if not exists yield \
           (date DATE, \
            id TEXT ,\
            name TEXT ,\
            cash INTEGER ,\
            stock INTEGER ,\
            total INTEGER);'

db.execute(sql)

In [None]:
# check in db
sql = f'select * from yield'
df_yield = pd.read_sql_query(sql, db)
df_yield

In [None]:
db.close()