In [None]:
import requests
from io import StringIO
import pandas as pd
import numpy as np
import re
pd.options.mode.chained_assignment = None 

In [2]:
import sqlite3
sqlite3.register_adapter(np.int64, int)
db = sqlite3.connect('./stock.db', isolation_level=None)

# 取得上市櫃公司代號名稱

In [None]:
SII_URL = "https://isin.twse.com.tw/isin/C_public.jsp?strMode=2"  # 上市公司名單
OTC_URL = "https://isin.twse.com.tw/isin/C_public.jsp?strMode=4" # 上櫃公司名單  

def fetchStockIdName(type):
    r = requests.get(type)
    df = pd.read_html(r.text)[0]

    df.columns = df.iloc[0]
    df = df.iloc[2:]

    df[['證券代號','證券名稱']] = df['有價證券代號及名稱'].str.split('\u3000', expand=True)
    df = df.drop(columns=['備註', '有價證券代號及名稱','CFICode','國際證券辨識號碼(ISIN Code)'])
    df = df[df['證券代號'].apply(regex_filter)]
    return df

def regex_filter(val):
    if val:
        mo = re.search(r'^\d{4}$',val)
        if mo:
            return True
        else:
            return False
    else:
        return False

df_sii = fetchStockIdName(SII_URL)
df_otc = fetchStockIdName(OTC_URL)

In [None]:
print('上市家數:', len(df_sii)) 
print('上櫃家數:', len(df_otc))
print('共:', len(df_sii) + len(df_otc))
# 20220419: 975 795 1770
# 20220922: 981 799 1780
# 20221126: 928 804 1786
# 20230805: 992 812 1804

In [None]:
df_all = df_sii.append(df_otc)
df_all.head()

In [None]:
sql = 'create table if not exists stockIdName \
           (id INT ,\
            name TEXT ,\
            listingDate TEXT ,\
            market TEXT,\
            industry TEXT,\
            capital INT);'
db.execute(sql)

In [None]:
# insert into db
for index, row in df_all.iterrows(): 
    sql_insert = 'insert into stockIdName (id, name, listingDate, market, industry, capital) values (?,?,?,?,?,?)'
    try:
        db.execute(sql_insert, (int(row['證券代號']), row['證券名稱'], row['上市日'], row['市場別'],row['產業別'], 0))
    except:
        pass

In [None]:
# check in db
df_stock_id_name = pd.read_sql_query("select * from stockIdName",db)
df_stock_id_name

In [None]:
# db.execute('drop table stockIdName')

# Create daily stock table

In [None]:
sql = 'create table if not exists daily \
           (date DATE, \
            id INT ,\
            name TEXT ,\
            tradeVolumn INTEGER ,\
            [transaction] INTEGER ,\
            tradeValue INTEGER ,\
            open FLOAT ,\
            high FLOAT ,\
            low FLOAT ,\
            close FLOAT ,\
            dir TEXT ,\
            change FLOAT ,\
            bidPrice FLOAT ,\
            bidVolumn INTEGER ,\
            askPrice FLOAT ,\
            askVolumn INTEGER ,\
            pe FLOAT);'

db.execute(sql)

In [None]:
# check in db
sql = f'select * from daily'
pd.read_sql_query(sql, db)

In [None]:
# db.execute('drop table daily')

# Search DB tables 
查詢目前資料庫 資料表

In [3]:
tables = db.execute("select sql from sqlite_master where type = 'table'").fetchall()
print(f"資料庫共: {len(tables)} 張表")
print('table name: ')
for row in tables:
    print('  ', row[0].split(' ')[2])

資料庫共: 8 張表
table name: 
   daily
   monthlyRevenue
   cashflow
   balanceSheet
   yield
   financialStatement
   stockIdName
   features


# 月營收 Monthly revenue

DB columns

    date 年月份 TEXT
    id 公司代號 INT,    
    name 公司名稱 TEXT,    
    revenue 當月營收 INTEGER,
    lastMonthRevenue 上月營收 INTEGER, 
    MoM 上月比較增減 FLOAT,
    lastYearRevenue 去年當月營收 INTEGER,
    YoY 去年同月增減 FLOAT, 
    lastSum 去年累計營收 INTEGER, 
    sumYoY 前期比較增減 FLOAT,
    sum 當月累計營收 INTEGER, 
    note 備註 TEXT );'

In [None]:
name = "monthlyRevenue"
sql = f'create table if not exists {name} \
           (date TEXT, \
            id INT,     \
            name TEXT,    \
            revenue INTEGER, \
            lastMonthRevenue INTEGER, \
            MoM FLOAT, \
            lastYearRevenue INTEGER, \
            YoY FLOAT, \
            lastSum INTEGER, \
            sumYoY FLOAT,\
            sum INTEGER, \
            note TEXT );'

db.execute(sql)

In [None]:
# db.execute('drop table monthlyRevenue')

# 財報

## 綜合損益彙表

|id|name|revenue|grossProfit|operatingIncome|incomeBeforeTax|income|eps|qeps|
|--|--|--|--|--|--|--|--|--|
|公司代號|公司名稱|營業收入|營業毛利（毛損）|營業利益（損失）|稅前淨利（淨損）|本期淨利（淨損）|基本每股盈餘（元）|單季eps|

In [None]:
sql = 'create table if not exists financialStatement \
           (date TEXT,\
            id INT ,\
            name TEXT ,\
            revenue INTEGER,\
            grossProfit INTEGER,\
            operatingIncome INTEGER,\
            incomeBeforeTax INTEGER,\
            income INTEGER, \
            qincome INTEGER, \
            eps FLOAT,\
            qeps FLOAT);'

db.execute(sql)

In [None]:
sql = f'select * from financialStatement'
pd.read_sql_query(sql, db)

In [None]:
db.execute('drop table financialStatement')

## 資產負債表

|資產總額|流動資產|非流動資產|負債總額|流動負債|非流動負債|資本公積|保留盈餘|庫藏股票|權益總額|股本|每股參考淨值|
|--|--|--|--|--|--|--|--|--|--|--|--|
|asset|currentAsset|nonCurrentAsset|liabilities|currentLiabilities|nonCurrentLiabilities|additionalPaid|retainedEarning|treasury|shareholderEquity|capital|pb|

In [None]:
sql = 'create table if not exists balanceSheet \
           (date TEXT,\
            id INT ,\
            name TEXT ,\
            asset INTEGER,\
            currentAsset INTEGER,\
            nonCurrentAsset INTEGER,\
            liabilities INTEGER,\
            currentLiabilities INTEGER, \
            nonCurrentLiabilities INTEGER, \
            additionalPaid INTEGER, \
            retainedEarning INTEGER, \
            treasury INTEGER, \
            shareholderEquity INTEGER, \
            capital INTEGER, \
            pb FLOAT);'

db.execute(sql)

In [None]:
sql = f'select * from balanceSheet'
pd.read_sql_query(sql, db)

In [None]:
# db.execute("drop table balanceSheet")

# 現金流量表

|日期|公司代號|公司名稱|營業活動之淨現金流入（流出）|投資活動之淨現金流入（流出）|籌資活動之淨現金流入（流出）|匯率變動對現金及約當現金之影響|本期現金及約當現金增加（減少）數|期初現金及約當現金餘額|期末現金及約當現金餘額|
|--|--|--|--|--|--|--|--|--|--|
|date|id|name|operating|investing|financing|exchange|change|beginning|end|

In [None]:
sql = 'create table if not exists cashflow \
           (date TEXT,\
            id INT ,\
            name TEXT ,\
            operating INTEGER,\
            investing INTEGER,\
            cashflow INTEGER, \
            qcashflow INTERGER, \
            financing INTEGER,\
            exchange INTEGER,\
            change INTEGER, \
            beginning INTEGER, \
            end INTEGER);'

db.execute(sql)

In [None]:
sql = f'select * from cashflow'
pd.read_sql_query(sql, db)

In [None]:
# db.execute("drop table cashflow")

# Close DB connection

In [4]:
db.close()