In [13]:
import requests
import pandas as pd
import numpy as np
from datetime import date
import plotly.express as px
def conv_to_list(obj):
    if not isinstance(obj, list) :
        results = [obj]
    else:
        results = obj
    return results

def df_conv_col_type(df, cols, to, ignore=False):
    cols = conv_to_list(cols)
    for i in range(len(cols)):
        if ignore :
            try:
                df[cols[i]] = df[cols[i]].astype(to)
            except:
                print('df_conv_col_type - ' + cols[i] + '轉換錯誤')
                continue
        else:
            df[cols[i]] = df[cols[i]].astype(to)
    return df

# 下載證交所資料 ------
link = 'https://www.twse.com.tw/exchangeReport/STOCK_DAY_ALL?response=open_data'
data = pd.read_csv(link)
# ['證券代號', '證券名稱', '成交股數', '成交金額', '開盤價',
#  '最高價', '最低價', '收盤價', '漲跌價差', '成交筆數']
data.columns = ['id', 'NAME', 'TRADE_VOLUME', 'TRADE_VALUE', 
                'OPEN', 'HIGH' ,'LOW', 'CLOSE', 'PRICE_CHANGE', 'TRANSACTION']   

data = data.rename(columns={'id': 'STOCK_SYMBOL'})
# 除了證券代號外，其他欄位都是str，且部份資料中有''
data = data.replace('', np.nan, regex=True)
# 將data type轉為float
data = df_conv_col_type(df=data, 
                        cols=['TRADE_VOLUME', 'TRADE_VALUE', 'OPEN', 'HIGH' ,'LOW',
                              'CLOSE', 'PRICE_CHANGE', 'TRANSACTION'],
                        to='float')
data

Unnamed: 0,STOCK_SYMBOL,NAME,TRADE_VOLUME,TRADE_VALUE,OPEN,HIGH,LOW,CLOSE,PRICE_CHANGE,TRANSACTION
0,0050,元大台灣50,4251764.0,497565809.0,117.15,117.50,116.75,117.20,-0.35,10633.0
1,0051,元大中型100,45603.0,2604555.0,57.10,57.55,56.85,57.55,0.15,152.0
2,0052,富邦科技,124891.0,12471378.0,99.70,100.50,99.55,100.10,-0.25,293.0
3,0053,元大電子,14017.0,814937.0,58.20,58.50,57.95,58.50,-0.20,1001.0
4,0055,元大MSCI金融,204113.0,4587623.0,22.67,22.67,22.43,22.52,-0.15,238.0
...,...,...,...,...,...,...,...,...,...,...
1186,9944,新麗,439676.0,9267714.0,21.10,21.25,20.90,20.95,-0.25,443.0
1187,9945,潤泰新,4720106.0,164577740.0,35.20,35.20,34.70,34.85,-0.35,3468.0
1188,9946,三發地產,118613.0,1654269.0,13.95,14.05,13.80,14.00,0.05,74.0
1189,9955,佳龍,940614.0,24761204.0,26.15,26.65,26.10,26.45,-0.20,592.0


In [14]:
link_info = 'https://mopsfin.twse.com.tw/opendata/t187ap03_L.csv'
df_info = pd.read_csv(link_info)
df_info_k = df_info[['實收資本額','公司代號']].copy()

# Calculate outstanding shares
df_info_k['tvolume'] = round((df_info_k['實收資本額'] /  10) /1000)
df_info_k = df_info_k.rename(columns={'公司代號': 'STOCK_SYMBOL'})

# Convert 'STOCK_SYMBOL' to string type in both dataframes
data['STOCK_SYMBOL'] = data['STOCK_SYMBOL'].astype(str)
df_info_k['STOCK_SYMBOL'] = df_info_k['STOCK_SYMBOL'].astype(str)

#  merge
data = data.merge(df_info_k[['STOCK_SYMBOL', 'tvolume']], how='left', on='STOCK_SYMBOL')
data = data.dropna(subset=['tvolume'])
data.head()

Unnamed: 0,STOCK_SYMBOL,NAME,TRADE_VOLUME,TRADE_VALUE,OPEN,HIGH,LOW,CLOSE,PRICE_CHANGE,TRANSACTION,tvolume
180,1101,台泥,26092568.0,985403140.0,37.85,37.95,37.6,37.8,-0.4,17905.0,7356182.0
182,1102,亞泥,2748562.0,120736174.0,43.9,44.1,43.8,43.95,0.1,1844.0,3545928.0
183,1103,嘉泥,376776.0,7442037.0,19.8,19.85,19.7,19.8,0.0,163.0,774781.0
184,1104,環泥,1949600.0,57981673.0,29.7,29.95,29.3,29.85,0.15,986.0,653609.0
185,1108,幸福,4397665.0,68015903.0,15.8,15.95,15.15,15.5,-0.55,1816.0,404738.0


In [15]:
# 下載證交所基本資料 ------
res = requests.get("http://isin.twse.com.tw/isin/C_public.jsp?strMode=2")
df_info = pd.read_html(res.text)[0]

# 設定column名稱
df_info.columns = df_info.iloc[0]
# 刪除第一行
df_info = df_info.iloc[2:]
# 先移除row，再移除column，超過三個NaN則移除
df_info = df_info.dropna(thresh=3, axis=0).dropna(thresh=3, axis=1)
df_info[['STOCK_SYMBOL', 'NAME']] = df_info['有價證券代號及名稱'].str.split('　', expand=True)

df_info = df_info.dropna(subset=['產業別'])
# 将 ID 列中非数字的值设置为 NaN
df_info['STOCK_SYMBOL'] = pd.to_numeric(df_info['STOCK_SYMBOL'], errors='coerce')

# 删除包含 NaN 的行
df_info = df_info.dropna(subset=['STOCK_SYMBOL'])

# 将 ID 列恢复为整数类型
df_info['STOCK_SYMBOL'] = df_info['STOCK_SYMBOL'].astype(int)
df_info.drop(columns=['國際證券辨識號碼(ISIN Code)', 'NAME', '上市日', 'CFICode', '備註', '有價證券代號及名稱'], inplace=True)
df_info

Unnamed: 0,市場別,產業別,STOCK_SYMBOL
2,上市,水泥工業,1101
3,上市,水泥工業,1102
4,上市,水泥工業,1103
5,上市,水泥工業,1104
6,上市,水泥工業,1108
...,...,...,...
975,上市,其他業,9955
976,上市,鋼鐵工業,9958
24838,上市臺灣創新板,半導體業,6854
24839,上市臺灣創新板,其他業,6869


In [16]:
# Convert the 'STOCK_SYMBOL' column in both dataframes to string
data['STOCK_SYMBOL'] = data['STOCK_SYMBOL'].astype(str)
df_info['STOCK_SYMBOL'] = df_info['STOCK_SYMBOL'].astype(str)

# Perform the merge
df = data.merge(df_info[['STOCK_SYMBOL', '產業別', '市場別']], how='left', on='STOCK_SYMBOL')
df = df.dropna(subset=['產業別'])
df = df.rename(columns={'產業別': 'category', '市場別': 'market'})
df['market_value'] = df['tvolume'] * df['CLOSE']
df['log_market_value'] = np.log(df['market_value'])
df['PRICE_CHANGE%%']=df['PRICE_CHANGE'].astype(str).apply(lambda s: '+' + s if '-' not in s else s) + '%'
df

Unnamed: 0,STOCK_SYMBOL,NAME,TRADE_VOLUME,TRADE_VALUE,OPEN,HIGH,LOW,CLOSE,PRICE_CHANGE,TRANSACTION,tvolume,category,market,market_value,log_market_value,PRICE_CHANGE%%
0,1101,台泥,26092568.0,985403140.0,37.85,37.95,37.60,37.80,-0.40,17905.0,7356182.0,水泥工業,上市,2.780637e+08,19.443361,-0.4%
1,1102,亞泥,2748562.0,120736174.0,43.90,44.10,43.80,43.95,0.10,1844.0,3545928.0,水泥工業,上市,1.558435e+08,18.864363,+0.1%
2,1103,嘉泥,376776.0,7442037.0,19.80,19.85,19.70,19.80,0.00,163.0,774781.0,水泥工業,上市,1.534066e+07,16.546018,+0.0%
3,1104,環泥,1949600.0,57981673.0,29.70,29.95,29.30,29.85,0.15,986.0,653609.0,水泥工業,上市,1.951023e+07,16.786449,+0.15%
4,1108,幸福,4397665.0,68015903.0,15.80,15.95,15.15,15.50,-0.55,1816.0,404738.0,水泥工業,上市,6.273439e+06,15.651835,-0.55%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979,9944,新麗,439676.0,9267714.0,21.10,21.25,20.90,20.95,-0.25,443.0,109107.0,其他業,上市,2.285792e+06,14.642223,-0.25%
980,9945,潤泰新,4720106.0,164577740.0,35.20,35.20,34.70,34.85,-0.35,3468.0,3160250.0,其他業,上市,1.101347e+08,18.517215,-0.35%
981,9946,三發地產,118613.0,1654269.0,13.95,14.05,13.80,14.00,0.05,74.0,326554.0,建材營造業,上市,4.571756e+06,15.335408,+0.05%
982,9955,佳龍,940614.0,24761204.0,26.15,26.65,26.10,26.45,-0.20,592.0,103208.0,其他業,上市,2.729852e+06,14.819758,-0.2%


In [19]:
def tw_stock_treemap(today=None, area_ind='market_value', item='PRICE_CHANGE', clip=None):
    if item in ['PRICE_CHANGE']:
        color_continuous_midpoint = 0
    else:
        color_continuous_midpoint = np.average(df[item], weights=df[area_ind])
    df['custom_item_label'] = round(df[item], 2).astype(str)
    fig = px.treemap(df,
                 path=['market', 'category', 'NAME'],
                 values=area_ind,
                 color=item,
                 color_continuous_scale=[(0, 'darkgreen'), (0.5, 'orange'), (1, 'darkred')],
                 color_continuous_midpoint=color_continuous_midpoint,
                 range_color=[-7.5, 7.5],
                 custom_data=['PRICE_CHANGE%%', 'CLOSE'],  
                 title=f'TW-Stock Market TreeMap({today})'
                       f'---Area:"Market Value(Thousand NTD)"---Color:Price Change%',
                 width=1600,
                 height=800)
   
    fig.update_traces(root_color="lightgrey",
                      textposition='middle center',
                      textfont_size=24,
                      texttemplate="%{label}<br>%{customdata[0]}<br>%{customdata[1]}",
                      )
    return fig

today = date.today()
area_ind="market_value" 
item="PRICE_CHANGE" 

tw_stock_treemap(today, area_ind, item)
