In [2]:
import datetime as dt
import requests
import pandas as pd
from io import StringIO
from fake_useragent import UserAgent


def crawl_price(date: dt.datetime):
    ua = UserAgent()

    r = requests.post(
        'https://www.twse.com.tw/exchangeReport/MI_INDEX',
        params={
            'response': 'csv',
            'date': date.strftime('%Y%m%d'),
            'type': 'ALLBUT0999',  # 全部(不含權證、牛熊證、可展延牛熊證)
            '_': int(dt.datetime.now().timestamp() * 1000),
        },
        headers={
            'user-agent': ua.random
        }
    )

    content = r.text.replace('=', '')  # 例子： ="0050"

    # 將 column 數量小於等於 10 的行數都刪除
    lines = content.split('\n')
    lines = list(filter(lambda l: len(l.split('",')) > 10, lines))

    # 將每一行再合成同一行，並用肉眼看不到的換行符號'\n'分開
    content = "\n".join(lines)

    # 假如沒下載到，則回傳None（代表抓不到資料）
    if content == '':
        return None

    df = pd.read_csv(StringIO(content))
    df = df.astype(str)
    df = df.apply(lambda s: s.str.replace(',', ''))

    df = df.rename(columns={
        '證券代號': 'stock_id',
    })
    df = df.drop([
        '漲跌價差',
        '本益比'
    ], axis=1)

    # 將所有的表格元素(除了 stock_id 和名稱)都轉換成數字 (error='coerce' 代表無法轉成數字則用 NaN 取代)
    df = df.apply(lambda s: pd.to_numeric(s, errors='coerce') if s.name not in ['stock_id', '證券名稱']  else s)

    # 刪除不必要的欄位
    df = df[df.columns[df.isnull().all() == False]]

    df['date'] = pd.to_datetime(date)

    return df

df = crawl_price(dt.datetime(2023, 7, 17))
df

Unnamed: 0,stock_id,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,date
0,0050,元大台灣50,15718027,14370,2074554911,132.00,132.20,131.75,132.00,132.00,284,132.05,29,2023-07-17
1,0051,元大中型100,254466,339,18281782,71.45,72.30,71.45,71.95,71.90,1,71.95,2,2023-07-17
2,0052,富邦科技,400724,608,48581008,121.20,121.55,120.80,121.30,121.30,17,121.40,3,2023-07-17
3,0053,元大電子,20493,33,1436385,69.90,70.35,69.90,70.20,69.90,1,70.20,1,2023-07-17
4,0055,元大MSCI金融,548231,576,12973390,23.45,23.81,23.45,23.80,23.79,6,23.80,9,2023-07-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,9944,新麗,376553,205,8063705,21.20,21.70,21.15,21.50,21.50,6,21.55,4,2023-07-17
1196,9945,潤泰新,3929345,3067,138994400,35.55,35.60,35.15,35.45,35.40,126,35.45,98,2023-07-17
1197,9946,三發地產,73263,85,1006054,13.80,13.85,13.65,13.70,13.65,18,13.75,9,2023-07-17
1198,9955,佳龍,261864,185,6103462,23.30,23.55,23.20,23.30,23.30,16,23.35,4,2023-07-17


In [3]:
df = df.rename(columns={
    '證券名稱': 'name',
    "開盤價": "open",
    "最高價": "high",
    "最低價": "low",
    "收盤價": "close",
    "成交股數": "volume",
    "成交金額": "traded_value",
    "成交筆數": "transaction_count",
    "最後揭示買價": "last_bid_price",
    "最後揭示買量": "last_bid_volume",
    "最後揭示賣價": "last_ask_price",
    "最後揭示賣量": "last_ask_volume",
})
df

Unnamed: 0,stock_id,name,volume,transaction_count,traded_value,open,high,low,close,last_bid_price,last_bid_volume,last_ask_price,last_ask_volume,date
0,0050,元大台灣50,15718027,14370,2074554911,132.00,132.20,131.75,132.00,132.00,284,132.05,29,2023-07-17
1,0051,元大中型100,254466,339,18281782,71.45,72.30,71.45,71.95,71.90,1,71.95,2,2023-07-17
2,0052,富邦科技,400724,608,48581008,121.20,121.55,120.80,121.30,121.30,17,121.40,3,2023-07-17
3,0053,元大電子,20493,33,1436385,69.90,70.35,69.90,70.20,69.90,1,70.20,1,2023-07-17
4,0055,元大MSCI金融,548231,576,12973390,23.45,23.81,23.45,23.80,23.79,6,23.80,9,2023-07-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,9944,新麗,376553,205,8063705,21.20,21.70,21.15,21.50,21.50,6,21.55,4,2023-07-17
1196,9945,潤泰新,3929345,3067,138994400,35.55,35.60,35.15,35.45,35.40,126,35.45,98,2023-07-17
1197,9946,三發地產,73263,85,1006054,13.80,13.85,13.65,13.70,13.65,18,13.75,9,2023-07-17
1198,9955,佳龍,261864,185,6103462,23.30,23.55,23.20,23.30,23.30,16,23.35,4,2023-07-17


In [25]:
pd.isna(np.nan)

True