# 目標：
## 一、 爬取臺灣證券交易所單一個股的各日成交資訊
## 二、 繪製圖表
## 三、 將各股和大盤指數比較

### 1. 爬取2023年10月大立光(3008)的成交資訊 

In [1]:
# 引入所有會用到的套件
import requests as r
import json
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, date
import matplotlib.pyplot as plt

In [2]:
# 取得網址，若輸出內容為200表示連線成功
url = "https://www.twse.com.tw/rwd/zh/afterTrading/STOCK_DAY?date=20231031&stockNo=3008&response=json&_=1698754560614"
r.get(url)  

<Response [200]>

In [3]:
# 用變數名為 res 把資料存起來，並使用 json 格式打開
res = r.get(url) 
res.json()

{'stat': 'OK',
 'date': '20231031',
 'title': '112年10月 3008 大立光           各日成交資訊',
 'fields': ['日期', '成交股數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌價差', '成交筆數'],
 'data': [['112/10/02',
   '773,401',
   '1,662,760,765',
   '2,145.00',
   '2,170.00',
   '2,120.00',
   '2,145.00',
   '+10.00',
   '1,965'],
  ['112/10/03',
   '607,853',
   '1,306,159,735',
   '2,130.00',
   '2,165.00',
   '2,130.00',
   '2,140.00',
   '-5.00',
   '1,770'],
  ['112/10/04',
   '709,121',
   '1,477,363,160',
   '2,125.00',
   '2,125.00',
   '2,065.00',
   '2,080.00',
   '-60.00',
   '2,849'],
  ['112/10/05',
   '577,641',
   '1,223,764,195',
   '2,100.00',
   '2,135.00',
   '2,095.00',
   '2,120.00',
   '+40.00',
   '1,311'],
  ['112/10/06',
   '1,017,999',
   '2,171,027,320',
   '2,190.00',
   '2,190.00',
   '2,090.00',
   '2,090.00',
   '-30.00',
   '2,819'],
  ['112/10/11',
   '863,698',
   '1,849,496,625',
   '2,155.00',
   '2,165.00',
   '2,110.00',
   '2,145.00',
   '+55.00',
   '2,150'],
  ['112/10/12

In [12]:
# 根據 json 物件結構中的 'data' 鍵進行了索引
stock3008_10_json = res.json()
stock3008_10_json['data']

[['112/10/02',
  '773,401',
  '1,662,760,765',
  '2,145.00',
  '2,170.00',
  '2,120.00',
  '2,145.00',
  '+10.00',
  '1,965'],
 ['112/10/03',
  '607,853',
  '1,306,159,735',
  '2,130.00',
  '2,165.00',
  '2,130.00',
  '2,140.00',
  '-5.00',
  '1,770'],
 ['112/10/04',
  '709,121',
  '1,477,363,160',
  '2,125.00',
  '2,125.00',
  '2,065.00',
  '2,080.00',
  '-60.00',
  '2,849'],
 ['112/10/05',
  '577,641',
  '1,223,764,195',
  '2,100.00',
  '2,135.00',
  '2,095.00',
  '2,120.00',
  '+40.00',
  '1,311'],
 ['112/10/06',
  '1,017,999',
  '2,171,027,320',
  '2,190.00',
  '2,190.00',
  '2,090.00',
  '2,090.00',
  '-30.00',
  '2,819'],
 ['112/10/11',
  '863,698',
  '1,849,496,625',
  '2,155.00',
  '2,165.00',
  '2,110.00',
  '2,145.00',
  '+55.00',
  '2,150'],
 ['112/10/12',
  '771,591',
  '1,648,539,445',
  '2,135.00',
  '2,160.00',
  '2,105.00',
  '2,150.00',
  '+5.00',
  '1,980'],
 ['112/10/13',
  '2,450,243',
  '5,299,781,610',
  '2,090.00',
  '2,210.00',
  '2,055.00',
  '2,175.00',
  '+25

In [203]:
# 將爬取到的資料存入 json 檔
output_file = "3008.json"

# 使用 with 語句打開檔案並寫入 json 數據
with open(output_file, "w") as json_file:
    json.dump(stock3008_10_json, json_file)

In [16]:
# 因為 json 檔不易閱讀，所以改成 dataframe 的形格式
pd.DataFrame.from_dict(stock3008_10_json['data'])

stock3008_10_df = pd.DataFrame.from_dict(stock3008_10_json['data'])
stock3008_10_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,112/10/02,773401,1662760765,2145.0,2170.0,2120.0,2145.0,10.0,1965
1,112/10/03,607853,1306159735,2130.0,2165.0,2130.0,2140.0,-5.0,1770
2,112/10/04,709121,1477363160,2125.0,2125.0,2065.0,2080.0,-60.0,2849
3,112/10/05,577641,1223764195,2100.0,2135.0,2095.0,2120.0,40.0,1311
4,112/10/06,1017999,2171027320,2190.0,2190.0,2090.0,2090.0,-30.0,2819


In [18]:
# 因為上述結果無欄位名稱，因此從 fields 抓取欄位名稱當表頭
stock_json['fields']
stock3008_10_df.columns = stock3008_10_json['fields']
stock3008_10_df.head()

Unnamed: 0,日期,成交股數,成交金額,開盤價,最高價,最低價,收盤價,漲跌價差,成交筆數
0,112/10/02,773401,1662760765,2145.0,2170.0,2120.0,2145.0,10.0,1965
1,112/10/03,607853,1306159735,2130.0,2165.0,2130.0,2140.0,-5.0,1770
2,112/10/04,709121,1477363160,2125.0,2125.0,2065.0,2080.0,-60.0,2849
3,112/10/05,577641,1223764195,2100.0,2135.0,2095.0,2120.0,40.0,1311
4,112/10/06,1017999,2171027320,2190.0,2190.0,2090.0,2090.0,-30.0,2819


### 2. 下載2023年至今，大立光(3008)的成交資訊

In [22]:
# 下載從 2023-01-01 到 2023-10-31 的成交資訊  MS 表 month start(1號開始)
month_list = pd.date_range('2023-01-01','2023-10-01', freq='MS').strftime("%Y%m%d").tolist()  
for month in month_list:
    print(month)

20230101
20230201
20230301
20230401
20230501
20230601
20230701
20230801
20230901
20231001


In [24]:
# 建立空的 dataframe
df_3008 = pd.DataFrame()
df_3008

In [31]:
# 使用 month_list 替換網址中 month 的位置，即可抓取到不同日期的資料 
for month in month_list:
    url = "https://www.twse.com.tw/exchangeReport/STOCK_DAY?response=json&date="+ month + "&stockNo=2330"
    res = r.get(url)
    stock3008_json = res.json()
    stock3008_df = pd.DataFrame.from_dict(stock3008_json['data'])
    
    # append 將在未來的Pandas版本中將被刪除，所以使用pandas.concat函式來代替。
    # df_3008 = df_3008.append(stock3008_df, ignore_index=True)   
    df_3008 = pd.concat([df_3008, stock3008_df], ignore_index=True)

In [32]:
# 指定欄位名稱
df_3008.columns = ['日期', '成交股數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌價差', '成交筆數']
df_3008

Unnamed: 0,日期,成交股數,成交金額,開盤價,最高價,最低價,收盤價,漲跌價差,成交筆數
0,112/01/03,15311364,6871973708,446.00,453.50,443.00,453.00,+4.50,22581
1,112/01/04,20626874,9310050329,449.50,455.00,448.50,449.50,-3.50,18233
2,112/01/05,23972099,10972616269,459.00,459.50,455.00,458.50,+9.00,20752
3,112/01/06,21313593,9745142549,455.00,459.50,455.00,458.50,0.00,16635
4,112/01/09,49186355,23352375299,468.00,481.00,467.50,481.00,+22.50,57305
...,...,...,...,...,...,...,...,...,...
1171,112/10/25,17137199,9371422161,544.00,551.00,544.00,544.00,0.00,12233
1172,112/10/26,31682955,16851704175,530.00,535.00,530.00,531.00,-13.00,45750
1173,112/10/27,17050787,9094404470,534.00,536.00,532.00,533.00,+2.00,14773
1174,112/10/30,23299422,12374705789,531.00,534.00,528.00,532.00,-1.00,24536


In [46]:
# 將以上程式碼寫成函式，方便以後查詢
def get_stock_data(start_year, start_month, end_year, end_month, stock_code):
    # 日期的「日」部分被固定設為 1 號
    start_date = str(date(start_year, start_month, 1))
    end_date = str(date(end_year, end_month, 1))
    month_list = pd.date_range(start_date, end_date, freq='MS').strftime("%Y%m%d").tolist()
    
    df_3008 = pd.DataFrame()
    for month in month_list:
        url = "https://www.twse.com.tw/exchangeReport/STOCK_DAY?response=json&date="+ month + "&stockNo=" + str(stock_code)
        res = r.get(url)
        stock3008_json = res.json()
        stock3008_df = pd.DataFrame.from_dict(stock3008_json['data'])
        df_3008 = pd.concat([df_3008, stock3008_df], ignore_index=True)
    
    df_3008.columns = ['日期', '成交股數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌價差', '成交筆數']
    return df_3008

In [59]:
# 執行自己寫的函式
stock_3008 = get_stock_data(start_year = 2013, start_month = 1, end_year = 2023, end_month = 10, stock_code = 3008)
stock_3008

Unnamed: 0,日期,成交股數,成交金額,開盤價,最高價,最低價,收盤價,漲跌價差,成交筆數
0,102/01/02,2896765,2351994480,790.00,832.00,785.00,832.00,+54.00,2409
1,102/01/03,2278777,1898138248,836.00,844.00,824.00,824.00,-8.00,2048
2,102/01/04,1908685,1542840015,824.00,825.00,791.00,802.00,-22.00,1707
3,102/01/07,3777648,2874848056,802.00,802.00,746.00,746.00,-56.00,3127
4,102/01/08,3475195,2544618640,740.00,744.00,723.00,738.00,-8.00,3074
...,...,...,...,...,...,...,...,...,...
2643,112/10/25,294925,593731365,2010.00,2020.00,2005.00,2010.00,0.00,1537
2644,112/10/26,559943,1118462235,1990.00,2025.00,1980.00,2005.00,-5.00,2460
2645,112/10/27,373473,752994400,2005.00,2025.00,1995.00,2020.00,+15.00,1282
2646,112/10/30,594756,1213464225,2040.00,2075.00,2015.00,2025.00,+5.00,1698


In [60]:
# 將資料存成 csv 檔
stock_3008.to_csv("3008.csv", encoding="utf-8")

### 一次下載多支上市台股個股歷史資料

In [79]:
# 引入其他套件
from lxml import etree
import plotly.graph_objects as go

In [80]:
# 抓取台股上市個股股票代碼
url_id = "https://isin.twse.com.tw/isin/C_public.jsp?strMode=2" 
res = r.get(url_id)

In [81]:
# Beautiful Soup使用 "lxml" 的解析器，解析HTML文檔
soup = BeautifulSoup(res.text, "lxml") 
tr = soup.findAll('tr')
tds = []

# 過濾掉無用資訊
for raw in tr:
     data = [td.get_text() for td in raw.findAll("td")]
     if len(data) == 7:
         tds.append(data)

In [None]:
tr

In [None]:
tds

In [85]:
# 儲存表格
import pandas as pd 
pd.DataFrame(tds[1:],columns=tds[0])

Unnamed: 0,有價證券代號及名稱,國際證券辨識號碼(ISIN Code),上市日,市場別,產業別,CFICode,備註
0,1101　台泥,TW0001101004,1962/02/09,上市,水泥工業,ESVUFR,
1,1102　亞泥,TW0001102002,1962/06/08,上市,水泥工業,ESVUFR,
2,1103　嘉泥,TW0001103000,1969/11/14,上市,水泥工業,ESVUFR,
3,1104　環泥,TW0001104008,1971/02/01,上市,水泥工業,ESVUFR,
4,1108　幸福,TW0001108009,1990/06/06,上市,水泥工業,ESVUFR,
...,...,...,...,...,...,...,...
31836,01003T　兆豐新光R1,TW00001003T4,2005/12/26,上市,,CBCIXU,
31837,01004T　土銀富邦R2,TW00001004T2,2006/04/13,上市,,CBCIXU,
31838,01007T　兆豐國泰R2,TW00001007T5,2006/10/13,上市,,CBCIXU,
31839,01009T　王道圓滿R1,TW00001009T1,2018/06/21,上市,,CBCIXU,


In [221]:
def get_tw_stock_data(start_year, start_month, end_year, end_month, stock_code):
    start_date = str(date(start_year, start_month, 1))
    end_date = str(date(end_year, end_month, 1))
    month_list = pd.date_range(start_date, end_date, freq='MS').strftime("%Y%m%d").tolist()
    
    df_3s = pd.DataFrame()
    for month in month_list:
        url_3s = "https://www.twse.com.tw/exchangeReport/STOCK_DAY?response=json&date="+ month + "&stockNo=" + str(stock_code)
        res = r.get(url_3s)
        stock3s_json = res.json()
        stock3s_df = pd.DataFrame.from_dict(stock3s_json['data'])
        df_3s = df_3s.append(stock3s_df, ignore_index = True)
        
    # 資料轉型
    for col in [0, 1, 2, 3, 4, 5, 6, 8]:
        for row in range(df_3s.shape[0]):
            # 把"日期"從字串(string)換成時間(datetime)，並將民國年換成西元年
            if col == 0:
                day = df_3s.iloc[row,0].split('/')
                df_3s.iloc[row, 0] = datetime(int(day[0]) + 1911, int(day[1]), int(day[2]))  
            # 把"開盤價", "最高價", "最低價", "收盤價"帶有逗號的字串(string)換成浮點數(float) 
            elif col != 0:
                df_3s.iloc[row, col] = float(df_3s.iloc[row,col].replace(',', ''))
    
    df_3s.columns = ['日期', '成交股數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌價差', '成交筆數']
    return df_3s

In [222]:
# 聯華電子(2303)、友達光電(2409)、燦坤(2430)
stock_code_list = ['2303', '2409', '2430']
df_3s = pd.DataFrame()

In [223]:
# 爬取上面三支股票十年來的成交資訊
for stock_code in stock_code_list:
    stock3s_df = get_tw_stock_data(start_year = 2018, 
                                 start_month = 10, 
                                 end_year = 2023, 
                                 end_month = 10, 
                                 stock_code = stock_code)
    stock3s_df.insert(0, '股票代碼', stock_code)
    df_3s = pd.concat([df_3s, stock3s_df], ignore_index=True)
# df3s


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [228]:
df_3s['日期'] = df_3s['日期'].dt.strftime('%Y-%m-%d')

# 將爬取到的資料存入 json 檔
output_file = "2303 2409 2430.json"

# 使用 with 語句打開檔案並寫入 json 數據
with open(output_file, "w") as json_file:
    json.dump(df_3s.to_dict(orient='records'), json_file)

In [120]:
df_3s.to_csv("2303 2409 2430.csv")

In [105]:
# 將聯華電子(2303)和總表分開
df_2303 = df_3s[df_3s['股票代碼'] == '2303']
df_2303

Unnamed: 0,股票代碼,日期,成交股數,成交金額,開盤價,最高價,最低價,收盤價,漲跌價差,成交筆數
0,2303,2018-10-01 00:00:00,12175097.0,193999689.0,16.0,16.0,15.85,15.9,-0.25,3456.0
1,2303,2018-10-02 00:00:00,15619834.0,244671183.0,15.85,15.95,15.6,15.6,-0.30,4554.0
2,2303,2018-10-03 00:00:00,17982464.0,279911223.0,15.6,15.7,15.45,15.5,-0.10,3458.0
3,2303,2018-10-04 00:00:00,48764973.0,738879008.0,15.4,15.45,15.05,15.1,-0.40,9745.0
4,2303,2018-10-05 00:00:00,57497812.0,852831993.0,15.0,15.1,14.6,14.7,-0.40,7641.0
...,...,...,...,...,...,...,...,...,...,...
1233,2303,2023-10-25 00:00:00,54853164.0,2676729160.0,48.45,49.2,48.0,49.05,+0.75,22983.0
1234,2303,2023-10-26 00:00:00,100500655.0,4719808619.0,46.95,47.35,46.7,46.8,-2.25,44577.0
1235,2303,2023-10-27 00:00:00,42770047.0,1997541438.0,46.95,46.95,46.35,46.6,-0.20,17218.0
1236,2303,2023-10-30 00:00:00,34601022.0,1606129177.0,46.7,46.8,46.15,46.4,-0.20,15633.0


In [106]:
# 將友達光電(2409)和總表分開
df_2409 = df_3s[df_3s['股票代碼'] == '2409']
df_2409

Unnamed: 0,股票代碼,日期,成交股數,成交金額,開盤價,最高價,最低價,收盤價,漲跌價差,成交筆數
1238,2409,2018-10-01 00:00:00,29416419.0,378166138.0,12.9,13.0,12.75,12.8,-0.10,4192.0
1239,2409,2018-10-02 00:00:00,17636698.0,224744481.0,12.8,12.85,12.7,12.75,-0.05,4491.0
1240,2409,2018-10-03 00:00:00,12335940.0,157488670.0,12.75,12.85,12.7,12.8,+0.05,2369.0
1241,2409,2018-10-04 00:00:00,28297192.0,356914277.0,12.75,12.75,12.55,12.55,-0.25,6701.0
1242,2409,2018-10-05 00:00:00,36640229.0,455757171.0,12.5,12.55,12.3,12.4,-0.15,6445.0
...,...,...,...,...,...,...,...,...,...,...
2464,2409,2023-10-25 00:00:00,59404725.0,931124179.0,15.4,15.85,15.35,15.75,+0.55,12455.0
2465,2409,2023-10-26 00:00:00,26538689.0,410965078.0,15.4,15.6,15.3,15.5,-0.25,6701.0
2466,2409,2023-10-27 00:00:00,25671286.0,396239495.0,15.5,15.6,15.35,15.35,-0.15,5719.0
2467,2409,2023-10-30 00:00:00,24526665.0,382004530.0,15.4,15.7,15.35,15.65,+0.30,6858.0


In [107]:
# 將燦坤(2430)和總表分開
df_2454 = df_3s[df_3s['股票代碼'] == '2430']
df_2454

Unnamed: 0,股票代碼,日期,成交股數,成交金額,開盤價,最高價,最低價,收盤價,漲跌價差,成交筆數
2469,2430,2018-10-01 00:00:00,34772.0,770736.0,22.05,22.35,22.05,22.3,-0.10,33.0
2470,2430,2018-10-02 00:00:00,68100.0,1528455.0,22.3,22.55,22.05,22.35,+0.05,44.0
2471,2430,2018-10-03 00:00:00,17203.0,382866.0,22.2,22.3,22.15,22.3,-0.05,14.0
2472,2430,2018-10-04 00:00:00,42061.0,926354.0,22.2,22.2,22.0,22.0,-0.30,37.0
2473,2430,2018-10-05 00:00:00,62200.0,1346719.0,22.0,22.0,21.5,21.65,-0.35,46.0
...,...,...,...,...,...,...,...,...,...,...
3695,2430,2023-10-25 00:00:00,18469.0,715467.0,38.7,38.75,38.7,38.75,+0.05,52.0
3696,2430,2023-10-26 00:00:00,30939.0,1194258.0,38.65,38.65,38.5,38.6,-0.15,66.0
3697,2430,2023-10-27 00:00:00,20779.0,803273.0,38.6,38.75,38.55,38.75,+0.15,44.0
3698,2430,2023-10-30 00:00:00,43251.0,1661038.0,38.7,38.7,38.2,38.65,-0.10,55.0


In [108]:
# 製作圖表
fig = go.Figure(data=[go.Scatter(x = df_2303['日期'], y = df_2303['收盤價'], name = '聯華電子', line = dict(width=3)),
                      go.Scatter(x = df_2409['日期'], y = df_2409['收盤價'], name = '友達光電', line = dict(width=3)),
                      go.Scatter(x = df_2454['日期'], y = df_2454['收盤價'], name = '燦坤', line = dict(width=3)) ])


# Set x-axis title
fig.update_xaxes(title_text="日期")

# Set y-axis title
fig.update_yaxes(title_text="股價")

fig.update_layout(
    title_text="2013年1月-2023年10月 台股個股 歷史收盤價",
    width = 1000,
    height = 500
)

### 爬取大盤指數並作圖

In [218]:
def get_stock_data(start_year, start_month, end_year, end_month):
    start_date = str(date(start_year, start_month, 1))
    end_date = str(date(end_year, end_month, 1))
    month_list = pd.date_range(start_date, end_date, freq='MS').strftime("%Y%m%d").tolist()
    
    df_si = pd.DataFrame()
    for month in month_list:
        url_si = "https://www.twse.com.tw/indicesReport/MI_5MINS_HIST?response=json&date=" + month
        res = r.get(url_si)
        stocksi_json = res.json()
        stocksi_df = pd.DataFrame.from_dict(stocksi_json['data'])
        df_si = pd.concat([df_si, stocksi_df], ignore_index=True)
    
    # 把"日期"從字串(string)換成時間(datetime)，並將民國年換成西元年
    if col == 0:
        day = df_3s.iloc[row,0].split('/')
        df_3s.iloc[row, 0] = datetime(int(day[0]) + 1911, int(day[1]), int(day[2]))  
        
    df_si.columns = ['Date', 'Open', 'High', 'Low', 'Close']
    return df_si

In [219]:
df_si = get_stock_data(start_year = 2018, start_month = 1, end_year = 2023, end_month = 10)

# df_si

In [220]:
# 將爬取到的資料存入 json 檔
output_file = "stock_si.json"

# 使用 with 語句打開檔案並寫入 json 數據
with open(output_file, "w") as json_file:
    json.dump(df_si.to_dict(), json_file)

In [162]:
stock_si.to_csv("stock_si.csv")

In [163]:
for col in range(1, 5):
    for row in range(stock_si.shape[0]):
        stock_si.iloc[row, col] = float(stock_si.iloc[row,col].replace(',', ''))
        
stock_si.head()

Unnamed: 0,Date,Open,High,Low,Close
0,107/01/02,10664.82,10710.73,10650.77,10710.73
1,107/01/03,10759.61,10813.16,10759.61,10801.57
2,107/01/04,10834.58,10853.46,10794.25,10848.63
3,107/01/05,10856.92,10879.8,10815.68,10879.8
4,107/01/08,10899.09,10918.47,10869.12,10915.75


In [164]:
# 繪製 K 線圖
fig = go.Figure(data=[go.Candlestick(x=stock_si['Date'],
                open=stock_si['Open'],
                high=stock_si['High'],
                low=stock_si['Low'],
                close=stock_si['Close'],
                increasing_line_color= 'red', 
                decreasing_line_color= 'green')])


fig.show()

### 比較大盤和科技股代表

In [165]:
from plotly.subplots import make_subplots

df1 = df_3s
df2 = stock_si

# 創立第二個 y 軸
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x = df1['日期'], y= df_2303['收盤價'], name = "聯華電子收盤價", line = dict(color="blue", dash='dashdot')),
    secondary_y = False,
)

fig.add_trace(
    go.Scatter(x = df1['日期'], y= df_2409['收盤價'], name = "友達光電收盤價", line = dict(color="purple", dash='dashdot')),
    secondary_y = False,
)


fig.add_trace(
    go.Scatter(x = df1['日期'], y= df_2454['收盤價'], name = "燦坤收盤價", line = dict(color="orange", dash='dashdot')),
    secondary_y = False,
)


fig.add_trace(
    go.Scatter(x = df1['日期'], y = stock_si['Close'], name = "大盤收盤價", line = dict(color="black")),
    secondary_y = True,
)

# Add figure title
fig.update_layout(
    title_text="大盤及個股關係圖"
)

# Set x-axis title
fig.update_xaxes(title_text="日期")

# Set y-axes titles
fig.update_yaxes(title_text="<b>價格</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>大盤指數</b>", secondary_y=True)

fig.show()