In [None]:
import datetime
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

def generate_date_list(start_date, end_date):
    delta = end_date - start_date
    return [start_date + datetime.timedelta(days=i) for i in range(delta.days + 1)]

def fetch_and_process_data(url, date, market_type):
    response = requests.get(url)
    if '很抱歉，沒有符合條件的資料!' in response.text:
        return None  # 返回 None 表示沒有數據
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    if market_type == 'twse':
        table = soup.find_all('table')[8]
        column_names = [td.get_text().strip() for td in table.find('thead').find_all('tr')[2].find_all('td')]
        data_rows = [[td.get_text().strip().replace(',', '').replace('--', '') for td in row.find_all('td')] for row in table.find('tbody').find_all('tr')]
        df = pd.DataFrame(data_rows, columns=column_names)
        df = df[['證券代號', '證券名稱', '開盤價', '最高價', '最低價', '收盤價', '成交股數', '成交金額']]
        df = df.rename(columns={'證券代號': 'code', '證券名稱': 'name', '開盤價': 'open', '最高價': 'high', '最低價': 'low', '收盤價': 'close', '成交股數': 'volume', '成交金額': 'value'})
    else:
        table = soup.find('table')
        column_names = [td.text.strip() for td in soup.find('thead').find_all('td')]
        data_rows = soup.find('tbody').find_all('tr')
        data = []
        for row in data_rows:
            data.append([td.text.strip() for td in row.find_all('td')])
        df = pd.DataFrame(data, columns=column_names)
        df = df[['代號', '名稱', '開盤', '最高', '最低', '收盤', '成交股數', '成交金額(元)']]
        df = df.rename(columns={'代號': 'code', '名稱': 'name', '開盤': 'open', '最高': 'high', '最低': 'low', '收盤': 'close', '成交股數': 'volume', '成交金額(元)': 'value'})
    
    df.insert(0, 'date', date.strftime('%Y-%m-%d'))
    df.insert(1, 'market', 'TWSE' if market_type == 'twse' else 'TPEX')
    df['code'] = "'" + df['code'].astype(str)
    return df

def save_to_csv(df, start_date, end_date):
    file_name = f'stock_price_data_{end_date.strftime("%Y%m%d")}_{start_date.strftime("%Y%m%d")}.csv'
    df.to_csv(file_name, index=False)
    print(f'Saved file: {file_name}')

# 設定起始日期和結束日期
start_date = datetime.datetime.strptime('2022/1/1', '%Y/%m/%d')
end_date = datetime.datetime.strptime('2022/12/31', '%Y/%m/%d')

# 設定儲存頻率（每隔幾天存一次）
save_frequency = 30

# 產生日期列表
date_list = generate_date_list(start_date, end_date)

# 儲存所有DataFrame的列表
df_list = []
df_temp = pd.DataFrame()

# 迴圈日期下載資料
for date in date_list:
    twse_url = f'https://www.twse.com.tw/exchangeReport/MI_INDEX?response=html&date={date.strftime("%Y%m%d")}&type=ALLBUT0999'
    print(twse_url)
    df_twse = fetch_and_process_data(twse_url, date, 'twse')
    if df_twse is not None:
        df_temp = pd.concat([df_temp, df_twse], ignore_index=True)
    
    tpex_url = f'https://www.tpex.org.tw/web/stock/aftertrading/otc_quotes_no1430/stk_wn1430_result.php?l=zh-tw&o=htm&d={(date.year-1911)}/{date.strftime("%m/%d")}&se=EW&s=0,asc,0'
    print(tpex_url)
    df_tpex = fetch_and_process_data(tpex_url, date, 'otc')
    if df_tpex is not None:
        df_temp = pd.concat([df_temp, df_tpex], ignore_index=True)
    
    time.sleep(1)  # 每次撈取一天資料後延遲 1 秒

    if date == date_list[-1] or (date_list.index(date) + 1) % save_frequency == 0:
        save_to_csv(df_temp, date_list[0], date)
        df_temp = pd.DataFrame()

print('All data saved.')
