### 下載台灣加權指數和其前兩種成分股的時間序列歷史股價數據
* https://www.taifex.com.tw/cht/9/futuresQADetail
* 日期：2015-02-24 至 2025-01-22

In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
import ta.trend
import ta.momentum
from sklearn.preprocessing import MinMaxScaler
import time

In [2]:
tickers = {
    'taiex':'^TWII',
    'tsmc':'2330.TW',
    'mediatek':'2454.TW',
    'foxconn':'2317.TW'
}

In [5]:
def calculate(df: pd.DataFrame):
    df['dailyreturn'] = df['close'].pct_change()
    df['macd'] = ta.trend.MACD(df['close'], window_slow=12, window_fast=6).macd()
    df['rsi'] = ta.momentum.RSIIndicator(df['close'], window=6).rsi()
    df['k'] = ta.momentum.StochasticOscillator(df['high'], df['low'], df['close'], window=6, smooth_window=2).stoch()
    df['d'] = ta.momentum.StochasticOscillator(df['high'], df['low'], df['close'], window=6, smooth_window=2).stoch_signal()
    df['r'] = ta.momentum.WilliamsRIndicator(df['high'], df['low'], df['close'], lbp=6).williams_r()
    
    return df

In [7]:
start_date = '2015-02-24'
# start_date = '2010-02-22'
end_date = '2025-01-22'

for key, value in tickers.items():
    try:
        data = yf.download(value, start=start_date, end=end_date, auto_adjust=True, multi_level_index=False)
        print(f'✅ {key} done')
        data.columns = data.columns.str.lower()
        data = calculate(data)
        data.to_csv(f'./{key}_10y_raw.csv')
    except Exception as e:
        print(f'⚠️ {key} error: {e}')
    time.sleep(5)

[*********************100%***********************]  1 of 1 completed

1 Failed download:
['^TWII']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


✅ taiex done


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['2330.TW']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


✅ tsmc done


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['2454.TW']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


✅ mediatek done


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['2317.TW']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


✅ foxconn done


In [9]:
start_date = '2015-02-24'
end_date = '2025-01-22'

for key, value in tickers.items():
    df = pd.read_csv(f'./15y/{key}_stock_data_15y.csv')
    df['Date'] = pd.to_datetime(df['Date'])
    df_10y = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
    try:
        df_10y.to_csv(f'./{key}_10y_raw.csv', index=False)
        print(f'✅ {key} done')
    except Exception as e:
        print(f'⚠️ {key} error: {e}')
    

✅ taiex done
✅ tsmc done
✅ mediatek done
✅ foxconn done


In [11]:
def preprocess(ticker_name):
    df = pd.read_csv(f'./10y/{ticker_name}_10y_raw.csv')
    
    # 新增前 1-5 天的價格數據
    df['before1day'] = df['close'].shift(1)
    df['before2day'] = df['close'].shift(2)
    df['before3day'] = df['close'].shift(3)
    df['before4day'] = df['close'].shift(4)
    df['before5day'] = df['close'].shift(5)

    # 轉換 Date 數據格式為 datetime
    df['Date'] = pd.to_datetime(df['Date'])

    # 清除缺失值
    df = df.dropna()

    # 進行 Min-Max 標準化
    scaler = MinMaxScaler(feature_range=(0, 1))
    df['close'] = scaler.fit_transform(df[['close']])
    df['before1day'] = scaler.fit_transform(df[['before1day']])
    df['before2day'] = scaler.fit_transform(df[['before2day']])
    df['before3day'] = scaler.fit_transform(df[['before3day']])
    df['before4day'] = scaler.fit_transform(df[['before4day']])
    df['before5day'] = scaler.fit_transform(df[['before5day']])
    df['macd'] = scaler.fit_transform(df[['macd']])
    df['r'] = scaler.fit_transform(df[['r']])
    df['rsi'] = scaler.fit_transform(df[['rsi']])
    df['k'] = scaler.fit_transform(df[['k']])
    df['d'] = scaler.fit_transform(df[['d']])

        
    df = df.drop(['high', 'low', 'volume'], axis=1)
    df = df.reset_index(drop=True)

    # 保存數據到CSV文件
    df.to_csv(f'./{ticker_name}_10y_data.csv', index=False)

In [12]:
for key, value in tickers.items():
    preprocess(key)

In [13]:
for key, value in tickers.items():
    df = pd.read_csv(f'./{key}_10y_data.csv')
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values('Date').reset_index(drop=True)

    split_index = int(len(df) * 0.7)
    train_df = df.iloc[:split_index]
    test_df = df.iloc[split_index:]

    train_df.to_csv(f'../{key}_train_data.csv', index=False)
    test_df.to_csv(f'../{key}_test_data.csv', index=False)
    print(f'✅ {key} done')

✅ taiex done
✅ tsmc done
✅ mediatek done
✅ foxconn done


In [3]:
def preprocess2(ticker_name):
    df = pd.read_csv(f'./10y/{ticker_name}_10y_raw.csv')

    # 清除缺失值
    df = df.dropna()

    # 進行 Min-Max 標準化
    scaler = MinMaxScaler(feature_range=(0, 1))
    df['close'] = scaler.fit_transform(df[['close']])
    df['macd'] = scaler.fit_transform(df[['macd']])
    df['r'] = scaler.fit_transform(df[['r']])
    df['rsi'] = scaler.fit_transform(df[['rsi']])
    df['k'] = scaler.fit_transform(df[['k']])
    df['d'] = scaler.fit_transform(df[['d']])

        
    df = df.drop(['high', 'low', 'volume'], axis=1)
    df = df.reset_index(drop=True)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values('Date').reset_index(drop=True)

    # 保存數據到CSV文件
    split_index = int(len(df) * 0.7)
    train_df = df.iloc[:split_index]
    test_df = df.iloc[split_index:]

    train_df.to_csv(f'../OtherModel/{key}_train_data.csv', index=False)
    test_df.to_csv(f'../OtherModel/{key}_test_data.csv', index=False)
    print(f'✅ {key} done')


In [4]:
for key, value in tickers.items():
    preprocess2(key)

✅ taiex done
✅ tsmc done
✅ mediatek done
✅ foxconn done
