In [1]:
%pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.18.0-cp312-cp312-win_amd64.whl.metadata (3.3 kB)
Collecting tensorflow-intel==2.18.0 (from tensorflow)
  Downloading tensorflow_intel-2.18.0-cp312-cp312-win_amd64.whl.metadata (4.9 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading flatbuffers-24.12.23-py2.py3-none-any.whl.metadata (876 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensor

In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import glob

In [9]:
path = './data/'

In [10]:
df_price = pd.read_csv(path + '甘藍初秋_台北一日交易行情.csv')
df_price.head()

Unnamed: 0,日期,市場,產品,上價,中價,下價,平均價,增減%,交易量(公斤),增減%.1
0,085/01/01,109 台北一,LA1 甘藍 初秋,12.0,9.0,6.0,8.8,-,75779,-
1,085/01/02,109 台北一,LA1 甘藍 初秋,13.0,10.0,7.0,11.2,27,58780,-22
2,085/01/03,109 台北一,LA1 甘藍 初秋,13.0,10.0,7.0,10.1,-10,103999,77
3,085/01/05,109 台北一,LA1 甘藍 初秋,11.0,9.0,7.0,8.1,-,108342,-
4,085/01/06,109 台北一,LA1 甘藍 初秋,10.0,8.0,6.0,7.9,-2,113928,5


In [11]:
df_typhoon = pd.read_csv(path + 'typhoon_data.csv')
df_typhoon.head()

Unnamed: 0,年份,颱風編號,颱風名稱,颱風生命期間,颱風生命期間中心最低氣壓(hPa),颱風生命期間中心最大風速(m/s),颱風生命期間最大7級風暴風半徑(km),颱風生命期間最大10級風暴風半徑(km),警報發布報數
0,2024,202426,帕布(PABUK),2024-12-23 06:00~2024-12-25 00:00,1000,18,80,---,---
1,2024,202425,天兔(USAGI),2024-11-11 18:00~2024-11-16 03:00,945,45,150,50,19
2,2024,202424,萬宜(MAN-YI),2024-11-09 06:00~2024-11-20 00:00,900,60,180,90,---
3,2024,202423,桔梗(TORAJI),2024-11-09 06:00~2024-11-14 06:00,965,38,150,50,---
4,2024,202422,銀杏(YINXING),2024-11-03 18:00~2024-11-12 06:00,930,51,180,90,---


In [12]:
path = './data/過去九年降雨資料/'
df_rain = pd.read_csv(path + 'file.csv')
df_rain.head()

Unnamed: 0,name,description
0,dy_Report_2016.xml,2016年署屬地面測站每日雨量資料
1,dy_Report_2017.xml,2017年署屬地面測站每日雨量資料
2,dy_Report_2018.xml,2018年署屬地面測站每日雨量資料
3,dy_Report_2019.xml,2019年署屬地面測站每日雨量資料
4,dy_Report_2020.xml,2020年署屬地面測站每日雨量資料


In [2]:
# Step 1: 整合所有 CSV 文件
file_paths = glob.glob("data/train.csv")  # 修改為您的實際文件路徑模式
all_data = pd.DataFrame()

for file in file_paths:
    data = pd.read_csv(file)
    all_data = pd.concat([all_data, data], ignore_index=True)

print(f"數據集大小: {all_data.shape}")

# Step 2: 數據預處理
# 假設數據集包含 '日期' 和 '目標值' 列，您需要根據實際數據進行修改
all_data['日期'] = pd.to_datetime(all_data['日期'])  # 確保日期列格式正確

# 按日期排序
all_data = all_data.sort_values(by='日期')

# 填補缺失值
all_data = all_data.fillna(method='ffill').fillna(method='bfill')

# 特徵工程（這部分需要根據您的數據具體修改）
all_data['year'] = all_data['日期'].dt.year
all_data['month'] = all_data['日期'].dt.month
all_data['day'] = all_data['日期'].dt.day

# 歸一化目標值
scaler = MinMaxScaler(feature_range=(0, 1))
all_data['目標值'] = scaler.fit_transform(all_data[['目標值']])

# Step 3: 數據分割
# 篩選數據集
train_data = all_data[(all_data['year'] >= 2014) & (all_data['year'] <= 2020)]
val_data = all_data[(all_data['year'] == 2021) | (all_data['year'] == 2022)]
test_data = all_data[all_data['year'] == 2023]

# 創建時間序列數據
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length, :-1])
        y.append(data[i + sequence_length, -1])
    return np.array(X), np.array(y)

sequence_length = 30  # 使用過去 30 天的數據進行預測

# 訓練數據
train_values = train_data[['目標值']].values
X_train, y_train = create_sequences(train_values, sequence_length)

# 驗證數據
val_values = val_data[['目標值']].values
X_val, y_val = create_sequences(val_values, sequence_length)

# 測試數據
test_values = test_data[['目標值']].values
X_test, y_test = create_sequences(test_values, sequence_length)

# Step 4: 構建 LSTM 模型
model = Sequential([
    LSTM(50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

# Step 5: 模型訓練
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_data=(X_val, y_val),
    verbose=1
)

# Step 6: 模型評估
y_test_pred = model.predict(X_test)

# 將數據反轉回原始尺度
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))
y_test_pred = scaler.inverse_transform(y_test_pred)

mse = mean_squared_error(y_test, y_test_pred)
r2 = r2_score(y_test, y_test_pred)

print(f"Test MSE: {mse:.2f}")
print(f"Test R2: {r2:.2f}")

# Step 7: 保存模型
import joblib
joblib.dump(model, 'trained_lstm_model.h5')

數據集大小: (0, 0)


KeyError: '日期'

In [None]:
# Step 8: 可視化結果
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(y_test, label='True Values')
plt.plot(y_test_pred, label='Predictions')
plt.legend()
plt.title('LSTM Predictions vs True Values')
plt.show()