In [6]:
import xml.etree.ElementTree as ET
files=['dy_Report_2015.xml','dy_Report_2016.xml','dy_Report_2017.xml','dy_Report_2018.xml','dy_Report_2019.xml','dy_Report_2020.xml','dy_Report_2021.xml','dy_Report_2022.xml','dy_Report_2023.xml']

# 初始化一個列表來存儲提取的數據
data_list = []

for f in files:
    # 讀取 XML 文件
    file_path = f
    tree = ET.parse(file_path)
    root = tree.getroot()
    
    
    ns = {'ns': 'urn:cwa:gov:tw:cwacommon:0.1'}
    
    for item in root.findall('.//ns:location', ns):
        station = item.find('ns:station', ns)
        stationName = station.find('ns:StationName', ns).text
        if stationName == '臺南':
            monthly=item.findall('.//ns:monthly', ns)
            for m in monthly:
                yearmonth=m.find('ns:YearMonth', ns).text+'-01'
                total=m.find('ns:Total', ns).text
                if total=='T':
                    total=0
                data_list.append({'month': yearmonth, 'total': float(total)})

# 打印提取的數據
print(data_list)



[{'month': '2015-01-01', 'total': 13.5}, {'month': '2015-02-01', 'total': 22.0}, {'month': '2015-03-01', 'total': 5.1}, {'month': '2015-04-01', 'total': 25.8}, {'month': '2015-05-01', 'total': 300.0}, {'month': '2015-06-01', 'total': 19.0}, {'month': '2015-07-01', 'total': 225.0}, {'month': '2015-08-01', 'total': 629.6}, {'month': '2015-09-01', 'total': 192.0}, {'month': '2015-10-01', 'total': 31.0}, {'month': '2015-11-01', 'total': 4.5}, {'month': '2015-12-01', 'total': 13.5}, {'month': '2016-01-01', 'total': 170.9}, {'month': '2016-02-01', 'total': 38.0}, {'month': '2016-03-01', 'total': 96.5}, {'month': '2016-04-01', 'total': 146.0}, {'month': '2016-05-01', 'total': 87.3}, {'month': '2016-06-01', 'total': 376.5}, {'month': '2016-07-01', 'total': 374.0}, {'month': '2016-08-01', 'total': 91.5}, {'month': '2016-09-01', 'total': 1220.3}, {'month': '2016-10-01', 'total': 69.0}, {'month': '2016-11-01', 'total': 48.9}, {'month': '2016-12-01', 'total': 2.0}, {'month': '2017-01-01', 'total':

In [7]:
import pandas as pd

# 將清單轉換為 DataFrame
df = pd.DataFrame(data_list)

# 將日期列轉換為 datetime 類型
df['date'] = pd.to_datetime(df['month'])

# 設置日期為索引
df.set_index('date', inplace=True)

# 按日期排序
df.sort_index(inplace=True)

# 檢查數據
print(df.head())



                 month  total
date                         
2015-01-01  2015-01-01   13.5
2015-02-01  2015-02-01   22.0
2015-03-01  2015-03-01    5.1
2015-04-01  2015-04-01   25.8
2015-05-01  2015-05-01  300.0


In [9]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# 將降雨量縮放到 [0, 1] 範圍
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[['total']].values)

# 創建時間序列數據集
def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

time_step = 30  # 使用過去 30 天的數據來預測未來的降雨量
X, y = create_dataset(scaled_data, time_step)

# 將數據重塑為 [samples, time steps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))


In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# 建構 LSTM 模型
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)))
model.add(LSTM(units=50))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

# 訓練模型
model.fit(X, y, epochs=20, batch_size=32)


  super().__init__(**kwargs)


Epoch 1/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 47ms/step - loss: 0.0494
Epoch 2/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 0.0384
Epoch 3/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - loss: 0.0342
Epoch 4/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 0.0379
Epoch 5/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 0.0413
Epoch 6/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0333
Epoch 7/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 0.0263
Epoch 8/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 0.0344
Epoch 9/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.0272
Epoch 10/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 0.0369
Epoch 11/20
[1m3/

<keras.src.callbacks.history.History at 0x1ae96565150>

In [11]:
# 假設你有最新的 30 天的降雨數據
recent_data = np.array(df.iloc[-30:,1])
recent_data = scaler.transform(recent_data.reshape(-1, 1))
recent_data = recent_data.reshape((1, time_step, 1))

# 預測未來一天的降雨量
predicted_rainfall = model.predict(recent_data)
predicted_rainfall = scaler.inverse_transform(predicted_rainfall)
print(f'Predicted Rainfall: {predicted_rainfall[0][0]}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 781ms/step
Predicted Rainfall: 109.09745025634766
