**合併數據**

In [1]:
import pandas as pd
import glob
import os

folder_path = './Training data/'
files = glob.glob(os.path.join(folder_path, '*.csv'))
data = pd.concat([pd.read_csv(f) for f in files], ignore_index=True)

In [2]:
data

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW)
0,10,2024-03-01 17:14:06.000,0.0,1017.48,15.59,94.30,652.92,0.12
1,10,2024-03-01 17:14:47.000,0.0,1017.48,15.66,94.04,682.50,0.12
2,10,2024-03-01 17:15:47.000,0.0,1017.47,15.74,94.10,750.00,0.14
3,10,2024-03-01 17:16:47.000,0.0,1017.46,15.78,94.09,738.33,0.14
4,10,2024-03-01 17:17:47.000,0.0,1017.49,15.80,94.08,660.83,0.12
...,...,...,...,...,...,...,...,...
1290889,9,2024-07-23 15:50:57.000,0.0,994.54,30.69,72.91,2288.33,1.10
1290890,9,2024-07-23 15:51:57.000,0.0,994.40,30.27,73.16,3236.67,1.92
1290891,9,2024-07-23 15:52:57.000,0.0,994.39,29.90,72.51,4526.67,3.57
1290892,9,2024-07-23 15:53:57.000,0.0,994.40,29.38,73.23,4231.67,3.13


**異常值處理**

**WindSpeed(m/s)**

**SunLight(Lux)**

In [3]:
from sklearn.model_selection import train_test_split

# 特徵與標籤
X = data[['Sunlight(Lux)']]  # 光照度
y = data['Power(mW)']       # 發電量

# 分割訓練與測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
from sklearn.linear_model import LinearRegression

# 建立線性回歸模型
model = LinearRegression()

# 訓練模型
model.fit(X_train, y_train)

# 獲取模型係數與截距
print(f"模型係數: {model.coef_[0]}")  # 光照度對發電量的影響係數
print(f"模型截距: {model.intercept_}")  # 發電量基線


模型係數: 0.015636212388786923
模型截距: -85.31809827114148


In [5]:
from sklearn.metrics import mean_squared_error, r2_score

# 預測
y_pred = model.predict(X_test)

# 計算誤差與解釋力
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MSE: {mse}")
print(f"R2: {r2}")

MSE: 28640.7418769258
R2: 0.8779128035444631


In [6]:
# 用回歸模型補充異常光照度對應的發電量
data.loc[data['Sunlight(Lux)'] == 117758.2, 'Power(mW)'] = \
    model.predict(data[data['Sunlight(Lux)'] == 117758.2][['Sunlight(Lux)']])

In [7]:
data

Unnamed: 0,LocationCode,DateTime,WindSpeed(m/s),Pressure(hpa),Temperature(°C),Humidity(%),Sunlight(Lux),Power(mW)
0,10,2024-03-01 17:14:06.000,0.0,1017.48,15.59,94.30,652.92,0.12
1,10,2024-03-01 17:14:47.000,0.0,1017.48,15.66,94.04,682.50,0.12
2,10,2024-03-01 17:15:47.000,0.0,1017.47,15.74,94.10,750.00,0.14
3,10,2024-03-01 17:16:47.000,0.0,1017.46,15.78,94.09,738.33,0.14
4,10,2024-03-01 17:17:47.000,0.0,1017.49,15.80,94.08,660.83,0.12
...,...,...,...,...,...,...,...,...
1290889,9,2024-07-23 15:50:57.000,0.0,994.54,30.69,72.91,2288.33,1.10
1290890,9,2024-07-23 15:51:57.000,0.0,994.40,30.27,73.16,3236.67,1.92
1290891,9,2024-07-23 15:52:57.000,0.0,994.39,29.90,72.51,4526.67,3.57
1290892,9,2024-07-23 15:53:57.000,0.0,994.40,29.38,73.23,4231.67,3.13


# Model Training

**use Lstm and pre-processed data to train our model**

In [8]:
features = ['WindSpeed(m/s)', 'Pressure(hpa)', 'Temperature(°C)', 
            'Humidity(%)', 'Sunlight(Lux)']
X = data[features]
y = data['Power(mW)']

In [9]:
import numpy as np

def create_sequences(X, y, time_steps):
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X.iloc[i:i+time_steps].values)
        y_seq.append(y.iloc[i+time_steps])
    return np.array(X_seq), np.array(y_seq)

time_steps = 10  # 每次輸入的時間步長
X_seq, y_seq = create_sequences(X, y, time_steps)

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential([
    LSTM(50, activation='relu', input_shape=(time_steps, len(features))),
    Dense(1)  # 預測單一數值
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

  super().__init__(**kwargs)


In [12]:
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m25818/25818[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 3ms/step - loss: 1188783.1250 - mae: 320.2808 - val_loss: 34819.8281 - val_mae: 78.5247
Epoch 2/50
[1m11773/25818[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m33s[0m 2ms/step - loss: 34996.2109 - mae: 77.1766

KeyboardInterrupt: 

In [None]:
predictions = model.predict(X_test)

In [None]:
import pandas as pd

results = pd.DataFrame({
    'Id': ['202402180900{:02d}'.format(i) for i in range(1, len(predictions) + 1)],
    'Power(mW)': predictions.flatten().round(2)
})
results.to_csv('upload.csv', index=False)
