In [25]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import predict
import optuna

In [26]:
dataset=pd.read_csv('SolarPrediction_aligned_Sun.csv',engine='python',nrows=576*15)
dataset=dataset.drop("Data",axis=1)
dataset=dataset.drop("Time",axis=1)
# dataset=dataset.drop("TimeSunRise",axis=1)
# dataset=dataset.drop("TimeSunSet",axis=1)
target=dataset["Radiation"]
dataset.head(5)

Unnamed: 0,UNIXTime,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,TimeSunRise,TimeSunSet
0,8,2.58,51,30.43,103,77.27,11.25,22020,67080
1,310,2.83,51,30.43,103,153.44,9.0,22020,67080
2,1206,2.16,51,30.43,103,142.04,7.87,22020,67080
3,1505,2.21,51,30.43,103,144.12,18.0,22020,67080
4,1809,2.25,51,30.43,103,67.42,11.25,22020,67080


In [27]:
dataset=dataset.values
dataset=dataset.astype('float32')
scalar1=MinMaxScaler(feature_range=(0,1))
scalar2=MinMaxScaler(feature_range=(0,1))
scalar_dim=dataset[:,1]
dataset=scalar1.fit_transform(dataset)

scalar_dim=scalar_dim.reshape(len(dataset),1)
print(scalar_dim)
scalar_dim=scalar2.fit_transform(scalar_dim)
print(scalar_dim)

train_size=int(len(dataset)*0.67)
test_size=len(dataset)- train_size
train,test=dataset[0:train_size,:],dataset[train_size:len(dataset),:]

[[  2.58]
 [  2.83]
 [  2.16]
 ...
 [344.9 ]
 [365.66]
 [355.57]]
[[0.00089369]
 [0.00104993]
 [0.00063121]
 ...
 [0.21482898]
 [0.2278031 ]
 [0.22149728]]


In [28]:
def create_dataset(dataset,look_back):
    dataX,dataY=[],[]
    for i in range(len(dataset)-look_back-1):
        #a=dataset第0行i~i+look_back個
        a=dataset[i:(i+look_back),0:9]
        dataX.append(a)
        dataY.append(dataset[i+look_back,1])
    return np.array(dataX),np.array(dataY)

In [29]:
look_back=5
trainX,trainY=create_dataset(train,look_back)
testX,testY=create_dataset(test,look_back)


In [30]:
trainX=trainX.reshape(5782,45)
testX=testX.reshape(2846,45)
trainY=trainY.reshape(len(trainY),1)
testY=testY.reshape(len(testY),1)

In [31]:
def objective(trial):
    # 定義 MLPRegressor 的超參數範圍
    hidden_layer_sizes = (
        trial.suggest_int("n_layers", 1, 5),  # 隱藏層層數
        trial.suggest_int("n_units", 32, 256),  # 每個隱藏層的單位數
    )

    # 創建 MLPRegressor 模型
    mlp = MLPRegressor(
        hidden_layer_sizes=hidden_layer_sizes,
        max_iter=trial.suggest_int("max_iter", 100, 500),
        alpha=trial.suggest_float("alpha", 1e-5, 1e-1, log=True),
        random_state=42,
    )

    # 訓練模型
    mlp.fit(trainX, trainY)

    # 進行預測
    y_pred = mlp.predict(testX)
    

    # 計算均方誤差
    mse = mean_squared_error(testY, y_pred)

    return mse

In [32]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# 取得最佳超參數
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# 使用最佳超參數重新建立模型
best_mlp = MLPRegressor(
    hidden_layer_sizes=(best_params["n_layers"], best_params["n_units"]),
    max_iter=best_params["max_iter"],
    alpha=best_params["alpha"],
    random_state=42,
)

# 訓練最佳模型
best_mlp.fit(trainX, trainY)

# 最佳模型的預測
y_pred_best = best_mlp.predict(testX)

# 計算最佳模型的均方誤差
mse_best = mean_squared_error(testY, y_pred_best)
print("Best Model Mean Squared Error:", mse_best)

[I 2024-01-29 18:52:09,019] A new study created in memory with name: no-name-243aaa23-42c3-46a1-bed4-53f8a2551cb6
  y = column_or_1d(y, warn=True)
[I 2024-01-29 18:52:09,607] Trial 0 finished with value: 0.004921585321426392 and parameters: {'n_layers': 5, 'n_units': 67, 'max_iter': 353, 'alpha': 0.001803352399772248}. Best is trial 0 with value: 0.004921585321426392.
  y = column_or_1d(y, warn=True)
[I 2024-01-29 18:52:10,277] Trial 1 finished with value: 0.0038803908973932266 and parameters: {'n_layers': 5, 'n_units': 161, 'max_iter': 293, 'alpha': 0.020007980627134575}. Best is trial 1 with value: 0.0038803908973932266.
  y = column_or_1d(y, warn=True)
[I 2024-01-29 18:52:10,890] Trial 2 finished with value: 0.004533940460532904 and parameters: {'n_layers': 2, 'n_units': 122, 'max_iter': 410, 'alpha': 4.0302565308352194e-05}. Best is trial 1 with value: 0.0038803908973932266.
  y = column_or_1d(y, warn=True)
[I 2024-01-29 18:52:11,515] Trial 3 finished with value: 0.0038670522626489

Best Hyperparameters: {'n_layers': 4, 'n_units': 210, 'max_iter': 328, 'alpha': 0.0201293522716579}
Best Model Mean Squared Error: 0.0032630225
