# Import Libraries


In [4]:
pip install optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m24.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [5]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt # Visualization
import matplotlib.dates as mdates # Formatting dates
import seaborn as sns # Visualization
from sklearn.preprocessing import MinMaxScaler
import torch # Library for implementing Deep Neural Network
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import optuna


# Load and Preprocess Dataset

In [6]:
df = pd.read_csv('Biotech_preprocessed_dataset_with_outliers.csv')
df

Unnamed: 0,Time,TOD,Hour,Day,Total (W),Phase_A (W),Phase_B (W),Phase_C (W),Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday,Holiday
0,2024-01-04 15:00:00,15:00:00,15,Thursday,821.0,207.0,467.0,147.0,False,False,False,False,True,False,False,False
1,2024-01-04 16:00:00,16:00:00,16,Thursday,741.0,298.0,329.0,114.0,False,False,False,False,True,False,False,False
2,2024-01-04 17:00:00,17:00:00,17,Thursday,776.0,363.0,278.0,135.0,False,False,False,False,True,False,False,False
3,2024-01-04 18:00:00,18:00:00,18,Thursday,1260.0,659.0,419.0,182.0,False,False,False,False,True,False,False,False
4,2024-01-04 19:00:00,19:00:00,19,Thursday,973.0,381.0,410.0,182.0,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4454,2024-07-08 05:00:00,05:00:00,5,Monday,1040.0,635.0,259.0,146.0,False,True,False,False,False,False,False,False
4455,2024-07-08 06:00:00,06:00:00,6,Monday,1414.0,1023.0,251.0,140.0,False,True,False,False,False,False,False,False
4456,2024-07-08 07:00:00,07:00:00,7,Monday,571.0,225.0,243.0,103.0,False,True,False,False,False,False,False,False
4457,2024-07-08 08:00:00,08:00:00,8,Monday,784.0,411.0,280.0,93.0,False,True,False,False,False,False,False,False


In [7]:
df['Time'] = pd.to_datetime(df['Time'])
df['TOD'] = df['Time'].dt.hour
df['office_day'] = ~df['Holiday']

# mapping dictionary
day_mapping = {'Sunday': 1, 'Monday': 2, 'Tuesday': 3, 'Wednesday': 4, 'Thursday': 5, 'Friday': 6, 'Saturday': 7}

# Map the day names to numerical values
df['Day'] = df['Day'].map(day_mapping)

df

Unnamed: 0,Time,TOD,Hour,Day,Total (W),Phase_A (W),Phase_B (W),Phase_C (W),Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday,Holiday,office_day
0,2024-01-04 15:00:00,15,15,5,821.0,207.0,467.0,147.0,False,False,False,False,True,False,False,False,True
1,2024-01-04 16:00:00,16,16,5,741.0,298.0,329.0,114.0,False,False,False,False,True,False,False,False,True
2,2024-01-04 17:00:00,17,17,5,776.0,363.0,278.0,135.0,False,False,False,False,True,False,False,False,True
3,2024-01-04 18:00:00,18,18,5,1260.0,659.0,419.0,182.0,False,False,False,False,True,False,False,False,True
4,2024-01-04 19:00:00,19,19,5,973.0,381.0,410.0,182.0,False,False,False,False,True,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4454,2024-07-08 05:00:00,5,5,2,1040.0,635.0,259.0,146.0,False,True,False,False,False,False,False,False,True
4455,2024-07-08 06:00:00,6,6,2,1414.0,1023.0,251.0,140.0,False,True,False,False,False,False,False,False,True
4456,2024-07-08 07:00:00,7,7,2,571.0,225.0,243.0,103.0,False,True,False,False,False,False,False,False,True
4457,2024-07-08 08:00:00,8,8,2,784.0,411.0,280.0,93.0,False,True,False,False,False,False,False,False,True


In [8]:
df.index = df['Time']
df.head()

Unnamed: 0_level_0,Time,TOD,Hour,Day,Total (W),Phase_A (W),Phase_B (W),Phase_C (W),Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday,Holiday,office_day
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2024-01-04 15:00:00,2024-01-04 15:00:00,15,15,5,821.0,207.0,467.0,147.0,False,False,False,False,True,False,False,False,True
2024-01-04 16:00:00,2024-01-04 16:00:00,16,16,5,741.0,298.0,329.0,114.0,False,False,False,False,True,False,False,False,True
2024-01-04 17:00:00,2024-01-04 17:00:00,17,17,5,776.0,363.0,278.0,135.0,False,False,False,False,True,False,False,False,True
2024-01-04 18:00:00,2024-01-04 18:00:00,18,18,5,1260.0,659.0,419.0,182.0,False,False,False,False,True,False,False,False,True
2024-01-04 19:00:00,2024-01-04 19:00:00,19,19,5,973.0,381.0,410.0,182.0,False,False,False,False,True,False,False,False,True


In [9]:
# Keep the rows belonging to Sping season only (March, april, and May)
start_date = pd.to_datetime('2024-03-01')
end_date = pd.to_datetime('2024-06-01')

df_spring = df[(df['Time'] >= start_date) & (df['Time'] < end_date)]
df_spring

Unnamed: 0_level_0,Time,TOD,Hour,Day,Total (W),Phase_A (W),Phase_B (W),Phase_C (W),Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday,Holiday,office_day
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2024-03-01 00:00:00,2024-03-01 00:00:00,0,0,6,792.0,410.0,198.0,184.0,True,False,False,False,False,False,False,False,True
2024-03-01 01:00:00,2024-03-01 01:00:00,1,1,6,733.0,222.0,326.0,185.0,True,False,False,False,False,False,False,False,True
2024-03-01 02:00:00,2024-03-01 02:00:00,2,2,6,706.0,222.0,300.0,184.0,True,False,False,False,False,False,False,False,True
2024-03-01 03:00:00,2024-03-01 03:00:00,3,3,6,684.0,302.0,197.0,185.0,True,False,False,False,False,False,False,False,True
2024-03-01 04:00:00,2024-03-01 04:00:00,4,4,6,784.0,400.0,199.0,185.0,True,False,False,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-31 19:00:00,2024-05-31 19:00:00,19,19,6,1070.0,702.0,194.0,174.0,True,False,False,False,False,False,False,False,True
2024-05-31 20:00:00,2024-05-31 20:00:00,20,20,6,689.0,212.0,295.0,182.0,True,False,False,False,False,False,False,False,True
2024-05-31 21:00:00,2024-05-31 21:00:00,21,21,6,718.0,213.0,322.0,183.0,True,False,False,False,False,False,False,False,True
2024-05-31 22:00:00,2024-05-31 22:00:00,22,22,6,606.0,222.0,200.0,184.0,True,False,False,False,False,False,False,False,True


In [10]:
# df_holiday = df[df['Holiday'] == True]
df_holiday = df_spring[df_spring['Holiday'] == True]
df_holiday

Unnamed: 0_level_0,Time,TOD,Hour,Day,Total (W),Phase_A (W),Phase_B (W),Phase_C (W),Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday,Holiday,office_day
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2024-03-02 00:00:00,2024-03-02 00:00:00,0,0,7,755.0,187.0,384.0,184.0,False,False,True,False,False,False,False,True,False
2024-03-02 01:00:00,2024-03-02 01:00:00,1,1,7,620.0,232.0,203.0,185.0,False,False,True,False,False,False,False,True,False
2024-03-02 02:00:00,2024-03-02 02:00:00,2,2,7,800.0,307.0,309.0,184.0,False,False,True,False,False,False,False,True,False
2024-03-02 03:00:00,2024-03-02 03:00:00,3,3,7,801.0,418.0,199.0,184.0,False,False,True,False,False,False,False,True,False
2024-03-02 04:00:00,2024-03-02 04:00:00,4,4,7,710.0,330.0,196.0,184.0,False,False,True,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-28 19:00:00,2024-05-28 19:00:00,19,19,3,1368.0,902.0,284.0,182.0,False,False,False,False,False,True,False,True,False
2024-05-28 20:00:00,2024-05-28 20:00:00,20,20,3,1533.0,1051.0,299.0,183.0,False,False,False,False,False,True,False,True,False
2024-05-28 21:00:00,2024-05-28 21:00:00,21,21,3,899.0,395.0,321.0,183.0,False,False,False,False,False,True,False,True,False
2024-05-28 22:00:00,2024-05-28 22:00:00,22,22,3,678.0,295.0,200.0,183.0,False,False,False,False,False,True,False,True,False


In [11]:
# df_office_day = df[df['Holiday'] == False]
df_office_day = df_spring[df_spring['Holiday'] == False]
df_office_day

Unnamed: 0_level_0,Time,TOD,Hour,Day,Total (W),Phase_A (W),Phase_B (W),Phase_C (W),Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday,Holiday,office_day
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2024-03-01 00:00:00,2024-03-01 00:00:00,0,0,6,792.0,410.0,198.0,184.0,True,False,False,False,False,False,False,False,True
2024-03-01 01:00:00,2024-03-01 01:00:00,1,1,6,733.0,222.0,326.0,185.0,True,False,False,False,False,False,False,False,True
2024-03-01 02:00:00,2024-03-01 02:00:00,2,2,6,706.0,222.0,300.0,184.0,True,False,False,False,False,False,False,False,True
2024-03-01 03:00:00,2024-03-01 03:00:00,3,3,6,684.0,302.0,197.0,185.0,True,False,False,False,False,False,False,False,True
2024-03-01 04:00:00,2024-03-01 04:00:00,4,4,6,784.0,400.0,199.0,185.0,True,False,False,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-31 19:00:00,2024-05-31 19:00:00,19,19,6,1070.0,702.0,194.0,174.0,True,False,False,False,False,False,False,False,True
2024-05-31 20:00:00,2024-05-31 20:00:00,20,20,6,689.0,212.0,295.0,182.0,True,False,False,False,False,False,False,False,True
2024-05-31 21:00:00,2024-05-31 21:00:00,21,21,6,718.0,213.0,322.0,183.0,True,False,False,False,False,False,False,False,True
2024-05-31 22:00:00,2024-05-31 22:00:00,22,22,6,606.0,222.0,200.0,184.0,True,False,False,False,False,False,False,False,True


**Drop unnecessary Columns**\
Since we are going to forecast the total load, loads of phase A, B, and C, are not required. Similarly, since we have already extracted the hour from time, the 'Time' column is not required anymore. Holidays and office days have already been separated. So, these information are also not required

In [12]:
df_office_day.drop(['Time', 'Phase_A (W)', 'Phase_B (W)', 'Phase_C (W)'], axis=1, inplace=True)
df_office_day.drop(['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'], axis=1, inplace=True)
df_office_day.drop(['Holiday', 'office_day', 'TOD'], axis=1, inplace=True)
df_office_day

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_office_day.drop(['Time', 'Phase_A (W)', 'Phase_B (W)', 'Phase_C (W)'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_office_day.drop(['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_office_day.drop(['Holiday', 'office_day', 'TOD'], axis=1, inplace=True)


Unnamed: 0_level_0,Hour,Day,Total (W)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-03-01 00:00:00,0,6,792.0
2024-03-01 01:00:00,1,6,733.0
2024-03-01 02:00:00,2,6,706.0
2024-03-01 03:00:00,3,6,684.0
2024-03-01 04:00:00,4,6,784.0
...,...,...,...
2024-05-31 19:00:00,19,6,1070.0
2024-05-31 20:00:00,20,6,689.0
2024-05-31 21:00:00,21,6,718.0
2024-05-31 22:00:00,22,6,606.0


**Scale the Numerical Attributes**

In [13]:
columns_to_scale = ['Hour', 'Day', 'Total (W)']
scaler = MinMaxScaler(feature_range=(-1, 1))
df_office_day[columns_to_scale] = scaler.fit_transform(df_office_day[columns_to_scale])

df_office_day

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_office_day[columns_to_scale] = scaler.fit_transform(df_office_day[columns_to_scale])


Unnamed: 0_level_0,Hour,Day,Total (W)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-03-01 00:00:00,-1.000000,1.0,-0.869397
2024-03-01 01:00:00,-0.913043,1.0,-0.887700
2024-03-01 02:00:00,-0.826087,1.0,-0.896076
2024-03-01 03:00:00,-0.739130,1.0,-0.902901
2024-03-01 04:00:00,-0.652174,1.0,-0.871878
...,...,...,...
2024-05-31 19:00:00,0.652174,1.0,-0.783155
2024-05-31 20:00:00,0.739130,1.0,-0.901349
2024-05-31 21:00:00,0.826087,1.0,-0.892353
2024-05-31 22:00:00,0.913043,1.0,-0.927098


In [14]:
df_office_day.shape

(1728, 3)

# **Separate train and test data**

In [15]:
training_data_len = int(len(df_office_day)*0.6)
validation_data_len = int(len(df_office_day)*0.2)
print(training_data_len)
print(validation_data_len)

train_data = df_office_day[:training_data_len]
validation_data = df_office_day[training_data_len:training_data_len+validation_data_len]
test_data = df_office_day[training_data_len+validation_data_len:]


print(train_data.shape, validation_data.shape, test_data.shape)

1036
345
(1036, 3) (345, 3) (347, 3)


In [16]:
dataset_train = train_data.values
dataset_train = np.reshape(dataset_train, (-1, 3))
dataset_train.shape

(1036, 3)

In [17]:
dataset_validation = validation_data.values
dataset_validation = np.reshape(dataset_validation, (-1, 3))
dataset_validation.shape

(345, 3)

In [18]:
dataset_test = test_data.values
dataset_test = np.reshape(dataset_test, (-1, 3))
dataset_test.shape

(347, 3)

**Create Sequences**

In [19]:
scaled_train, scaled_validation, scaled_test = dataset_train, dataset_validation, dataset_test

In [20]:
sequence_length = 24
X_train, y_train = [], []
for i in range(len(scaled_train)-sequence_length):
  X_train.append(scaled_train[i:i+sequence_length])
  y_train.append(scaled_train[i+1:i+sequence_length+1])


X_train, y_train = np.array(X_train), np.array(y_train)
y_train = y_train[:, :, -1].reshape(-1, 24, 1)

X_train.shape, y_train.shape

((1012, 24, 3), (1012, 24, 1))

In [21]:
X_validation, y_validation = [], []
for i in range(len(scaled_validation)-sequence_length):
  X_validation.append(scaled_validation[i:i+sequence_length])
  y_validation.append(scaled_validation[i+1:i+sequence_length+1])
  # y_train.append(scaled_train[i+sequence_length])

X_validation, y_validation = np.array(X_validation), np.array(y_validation)
y_validation = y_validation[:, :, -1].reshape(-1, 24, 1)

X_validation.shape, y_validation.shape

((321, 24, 3), (321, 24, 1))

In [22]:
X_test, y_test = [], []
for i in range(len(scaled_test)-sequence_length):
  X_test.append(scaled_test[i:i+sequence_length])
  y_test.append(scaled_test[i+1:i+sequence_length+1])
  # y_test.append(scaled_test[i+sequence_length])

X_test, y_test = np.array(X_test), np.array(y_test)
y_test = y_test[:, :, -1].reshape(-1, 24, 1)

X_test.shape, y_test.shape

((323, 24, 3), (323, 24, 1))

**Convert To Tensors**

In [23]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
print(X_train.shape, y_train.shape)

X_validation = torch.tensor(X_validation, dtype=torch.float32)
y_validation = torch.tensor(y_validation, dtype=torch.float32)
print(X_validation.shape, y_validation.shape)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)
print(X_test.shape, y_test.shape)


torch.Size([1012, 24, 3]) torch.Size([1012, 24, 1])
torch.Size([321, 24, 3]) torch.Size([321, 24, 1])
torch.Size([323, 24, 3]) torch.Size([323, 24, 1])


# Create Model

In [46]:
class GRU(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, drop_out=0.0):
    super(GRU, self).__init__()
    self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=drop_out)
    self.fc = nn.Linear(hidden_size, 1)

  def forward(self, x):
    out, _ = self.gru(x)
    out = self.fc(out)
    return out

In [47]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


**Torch DataLoader**

In [48]:
batch_size = 32
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle=True)

validation_dataset = torch.utils.data.TensorDataset(X_validation, y_validation)
validation_loader = torch.utils.data.DataLoader(validation_dataset, batch_size = batch_size, shuffle=False)

test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

In [49]:
for x, y in train_loader:
	print(f'X shape: {x.shape}, y shape: {y.shape}')
	break

X shape: torch.Size([32, 24, 3]), y shape: torch.Size([32, 24, 1])


# Automated hyperparameter Tuning

In [50]:
def objective(trial):
    input_size = 3

    # Hyperparameters to tune
    hidden_size = trial.suggest_int('hidden_size', 32, 128)
    num_layers = trial.suggest_int('num_layers', 2, 3)
    # learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    learning_rate = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    drop_out = trial.suggest_float("dropout_l{}".format(i), 0.0, 0.8)

    # Create the model
    model = GRU(input_size, hidden_size, num_layers, drop_out)
    model = model.to(device)
    loss_fn = torch.nn.MSELoss(reduction="mean")

    # optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    # optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "AdamW", "RAdam"])
    # optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    # optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=learning_rate)

    batch_size = 32 #trial.suggest_categorical('batch_size', [16, 32, 64])
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle=True)

    validation_dataset = torch.utils.data.TensorDataset(X_validation, y_validation)
    validation_loader = torch.utils.data.DataLoader(validation_dataset, batch_size = batch_size, shuffle=False)

    test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

    num_epochs = trial.suggest_int('num_epochs', 10, 60)
    train_loss_hist =[]
    validation_loss_hist =[]
    train_mape_hist = []
    validation_mape_hist = []
    # Training loop
    for epoch in range(num_epochs):
      total_loss_train = 0.0
      total_mape_train = 0.0

      # Training
      model.train()
      for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        predictions = model(batch_X)
        # predictions = predictions[:, -1, :]
        loss = loss_fn(predictions, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss_train += loss.item()
        total_mape_train += torch.mean(torch.abs((predictions[:, -1, :] - batch_y[:, -1, :])/(batch_y[:, -1, :]+1e-8)))*100.0

      # Calculate average training loss and accuracy
      average_loss_train = total_loss_train / len(train_loader)
      train_loss_hist.append(average_loss_train)
      average_mape_train = total_mape_train / len(train_loader)
      train_mape_hist.append(average_mape_train)

      # Validation on test data
      model.eval()
      with torch.no_grad():
        total_loss_validation = 0.0
        total_mape_validation = 0.0

        for batch_X_validation, batch_y_validation in validation_loader:
          batch_X_validation, batch_y_validation = batch_X_validation.to(device), batch_y_validation.to(device)
          predictions_validation = model(batch_X_validation).to(device)
          # predictions_validation = predictions_validation[:, -1, :]
          validation_loss = loss_fn(predictions_validation, batch_y_validation)

          total_mape_validation += torch.mean(torch.abs((predictions_validation[:, -1, :] - batch_y_validation[:, -1, :])/(batch_y_validation[:, -1, :]+1e-8)))*100.0
          total_loss_validation += validation_loss.item()

        # Calculate average test loss and accuracy
        average_loss_validation = total_loss_validation / len(validation_loader)
        average_loss_validation = average_loss_validation
        validation_loss_hist.append(average_loss_validation)
        average_mape_validation = total_mape_validation / len(validation_loader)
        validation_mape_hist.append(average_mape_validation)
      # if (epoch+1)%10==0:
      #   print(f'Epoch [{epoch+1}/{num_epochs}] - Training Loss: {average_loss_train:.4f}, Validation Loss: {average_loss_validation:.4f}')

      trial.report(average_loss_validation, epoch)

      if trial.should_prune():
        raise optuna.exceptions.TrialPruned()

    return average_loss_validation


In [51]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100, timeout=600)

pruned_trials = study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2024-07-31 08:40:47,172] A new study created in memory with name: no-name-630e8503-0e46-4163-aaf5-8758ff0fc24f
[I 2024-07-31 08:41:27,550] Trial 0 finished with value: 0.07114451691846956 and parameters: {'hidden_size': 90, 'num_layers': 2, 'lr': 0.0024898279559708867, 'dropout_l322': 0.6812160459479215, 'num_epochs': 40}. Best is trial 0 with value: 0.07114451691846956.
[I 2024-07-31 08:41:37,240] Trial 1 finished with value: 0.06709484332664446 and parameters: {'hidden_size': 84, 'num_layers': 2, 'lr': 0.0008122080315626944, 'dropout_l322': 0.1993754815879849, 'num_epochs': 10}. Best is trial 1 with value: 0.06709484332664446.
[I 2024-07-31 08:42:57,423] Trial 2 finished with value: 0.06570498408241705 and parameters: {'hidden_size': 114, 'num_layers': 3, 'lr': 0.00024390340349146706, 'dropout_l322': 0.7394788877258377, 'num_epochs': 45}. Best is trial 2 with value: 0.06570498408241705.
[I 2024-07-31 08:43:49,881] Trial 3 finished with value: 0.06777500716800039 and parameters: {'

Study statistics: 
  Number of finished trials:  46
  Number of pruned trials:  34
  Number of complete trials:  12
Best trial:
  Value:  0.06497741389003667
  Params: 
    hidden_size: 102
    num_layers: 3
    lr: 0.003299689595600927
    dropout_l322: 0.18136547778973497
    num_epochs: 18


In [52]:
optuna.visualization.plot_optimization_history(study)

In [53]:
optuna.visualization.plot_param_importances(study)

In [54]:
optuna.visualization.plot_slice(study)

In [55]:
optuna.visualization.plot_contour(study)

In [56]:
optuna.visualization.plot_parallel_coordinate(study)