# `imbrium` use case
## Hyper parameter optimization - Bidirectional Long Short-term Neural Network

### Multivariate pure forecasting

##### Example Steps:

- basic data preparation
- scale target and feature numpy arrays
- create imbrium bidirectional long short-term neural network
- use optuna to perform hyper parameter optimzation on shifting window variables

In [1]:
import imbrium
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import optuna

print(f"imbrium version: {imbrium.__version__} loaded")

imbrium version: 3.0.0 loaded


In [2]:
example_data = pd.read_csv('example_dataset/AirQualityUCI.csv', delimiter=';')
example_data.head()

Unnamed: 0,Date,Time,CO(GT),PT08.S1(CO),NMHC(GT),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH,AH,Unnamed: 15,Unnamed: 16
0,10/03/2004,18.00.00,26,1360.0,150.0,119,1046.0,166.0,1056.0,113.0,1692.0,1268.0,136,489,7578,,
1,10/03/2004,19.00.00,2,1292.0,112.0,94,955.0,103.0,1174.0,92.0,1559.0,972.0,133,477,7255,,
2,10/03/2004,20.00.00,22,1402.0,88.0,90,939.0,131.0,1140.0,114.0,1555.0,1074.0,119,540,7502,,
3,10/03/2004,21.00.00,22,1376.0,80.0,92,948.0,172.0,1092.0,122.0,1584.0,1203.0,110,600,7867,,
4,10/03/2004,22.00.00,16,1272.0,51.0,65,836.0,131.0,1205.0,116.0,1490.0,1110.0,112,596,7888,,


In [3]:
example_data = example_data[['CO(GT)', 'PT08.S1(CO)', 'NMHC(GT)', 'C6H6(GT)', 'PT08.S2(NMHC)']]

In [4]:
example_data.isna().sum()

CO(GT)           114
PT08.S1(CO)      114
NMHC(GT)         114
C6H6(GT)         114
PT08.S2(NMHC)    114
dtype: int64

In [5]:
example_data = example_data.dropna()

In [6]:
example_data.notna().sum()

CO(GT)           9357
PT08.S1(CO)      9357
NMHC(GT)         9357
C6H6(GT)         9357
PT08.S2(NMHC)    9357
dtype: int64

In [7]:
example_data = example_data.replace(",", ".", regex = True).astype("float")

In [8]:
target = np.array(example_data['PT08.S1(CO)']).reshape(-1, 1)

In [9]:
features = example_data[['CO(GT)', 'NMHC(GT)', 'C6H6(GT)', 'PT08.S2(NMHC)']]

In [10]:
feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler()

scaled_features = feature_scaler.fit_transform(features)
scaled_target = target_scaler.fit_transform(target)

In [11]:
custom_layer_config = {
            "layer0": {
                "config": {
                    "neurons": 200,
                    "activation": "relu",
                    "regularization": 0.002,
                    "dropout": 0.2,
                }
            },
            "layer1": {
                "config": {
                    "neurons": 100,
                    "activation": "relu",
                    "regularization": 0.002,
                    "dropout": 0.2,
                }
            },
            "layer2": {
                "config": {
                    "neurons": 50,
                    "activation": "relu",
                    "regularization": 0.002,
                    "dropout": 0.2,
                }
            },
            "layer3": {
                "config": {
                    "neurons": 50,
                    "activation": "relu",
                    "regularization": 0.002,
                    "dropout": 0.002
                }
            },
            "layer4": {
                "config": {
                    "neurons": 25,
                    "activation": "relu",
                    "regularization": 0.002,
                }
            },
        }

In [12]:
from imbrium import PureMulti

In [14]:
predictor_instance = PureMulti(target = scaled_target, features = scaled_features)


def objective(trial):

    steps_past = trial.suggest_int('steps_past', 5, 30)
    steps_future = trial.suggest_int('steps_future', 5, 10)
    
    
    predictor_instance.create_fit_bilstm(
        steps_past =  steps_past,
        steps_future = steps_future,
        loss='mean_squared_error',
        metrics='mean_squared_error',
        bilstm_block_one = 3,
        lstm_block_one = 2,
        layer_config = custom_layer_config, 
        epochs=10,
        show_progress=1,
        validation_split=0.20,
        board=False,
        monitor='val_loss',
        patience=3,
        min_delta=0,
        verbose=1
    )

    predictor_instance.evaluate_model()
    metric_value = predictor_instance.show_evaluation()[0]

    return metric_value

study = optuna.create_study(direction='minimize')  
study.optimize(objective, n_trials=5) 

[I 2024-05-12 20:31:46,416] A new study created in memory with name: no-name-94051a33-ed25-456a-9fa3-aa633c59231c


Epoch 1/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 122ms/step - loss: 1.3698 - mean_squared_error: 0.1766 - val_loss: 0.1085 - val_mean_squared_error: 0.0325
Epoch 2/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 110ms/step - loss: 0.0805 - mean_squared_error: 0.0212 - val_loss: 0.0718 - val_mean_squared_error: 0.0409
Epoch 3/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 111ms/step - loss: 0.0503 - mean_squared_error: 0.0232 - val_loss: 0.0569 - val_mean_squared_error: 0.0389
Epoch 4/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 112ms/step - loss: 7.2312 - mean_squared_error: 7.1808 - val_loss: 0.2335 - val_mean_squared_error: 0.0395
Epoch 5/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 122ms/step - loss: 0.2070 - mean_squared_error: 0.0195 - val_loss: 0.2113 - val_mean_squared_error: 0.0375
Epoch 6/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[

[I 2024-05-12 20:34:20,337] Trial 0 finished with value: 0.18559874594211578 and parameters: {'steps_past': 21, 'steps_future': 10}. Best is trial 0 with value: 0.18559874594211578.


Epoch 1/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 152ms/step - loss: 1.3966 - mean_squared_error: 0.1347 - val_loss: 0.1725 - val_mean_squared_error: 0.0321
Epoch 2/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 136ms/step - loss: 0.1340 - mean_squared_error: 0.0208 - val_loss: 0.0963 - val_mean_squared_error: 0.0334
Epoch 3/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 133ms/step - loss: 0.0740 - mean_squared_error: 0.0198 - val_loss: 0.0741 - val_mean_squared_error: 0.0383
Epoch 4/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 112ms/step - loss: 2407.8601 - mean_squared_error: 2407.8171 - val_loss: 0.2293 - val_mean_squared_error: 0.0466
Epoch 5/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 107ms/step - loss: 0.2100 - mean_squared_error: 0.0185 - val_loss: 0.2041 - val_mean_squared_error: 0.0336
Epoch 6/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

[I 2024-05-12 20:37:08,851] Trial 1 finished with value: 0.1740313321352005 and parameters: {'steps_past': 20, 'steps_future': 7}. Best is trial 1 with value: 0.1740313321352005.


Epoch 1/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 203ms/step - loss: 1.5412 - mean_squared_error: 0.1736 - val_loss: 0.3200 - val_mean_squared_error: 0.0290
Epoch 2/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 172ms/step - loss: 0.2708 - mean_squared_error: 0.0200 - val_loss: 0.2014 - val_mean_squared_error: 0.0335
Epoch 3/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 175ms/step - loss: 0.1724 - mean_squared_error: 0.0203 - val_loss: 0.1534 - val_mean_squared_error: 0.0399
Epoch 4/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 159ms/step - loss: 0.1266 - mean_squared_error: 0.0222 - val_loss: 0.1162 - val_mean_squared_error: 0.0354
Epoch 5/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 165ms/step - loss: 0.0968 - mean_squared_error: 0.0216 - val_loss: 0.0906 - val_mean_squared_error: 0.0307
Epoch 6/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[

[I 2024-05-12 20:42:02,031] Trial 2 finished with value: 3.6487197875976562 and parameters: {'steps_past': 29, 'steps_future': 7}. Best is trial 1 with value: 0.1740313321352005.


Epoch 1/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 129ms/step - loss: 1.3337 - mean_squared_error: 0.1584 - val_loss: 0.1136 - val_mean_squared_error: 0.0493
Epoch 2/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 104ms/step - loss: 0.0759 - mean_squared_error: 0.0258 - val_loss: 0.0655 - val_mean_squared_error: 0.0377
Epoch 3/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 101ms/step - loss: 0.0477 - mean_squared_error: 0.0232 - val_loss: 0.0516 - val_mean_squared_error: 0.0345
Epoch 4/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 107ms/step - loss: 0.0373 - mean_squared_error: 0.0213 - val_loss: 0.0504 - val_mean_squared_error: 0.0378
Epoch 5/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 108ms/step - loss: 0.0332 - mean_squared_error: 0.0219 - val_loss: 0.0458 - val_mean_squared_error: 0.0372
Epoch 6/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[

[I 2024-05-12 20:45:25,592] Trial 3 finished with value: 0.049423303455114365 and parameters: {'steps_past': 19, 'steps_future': 9}. Best is trial 3 with value: 0.049423303455114365.


Epoch 1/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 150ms/step - loss: 1.4428 - mean_squared_error: 0.1278 - val_loss: 0.2236 - val_mean_squared_error: 0.0366
Epoch 2/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 128ms/step - loss: 0.1747 - mean_squared_error: 0.0216 - val_loss: 0.1250 - val_mean_squared_error: 0.0373
Epoch 3/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 128ms/step - loss: 0.0987 - mean_squared_error: 0.0221 - val_loss: 0.0862 - val_mean_squared_error: 0.0357
Epoch 4/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 141ms/step - loss: 54177.4258 - mean_squared_error: 54177.3711 - val_loss: 0.3624 - val_mean_squared_error: 0.0551
Epoch 5/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 127ms/step - loss: 0.3478 - mean_squared_error: 0.0440 - val_loss: 0.3585 - val_mean_squared_error: 0.0553
Epoch 6/10
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

[I 2024-05-12 20:48:15,426] Trial 4 finished with value: 0.3265719711780548 and parameters: {'steps_past': 25, 'steps_future': 6}. Best is trial 3 with value: 0.049423303455114365.
