##Loading Dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.datasets
from sklearn.model_selection import train_test_split

##Setting up the Data Frames

Loading the training dataset in data frame

In [None]:
dataset = pd.read_csv('raw.csv', header=None)
df = dataset[0].str.split(expand=True)
df.columns = [f"Column{i+1}" for i in range(df.shape[1])]

Loading the test dataset in data frame

In [None]:
testset = pd.read_csv('test.csv', header=None)
ts = testset[0].str.split(expand=True)
ts.columns = [f"Column{i+1}" for i in range(ts.shape[1])]

Formatting data frame

In [None]:
df = df.rename(columns={'Column1': 'IMF'})
df = df.rename(columns={'Column2': 'PPPF'})
ts = ts.rename(columns={'Column1': 'IMF'})
ts = ts.rename(columns={'Column2': 'PPPF'})

In [None]:
print(df.head())
print(ts.head())

           IMF         PPPF      Column3      Column4      Column5  \
0  1.326300000  1.862085698  3.562614673  1.112359136  4.208064938   
1  1.341490000  1.786557677  2.354765579  1.047928545  2.268973609   
2  1.330340000  1.826187439  3.037997085  4.719516591  1.502279680   
3  1.356530000  1.739224557  4.412803792  2.220679787  3.132737509   
4  1.301900000  1.955828713  2.790669497  0.798176830  1.231750364   

       Column6      Column7      Column8      Column9     Column10  ...  \
0  1.412678686  2.271030805  3.950613583  3.627031601  4.600331850  ...   
1  4.637776935  3.793193857  2.859701836  4.361057749  3.592485464  ...   
2  3.644736216  3.575727216  1.337685825  2.380346788  4.503771400  ...   
3  1.327124001  4.759890260  2.882082371  3.663430380  4.531524442  ...   
4  1.977843691  2.393930286  4.738125577  1.364743305  3.391665203  ...   

      Column32     Column33     Column34     Column35     Column36  \
0  3.965987238  4.030998902  0.826347177  2.088521298  4.0

##Assigning Features to X and Targets to Y

In [None]:
X_train = df.drop(columns=['IMF','PPPF'], axis=1)
Y_train = df[['IMF','PPPF']]
X_test=ts.drop(columns=['IMF','PPPF'], axis=1)
Y_test=ts[['IMF','PPPF']]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Coverting the data to numeric (was being read as object)

In [None]:
X_train = X_train.apply(pd.to_numeric, errors='coerce')
Y_train = Y_train.apply(pd.to_numeric, errors='coerce')
X_test = X_test.apply(pd.to_numeric, errors='coerce')
Y_test = Y_test.apply(pd.to_numeric, errors='coerce')

Scaling the data using StandardScaler from sklearn

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

Preparing Y for Multi-Output Learning

In [None]:
Y_train = {
    'IMF': Y_train['IMF'].to_numpy().reshape(-1, 1),
    'PPPF': Y_train['PPPF'].to_numpy().reshape(-1, 1)
}

Y_test = {
    'IMF': Y_test['IMF'].to_numpy().reshape(-1, 1),
    'PPPF': Y_test['PPPF'].to_numpy().reshape(-1, 1)
}

##Hyperparamter Tuning using Optuna

Hyperparameter tuning is essential because it helps improve the performance and accuracy of models by selecting the optimal settings for various parameters.

Optuna is an open-source framework for hyperparameter optimization, designed to automate the process of finding the best hyperparameters for machine learning models.

In [None]:
!pip install optuna



In [None]:
import tensorflow as tf
from tensorflow import keras
import optuna


def mymodel(trial):

    n_neurons_1 = trial.suggest_int('num_neurons_1', 32, 128)
    n_neurons_2 = trial.suggest_int('num_neurons_2', 128, 256)
    alpha_leakyrelu = trial.suggest_float('alpha_leakyrelu', 1e-4, 1e-1, log=True)
    learningrate = trial.suggest_float('learningrate', 1e-5, 1e-2, log=True)

#Defining layers
    input_layer = keras.Input(shape=(39,)) #39 features
    shared_layer_1 = keras.layers.Dense(n_neurons_1, activation='relu')(input_layer)
    shared_layer_2 = keras.layers.Dense(
        n_neurons_2,
        activation=keras.layers.LeakyReLU(negative_slope=alpha_leakyrelu)
    )(shared_layer_1)
    #Chose this activation upon researching more on the dataset and the linked introductory paper


    IMF = keras.layers.Dense(1, activation='linear', name='IMF')(shared_layer_2)
    PPPF = keras.layers.Dense(1, activation='linear', name='PPPF')(shared_layer_2)


    model = keras.Model(inputs=input_layer, outputs=[IMF, PPPF])


    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learningrate),
        loss={'IMF': 'mse', 'PPPF': 'mse'}, #Setting mean sqaure error as loss

        metrics={'IMF': ['mae'], 'PPPF': ['mae']} #Setting mean absolute error as metric
    )

    return model

#defining function for Optuna
def hyperparam(trial):

    model = mymodel(trial)

#Training model
    history = model.fit(
        X_train,  #Input features
        Y_train,      #Targets
        validation_split=0.2,  #20% validation split
        epochs=10,
        batch_size=32,
        verbose=0
    )


    val_loss = history.history['val_loss'][-1]  #Last epoch's validation loss
    return val_loss


study = optuna.create_study(direction='minimize')  #Minimize validation loss
study.optimize(hyperparam, n_trials=25)


print("Best Hyperparameters:", study.best_params)

best_model = mymodel(study.best_trial)


[I 2025-01-27 13:16:54,587] A new study created in memory with name: no-name-c0b2629d-ab4b-4e4d-8956-14804bcc5e49
[I 2025-01-27 13:17:17,085] Trial 0 finished with value: 0.01987912505865097 and parameters: {'num_neurons_1': 66, 'num_neurons_2': 137, 'alpha_leakyrelu': 0.002717612414215528, 'learningrate': 0.00011847944267288263}. Best is trial 0 with value: 0.01987912505865097.
[I 2025-01-27 13:17:37,544] Trial 1 finished with value: 0.020603425800800323 and parameters: {'num_neurons_1': 114, 'num_neurons_2': 171, 'alpha_leakyrelu': 0.00029395278223290035, 'learningrate': 8.52695981142433e-05}. Best is trial 0 with value: 0.01987912505865097.
[I 2025-01-27 13:17:53,898] Trial 2 finished with value: 0.09100493788719177 and parameters: {'num_neurons_1': 33, 'num_neurons_2': 178, 'alpha_leakyrelu': 0.0007546203864571636, 'learningrate': 1.8719345088285616e-05}. Best is trial 0 with value: 0.01987912505865097.
[I 2025-01-27 13:18:18,206] Trial 3 finished with value: 0.004709638189524412 a

Best Hyperparameters: {'num_neurons_1': 69, 'num_neurons_2': 225, 'alpha_leakyrelu': 0.005240804515293498, 'learningrate': 0.009919849853637546}


##Training Best Model

In [None]:
best_model.fit(
    X_train,
    Y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    verbose=1
)
#on tuning results were:
#Best Hyperparameters: {'num_neurons_1': 69, 'num_neurons_2': 225, 'alpha_leakyrelu': 0.005240804515293498, 'learningrate': 0.009919849853637546}

Epoch 1/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - IMF_loss: 0.0636 - IMF_mae: 0.1522 - PPPF_loss: 0.1074 - PPPF_mae: 0.2053 - loss: 0.1710 - val_IMF_loss: 0.0030 - val_IMF_mae: 0.0458 - val_PPPF_loss: 0.0087 - val_PPPF_mae: 0.0744 - val_loss: 0.0117
Epoch 2/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - IMF_loss: 0.0016 - IMF_mae: 0.0322 - PPPF_loss: 0.0067 - PPPF_mae: 0.0647 - loss: 0.0083 - val_IMF_loss: 4.8035e-04 - val_IMF_mae: 0.0170 - val_PPPF_loss: 0.0058 - val_PPPF_mae: 0.0601 - val_loss: 0.0063
Epoch 3/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - IMF_loss: 8.6664e-04 - IMF_mae: 0.0231 - PPPF_loss: 0.0058 - PPPF_mae: 0.0602 - loss: 0.0067 - val_IMF_loss: 3.6332e-04 - val_IMF_mae: 0.0152 - val_PPPF_loss: 0.0053 - val_PPPF_mae: 0.0572 - val_loss: 0.0057
Epoch 4/50
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - IMF_loss: 5.9578e-04 - IMF_mae: 0.01

<keras.src.callbacks.history.History at 0x7854915fff90>

##Testing

Predicting IMF and PPPF

In [None]:
predictions = best_model.predict(X_test)


if len(predictions) == 2:
    IMF_predictions = predictions[0]
    PPPF_predictions = predictions[1]

print("IMF Predictions (first 5):", IMF_predictions[:5])
print("PPPF Predictions (first 5):", PPPF_predictions[:5])

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
IMF Predictions (first 5): [[1.3192359]
 [1.3246114]
 [1.3152871]
 [1.3392308]
 [1.3052394]]
PPPF Predictions (first 5): [[1.9092442]
 [1.8791753]
 [1.9755075]
 [1.8564227]
 [1.9905673]]


Calculating Test Loss

In [None]:
best_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=study.best_params['learningrate']),
    loss={'IMF': 'mse', 'PPPF': 'mse'},
    metrics={'IMF': 'mae', 'PPPF': 'mae'}
)

results = best_model.evaluate(X_test, Y_test)

test_loss = results[0]
imf_mse = results[1]
pppf_mse = results[2]
imf_mae = results[3]
pppf_mae = results[4]


print(f"Test Loss: {test_loss}")
print(f"Test IMF MAE: {imf_mae}")
print(f"Test IMF MSE: {imf_mse}")
print(f"Test PPPF MAE: {pppf_mae}")
print(f"Test PPPF MSE: {pppf_mse}")

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - IMF_loss: 5.2701e-04 - IMF_mae: 0.0056 - PPPF_loss: 0.0019 - PPPF_mae: 0.0323 - loss: 0.0024  
Test Loss: 0.002276485785841942
Test IMF MAE: 0.005266352090984583
Test IMF MSE: 0.000511332880705595
Test PPPF MAE: 0.031232086941599846
Test PPPF MSE: 0.001768571324646473
