In [1]:
import sys
import os
if (colab := 'google.colab' in sys.modules):
    from google.colab import drive
    drive.mount('/content/drive')
    sys.path.insert(0,'/content/drive/Shareddrives/Project_ML_23/machine-learning-project')
    sys.path.insert(0,'/content/drive/Shareddrives/Project_ML_23/machine-learning-project/src/utils')
    !pip install optuna
    !pip install scikit-learn
    !pip install scikeras
    TRAIN_DATA = os.path.join('/content/drive/Shareddrives/Project_ML_23/machine-learning-project', 'datasets', 'cup2023','ML-CUP23-TR.csv')
    IMAGES_FOLDER = os.path.join('/content/drive/Shareddrives/Project_ML_23/machine-learning-project', 'images', 'cup2023', 'neural_network')
    MODEL_FOLDER = os.path.join('/content/drive/Shareddrives/Project_ML_23/machine-learning-project', 'trained_models', 'cup2023')
else :   
    TRAIN_DATA = os.path.join('..', '..', 'datasets', 'cup2023', 'ML-CUP23-TR.csv')
    IMAGES_FOLDER = os.path.join('..', '..', 'images', 'cup2023', 'neural_network')
    MODEL_FOLDER = os.path.join('..', '..', 'trained_models', 'cup2023')

In [2]:
sys.path.append('../utils')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set_theme(style='darkgrid')
from keras import backend as K


from utils import save_plot, mean_euclidean_error, root_mean_squared_error, multidim_r2
from NN import NeuralNetwork
from joblib import dump

In [3]:
# To skip the first column (row indexes)
columns_to_read = list(range(1, 14))

df_train = pd.read_csv(TRAIN_DATA, header=None, comment='#', usecols=columns_to_read, delimiter=',')
df_train = df_train.astype('float64') # casting
df_train.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,-0.91728,-0.712727,-0.989904,0.992819,0.993649,0.995543,0.711074,0.407645,-0.688548,0.61689,7.897453,-35.936382,21.077147
1,-0.858784,0.998755,-0.998396,0.999909,0.316503,-0.951897,-0.163139,0.980982,0.661759,-0.800155,-9.330632,19.901571,6.069154
2,-0.990441,0.958726,-0.998675,0.997216,0.987166,0.356483,-0.279689,0.599163,-0.68463,0.922901,14.8494,3.37409,19.667479
3,0.937117,0.984474,-0.61242,0.999812,0.728623,-0.539962,-0.165939,0.999352,-0.921444,-0.974766,-46.591854,13.734777,17.9536
4,-0.906628,-0.884567,-0.932487,0.941037,0.978134,0.998179,0.749606,-0.590599,-0.508268,0.691798,8.2175,-45.885254,14.894251


In [4]:
features = ['feature_' + str(i) for i in range(1, 11)]
targets = ['target_x', 'target_y', 'target_z']

# Rename columns
new_column_names = features + targets
df_train.columns = new_column_names

df_train.head()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,target_x,target_y,target_z
0,-0.91728,-0.712727,-0.989904,0.992819,0.993649,0.995543,0.711074,0.407645,-0.688548,0.61689,7.897453,-35.936382,21.077147
1,-0.858784,0.998755,-0.998396,0.999909,0.316503,-0.951897,-0.163139,0.980982,0.661759,-0.800155,-9.330632,19.901571,6.069154
2,-0.990441,0.958726,-0.998675,0.997216,0.987166,0.356483,-0.279689,0.599163,-0.68463,0.922901,14.8494,3.37409,19.667479
3,0.937117,0.984474,-0.61242,0.999812,0.728623,-0.539962,-0.165939,0.999352,-0.921444,-0.974766,-46.591854,13.734777,17.9536
4,-0.906628,-0.884567,-0.932487,0.941037,0.978134,0.998179,0.749606,-0.590599,-0.508268,0.691798,8.2175,-45.885254,14.894251


In [5]:
from sklearn.model_selection import train_test_split

X = df_train[features].to_numpy()
y = df_train[targets].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_val, X_valid, y_train_val, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Create model

In [6]:
from sklearn.metrics import make_scorer

mee_scorer = make_scorer(mean_euclidean_error, greater_is_better=False)


In [7]:
from sklearn.model_selection import cross_val_score
import optuna
from scikeras.wrappers import KerasRegressor
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import Pipeline
from keras.callbacks import EarlyStopping


def objective_function(trial):
    num_layers = trial.suggest_int('num_layers', 1, 5)

    # Setting up the architecture with a funnel structure
    max_units = 512
    architecture = []
    for i in range(num_layers):
        units = trial.suggest_int(f'num_units_l{i}', 32, max_units, step=32)
        architecture.append(units)
        max_units = units  # Next layer will have equal or fewer units

    dropout_input_rate = trial.suggest_float('dropout_input_rate', 0.0, 0.5)
    dropout_hidden_rate = [trial.suggest_float(f'dropout_rate_l{i}', 0.0, 0.5) for i in range(num_layers)]
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    momentum = trial.suggest_float('momentum', 0.0, 1.0)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
    use_nesterov = trial.suggest_categorical('use_nesterov', [True, False])
    epochs = trial.suggest_int('epochs', 5, 100)
    batch_size = trial.suggest_int('batch_size', 16, 128)
    patience = trial.suggest_int('patience', 5, 20)

    def build_model():
        model = NeuralNetwork(
            input_dimension=10,
            output_dimension=3,
            architecture=architecture,
            activation='relu',
            dropout_input_rate=dropout_input_rate,
            dropout_hidden_rate=dropout_hidden_rate,
            learning_rate=learning_rate,
            momentum=momentum,
            weight_decay=weight_decay,
            use_nesterov=use_nesterov
        ).build_model()
        return model

    pipeline = Pipeline([
        ('scaler', RobustScaler()),
        ('model', KerasRegressor(build_model, epochs=epochs, batch_size=batch_size, verbose=0, callbacks=[EarlyStopping(monitor='loss', patience=patience)]))
    ])

    '''
    Per usare cross validation, decommentare le due righe sotto e commentare
    le altre fino al return
    '''
    scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring=mee_scorer)
    return -np.mean(scores)

    #pipeline.fit(X_train_val, y_train_val)
    #y_pred = pipeline.predict(X_valid)

    #error = mean_euclidean_error(y_valid, y_pred)
    #return error

study = optuna.create_study(direction='minimize')
study.optimize(objective_function, n_trials=100, n_jobs=-1, show_progress_bar=True)

  from .autonotebook import tqdm as notebook_tqdm
[I 2024-01-14 13:14:15,032] A new study created in memory with name: no-name-2c541a1d-930b-462b-ba36-93b81af3da35
  0%|          | 0/100 [00:00<?, ?it/s]2024-01-14 13:14:15.146731: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2024-01-14 13:14:15.146781: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2024-01-14 13:14:15.146812: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2024-01-14 13:14:15.147289: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-01-14 13:14:15.147540: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus i



In [None]:
print(study.best_trial)
print(study.best_params)
print(study.best_value)

In [None]:
best_params = study.best_params

def build_model():
    model = NeuralNetwork(
        input_dimension=10,
        output_dimension=3,
        architecture=best_params['architecture'],
        activation='relu',
        dropout_input_rate=best_params['dropout_input_rate'],
        dropout_hidden_rate=best_params['dropout_hidden_rate'],
        learning_rate=best_params['learning_rate'],
        momentum=best_params['momentum'],
        weight_decay=best_params['weight_decay'],
        use_nesterov=best_params['use_nesterov']
    ).build_model()
    return model

nn = KerasRegressor(
    build_model,
    epochs=best_params['epochs'],
    batch_size=best_params['batch_size'],
    verbose=0
)

In [None]:
final_model = Pipeline([
    ('scaler', RobustScaler()),
    ('neuralnetwork', nn)
])

final_model.fit(X_train, y_train)

# Test of the model

In [None]:
y_pred = final_model.predict(X_test)

mee = mean_euclidean_error(y_test, y_pred)
r2 = multidim_r2(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)

print('MEE:', mee)
print('R2:', r2)
print('RMSE:', rmse)

# Learning curve

In [None]:
from sklearn.model_selection import learning_curve

train_sizes, train_scores, validation_scores = learning_curve(
    estimator=final_model,
    X=X_train,
    y=y_train,
    train_sizes=[0.1, 0.33, 0.55, 0.78, 1.],
    cv=5,
    n_jobs=-1,
    verbose=2,
    scoring=mee_scorer
)

In [None]:
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
validation_scores_mean = np.mean(validation_scores, axis=1)
validation_scores_std = np.std(validation_scores, axis=1)

In [None]:
plt.figure()
plt.title('Learning curve', fontweight='bold', fontsize=16)
plt.xlabel('Training examples', fontweight='bold')
plt.ylabel('Score', fontweight='bold')
plt.grid(True)

color1 = sns.dark_palette((20, 60, 50), input='husl')[-1]
color2 = sns.dark_palette('seagreen')[-1]

# Filling the area around the mean scores to indicate variability of the model's performance
# The shaded area represents the range of scores (mean ± standard deviation) for each training set size
plt.fill_between(
    train_sizes, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2, color=color1
)
plt.fill_between(
    train_sizes, validation_scores_mean - validation_scores_std,
                 validation_scores_mean + validation_scores_std, alpha=0.2, color=color2
)

# Mean score lines for training and validation
sns.lineplot(x=train_sizes, y=train_scores_mean, marker='s', color=color1, label='Training score')
sns.lineplot(x=train_sizes, y=validation_scores_mean, marker='s', color=color2, label='Cross-validation score')

plt.legend(loc='lower right')

save_plot(plt, IMAGES_FOLDER, 'learning_curve_optuna')
plt.show()

# Save model

In [None]:
model_path = os.path.join(MODEL_FOLDER, 'NN_optuna_model.joblib')
dump(final_model, model_path, compress=3)