## Import

In [1]:
from network.neural_network import NeuralNetwork
from network.neural_network_utility import evaluate
from math_functions.function_enums import LossFunction, ActivationFunction, Metrics
from utilities.dataset_reader import read_cup, read_cup_ext_test
from model_selection.validation import kfold_cv_ensemble
from model_selection.grid import grid_search, get_top_n_results, get_all_results
from utilities.utils import count_configs, get_list_models, plot_over_epochs, save_array_with_comments
from network.ensemble import Ensemble
import numpy as np
import matplotlib.pyplot as plt
import json

## Path for development set split

In [None]:
MODEL_SEL_DATA_PATH = 'datasets/cup/grid_search/CUP_model_selection_data.csv'
MODEL_SEL_TARGETS_PATH = 'datasets/cup/grid_search/CUP_model_selection_targets.csv'
MODEL_ASSESS_DATA_PATH = 'datasets/cup/grid_search/CUP_model_assessment_data.csv'
MODEL_ASSESS_TARGETS_PATH = 'datasets/cup/grid_search/CUP_model_assessment_targets.csv'

## Save training data and internal test data

In [None]:
# data, targets = read_cup('datasets/cup/CUP_TR.csv')
# train_data, test_data, train_targets, test_targets = holdout(data, targets, 0.8, shuffle_set=True)

# np.savetxt(MODEL_SEL_DATA_PATH, train_data, delimiter=',')
# np.savetxt(MODEL_SEL_TARGETS_PATH, train_targets, delimiter=',')
# np.savetxt(MODEL_ASSESS_DATA_PATH, test_data, delimiter=',')
# np.savetxt(MODEL_ASSESS_TARGETS_PATH, test_targets, delimiter=',')

## Load training data and internal test data

In [None]:
train_data = np.loadtxt(MODEL_SEL_DATA_PATH, delimiter=',')
train_targets = np.loadtxt(MODEL_SEL_TARGETS_PATH, delimiter=',')
test_data = np.loadtxt(MODEL_ASSESS_DATA_PATH, delimiter=',')
test_targets = np.loadtxt(MODEL_ASSESS_TARGETS_PATH, delimiter=',')

## Hyperparameter tuning

In [None]:
fixed_params = dict(
    n_output_units=3, 
    training_loss_type_value=LossFunction.MSE.value, 
    validation_loss_type_value=LossFunction.MSE.value, 
    evaluation_metric_type_value=Metrics.MEE.value,
    activation_hidden_type_value=ActivationFunction.SIGMOID.value, 
    activation_output_type_value=ActivationFunction.IDENTITY.value, 
    classification=False, 
    early_stopping=True, 
    fast_stopping=False,
    patience=20, 
    tolerance=0.1,
    epochs=500,
    linear_decay=True,
    verbose=False,
)

grid_params = dict(
    batch_size=[1, 64, 128],
    hidden_layer_sizes=[[64, 32], [128, 64], [128, 128], [64, 64, 32]],
    learning_rate=[0.0005, 0.005, 0.05, 0.1],
    mom_alpha=[0, 0.6, 0.7, 0.9],
    reg_lambda=[0, 0.0001, 0.00001],
    nesterov=[True, False],
    tao=[200, 500]
)

results = grid_search(
    k_folds=3, 
    data=train_data, 
    target=train_targets, 
    metrics=[Metrics.MSE.value, Metrics.MEE.value], 
    fixed_param=fixed_params, 
    grid_param=grid_params, 
    file_name_results="ml_cup_first_grid_full", 
    verbose=False,
    plot=True,
    log_scale=True
)

#### Examining results

In [None]:
top_100_merged = get_top_n_results('ml_cup_first_grid_full.json', 100, 'validation_mee_mean', ascending=True)
top_50_merged = get_top_n_results('ml_cup_first_grid_full.json', 50, 'validation_mee_mean', ascending=True)
top_20_merged = get_top_n_results('ml_cup_first_grid_full.json', 20, 'validation_mee_mean', ascending=True)

In [None]:
results = get_all_results('ml_cup_first_grid_full.json')
print(f'total number of explored configs: {len(results)}')

validation_mee_means = {model[0]: model[1]['validation_mee_mean'] for model in top_100_merged}

for model_config, mee_mean in validation_mee_means.items():
    print(f"{model_config}: {mee_mean}")

In [None]:
print('TOP-100----------------')
count_configs(top_100_merged)
print('\nTOP-50----------------')
count_configs(top_50_merged)
print('\nTOP-20----------------')
count_configs(top_20_merged)

### Second grid on batch size 64

batch_size 64 grid

In [None]:
fixed_params = dict(
    n_output_units=3, 
    training_loss_type_value=LossFunction.MSE.value, 
    validation_loss_type_value=LossFunction.MSE.value, 
    evaluation_metric_type_value=Metrics.MEE.value,
    activation_hidden_type_value=ActivationFunction.SIGMOID.value, 
    activation_output_type_value=ActivationFunction.IDENTITY.value, 
    classification=False, 
    early_stopping=True, 
    fast_stopping=False,
    patience=20, 
    tolerance=0.1,
    epochs=2000,
    linear_decay=True,
    batch_size=64,
    hidden_layer_sizes=[128, 128],
    nesterov=True,
    verbose=False,
)

grid_params = dict(
    learning_rate=[0.02, 0.04, 0.05, 0.08, 0.1, 0.2],
    mom_alpha=[0, 0.6, 0.7, 0.9],
    reg_lambda=[0, 0.00001],
    tao=[500, 1000]
)

results = grid_search(
    k_folds=5, 
    data=train_data, 
    target=train_targets, 
    metrics=[Metrics.MSE.value, Metrics.MEE.value], 
    fixed_param=fixed_params, 
    grid_param=grid_params, 
    file_name_results="ml_cup_second_grid_batch_64", 
    verbose=False,
    plot=True,
    log_scale=True
)

### Second grid on batch_size 1

batch_size 1 grid

In [None]:
fixed_params = dict(
    n_output_units=3, 
    training_loss_type_value=LossFunction.MSE.value, 
    validation_loss_type_value=LossFunction.MSE.value, 
    evaluation_metric_type_value=Metrics.MEE.value,
    activation_hidden_type_value=ActivationFunction.SIGMOID.value, 
    activation_output_type_value=ActivationFunction.IDENTITY.value, 
    classification=False, 
    early_stopping=True, 
    fast_stopping=False,
    patience=20, 
    tolerance=0.1,
    epochs=2000,
    linear_decay=True,
    batch_size=1,
    hidden_layer_sizes=[128, 128],
    nesterov=False,
    verbose=False,
)

grid_params = dict(
    learning_rate=[0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008],
    mom_alpha=[0.5, 0.6, 0.7],
    reg_lambda=[0, 0.0001, 0.00001, 0.00005],
    tao=[500, 1000]
)

results = grid_search(
    k_folds=5, 
    data=train_data, 
    target=train_targets, 
    metrics=[Metrics.MSE.value, Metrics.MEE.value], 
    fixed_param=fixed_params, 
    grid_param=grid_params, 
    file_name_results="ml_cup_second_grid_batch_1", 
    verbose=False,
    plot=True,
    log_scale=True
)

### Results examination

In [None]:
top_100_second_batch_64 = get_top_n_results('ml_cup_second_grid_batch_64.json', 100, 'validation_mee_mean', ascending=True)
top_50_second_batch_64 = get_top_n_results('ml_cup_second_grid_batch_64.json', 50, 'validation_mee_mean', ascending=True)
top_20_second_batch_64 = get_top_n_results('ml_cup_second_grid_batch_64.json', 20, 'validation_mee_mean', ascending=True)

In [None]:
results = get_all_results('ml_cup_second_grid_batch_64.json')
print(f'total number of explored configs: {len(results)}')

validation_mee_means = {model[0]: model[1]['validation_mee_mean'] for model in top_100_second_batch_64}

for model_config, mee_mean in validation_mee_means.items():
    print(f"{model_config}: {mee_mean}")

In [None]:
top_100_second_batch_1 = get_top_n_results('ml_cup_second_grid_batch_1.json', 100, 'validation_mee_mean', ascending=True)
top_50_second_batch_1 = get_top_n_results('ml_cup_second_grid_batch_1.json', 50, 'validation_mee_mean', ascending=True)
top_20_second_batch_1 = get_top_n_results('ml_cup_second_grid_batch_1.json', 20, 'validation_mee_mean', ascending=True)

In [None]:
results = get_all_results('ml_cup_second_grid_batch_1.json')
print(f'total number of explored configs: {len(results)}')

validation_mee_means = {model[0]: model[1]['validation_mee_mean'] for model in top_100_second_batch_1}

for model_config, mee_mean in validation_mee_means.items():
    print(f"{model_config}: {mee_mean}")

### Try on the top model for the 64 batch_size version

we plot on the internal test set (no operation is done with the internal test set, it's just a plot)

In [None]:
net = NeuralNetwork(
    n_output_units=3, 
    training_loss_type_value=LossFunction.MSE.value, 
    validation_loss_type_value=LossFunction.MSE.value, 
    evaluation_metric_type_value=Metrics.MEE.value,
    activation_hidden_type_value=ActivationFunction.SIGMOID.value, 
    activation_output_type_value=ActivationFunction.IDENTITY.value, 
    classification=False, 
    early_stopping=False, 
    fast_stopping=False,
    patience=20, 
    tolerance=0.1,
    epochs=2000,
    linear_decay=True,
    batch_size=1,
    hidden_layer_sizes=[128, 128],
    nesterov=True,
    learning_rate=0.006,
    mom_alpha=0.6,
    reg_lambda=0.00005,
    tao=1000,
    verbose=False
)

net.train_net(train_data=train_data, train_target=train_targets, val_data=test_data, val_target=test_targets, tr_loss_stopping_point=0.07113932071043247)
net.predict_and_evaluate(test_data, test_targets, Metrics.MEE.value)

In [None]:
plot_over_epochs(y_values=net.training_losses, title='loss over epochs', y_label='mse', y_legend='training set', y_prime_values=net.validation_losses, y_prime_legend='test set', yscale='log')
plot_over_epochs(y_values=net.training_evaluations, title='score over epochs', y_label='mee', y_legend='training set', y_prime_values=net.validation_evaluations, y_prime_legend='test set', yscale='log')

## Ensemble

After picking the best 10 performing models, we decided to build an ensamble in order to improve performance.

Note: we won't train the models using early stopping right now. We will use more data to train the models and use the training loss mean of the best epoch (w.r.t. the internal valiation loss) obtained during the kfold cv in the grid process.

In [6]:
# get top 10 results from both batches grids
top_10_second_batch_1 = get_top_n_results('ml_cup_second_grid_batch_1.json', 10, 'validation_mee_mean', ascending=True)
top_10_second_batch_64 = get_top_n_results('ml_cup_second_grid_batch_64.json', 10, 'validation_mee_mean', ascending=True)

# merge them together and sort by validation_mee_mean
merged_data = top_10_second_batch_1 + top_10_second_batch_64
sorted_data = sorted(merged_data, key=lambda x: x[1]['validation_mee_mean'])

# get the final top 10 results
top_10_models_results = sorted_data[:10]

# get the stopping point for training the retrained model over the entire development dataset
tr_stopping_points = [results[1]['tr_losses_mean'] for results in top_10_models_results]

# get the top 10 models configurations
list_models = get_list_models(top_10_models_results)

first we execute a kfold over the ensemble (and over each constituent model)

In [None]:
fixed_params = dict(
    n_output_units=3, 
    training_loss_type_value=LossFunction.MSE.value, 
    validation_loss_type_value=LossFunction.MSE.value, 
    evaluation_metric_type_value=Metrics.MEE.value,
    activation_hidden_type_value=ActivationFunction.SIGMOID.value, 
    activation_output_type_value=ActivationFunction.IDENTITY.value, 
    epochs=2000, 
    nesterov=False, 
    classification=False, 
    early_stopping=False, 
    fast_stopping=False,
    linear_decay=True,
    patience=20, 
    tolerance=0.1,
    hidden_layer_sizes=[128, 128],
    verbose=False
)

# build ensemble
models = []
for index, model in enumerate(list_models):
    params = {**fixed_params, **model}
    models.append(NeuralNetwork(**fixed_params, **model))
ensemble = Ensemble(models)

# cross-validation on ensemble
kfold_model_result, kfold_ensemble_result = kfold_cv_ensemble(10, train_data, train_targets, [Metrics.MSE.value, Metrics.MEE.value], ensemble, tr_stopping_points=tr_stopping_points, verbose=False)

with open('json_results/ensemble_top_10_tao_2000.json', 'w', encoding='utf-8') as f:
    json.dump({**kfold_model_result, **kfold_ensemble_result}, f, indent=4)

## Model Selection and Model Assessment

Results on the kfold cross validation has shown that the best performing model is indeed the ensemble of the top 10 models. Thus we now retrain the model on the entire internal training set

In [None]:
fixed_params = dict(
    n_output_units=3, 
    training_loss_type_value=LossFunction.MSE.value, 
    validation_loss_type_value=LossFunction.MSE.value, 
    evaluation_metric_type_value=Metrics.MEE.value,
    activation_hidden_type_value=ActivationFunction.SIGMOID.value, 
    activation_output_type_value=ActivationFunction.IDENTITY.value, 
    epochs=2000, 
    nesterov=False, 
    classification=False, 
    early_stopping=False, 
    fast_stopping=False,
    linear_decay=True,
    patience=20, 
    tolerance=0.1,
    hidden_layer_sizes=[128, 128],
    verbose=False
)

# build ensemble
models = []
for index, model in enumerate(list_models):
    params = {**fixed_params, **model}
    models.append(NeuralNetwork(**fixed_params, **model))
ensemble = Ensemble(models)

# train ensemble on the entire development dataset
ensemble.train(train_data, train_targets, test_data, test_targets, tr_stopping_points=tr_stopping_points)

# evaluate ensemble on the test set and save results
results = {}
for index, model in enumerate(ensemble.models):
    y_pred = model.predict(test_data)
    results[f"model_{index+1}"] = evaluate(y_pred, test_targets, Metrics.MEE.value)
    
y_pred = ensemble.predict(test_data)
results["ensemble"] = evaluate(y_pred, test_targets, Metrics.MEE.value)

with open('json_results/ensemble_top_10_test_results_tao_2000.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=4) 


#### Plotting curves for the top-10 models

In [None]:
all_training_losses = [model.training_losses for model in ensemble.models]
all_validation_losses = [model.validation_losses for model in ensemble.models]
max_length_tr_losses = max(len(loss_array) for loss_array in all_training_losses)
max_length_val_losses = max(len(loss_array) for loss_array in all_validation_losses)

plt.figure(figsize=(4, 4))
for i, model in enumerate(ensemble.models):

    # Pad shorter arrays
    padded_data_tr_losses = [np.pad(loss_array, (0, max_length_tr_losses - len(loss_array)), 'edge') for loss_array in all_training_losses]
    padded_data_val_losses = [np.pad(loss_array, (0, max_length_val_losses - len(loss_array)), 'edge') for loss_array in all_validation_losses]

    if i == 0:
        plt.plot(list(range(len(model.training_losses))), model.training_losses, color='blue', alpha=0.2, label='Training set')
        plt.plot(list(range(len(model.validation_losses))), model.validation_losses, color='red', alpha=0.2, label='Internal test set')
    else: 
        plt.plot(list(range(len(model.training_losses))), model.training_losses, color='blue', alpha=0.2)
        plt.plot(list(range(len(model.validation_losses))), model.validation_losses, color='red', alpha=0.2)
        

plt.plot(np.mean(padded_data_tr_losses, axis=0), label="Mean training set", linestyle='--', color='darkblue')
plt.plot(np.mean(padded_data_val_losses, axis=0), label="Mean internal test set", linestyle='--', color='darkred')
plt.yscale('log')
plt.title('Loss over epochs')
plt.xlabel('epochs')
plt.ylabel('mse')
plt.legend()
plt.show()


In [None]:
all_training_evals = [model.training_evaluations for model in ensemble.models]
all_validation_evals = [model.validation_evaluations for model in ensemble.models]
max_length_tr_evals = max(len(loss_array) for loss_array in all_training_evals)
max_length_val_evals = max(len(loss_array) for loss_array in all_validation_evals)

plt.figure(figsize=(4, 4))
for i, model in enumerate(ensemble.models):

    # Pad shorter arrays
    padded_data_tr_evals = [np.pad(loss_array, (0, max_length_tr_evals - len(loss_array)), 'edge') for loss_array in all_training_evals]
    padded_data_val_evals = [np.pad(loss_array, (0, max_length_val_evals - len(loss_array)), 'edge') for loss_array in all_validation_evals]

    if i == 0:
        plt.plot(list(range(len(model.training_losses))), model.training_evaluations, color='blue', alpha=0.2, label='Training set')
        plt.plot(list(range(len(model.validation_losses))), model.validation_evaluations, color='red', alpha=0.2, label='Internal test set')
    else: 
        plt.plot(list(range(len(model.training_losses))), model.training_evaluations, color='blue', alpha=0.2)
        plt.plot(list(range(len(model.validation_losses))), model.validation_evaluations, color='red', alpha=0.2)
        

plt.plot(np.mean(padded_data_tr_evals, axis=0), label="Mean training set", linestyle='--', color='darkblue')
plt.plot(np.mean(padded_data_val_evals, axis=0), label="Mean internal test set", linestyle='--', color='darkred')
plt.yscale('log')
plt.title('Score over epochs')
plt.xlabel('epochs')
plt.ylabel('mee')
plt.legend()
plt.show()


In [None]:
all_training_losses = [model.training_losses[:500] for model in ensemble.models]
all_validation_losses = [model.validation_losses[:500] for model in ensemble.models]

plt.figure(figsize=(4, 4))
for i, model in enumerate(ensemble.models):

    if i == 0:
        plt.plot(list(range(500)), model.training_losses[:500], color='blue', alpha=0.2, label='Training set')
        plt.plot(list(range(500)), model.validation_losses[:500], color='red', alpha=0.2, label='Internal test set')
    else: 
        plt.plot(list(range(500)), model.training_losses[:500], color='blue', alpha=0.2)
        plt.plot(list(range(500)), model.validation_losses[:500], color='red', alpha=0.2)
        

plt.plot(np.mean(all_training_losses, axis=0), label="Mean training set", linestyle='--', color='darkblue')
plt.plot(np.mean(all_validation_losses, axis=0), label="Mean internal test set", linestyle='--', color='darkred')
plt.yscale('log')
plt.title('Loss over epochs')
plt.xlabel('epochs')
plt.ylabel('mse')
plt.legend()
plt.show()


In [None]:
all_training_evaluations = [model.training_evaluations[:500] for model in ensemble.models]
all_validation_evaluations = [model.validation_evaluations[:500] for model in ensemble.models]

plt.figure(figsize=(4, 4))
for i, model in enumerate(ensemble.models):

    if i == 0:
        plt.plot(list(range(500)), model.training_evaluations[:500], color='blue', alpha=0.2, label='Training set')
        plt.plot(list(range(500)), model.validation_evaluations[:500], color='red', alpha=0.2, label='Internal test set')
    else: 
        plt.plot(list(range(500)), model.training_evaluations[:500], color='blue', alpha=0.2)
        plt.plot(list(range(500)), model.validation_evaluations[:500], color='red', alpha=0.2)
        

plt.plot(np.mean(all_training_evaluations, axis=0), label="Mean training set", linestyle='--', color='darkblue')
plt.plot(np.mean(all_validation_evaluations, axis=0), label="Mean internal test set", linestyle='--', color='darkred')
plt.yscale('log')
plt.title('Score over epochs')
plt.xlabel('epochs')
plt.ylabel('mee')
plt.legend()
plt.show()


#### Model Assessment on the ensemble

Now we will proceede with the model assessment by training and testing over the internal test set multiple times, to then average the results (in order to achieve a less biased result)

In [None]:
fixed_params = dict(
    n_output_units=3, 
    training_loss_type_value=LossFunction.MSE.value, 
    validation_loss_type_value=LossFunction.MSE.value, 
    evaluation_metric_type_value=Metrics.MEE.value,
    activation_hidden_type_value=ActivationFunction.SIGMOID.value, 
    activation_output_type_value=ActivationFunction.IDENTITY.value, 
    epochs=2000, 
    nesterov=False, 
    classification=False, 
    early_stopping=False, 
    fast_stopping=False,
    linear_decay=True,
    patience=20, 
    tolerance=0.1,
    hidden_layer_sizes=[128, 128],
    verbose=False
)

results = []

for i in range(10):
    # build ensemble
    models = []
    for index, model in enumerate(list_models):
        params = {**fixed_params, **model}
        models.append(NeuralNetwork(**fixed_params, **model))
    ensemble = Ensemble(models)

    # train ensemble on the entire development dataset
    ensemble.train(train_data, train_targets, test_data, test_targets, tr_stopping_points=tr_stopping_points)

        
    y_pred = ensemble.predict(test_data)
    results.append(evaluate(y_pred, test_targets, Metrics.MEE.value))

model_assessment = {}
model_assessment['test_results'] = results
model_assessment['test_mean'] = np.mean(results)
model_assessment['test_std'] = np.std(results)

with open('json_results/ensemble_top_10_model_assessment_tao_2000.json', 'w', encoding='utf-8') as f:
    json.dump(model_assessment, f, indent=4)

## Retraining the chosen model on the entire training set

Now that we assessed the model performance, we can retrain the model using the entire training set.

Note: we use the mean training loss obtained at the best epoch (w.r.t. the validation loss) as stopping criteria for the training. In this way we assure that the same level of fitting is achieved.

In [2]:
data, targets = read_cup('datasets/cup/CUP_TR.csv')
external_test_data = read_cup_ext_test('datasets/cup/CUP_TS.csv')

In [7]:
fixed_params = dict(
    n_output_units=3, 
    training_loss_type_value=LossFunction.MSE.value, 
    validation_loss_type_value=LossFunction.MSE.value, 
    evaluation_metric_type_value=Metrics.MEE.value,
    activation_hidden_type_value=ActivationFunction.SIGMOID.value, 
    activation_output_type_value=ActivationFunction.IDENTITY.value, 
    epochs=2000, 
    nesterov=False, 
    classification=False, 
    early_stopping=False, 
    fast_stopping=False,
    linear_decay=True,
    patience=20, 
    tolerance=0.1,
    hidden_layer_sizes=[128, 128],
    verbose=False
)

models = []
for index, model in enumerate(list_models):
    params = {**fixed_params, **model}
    models.append(NeuralNetwork(**fixed_params, **model))
ensemble = Ensemble(models)

# train ensemble on the entire development dataset
ensemble.train(data, targets, None, None, tr_stopping_points=tr_stopping_points)

In [12]:
# predict on the external test set
predict = ensemble.predict(external_test_data)

csv_comments = ["Nicoletta Alice, Piccolo Chiara, Pitzalis Nicola",
                    "quiquoqua",
                    "ML-CUP23",
                    "14/01/2024"]

# Save the example array to a file
example_file_path = 'csv_results/quiquoqua_ML-CUP23-TS.csv'
save_array_with_comments(predict, example_file_path, csv_comments)