In [1]:
import os 
import time
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from itertools import product

import load_cifar10

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,accuracy_score


In [2]:
X_train_raw, y_train_raw, X_test_raw, y_test_raw = load_cifar10.load_cifar10_data(as_array=True)

Data loaded succesfully!


In [3]:
# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_raw)
X_test_scaled = scaler.transform(X_test_raw)

In [None]:
# Create X_val set from the training data
X_train, X_val, y_train, y_val = train_test_split(X_train_scaled, 
                                                  y_train_raw, 
                                                  test_size=0.2,
                                                  random_state=13)


# Define Search Space

In [5]:
n_hidden_layers = [1, 2, 4]
n_neurons_x_layer = [50, 200, 1000]
learning_rate = [10**-3, 10**-4, 10**-5]
activation = 'relu'
solver='adam'

# se hacen todas las combinaciones
architectures = list(product(n_hidden_layers, n_neurons_x_layer, learning_rate))
print('Total of architectures:', len(architectures))



Total of architectures: 27


## One-Epoch Training

In [6]:
def one_epoch_train_mlp(architectures, X_train, y_train, X_val, y_val, X_test, y_test):
    """
    Trains and evaluates multiple MLP configurations and returns results in a DataFrame.
    
    Parameters:
    ----------
    configurations : list of tuples
        Each tuple contains (hidden_layers, n_neurons, learning_rate)
    X_train : array-like
        Training features
    y_train : array-like
        Training labels
    X_val : array-like
        Validation features
    y_val : array-like
        Validation labels
    X_test : array-like
        Test features
    y_test : array-like
        Test labels
        
    Returns:
    -------
    pandas.DataFrame
        DataFrame containing results for all configurations
    """
    
    # Initialize the dict for the results
    configuration_results = {
        'hidden_layers': [],
        'n_neurons': [],
        'learning_rate': [],
        'train_time': [],
        'train_score': [],
        'val_score': [],
        'test_score': [],  
        'confusion_matrices': [],
    }

    for i, (h, n, lr) in enumerate(architectures):
        print(i + 1, '/', len(architectures))
        print('Hidden Layers: {}, # Neurons: {}, Learning rate: {}'.format(h, n, lr))
        # definir estructura de neurona
        neuron_structure = (np.ones(h) * n).astype(int)

        # Entrenar NN
        mlp = MLPClassifier(
            hidden_layer_sizes=(neuron_structure),
            activation='relu',
            solver='adam',
            learning_rate_init=lr
        )

        # Start timing
        start_time = time.time()
        
        # Train for one epoch
        mlp.partial_fit(X_train, y_train, classes=np.unique(y_train))

        # Calculate time taken
        total_train_time = time.time() - start_time
        # Calculate accuracies
        train_accuracy = mlp.score(X_train, y_train)
        val_accuracy = mlp.score(X_val, y_val)
        y_val_pred = mlp.predict(X_val)  # Predict once

        # Confusion matrix
        best_cm = confusion_matrix(y_val, y_val_pred)

        # Calculate test accuracy
        test_accuracy = mlp.score(X_test, y_test)

        # Print results
        print(f"Training Time: {total_train_time:.4f} seconds")
        # print(f"Training Accuracy: {train_accuracy:.4f}")
        print(f"Validation Accuracy: {val_accuracy:.4f}")
        # print(f"Test Accuracy: {test_accuracy:.4f}")

        # Se almacenan los resultados en el dict
        configuration_results['hidden_layers'].append(h)
        configuration_results['n_neurons'].append(n)
        configuration_results['learning_rate'].append(lr)
        configuration_results['train_time'].append(total_train_time)
        configuration_results['train_score'].append(train_accuracy)
        configuration_results['val_score'].append(val_accuracy)
        configuration_results['test_score'].append(test_accuracy)
        configuration_results['confusion_matrices'].append(best_cm)

    # Save the results to a dataframe
    architectures_df = pd.DataFrame(configuration_results)
    
    return architectures_df

In [7]:
architectures_results = one_epoch_train_mlp(architectures, X_train, y_train, X_val, y_val, X_test_scaled, y_test_raw)

# Save the results to a dataframe
architectures_df = pd.DataFrame(architectures_results)
architectures_df.to_excel('One_Epoch_Results.xlsx', index=False)

1 / 27
Hidden Layers: 1, # Neurons: 50, Learning rate: 0.001
Training Time: 1.1786 seconds
Validation Accuracy: 0.4194
2 / 27
Hidden Layers: 1, # Neurons: 50, Learning rate: 0.0001
Training Time: 1.0628 seconds
Validation Accuracy: 0.3727
3 / 27
Hidden Layers: 1, # Neurons: 50, Learning rate: 1e-05
Training Time: 1.0953 seconds
Validation Accuracy: 0.2410
4 / 27
Hidden Layers: 1, # Neurons: 200, Learning rate: 0.001
Training Time: 3.0978 seconds
Validation Accuracy: 0.4211
5 / 27
Hidden Layers: 1, # Neurons: 200, Learning rate: 0.0001
Training Time: 3.0732 seconds
Validation Accuracy: 0.4202
6 / 27
Hidden Layers: 1, # Neurons: 200, Learning rate: 1e-05
Training Time: 3.0388 seconds
Validation Accuracy: 0.2872
7 / 27
Hidden Layers: 1, # Neurons: 1000, Learning rate: 0.001
Training Time: 16.6214 seconds
Validation Accuracy: 0.4263
8 / 27
Hidden Layers: 1, # Neurons: 1000, Learning rate: 0.0001
Training Time: 14.9299 seconds
Validation Accuracy: 0.4504
9 / 27
Hidden Layers: 1, # Neurons: 

In [8]:
# Load architectures_results
# architectures_df = pd.read_excel('One_Epoch_Results.xlsx')

In [9]:
# Define metric to sort architectures by
metric = 'val_score'

In [10]:
def process_architecture_results(df, metric):
    """
    Creates unique IDs for neural network architectures and ranks them by a specified metric.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing neural network architecture configurations.
        Must have columns 'hidden_layers', 'n_neurons', and 'learning_rate'.
    
    metric : str
        The column name of the metric to use for ranking.
        Higher values are assumed to be better.
    
    Returns:
    --------
    pandas.DataFrame
        A copy of the input DataFrame with two new columns:
        - 'ID': A string combining the architecture parameters
        - 'Ranking': The rank of each architecture based on the metric
    """
    # Create a copy to avoid modifying the original DataFrame
    result_df = df.copy()
    
    # Create an ID for each architecture
    result_df['ID'] = (result_df['hidden_layers'].astype(str) + '_' + 
                       result_df['n_neurons'].astype(str) + '_' + 
                       result_df['learning_rate'].astype(str))
    
    # Sort by metric score
    result_df = result_df.sort_values(by=metric, ascending=False).reset_index(drop=True)
    
    # Add ranking
    result_df['Ranking'] = result_df.index + 1
    
    return result_df

In [11]:
architectures_df = process_architecture_results(architectures_df, metric)

## Early stopping after 50 epochs wo increment:

In [12]:
early_stop_results = pd.read_excel("50 Epochs wo Increase_Results.xlsx")
early_stop_results = process_architecture_results(early_stop_results, metric)

## Check the rankings of both

In [13]:
# Merge both to check positions
merged_results = pd.merge(architectures_df, early_stop_results, 
                          on='ID', suffixes=('_OE', '_ES')) # One Epoch, Early Stop
merged_results['val_score_diff'] = merged_results['val_score_OE'] - merged_results['val_score_ES']
merged_results['position_diff'] = merged_results['Ranking_OE'] - merged_results['Ranking_ES']

merged_results['val_score_diff'] = merged_results['val_score_OE'] - merged_results['val_score_ES']
display(merged_results[['ID', 'Ranking_OE', 'Ranking_ES']].head())

Unnamed: 0,ID,Ranking_OE,Ranking_ES
0,4_1000_0.0001,1,4
1,2_1000_0.0001,2,1
2,2_1000_0.001,3,8
3,1_1000_0.0001,4,5
4,4_1000_0.001,5,13


In [14]:
# Check the top elements in the ES and compare with OS
def get_top_models_overlap(df1, df2, id_column='ID', score_column='val_score', top_n=5):
    """
    Find how many models appear in the top N of both dataframes.
    
    Parameters:
    ----------
    df1 : pandas.DataFrame
        First dataframe containing OE results
    df2 : pandas.DataFrame
        Second dataframe containing ES results
    id_column : str
        Column name that uniquely identifies the models
    score_column : str, default='val_score'
        Column name that contains the scores to rank by
    top_n : int, default=5
        Number of top models to consider
        
    Returns:
    -------
    dict
        Dictionary containing:
        - count: Number of models that appear in top N of both dataframes
        - common_ids: Set of IDs that appear in top N of both dataframes
        - df1_in_df2: Models from df1 that are in top N of df2
        - df2_in_df1: Models from df2 that are in top N of df1
    """
    # Get top N models from each dataframe
    top_df1 = df1.nlargest(top_n, score_column)
    top_df2 = df2.nlargest(top_n, score_column)
    
    # Get the IDs of models in the top N
    top_ids_df1 = set(top_df1[id_column])
    top_ids_df2 = set(top_df2[id_column])
    
    # Find common IDs
    common_ids = top_ids_df1.intersection(top_ids_df2)
    
    # Get models from df1 that are in top N of df2
    df1_in_top_of_df2 = df1[df1[id_column].isin(top_ids_df2)]
    
    # Get models from df2 that are in top N of df1
    df2_in_top_of_df1 = df2[df2[id_column].isin(top_ids_df1)]
    
    return {
        'count': len(common_ids),
        'common_ids': common_ids,
        'df1_in_df2': df1_in_top_of_df2,
        'df2_in_df1': df2_in_top_of_df1
    }

In [15]:
overlap = get_top_models_overlap(architectures_df, early_stop_results)
print(overlap['count'])

3


# Multiple experiments testing

In [16]:
n_hidden_layers = [1, 2, 4]
n_neurons_x_layer = [50, 200, 1000]
learning_rate = [10**-3, 10**-4, 10**-5]
activation = 'relu'
solver='adam'

# se hacen todas las combinaciones
architectures = list(product(n_hidden_layers, n_neurons_x_layer, learning_rate))
print('Total of architectures:', len(architectures))

metric = 'val_score'

Total of architectures: 27


In [17]:
# Store results
overlap_results = []
dataframes_list = []

number_of_experiments = 50

for exp in range(number_of_experiments):
    print(f"Experiment {exp + 1} / {number_of_experiments}")
    # Split the data
    X_train, X_val, y_train, y_val = train_test_split(X_train_scaled, y_train_raw, 
                                                    test_size=0.2)

    architectures_results = one_epoch_train_mlp(architectures, X_train, y_train, X_val, y_val, X_test_scaled, y_test_raw)

    # Process and rank the results
    architectures_df = process_architecture_results(architectures_results, metric)
    dataframes_list.append(architectures_df)

    # Calculate overlap
    overlap = get_top_models_overlap(architectures_df, early_stop_results)
    overlap_results.append(overlap['count'])

# Concatenate all DataFrames at once
dataframes = pd.concat(dataframes_list, axis=0, ignore_index=True)
dataframes.to_excel('50 Experiments One Epoch.xlsx', index=False)

Experiment 1 / 50
1 / 27
Hidden Layers: 1, # Neurons: 50, Learning rate: 0.001
Training Time: 1.1836 seconds
Validation Accuracy: 0.4198
2 / 27
Hidden Layers: 1, # Neurons: 50, Learning rate: 0.0001
Training Time: 1.1277 seconds
Validation Accuracy: 0.3792
3 / 27
Hidden Layers: 1, # Neurons: 50, Learning rate: 1e-05
Training Time: 1.1576 seconds
Validation Accuracy: 0.2100
4 / 27
Hidden Layers: 1, # Neurons: 200, Learning rate: 0.001
Training Time: 3.1678 seconds
Validation Accuracy: 0.4210
5 / 27
Hidden Layers: 1, # Neurons: 200, Learning rate: 0.0001
Training Time: 3.1278 seconds
Validation Accuracy: 0.4205
6 / 27
Hidden Layers: 1, # Neurons: 200, Learning rate: 1e-05
Training Time: 3.2031 seconds
Validation Accuracy: 0.2923
7 / 27
Hidden Layers: 1, # Neurons: 1000, Learning rate: 0.001
Training Time: 15.0989 seconds
Validation Accuracy: 0.4260
8 / 27
Hidden Layers: 1, # Neurons: 1000, Learning rate: 0.0001
Training Time: 14.9449 seconds
Validation Accuracy: 0.4538
9 / 27
Hidden Laye

In [3]:
experiments_result = pd.read_excel('50 Experiments One Epoch.xlsx')
experiments_result.sample()

Unnamed: 0,hidden_layers,n_neurons,learning_rate,train_time,train_score,val_score,test_score,confusion_matrices,ID,Ranking
675,2,1000,0.0001,20.046958,0.529775,0.4692,0.477,[[519 66 50 52 41 25 15 54 131 66]\n [...,2_1000_0.0001,1


In [None]:
# Plot ranking distribution
