# Imports

In [78]:
# Data processing
import pandas as pd
# Preprocessing modules
import absenteeism_at_work_preprocessor
import students_dropout_and_academic_success_preprocessor
import loan_preprocessor
# Sci-kit learn
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split, KFold
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
# Other utilities
import random
import warnings

# Common functionalities

## Helper function to keep input data consistent across folds
To prevent data leakage, preprocessing is done while performing cross validation. Because not all categories are present in each fold, one hot encoding can result in a different number of columns. To keep the columns consistent between the train and test data in one fold, the intersection of the columns is used.

In [79]:
def take_common_columns(X_train, X_test):
    common_columns = list(set(X_train.columns) & set(X_test.columns))
    return X_train[common_columns], X_test[common_columns]

## Scoring function for comparison table
This function is the central comparison utility function. It takes a dictionary of networks and returns a summary for each network with various metrics.

In [80]:
def compare_networks(networks, X_test, y_test):
    results = []
    
    for network_name, network in networks.items():
        y_pred = network.predict(X_test)
        
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average="weighted", zero_division=0)
        recall = recall_score(y_test, y_pred, average="weighted", zero_division=0)
        f1 = f1_score(y_test, y_pred, average="weighted")
        
        results.append({
            "Network": network_name,
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "F1 Score": f1
        })

    return pd.DataFrame(results)

## K-fold network validation framework
The following function defines the framework for comparing multiple neural network tuners. In a k-fold validation loop, networks are generated with the strategies provided as input paramters. In order to keep this function dataset agnostic, a preprocessor must be provided. The preprocessor takes a subset of the data, cleans it and splits it into X (features) and y (target).

In [81]:
def compare_tuning_algorithms(dataset, preprocessor, network_generators, n_folds, shuffle_train_test):
    metrics_table = pd.DataFrame()

    folds = KFold(n_splits=n_folds, shuffle=shuffle_train_test, random_state=seed)

    for fold, (train_idx, test_idx) in enumerate(folds.split(dataset)):
        X_train, y_train = preprocessor(dataset.iloc[train_idx])
        X_test, y_test = preprocessor(dataset.iloc[test_idx])
        X_train, X_test = take_common_columns(X_train, X_test)
        
        networks = {
            name: generator(X_train, y_train, seed=seed, parameters=parameters) 
            for name, generator in network_generators.items() 
        }
        
        fold_comparison = compare_networks(networks, X_test, y_test)
        metrics_table = pd.concat([metrics_table, fold_comparison], axis=0)

        print("Fold %s" % fold)
        print(fold_comparison)

    return metrics_table.groupby("Network").mean()   

## Benchmark neural network

In [82]:
def fit_benchmark_neural_network(X_train, y_train, seed, parameters=None):
    return MLPClassifier(random_state=seed).fit(X_train, y_train)

## Benchmark support vector classifier

In [83]:
def fit_benchmark_support_vector_classifier(X_train, y_train, seed, parameters=None):
    return SVC(random_state=seed).fit(X_train, y_train)

## Custom random search algorith

In [84]:
def create_network(layers, nodes, activation, learning_rate=0.01, early_stopping=True, validation_fraction=0.1, n_iter_no_change=10):
    """
    Creates an MLP network with specified layers, nodes, activation function, learning rate, and early stopping parameters.
    """
    hidden_layer_sizes = tuple(nodes)
    model = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, activation=activation, 
                          max_iter=100, learning_rate_init=learning_rate, 
                          early_stopping=early_stopping, validation_fraction=validation_fraction, 
                          n_iter_no_change=n_iter_no_change)
    return model

def train_and_evaluate(model, X_train, y_train):
    """
    Trains the MLPClassifier model and evaluates its performance on the test set.
    """
    model.fit(X_train, y_train)
    y_pred = model.predict(X_train)
    accuracy = accuracy_score(y_train, y_pred)
    return accuracy

def random_configuration(max_layers, max_nodes, activation_functions):
    """
    Generates a random configuration for the neural network.
    """
    # Randomly select the number of layers within the allowed range
    layers = random.randint(1, max_layers)
    # Randomly select an activation function from the provided list
    activation = random.choice(activation_functions)
    # Generate a random number of nodes for each layer
    nodes = [random.randint(1, max_nodes) for _ in range(layers)]
    return layers, nodes, activation

def tune_custom_random_neural_network(X_train, y_train, parameters, seed, iterations=20):
    """
    Tunes a custom random neural network based on specified parameters.
    """
    random.seed(seed)
    best_performance = None
    # Extracting maximum permissible values for layers, nodes, and activation functions
    max_layers, max_nodes, activation_functions = parameters['max_layers'], parameters['max_nodes'], parameters["activation_functions"]

    for _ in range(iterations):
        layers, nodes, activation = random_configuration(max_layers, max_nodes, activation_functions)
        model = create_network(layers, nodes, activation)
        performance = train_and_evaluate(model, X_train, y_train)
        
        # Update the best performance and configuration if this model is better
        if best_performance is None or performance > best_performance:
            best_performance = performance
            best_layers = layers
            best_nodes = nodes
            best_activation = activation

    return create_network(best_layers, best_nodes, best_activation).fit(X_train, y_train)

## Custom local search algorithm

In [85]:
def create_networkh(layers, nodes, activation, learning_rate=0.01, early_stopping=True, validation_fraction=0.1, n_iter_no_change=10):
    """
    Creates an MLP network with specified layers, nodes, activation function, learning rate, and early stopping parameters.
    """
    hidden_layer_sizes = tuple([nodes] * layers)
    model = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, activation=activation, 
                          max_iter=100, learning_rate_init=learning_rate, 
                          early_stopping=early_stopping, validation_fraction=validation_fraction, 
                          n_iter_no_change=n_iter_no_change)
    return model

def train_and_evaluate(model, X_train, y_train):
    """
    Trains the MLPClassifier model and evaluates its performance on the test set.
    """
    model.fit(X_train, y_train)
    y_pred = model.predict(X_train)
    accuracy = accuracy_score(y_train, y_pred)
    return accuracy

def tune_custom_local_search_network(X_train, y_train, parameters, seed):
    """
    Performs hill climbing to find a better neural network configuration.
    """
    max_layers, max_nodes, activation_functions = parameters["max_layers"], parameters["max_nodes"], parameters["activation_functions"]
    #Initializing the current nodes with starting values
    current_layers, current_nodes, current_activation = 1, 1, "relu"

    best_performance = None
 
    while True:
        neighbors = []
    
        # Generating neighbors by varying one parameter at a time
        # Increase layer count if below max
        if current_layers < max_layers:
            neighbors.append((current_layers + 1, current_nodes, current_activation))
        
        # Increase node count if below max
        if current_nodes < max_nodes:
            neighbors.append((current_layers, current_nodes + 1, current_activation))
        
        # Change activation function to each alternative
        for activation in activation_functions:
            if activation != current_activation:
                neighbors.append((current_layers, current_nodes, activation))

        # Evaluating each neighboring configuration
        # if no configuration is better then best_neighbor is set/stays None
        best_neighbor = None
        for neighbor in neighbors:
            layers, nodes, activation = neighbor
            #creating network
            model = create_networkh(layers, nodes, activation)
            #evaluating its performance
            performance = train_and_evaluate(model, X_train, y_train)

            if best_performance is None or performance > best_performance:
                best_performance = performance
                best_neighbor = neighbor

        # Check if no improvement
        if best_neighbor is None:
            return create_networkh(current_layers, current_nodes, current_activation).fit(X_train, y_train)

        current_layers, current_nodes, current_activation = best_neighbor

# Experiment parameters

In [86]:
disable_warnings = True
n_folds = 3
seed = 0
shuffle_train_test = True
network_generators = {
    "Benchmark neural network": fit_benchmark_neural_network,
    "Benchmark support vector classifier": fit_benchmark_support_vector_classifier,
    "Random search tuned neural network": tune_custom_random_neural_network,
    "Local search tuned neural network": tune_custom_local_search_network
}
parameters = {
    "max_layers": 5,
    "max_nodes": 50,
    "activation_functions": ["relu", "tanh", "logistic"]
}

In [87]:
if disable_warnings:
    warnings.filterwarnings("ignore")

# Absenteeism at work

## Data loading

In [88]:
absenteeism_at_work = pd.read_csv("../../data/absenteeism-at-work/data.csv", delimiter=";", index_col="ID")

## Network generator comparison

In [89]:
compare_tuning_algorithms(
    dataset=absenteeism_at_work, 
    preprocessor=absenteeism_at_work_preprocessor.preprocess, 
    network_generators=network_generators, 
    n_folds=n_folds, 
    shuffle_train_test=shuffle_train_test
)

Fold 0
                               Network  Accuracy  Precision    Recall  \
0             Benchmark neural network  0.551020   0.560207  0.551020   
1  Benchmark support vector classifier  0.555102   0.334260  0.555102   
2   Random search tuned neural network  0.493878   0.518324  0.493878   
3    Local search tuned neural network  0.081633   0.006664  0.081633   

   F1 Score  
0  0.536881  
1  0.413446  
2  0.450855  
3  0.012322  
Fold 1
                               Network  Accuracy  Precision    Recall  \
0             Benchmark neural network  0.601626   0.585437  0.601626   
1  Benchmark support vector classifier  0.548780   0.329476  0.548780   
2   Random search tuned neural network  0.544715   0.517520  0.544715   
3    Local search tuned neural network  0.284553   0.109983  0.284553   

   F1 Score  
0  0.582313  
1  0.408462  
2  0.513438  
3  0.145939  
Fold 2
                               Network  Accuracy  Precision    Recall  \
0             Benchmark neural net

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Network,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Benchmark neural network,0.565787,0.548072,0.565787,0.541935
Benchmark support vector classifier,0.531917,0.314161,0.531917,0.391579
Local search tuned neural network,0.195233,0.058162,0.195233,0.082219
Random search tuned neural network,0.55216,0.54423,0.55216,0.518409


# Students' dropout and academic success

## Data loading

In [90]:
students_dropout_and_academic_success = pd.read_csv("../../data/predict-students-dropout-and-academic-success/data.csv", delimiter=";")

## Network generator comparison

In [91]:
compare_tuning_algorithms(
    dataset=students_dropout_and_academic_success, 
    preprocessor=students_dropout_and_academic_success_preprocessor.preprocess, 
    network_generators=network_generators, 
    n_folds=n_folds, 
    shuffle_train_test=shuffle_train_test
)

Fold 0
                               Network  Accuracy  Precision    Recall  \
0             Benchmark neural network  0.660339   0.579430  0.660339   
1  Benchmark support vector classifier  0.497627   0.247633  0.497627   
2   Random search tuned neural network  0.717966   0.623061  0.717966   
3    Local search tuned neural network  0.497627   0.247633  0.497627   

   F1 Score  
0  0.588525  
1  0.330700  
2  0.654084  
3  0.330700  
Fold 1
                               Network  Accuracy  Precision    Recall  \
0             Benchmark neural network  0.610847   0.610287  0.610847   
1  Benchmark support vector classifier  0.496949   0.246958  0.496949   
2   Random search tuned neural network  0.720678   0.708431  0.720678   
3    Local search tuned neural network  0.496949   0.246958  0.496949   

   F1 Score  
0  0.529830  
1  0.329949  
2  0.703918  
3  0.329949  
Fold 2
                               Network  Accuracy  Precision    Recall  \
0             Benchmark neural net

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Network,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Benchmark neural network,0.646705,0.622669,0.646705,0.573708
Benchmark support vector classifier,0.497288,0.28726,0.497288,0.342684
Local search tuned neural network,0.499323,0.249332,0.499323,0.332586
Random search tuned neural network,0.649155,0.595355,0.649155,0.570612


# Loan

## Data loading

In [92]:
loan = pd.read_csv("../../data/kaggle-competitions/loan/loan-10k.lrn.csv", index_col="ID")

## Network generator comparison

In [93]:
compare_tuning_algorithms(
    dataset=loan, 
    preprocessor=loan_preprocessor.preprocess, 
    network_generators=network_generators, 
    n_folds=n_folds, 
    shuffle_train_test=shuffle_train_test
)

Fold 0
                               Network  Accuracy  Precision    Recall  \
0             Benchmark neural network  0.302639   0.308589  0.302639   
1  Benchmark support vector classifier  0.335633   0.272581  0.335633   
2   Random search tuned neural network  0.317636   0.264662  0.317636   
3    Local search tuned neural network  0.304739   0.092866  0.304739   

   F1 Score  
0  0.190657  
1  0.270512  
2  0.234098  
3  0.142352  
Fold 1
                               Network  Accuracy  Precision    Recall  \
0             Benchmark neural network  0.330033   0.301583  0.330033   
1  Benchmark support vector classifier  0.322232   0.271679  0.322232   
2   Random search tuned neural network  0.321632   0.273878  0.321632   
3    Local search tuned neural network  0.309031   0.095500  0.309031   

   F1 Score  
0  0.298402  
1  0.260445  
2  0.221259  
3  0.145910  
Fold 2
                               Network  Accuracy  Precision    Recall  \
0             Benchmark neural net

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Network,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Benchmark neural network,0.282898,0.29582,0.282898,0.212536
Benchmark support vector classifier,0.326399,0.278644,0.326399,0.259962
Local search tuned neural network,0.298799,0.089415,0.298799,0.137605
Random search tuned neural network,0.3137,0.295249,0.3137,0.223686
