In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time
import utils
import pandas as pd
import gc

from mlrose_hiive.algorithms.decay import ExpDecay
from mlrose_hiive.neural import NeuralNetwork

from sklearn.metrics import log_loss, classification_report
from sklearn.model_selection import train_test_split

class color:
    PURPLE = '\033[95m'
    CYAN = '\033[96m'
    DARKCYAN = '\033[36m'
    BLUE = '\033[94m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    RED = '\033[91m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    END = '\033[0m'

In [2]:
#stratified sampling in sklearn
def stratified_sampling(vegito, target, size = 0.4, seed = 903949505):
    gc.collect()
    goku, vegita = train_test_split(vegito, test_size=size, stratify=vegito[target], random_state=seed)
    print(color.BOLD + color.UNDERLINE + color.DARKCYAN + "Original distribution" + color.END)
    c = vegito[target].value_counts(normalize=False)
    p = vegito[target].value_counts(normalize=True)*100
    display(pd.concat([c,p], axis=1, keys=['counts', '%']))
    print("\n" + "\n" + color.BOLD + color.UNDERLINE + color.WARNING + "Train distribution" + color.END)
    c = goku[target].value_counts(normalize=False)
    p = goku[target].value_counts(normalize=True)*100
    display(pd.concat([c,p], axis=1, keys=['counts', '%']))
    print("\n" + "\n" + color.BOLD + color.UNDERLINE + color.RED + "Test distribution" + color.END)
    c = vegita[target].value_counts(normalize=False)
    p = vegita[target].value_counts(normalize=True)*100
    display(pd.concat([c,p], axis=1, keys=['counts', '%']))
    
    #split goku, vegita and cell by target variable to make depent and indepent data seperately
    goku_X = goku.drop(target, axis=1)
    vegita_X = vegita.drop(target, axis=1)
    goku_y = pd.DataFrame(goku[target])
    vegita_y = pd.DataFrame(vegita[target])
    
    #return goku_X, goku_y, vegita_X, vegita_y
    print("\n" + "\n" + color.BOLD + color.UNDERLINE + color.BLUE + "Train_x shape" + color.END)
    print("Number of columns are " + color.BOLD + str(goku_X.shape[0]) + color.END + " and number of rows are " + color.BOLD + str(goku_X.shape[1]) + color.END)
    display(goku_X.head())
    print("\n" + "\n" + color.BOLD + color.UNDERLINE + color.BLUE + "Train_y shape" + color.END)
    print("Number of columns are " + color.BOLD + str(goku_y.shape[0]) + color.END + " and number of rows are " + color.BOLD + str(goku_y.shape[1]) + color.END)
    display(goku_y.head())
    print("\n" + "\n" + color.BOLD + color.UNDERLINE + color.BLUE + "Train_x shape" + color.END)
    print("Number of columns are " + color.BOLD + str(vegita_X.shape[0]) + color.END + " and number of rows are " + color.BOLD + str(vegita_X.shape[1]) + color.END)
    display(vegita_X.head())
    print("\n" + "\n" + color.BOLD + color.UNDERLINE + color.BLUE + "Train_y shape" + color.END)
    print("Number of columns are " + color.BOLD + str(vegita_y.shape[0]) + color.END + " and number of rows are " + color.BOLD + str(vegita_y.shape[1]) + color.END)
    display(vegita_y.head())
    return goku_X, goku_y, vegita_X, vegita_y

In [33]:
def plot_nn_perf(x_train, y_train, random_seeds, **kwargs):
    """Plot Neural Networks performances on the training set.

        Use different optimizations algorithms (RHC, SA, GA and GD) and compare results on the training set using
        k-fold cross-validation.

        Args:
        x_train (ndarray): training data.
        y_train (ndarray): training labels.
        random_seeds (list or array): random seeds for multiple random runs to use for k-fold cross-validation.
        kwargs (dict): additional arguments to pass for curves plotting:
                  - rhc_max_iters (list or ndarray): RHC list or array of maximum number of iterations to plot vs.
                  - sa_max_iters (list or ndarray): SA list or array of maximum number of iterations to plot vs.
                  - ga_max_iters (list or ndarray): GA list or array of maximum number of iterations to plot vs.
                  - gd_max_iters (list or ndarray): GD list or array of maximum number of iterations to plot vs.
                  - init_temp (float): SA initial temperature.
                  - exp_decay_rate (float): SA temperature exponential decay rate.
                  - min_temp (float): SA minimum temperature.
                  - pop_size (int): GA population size.
                  - mutation_prob (float): GA mutation probability.

        Returns:
        None.
           """

    # Initialize algorithms, corresponding acronyms and max number of iterations
    algorithms = ['random_hill_climb', 'simulated_annealing', 'genetic_alg', 'gradient_descent']
    acronyms = ['RHC', 'SA', 'GA', 'GD']
    max_iters = ['rhc_max_iters', 'sa_max_iters', 'ga_max_iters', 'gd_max_iters']

    # Initialize lists of training curves, validation curves and training times curves
    train_curves, val_curves, train_time_curves = [], [], []

    # Define SA exponential decay schedule
    exp_decay = ExpDecay(init_temp=kwargs['init_temp'],
                         exp_const=kwargs['exp_decay_rate'],
                         min_temp=kwargs['min_temp'])

    # For each of the optimization algorithms to test the Neural Network with
    for i, algorithm in enumerate(algorithms):
        print('\nAlgorithm = {}'.format(algorithm))

        # For multiple random runs
        for random_seed in random_seeds:

            # Initialize training losses, validation losses and training time lists for current random run
            train_losses, val_losses, train_times = [], [], []

            # Compute stratified k-fold
            x_train_fold, x_val_fold, y_train_fold, y_val_fold = train_test_split(x_train, y_train,
                                                                                  test_size=0.2, shuffle=True,
                                                                                  random_state=random_seed,
                                                                                  stratify=y_train)
            # For each max iterations to run for
            for max_iter in kwargs[max_iters[i]]:

                # Define Neural Network using current algorithm
                nn = NeuralNetwork(hidden_nodes=[50,], activation='relu',
                                   algorithm=algorithm, max_iters=int(max_iter),
                                   bias=True, is_classifier=True, learning_rate=0.001,
                                   early_stopping=False, clip_max=1e10, schedule=exp_decay,
                                   pop_size=kwargs['pop_size'], mutation_prob=kwargs['mutation_prob'],
                                   max_attempts=int(max_iter), random_state=random_seed, curve=False)

                # Train on current training fold and append training time
                start_time = time.time()
                nn.fit(x_train_fold, y_train_fold)
                train_times.append(time.time() - start_time)

                # Compute and append training and validation log losses
                train_loss = log_loss(y_train_fold, nn.predict(x_train_fold))
                val_loss = log_loss(y_val_fold, nn.predict(x_val_fold))
                train_losses.append(train_loss)
                val_losses.append(val_loss)
                print('{} - train loss = {:.3f}, val loss = {:.3f}'.format(max_iter, train_loss, val_loss))

            # Append curves for current random seed to corresponding lists of curves
            train_curves.append(train_losses)
            val_curves.append(val_losses)
            train_time_curves.append(train_times)
        display('{} train'.format(acronyms[i]))
        # Plot training and validation figure for current algorithm
        display(kwargs[max_iters[i]])
        display(train_curves)
        display(val_curves)
        display(train_time_curves)
#         utils.plot_helper(x_axis=kwargs[max_iters[i]], y_axis=np.array(train_curves), label='{} train'.format(acronyms[i]))
#         utils.plot_hel'{} train'.format(acronyms[i]per(x_axis=kwargs[max_iters[i]], y_axis=np.array(val_curves), label='{} val'.format(acronyms[i]))

#         # Plot training time figure for current algorithm
#         plt.figure(train_times_figure)
#         utils.plot_helper(x_axis=kwargs[max_iters[i]], y_axis=np.array(train_time_curves), label=acronyms[i])

#     # Set title and labels to training and validation figure
#     plt.figure(train_val_figure)
#     utils.set_plot_title_labels(title='Neural Network - Loss vs. iterations',
#                                 x_label='Iterations',
#                                 y_label='Loss')

#     # Save figure
#     plt.savefig(IMAGE_DIR + 'nn_objective_vs_iterations')

#     # Set title and labels to training time figure
#     plt.figure(train_times_figure)
#     utils.set_plot_title_labels(title='Neural Network - Time vs. iterations',
#                                 x_label='Iterations',
#                                 y_label='Time (seconds)')

#     # Save figure
#     plt.savefig(IMAGE_DIR + 'nn_time_vs_iterations')

In [17]:
def test_nn_performances(x_train, x_test, y_train, y_test, random_seed, **kwargs):
    """Test Neural Networks performances on the test set using different optimizations algorithms: RHC, SA, GA and GD.

        Args:
        x_train (ndarray): training data.
        x_test (ndarray): test data.
        y_train (ndarray): training labels.
        y_test (ndarray): test labels.
        random_seed (int): random seed.
        kwargs (dict): additional arguments to pass for curves plotting:
                   - max_iters (int): maximum number of iterations.
                   - init_temp (float): SA initial temperature.
                   - exp_decay_rate (float): SA temperature exponential decay rate.
                   - min_temp (float): SA minimum temperature.
                   - pop_size (int): GA population size.
                   - mutation_prob (float): GA mutation probability.

        Returns:
        None.
        """

    # Define SA exponential decay schedule
    exp_decay = ExpDecay(init_temp=kwargs['init_temp'],
                         exp_const=kwargs['exp_decay_rate'],
                         min_temp=kwargs['min_temp'])

    # Define Neural Network using RHC for weights optimization
    rhc_nn = NeuralNetwork(hidden_nodes=[50, 30], activation='relu',
                           algorithm='random_hill_climb', max_iters=kwargs['max_iters'],
                           bias=True, is_classifier=True, learning_rate=0.001,
                           early_stopping=False, clip_max=1e10,
                           max_attempts=kwargs['max_iters'], random_state=random_seed, curve=False)

    # Define Neural Network using SA for weights optimization
    sa_nn = NeuralNetwork(hidden_nodes=[50, 30], activation='relu',
                          algorithm='simulated_annealing', max_iters=kwargs['max_iters'],
                          bias=True, is_classifier=True, learning_rate=0.001,
                          early_stopping=False, clip_max=1e10, schedule=exp_decay,
                          max_attempts=kwargs['max_iters'], random_state=random_seed, curve=False)

    # Define Neural Network using GA for weights optimization
    ga_nn = NeuralNetwork(hidden_nodes=[50, 30], activation='relu',
                          algorithm='genetic_alg', max_iters=kwargs['max_iters'],
                          bias=True, is_classifier=True, learning_rate=0.001,
                          early_stopping=False, clip_max=1e10,
                          pop_size=kwargs['pop_size'], mutation_prob=kwargs['mutation_prob'],
                          max_attempts=kwargs['max_iters'], random_state=random_seed, curve=False)

    # Define Neural Network using GD for weights optimization
    gd_nn = NeuralNetwork(hidden_nodes=[50, 30], activation='relu',
                          algorithm='gradient_descent', max_iters=kwargs['max_iters'],
                          bias=True, is_classifier=True, learning_rate=0.001,
                          early_stopping=False, clip_max=1e10,
                          max_attempts=kwargs['max_iters'], random_state=random_seed, curve=False)

    # Fit each of the Neural Networks using the different optimization algorithms
    rhc_nn.fit(x_train, y_train)
    sa_nn.fit(x_train, y_train)
    ga_nn.fit(x_train, y_train)
    gd_nn.fit(x_train, y_train)

    # Print classification reports for all of the optimization algorithms
    print('RHC test classification report = \n {}'.format(classification_report(y_test, rhc_nn.predict(x_test))))
    print('SA test classification report = \n {}'.format(classification_report(y_test, sa_nn.predict(x_test))))
    print('GA test classification report = \n {}'.format(classification_report(y_test, ga_nn.predict(x_test))))
    print('GD test classification report = \n {}'.format(classification_report(y_test, gd_nn.predict(x_test))))

In [3]:
data = pd.read_csv("C:/Users/sagni/Documents/Personal Files/CS7641/CS7641/Data/Bank_churn/train.csv", index_col=0)
data.drop(['Surname','CustomerId'], axis=1, inplace=True)
data['HasCrCard'] = data['HasCrCard'].astype(int)
data['Age'] = data['Age'].astype(int)
data['IsActiveMember'] = data['IsActiveMember'].astype(int)
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
transformer = make_column_transformer((OneHotEncoder(handle_unknown='ignore'),['Geography','Gender']))
# Transforming
transformed = transformer.fit_transform(data.drop(['Exited'], axis=1))
print(transformed.dtype)
# Transformating back
transformed_df = pd.DataFrame(transformed, columns=transformer.get_feature_names())
# One-hot encoding removed an index. Let's put it back:
transformed_df.index = data.index
# Joining tables
data = pd.concat([data, transformed_df], axis=1)
# Dropping old categorical columns
data.drop(['Geography','Gender'], axis=1, inplace=True)
# CHecking result
data.head()
trainX, trainY, testX, testY = stratified_sampling(data, 'Exited', 0.9, 903949505)

# join testX and testY on index to create a daframe data
data = pd.concat([testX, testY], axis=1)
testX, testY, ignoreX, ignoreY = stratified_sampling(data, 'Exited', 0.5, 903949505)
#delete pandas dataframe ignoreX, ignoreY
del ignoreX
del ignoreY
del data

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(trainX)
trainX = pd.DataFrame(scaler.transform(trainX), columns=trainX.columns, index=trainX.index)
testX = pd.DataFrame(scaler.transform(testX), columns=testX.columns, index=testX.index)
display(trainX.head())
display(testX.head())

float64
[1m[4m[36mOriginal distribution[0m


Unnamed: 0,counts,%
0,130113,78.840118
1,34921,21.159882




[1m[4m[93mTrain distribution[0m


Unnamed: 0,counts,%
0,13011,78.840211
1,3492,21.159789




[1m[4m[91mTest distribution[0m


Unnamed: 0,counts,%
0,117102,78.840107
1,31429,21.159893




[1m[4m[94mTrain_x shape[0m
Number of columns are [1m16503[0m and number of rows are [1m13[0m


Unnamed: 0_level_0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,onehotencoder__x0_France,onehotencoder__x0_Germany,onehotencoder__x0_Spain,onehotencoder__x1_Female,onehotencoder__x1_Male
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
3638,669,32,7,0.0,2,1,0,93982.02,1.0,0.0,0.0,0.0,1.0
159233,672,29,9,0.0,2,1,1,134794.02,1.0,0.0,0.0,1.0,0.0
38435,644,32,2,0.0,2,1,0,103932.38,1.0,0.0,0.0,0.0,1.0
144063,724,24,7,142755.25,1,0,0,34231.48,0.0,1.0,0.0,0.0,1.0
52235,572,39,8,89047.74,1,1,1,178373.87,1.0,0.0,0.0,0.0,1.0




[1m[4m[94mTrain_y shape[0m
Number of columns are [1m16503[0m and number of rows are [1m1[0m


Unnamed: 0_level_0,Exited
id,Unnamed: 1_level_1
3638,0
159233,0
38435,0
144063,0
52235,0




[1m[4m[94mTrain_x shape[0m
Number of columns are [1m148531[0m and number of rows are [1m13[0m


Unnamed: 0_level_0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,onehotencoder__x0_France,onehotencoder__x0_Germany,onehotencoder__x0_Spain,onehotencoder__x1_Female,onehotencoder__x1_Male
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
65509,632,31,7,111388.18,1,0,1,173498.45,0.0,1.0,0.0,1.0,0.0
20875,641,34,8,0.0,2,0,0,124615.59,1.0,0.0,0.0,0.0,1.0
53648,726,63,6,123948.85,1,0,0,145560.38,1.0,0.0,0.0,1.0,0.0
40310,671,29,3,105229.53,1,1,1,131804.86,0.0,0.0,1.0,0.0,1.0
60496,711,40,7,0.0,2,1,0,180829.87,0.0,0.0,1.0,0.0,1.0




[1m[4m[94mTrain_y shape[0m
Number of columns are [1m148531[0m and number of rows are [1m1[0m


Unnamed: 0_level_0,Exited
id,Unnamed: 1_level_1
65509,0
20875,0
53648,1
40310,0
60496,0


[1m[4m[36mOriginal distribution[0m


Unnamed: 0,counts,%
0,117102,78.840107
1,31429,21.159893




[1m[4m[93mTrain distribution[0m


Unnamed: 0,counts,%
0,58551,78.840638
1,15714,21.159362




[1m[4m[91mTest distribution[0m


Unnamed: 0,counts,%
0,58551,78.839577
1,15715,21.160423




[1m[4m[94mTrain_x shape[0m
Number of columns are [1m74265[0m and number of rows are [1m13[0m


Unnamed: 0_level_0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,onehotencoder__x0_France,onehotencoder__x0_Germany,onehotencoder__x0_Spain,onehotencoder__x1_Female,onehotencoder__x1_Male
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
121133,675,29,6,121063.57,2,0,1,102076.92,0.0,1.0,0.0,0.0,1.0
39290,569,33,3,153058.1,1,1,1,102625.08,0.0,1.0,0.0,1.0,0.0
156352,629,32,7,137781.65,1,0,1,153921.32,1.0,0.0,0.0,1.0,0.0
77927,721,31,3,0.0,2,1,0,121151.1,1.0,0.0,0.0,1.0,0.0
93794,642,33,2,0.0,2,1,0,131736.23,1.0,0.0,0.0,1.0,0.0




[1m[4m[94mTrain_y shape[0m
Number of columns are [1m74265[0m and number of rows are [1m1[0m


Unnamed: 0_level_0,Exited
id,Unnamed: 1_level_1
121133,0
39290,1
156352,0
77927,0
93794,0




[1m[4m[94mTrain_x shape[0m
Number of columns are [1m74266[0m and number of rows are [1m13[0m


Unnamed: 0_level_0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,onehotencoder__x0_France,onehotencoder__x0_Germany,onehotencoder__x0_Spain,onehotencoder__x1_Female,onehotencoder__x1_Male
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
136760,706,37,9,0.0,1,0,1,159919.15,1.0,0.0,0.0,1.0,0.0
24160,745,36,2,114370.43,1,1,0,76582.95,0.0,1.0,0.0,0.0,1.0
52087,815,35,4,137455.99,1,1,1,184178.29,0.0,1.0,0.0,0.0,1.0
97925,802,29,6,0.0,2,0,0,166957.82,1.0,0.0,0.0,0.0,1.0
17721,646,41,4,126273.95,1,1,1,70400.86,0.0,1.0,0.0,1.0,0.0




[1m[4m[94mTrain_y shape[0m
Number of columns are [1m74266[0m and number of rows are [1m1[0m


Unnamed: 0_level_0,Exited
id,Unnamed: 1_level_1
136760,1
24160,1
52087,1
97925,0
17721,1


Unnamed: 0_level_0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,onehotencoder__x0_France,onehotencoder__x0_Germany,onehotencoder__x0_Spain,onehotencoder__x1_Female,onehotencoder__x1_Male
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
3638,0.613248,0.222222,0.7,0.0,0.333333,1.0,0.0,0.469689,1.0,0.0,0.0,0.0,1.0
159233,0.619658,0.174603,0.9,0.0,0.333333,1.0,1.0,0.673849,1.0,0.0,0.0,1.0,0.0
38435,0.559829,0.222222,0.2,0.0,0.333333,1.0,0.0,0.519465,1.0,0.0,0.0,0.0,1.0
144063,0.730769,0.095238,0.7,0.598837,0.0,0.0,0.0,0.17079,0.0,1.0,0.0,0.0,1.0
52235,0.405983,0.333333,0.8,0.373542,0.0,1.0,1.0,0.891854,1.0,0.0,0.0,0.0,1.0


Unnamed: 0_level_0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,onehotencoder__x0_France,onehotencoder__x0_Germany,onehotencoder__x0_Spain,onehotencoder__x1_Female,onehotencoder__x1_Male
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
121133,0.626068,0.174603,0.6,0.507843,0.333333,0.0,1.0,0.510183,0.0,1.0,0.0,0.0,1.0
39290,0.399573,0.238095,0.3,0.642056,0.0,1.0,1.0,0.512925,0.0,1.0,0.0,1.0,0.0
156352,0.527778,0.222222,0.7,0.577973,0.0,0.0,1.0,0.769532,1.0,0.0,0.0,1.0,0.0
77927,0.724359,0.206349,0.3,0.0,0.333333,1.0,0.0,0.605601,1.0,0.0,0.0,1.0,0.0
93794,0.555556,0.238095,0.2,0.0,0.333333,1.0,0.0,0.658552,1.0,0.0,0.0,1.0,0.0


In [18]:
def neural_network(x_train, x_test, y_train, y_test, random_seeds):
    """Define and experiment the Neural Network weights optimization problem.

        Training Neural Networks weights can be done using GD and backpropation, but also another RO
        optimization algorithm, like RHC, SA or GA, can be used.

        Args:
          x_train (ndarray): training data.
          x_test (ndarray): test data.
          y_train (ndarray): training labels.
          y_test (ndarray): test labels.
          random_seeds (list or ndarray): random seeds for get performances over multiple random runs.

        Returns:
          None.
        """
    # Maximum iterations to run the Neural Network for
    iterations = np.array([i for i in range(1, 10)] + [10 * i for i in range(1, 20, 2)])

    # Plot performances for RHC, SA, GA and GD with Neural Networks
    plot_nn_perf(x_train, y_train,
                            random_seeds=random_seeds,
                            rhc_max_iters=iterations, sa_max_iters=iterations,
                            ga_max_iters=iterations, gd_max_iters=iterations,
                            init_temp=100, exp_decay_rate=0.1, min_temp=0.001,
                            pop_size=100, mutation_prob=0.2)

    # Test performances for RHC, SA, GA and GD with Neural Networks
#     nn.test_nn_performances(x_train, x_test, y_train, y_test,
#                             random_seed=random_seeds[0], max_iters=200,
#                             init_temp=100, exp_decay_rate=0.1, min_temp=0.001,
#                             pop_size=100, mutation_prob=0.2)


In [36]:
np.array([i for i in range(1, 10)] + [10 * i for i in range(1, 20, 2)])

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  30,  50,  70,
        90, 110, 130, 150, 170, 190])

In [34]:
random_seeds = [5 + 5 * i for i in range(2)]  # random seeds for get performances over multiple random runs
neural_network(trainX, testX, trainY, testY, random_seeds=random_seeds)


Algorithm = random_hill_climb
1 - train loss = 7.830, val loss = 7.889
2 - train loss = 7.830, val loss = 7.889
3 - train loss = 7.830, val loss = 7.889
4 - train loss = 7.830, val loss = 7.889
5 - train loss = 7.830, val loss = 7.889
6 - train loss = 7.830, val loss = 7.889
7 - train loss = 7.828, val loss = 7.889
8 - train loss = 7.828, val loss = 7.889
9 - train loss = 7.828, val loss = 7.889
10 - train loss = 7.828, val loss = 7.889
30 - train loss = 7.828, val loss = 7.879
50 - train loss = 7.830, val loss = 7.879
70 - train loss = 7.843, val loss = 7.889
90 - train loss = 7.846, val loss = 7.889
110 - train loss = 7.846, val loss = 7.889
130 - train loss = 7.849, val loss = 7.900
150 - train loss = 7.851, val loss = 7.900
170 - train loss = 7.849, val loss = 7.900
190 - train loss = 7.856, val loss = 7.900
1 - train loss = 7.406, val loss = 7.460
2 - train loss = 7.406, val loss = 7.460
3 - train loss = 7.406, val loss = 7.460
4 - train loss = 7.406, val loss = 7.460
5 - train l

'RHC train'

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  30,  50,  70,
        90, 110, 130, 150, 170, 190])

[[7.830238813933336,
  7.830238813933336,
  7.830238813933336,
  7.830238813933336,
  7.830238813933336,
  7.830238813933336,
  7.827622636354567,
  7.827622636354567,
  7.827622636354567,
  7.827622636354567,
  7.827622636354567,
  7.830238874499723,
  7.843320125791886,
  7.845936363937041,
  7.845936363937041,
  7.848552602082196,
  7.851168840227352,
  7.848552602082196,
  7.856401437650435],
 [7.406404539868615,
  7.406404539868615,
  7.406404539868615,
  7.406404539868615,
  7.406404539868615,
  7.406404539868615,
  7.406404539868615,
  7.406404539868615,
  7.406404539868615,
  7.406404539868615,
  7.406404539868615,
  7.406404539868615,
  7.409020778013769,
  7.409020778013769,
  7.409020778013769,
  7.409020778013769,
  7.409020778013769,
  7.409020778013769,
  7.409020778013769]]

[[7.889216046396395,
  7.889216046396395,
  7.889216046396395,
  7.889216046396395,
  7.889216046396395,
  7.889216046396395,
  7.889216046396395,
  7.889216046396395,
  7.889216046396395,
  7.889216046396395,
  7.8787529211631595,
  7.8787529211631595,
  7.889216288625244,
  7.889216288625244,
  7.889216288625244,
  7.899679656087328,
  7.899679656087328,
  7.899679656087328,
  7.899679656087328],
 [7.460214831475897,
  7.460214831475897,
  7.460214831475897,
  7.460214831475897,
  7.460214831475897,
  7.460214831475897,
  7.460214831475897,
  7.460214831475897,
  7.460214831475897,
  7.460214831475897,
  7.460214831475897,
  7.470678198937981,
  7.470678198937981,
  7.470678198937981,
  7.470678198937981,
  7.470678198937981,
  7.470678198937981,
  7.470678198937981,
  7.470678198937981]]

[[0.019807815551757812,
  0.04709196090698242,
  0.06250262260437012,
  0.09639763832092285,
  0.1250629425048828,
  0.14277386665344238,
  0.15644288063049316,
  0.19094252586364746,
  0.29041337966918945,
  0.22489047050476074,
  0.5795326232910156,
  0.9521889686584473,
  1.3348267078399658,
  1.694962739944458,
  2.0849955081939697,
  2.3990731239318848,
  2.8372769355773926,
  3.105135679244995,
  3.377453565597534],
 [0.03125405311584473,
  0.03125786781311035,
  0.06250548362731934,
  0.07169890403747559,
  0.09382081031799316,
  0.1140291690826416,
  0.13067841529846191,
  0.1406409740447998,
  0.17751431465148926,
  0.17281603813171387,
  0.5332136154174805,
  0.881427526473999,
  1.2074389457702637,
  1.5277857780456543,
  1.9204864501953125,
  2.223926305770874,
  2.5859696865081787,
  2.870896816253662,
  3.425034523010254]]


Algorithm = simulated_annealing


ValueError: not enough values to unpack (expected 3, got 2)

In [24]:
import types
import __main__

# Get all items in the main module (your script)
items = __main__.__dict__.items()

# Filter out only the functions
functions = [obj for obj in items if isinstance(obj[1], types.FunctionType)]

# Print the names of the functions
for name, _ in functions:
    print(name)

_
log_loss
classification_report
train_test_split
stratified_sampling
make_column_transformer
neural_network
test_nn_performances
plot_nn_perf
