In [1]:
import os
import time

import numpy as np
import pandas as pd
import mlrose_hiive as ml_h
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt



In [2]:
SEED = 42
ml_h.NNGSRunner

mlrose_hiive.runners.nngs_runner.NNGSRunner

In [3]:
df = pd.read_csv('datasets/wine-quality/wine-quality-all.csv', delimiter=';', encoding='utf-8')
X = df.drop('quality', axis=1)
y = df['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=SEED)

X_train_color = X_train['color'].values.reshape(-1, 1)
X_test_color = X_test['color'].values.reshape(-1, 1)

In [4]:
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')

X_train_encoded_color = encoder.fit_transform(X_train_color)
X_test_encoded_color = encoder.transform(X_test_color)

# Step 4: Concatenate the encoded 'color' column with the original data
X_train_reset = X_train.drop(columns=['color']).reset_index(drop=True)
X_train_encoded_color_reset = pd.DataFrame(X_train_encoded_color,
                                           columns=encoder.get_feature_names_out(['color'])).reset_index(drop=True)
X_train_e = pd.concat([X_train_reset, X_train_encoded_color_reset], axis=1)

X_test_reset = X_test.drop(columns=['color']).reset_index(drop=True)
X_test_encoded_color_reset = pd.DataFrame(X_test_encoded_color,
                                          columns=encoder.get_feature_names_out(['color'])).reset_index(drop=True)
X_test_e = pd.concat([X_test_reset, X_test_encoded_color_reset], axis=1)

X_all_e = pd.concat((X_train_e, X_test_e), axis=0)

#
scaler = MinMaxScaler()

X_train_n = scaler.fit_transform(X_train_e)
X_test_n = scaler.transform(X_test_e)

X_all_n = np.concatenate((X_train_n, X_test_n), axis=0)


In [5]:
X_val_n, X_test_n, y_val, y_test = train_test_split(X_test_n, y_test, test_size=0.5, random_state=SEED)

# Encode labels
one_hot = OneHotEncoder()

y_train_hot = one_hot.fit_transform(y_train.values.reshape(-1, 1)).todense()
y_val_hot = one_hot.transform(y_val.values.reshape(-1, 1)).todense()
y_test_hot = one_hot.transform(y_test.values.reshape(-1, 1)).todense()


In [6]:
F1_SCORE = 'micro'

In [7]:
from itertools import product

algorithms = [
    'gradient_descent',
    'simulated_annealing',
    'random_hill_climb',
    'genetic_alg',
]
learning_rates = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-7]
mutation_probabilities = [0.1, 0.3, 0.5, 0.8]
pop_sizes = [25, 50, 100, 150]
restarts = [0, 1, 2, 4, 6, 8, 10, 15]
schedules = [0.05, 0.1, 0.5, 1, 2, 5, 10, 20]

grid_search_params = list(product(algorithms, learning_rates, mutation_probabilities, pop_sizes, restarts, schedules))

filtered_grid_search_params = []
for elem in grid_search_params:
    algo, lr, mut_prob, pop_sz, rest, schedule = elem
    if algo == 'gradient_descent' and mut_prob == 0.1 and pop_sz == 50 and rest == 0 and schedule == 0.1:
        filtered_grid_search_params.append(elem)
    elif algo == 'simulated_annealing' and mut_prob == 0.1 and pop_sz == 50 and rest == 0:
        filtered_grid_search_params.append(elem)
    elif algo == 'random_hill_climb' and mut_prob == 0.1 and pop_sz == 50 and schedule == 0.1:
        filtered_grid_search_params.append(elem)
    elif algo == 'genetic_alg' and schedule == 0.1 and rest == 0 and (lr == 1e-4):
        filtered_grid_search_params.append(elem)

# filtered_grid_search_params
len(filtered_grid_search_params)

118

In [8]:
from tqdm.notebook import tqdm

out = []
#
# filtered_grid_search_params = filtered_grid_search_params[0:5]

for params in tqdm(filtered_grid_search_params):
    algo, lr, mut_prob, pop_sz, rest, schedule = params
    nn_model1 = ml_h.NeuralNetwork(
        hidden_nodes=[50, 50],
        activation='relu',
        algorithm=algo,
        max_iters=500,
        bias=True,
        is_classifier=True,
        early_stopping=False,
        clip_max=5,
        max_attempts=200,
        random_state=SEED,
        curve=True,
        learning_rate=lr,
        mutation_prob=mut_prob,
        pop_size=pop_sz,
        restarts=rest,
        schedule=ml_h.GeomDecay(schedule),
    )
    t0 = time.time()
    nn_model1.fit(X_train_n, y_train_hot)
    t1 = time.time()
    duration = t1 - t0
    y_train_pred = nn_model1.predict(X_train_n)
    y_val_pred = nn_model1.predict(X_val_n)
    y_test_pred = nn_model1.predict(X_test_n)
    y_train_micro_f1 = f1_score(np.asarray(y_train_hot), np.asarray(y_train_pred), average=F1_SCORE)
    y_val_micro_f1 = f1_score(np.asarray(y_val_hot), np.asarray(y_val_pred), average=F1_SCORE)
    y_test_micro_f1 = f1_score(np.asarray(y_test_hot), np.asarray(y_test_pred), average=F1_SCORE)

    out.append({
        'algorithm': algo,
        'learning_rate': lr,
        'mutation_prob': mut_prob,
        'population_size': pop_sz,
        'restarts': rest,
        'schedule_init': schedule,
        'duration': duration,
        'train_score': f1_score(np.asarray(y_train_hot), np.asarray(y_train_pred), average=F1_SCORE),
        'val_score': f1_score(np.asarray(y_val_hot), np.asarray(y_val_pred), average=F1_SCORE),
        'test_score': f1_score(np.asarray(y_test_hot), np.asarray(y_test_pred), average=F1_SCORE),
        'fitness_curve_length': len(nn_model1.fitness_curve),
        # 'fitness_curve_final': len(nn_model1.fitness_curve[:, 0][-1])
    })

#
# print(f"Train micro F1-score: {y_train_micro_f1}")
# print(f"Val micro F1-score: {y_val_micro_f1}")
# print(f"Test micro F1-score: {y_test_micro_f1}")

df = pd.DataFrame(out)
df.to_csv('outputs-nn/hyperparameter_search.csv')

  0%|          | 0/118 [00:00<?, ?it/s]

In [9]:
# from tqdm.notebook import tqdm
#
# # Additional logging for Genetic Algorithm
# #
# algorithms_2 = ['genetic_alg']
# learning_rates_2 = [1e-4]
# mutation_probabilities_2 = [0.1, 0.3, 0.5, 0.7]
# pop_sizes_2 = [25, 50, 100, 150]
# restarts_2 = [0]
# schedules_2 = [0.1]
#
# for_genetic_algo = list(
#     product(algorithms_2, learning_rates_2, mutation_probabilities_2, pop_sizes_2, restarts_2, schedules_2))
# additional_logging = set(for_genetic_algo) - set(filtered_grid_search_params)
#
# print(len(additional_logging))
#
# out_extra = []
#
# for params in tqdm(additional_logging):
#     algo, lr, mut_prob, pop_sz, rest, schedule = params
#     nn_model1 = ml_h.NeuralNetwork(
#         hidden_nodes=[50, 50],
#         activation='relu',
#         algorithm=algo,
#         max_iters=500,
#         bias=True,
#         is_classifier=True,
#         early_stopping=False,
#         clip_max=20,
#         max_attempts=200,
#         random_state=SEED,
#         curve=True,
#         learning_rate=lr,
#         mutation_prob=mut_prob,
#         pop_size=pop_sz,
#         restarts=rest,
#         schedule=ml_h.GeomDecay(schedule),
#     )
#     t0 = time.time()
#     nn_model1.fit(X_train_n, y_train_hot)
#     t1 = time.time()
#     duration = t1 - t0
#     y_train_pred = nn_model1.predict(X_train_n)
#     y_val_pred = nn_model1.predict(X_val_n)
#     y_test_pred = nn_model1.predict(X_test_n)
#     y_train_micro_f1 = f1_score(np.asarray(y_train_hot), np.asarray(y_train_pred), average=F1_SCORE)
#     y_val_micro_f1 = f1_score(np.asarray(y_val_hot), np.asarray(y_val_pred), average=F1_SCORE)
#     y_test_micro_f1 = f1_score(np.asarray(y_test_hot), np.asarray(y_test_pred), average=F1_SCORE)
#
#     out.append({
#         'algorithm': algo,
#         'learning_rate': lr,
#         'mutation_prob': mut_prob,
#         'population_size': pop_sz,
#         'restarts': rest,
#         'schedule_init': schedule,
#         'duration': duration,
#         'train_score': f1_score(np.asarray(y_train_hot), np.asarray(y_train_pred), average=F1_SCORE),
#         'val_score': f1_score(np.asarray(y_val_hot), np.asarray(y_val_pred), average=F1_SCORE),
#         'test_score': f1_score(np.asarray(y_test_hot), np.asarray(y_test_pred), average=F1_SCORE),
#         'fitness_curve_length': len(nn_model1.fitness_curve),
#         # 'fitness_curve_final': len(nn_model1.fitness_curve[:, 0][-1])
#     })
#
# extra_df = pd.DataFrame(out_extra)
# extra_df.to_csv('outputs-nn/hyperparameter_search_genetic_extra.csv')


In [54]:
from pathlib import Path


def plot_validation_curves(x,
                           train_scores,
                           val_scores,
                           file_name: str,
                           algo_name: str,
                           x_label,
                           y_label,
                           test_scores=None,
                           x_log_scale=False,
                           ):
    plt.rcParams['figure.dpi'] = 300
    plt.rcParams['figure.figsize'] = (8, 6)
    plt.plot(x, train_scores, label='Train Score', color='blue')
    plt.scatter(x, train_scores, color='blue', s=20)
    plt.plot(x, val_scores, label='Validation Score', color='yellow')
    plt.scatter(x, val_scores, color='yellow', s=20)
    if test_scores is not None:
        plt.plot(x, test_scores, label='Test Score', color='red')
        plt.scatter(x, test_scores, color='red', s=20)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(f"{y_label} vs {x_label} ({algo_name.replace('_', ' ').title()})")
    if x_log_scale:
        plt.xscale('log')
    plt.legend()
    plt.grid(True)

    file_path = f"charts-nn/{algo_name}/{file_name}"

    os.makedirs(Path(file_path).parent.absolute(), exist_ok=True)
    plt.savefig(file_path)
    plt.clf()


def plot_time_curves(x,
                     training_times,
                     file_name: str,
                     algo_name: str,
                     x_label,
                     y_label='Training Time, secs',
                     x_log_scale=False,
                     ):
    plt.rcParams['figure.dpi'] = 300
    plt.rcParams['figure.figsize'] = (8, 6)
    plt.plot(x, training_times, label='Training Time', color='blue')
    plt.scatter(x, training_times, color='blue', s=20)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(f"Training time vs {x_label} ({algo_name.replace('_', ' ').title()})")
    if x_log_scale:
        plt.xscale('log')
    plt.legend()
    plt.grid(True)

    file_path = f"charts-nn/{algo_name}/{file_name}"

    os.makedirs(Path(file_path).parent.absolute(), exist_ok=True)
    plt.savefig(file_path)
    plt.clf()


def plot_fitness_curves(curves, labels, file_name, algo_name=None):
    assert len(curves) == len(labels)
    plt.rcParams['figure.dpi'] = 300
    plt.rcParams['figure.figsize'] = (8, 6)
    for curve, label in list(zip(curves, labels)):
        plt.plot(curve, label=label)

    plt.xlabel("Iterations")
    plt.ylabel("Fitness Curve")
    title = f"Fitness Curve vs Iterations"
    if algo_name is not None:
        title = f"{title} ({algo_name.replace('-', ' ').title()})"
    plt.title(title)
    plt.legend()
    plt.grid(True)
    file_path = f"charts-nn/{file_name}"

    os.makedirs(Path(file_path).parent.absolute(), exist_ok=True)
    plt.savefig(file_path)
    plt.clf()

In [11]:
df = pd.read_csv('outputs-nn/hyperparameter_search.csv')

In [12]:
df = df.reset_index(drop=True).drop(columns=['Unnamed: 0', 'index'], errors='ignore')

In [13]:
def get_best_setups():
    max_score_idx = df.groupby('algorithm')['val_score'].idxmax()
    result_df = df.loc[max_score_idx]

    # Reset the index if needed
    result_df = result_df.reset_index(drop=True)
    return result_df


get_best_setups()

Unnamed: 0,algorithm,learning_rate,mutation_prob,population_size,restarts,schedule_init,duration,train_score,val_score,test_score,fitness_curve_length
0,genetic_alg,0.0001,0.1,25,0,0.1,117.184864,0.51826,0.537594,0.5,500
1,gradient_descent,1e-05,0.1,50,0,0.1,17.221923,0.546992,0.546366,0.52381,500
2,random_hill_climb,0.1,0.1,50,0,0.1,5.273635,0.441998,0.417293,0.429825,500
3,simulated_annealing,0.1,0.1,50,0,0.05,11.00246,0.441998,0.417293,0.429825,500


In [14]:
from functools import reduce

interesting_params = {
    'gradient_descent': ['learning_rate'],
    'simulated_annealing': ['learning_rate', 'schedule_init'],
    'random_hill_climb': ['learning_rate', 'restarts'],
    'genetic_alg': ['learning_rate', 'mutation_prob', 'population_size'],
}

needs_log_scale = {
    'learning_rate': True,
    'schedule_init': False,
    'restarts': False,
    'mutation_prob': False,
    'population_size': False,

}


def get_algorithm_best_params(algorithm_name, except_col):
    best_setups = get_best_setups()
    algorithm_df = best_setups[best_setups['algorithm'] == algorithm_name]
    algorithm_row = algorithm_df.iloc[0]
    return [(col, algorithm_row[col]) for col in algorithm_row.index if
            col not in ['algorithm', 'duration', 'train_score', 'val_score', 'test_score', 'fitness_curve_length',
                        except_col]]


for algo_name in algorithms:
    for param in interesting_params[algo_name]:
        pairs = get_algorithm_best_params(algo_name, param)
        condition = reduce(lambda x, y: x & y, [(df[col] == value) for col, value in pairs])
        algo_df = df[df['algorithm'] == algo_name]
        train_scores = algo_df[condition]['train_score']
        val_scores = algo_df[condition]['val_score']
        test_score = algo_df[condition]['test_score']
        xs = algo_df[condition][param]
        plot_validation_curves(x=xs,
                               train_scores=train_scores,
                               val_scores=val_scores,
                               test_scores=test_score,
                               x_label=param,
                               algo_name=algo_name,
                               file_name=f'validation_{algo_name}_{param}',
                               y_label='Micro F1-Score',
                               x_log_scale=needs_log_scale[param])

<Figure size 2400x1800 with 0 Axes>

In [30]:
def get_model_for_best_setup(algo_name):
    best_df = get_best_setups()
    best_df = best_df[best_df['algorithm'] == algo_name]
    return ml_h.NeuralNetwork(
        hidden_nodes=[50, 50],
        activation='relu',
        algorithm=algo_name,
        max_iters=500,
        bias=True,
        is_classifier=True,
        early_stopping=False,
        clip_max=5,
        max_attempts=200,
        random_state=SEED,
        curve=True,
        learning_rate=best_df['learning_rate'].iloc[0],
        mutation_prob=best_df['mutation_prob'].iloc[0],
        pop_size=best_df['population_size'].iloc[0],
        restarts=best_df['restarts'].iloc[0],
        schedule=ml_h.GeomDecay(schedule),
    )


In [31]:
model_gr_descent = get_model_for_best_setup('gradient_descent')
model_gr_descent.fit(X_train_n, y_train_hot)

In [32]:
model_gr_descent.fitness_curve

array([-6.13158361, -2.9242725 , -2.46824326, -2.14002216, -1.94520053,
       -1.82029745, -1.7351754 , -1.67459795, -1.62984763, -1.59240958,
       -1.56245014, -1.53565522, -1.51306904, -1.49196264, -1.47401512,
       -1.45648188, -1.44163519, -1.42628553, -1.41363053, -1.4000033 ,
       -1.38897013, -1.3770544 , -1.36746023, -1.35677457, -1.34840514,
       -1.33887136, -1.33134126, -1.32288736, -1.31602537, -1.30846624,
       -1.30213771, -1.29537564, -1.28979747, -1.28372992, -1.27857796,
       -1.27312761, -1.26836144, -1.26351656, -1.25928835, -1.25498971,
       -1.25119696, -1.24744497, -1.24401109, -1.24053536, -1.23736733,
       -1.23421659, -1.23135507, -1.22850072, -1.22590252, -1.22328883,
       -1.22094138, -1.21855836, -1.21641396, -1.21424044, -1.21225024,
       -1.21028124, -1.20841649, -1.20657056, -1.20482983, -1.2030925 ,
       -1.20147325, -1.19987325, -1.19834495, -1.19686879, -1.19545679,
       -1.19409598, -1.19280173, -1.19155029, -1.19035777, -1.18

In [33]:
model_sa = get_model_for_best_setup('simulated_annealing')
model_sa.fit(X_train_n, y_train_hot)

In [34]:
model_rhc = get_model_for_best_setup('random_hill_climb')
model_rhc.fit(X_train_n, y_train_hot)

In [35]:
model_ga = get_model_for_best_setup('genetic_alg')
model_ga.fit(X_train_n, y_train_hot)

In [36]:
plot_fitness_curves(
    [model_gr_descent.fitness_curve,
     (-1) * model_sa.fitness_curve[:, 0],
     (-1) * model_rhc.fitness_curve[:, 0],
     (-1) * model_ga.fitness_curve[:, 0],
     ],
    labels=[
        'Gradient Descent',
        'Simulated Annealing',
        'Random Hill Climbing',
        'Genetic Algorithm'
    ],
    file_name='fitness_curves.png'

)

<Figure size 2400x1800 with 0 Axes>

In [37]:
plot_fitness_curves(
    [
        model_gr_descent.fitness_curve,
    ],
    labels=[
        'Gradient Descent',
    ],
    file_name='gd_fitness_curves.png',
    algo_name='Gradient Descent',
)

<Figure size 2400x1800 with 0 Axes>

In [38]:
plot_fitness_curves(
    [
        (-1) * model_sa.fitness_curve[:, 0],

    ],
    labels=[
        'Simulated Annealing',
    ],
    file_name='sa_fitness_curves.png',
    algo_name='Simulated Annealing',
)


<Figure size 2400x1800 with 0 Axes>

In [39]:
plot_fitness_curves(
    [
        (-1) * model_rhc.fitness_curve[:, 0],
    ],
    labels=[
        'Random Hill Climbing',
    ],
    file_name='rhc_fitness_curves.png',
    algo_name='Random Hill Climbing',
)


<Figure size 2400x1800 with 0 Axes>

In [40]:
plot_fitness_curves(
    [
        (-1) * model_ga.fitness_curve[:, 0],
    ],
    labels=[
        'Genetic Algorithm'
    ],
    file_name='ga_fitness_curves.png',
    algo_name='Genetic Algorithm'
)


<Figure size 2400x1800 with 0 Axes>

In [41]:
np.sum(np.abs(model_gr_descent.fitted_weights))

1785.8739078279116

In [42]:
np.sum(np.abs(model_rhc.fitted_weights))


1794.3759567793943

In [45]:
np.sum(np.abs(model_sa.fitted_weights))

1799.4306799416215

In [43]:
np.sum(np.abs(model_ga.fitted_weights))

8843.68045124751

In [48]:
plot_time_curves

In [55]:
for algo_name in algorithms:
    for param in interesting_params[algo_name]:
        pairs = get_algorithm_best_params(algo_name, param)
        condition = reduce(lambda x, y: x & y, [(df[col] == value) for col, value in pairs])
        algo_df = df[df['algorithm'] == algo_name]
        train_time = algo_df[condition]['duration']
        xs = algo_df[condition][param]
        plot_time_curves(x=xs,
                         training_times=train_time,
                         x_label=param,
                         algo_name=algo_name,
                         file_name=f'time_{algo_name}_{param}',
                         x_log_scale=needs_log_scale[param])

<Figure size 2400x1800 with 0 Axes>