In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import ShuffleSplit, train_test_split,\
      LearningCurveDisplay, learning_curve
from sklearn.compose import make_column_transformer

import mlrose_hiive as mlrose
from mlrose_hiive import MaxKColorGenerator, QueensGenerator, FlipFlopGenerator,\
      TSPGenerator, KnapsackGenerator, ContinuousPeaksGenerator
from mlrose_hiive import SARunner, GARunner, NNGSRunner, MIMICRunner, RHCRunner
from mlrose_hiive import SKMLPRunner

## Data Pre-processing

In [4]:
red_wine = os.path.join('data','wine', 'winequality-red.csv')
white_wine = os.path.join('data','wine', 'winequality-white.csv')
turbine = os.path.join('data','turbine','gt_2011.csv')
mushrooms = os.path.join('data','mushroom','secondary_data.csv')

# encoders to use
scale = StandardScaler()
s_split = ShuffleSplit()
ohe = OneHotEncoder(sparse_output=False)

transformer = make_column_transformer(
    (
        ohe, 
        [
        'cap-shape', 'cap-surface', 'cap-color',
       'does-bruise-or-bleed', 'gill-attachment','gill-spacing', 'gill-color', 
       'stem-root', 'stem-surface', 'stem-color','veil-type', 'veil-color',
        'has-ring', 'ring-type', 'spore-print-color','habitat', 'season'
        ]
        ),
    remainder='passthrough'
    )

shroom_df = pd.read_csv(mushrooms,sep=';').sample(frac=1).reset_index(drop=True)
x = shroom_df.iloc[:,1:].copy()
x_shroom = pd.DataFrame(transformer.fit_transform(x), 
                columns=transformer.get_feature_names_out())
y = shroom_df.iloc[:,0].copy()
y_shroom = (y == 'p')


# reduce the number of training examples
x_shroom = x_shroom[:7000]
y_shroom =  y_shroom[:7000]

x_shroom_train, x_shroom_test, y_shroom_train, y_shroom_test = train_test_split(
    x_shroom, y_shroom, test_size=0.2)


In [None]:
white_df = pd.read_csv(white_wine, sep=';')
red_df = pd.read_csv(red_wine, sep=';')
white_df['type'] = 0
red_df['type'] = 1
wine_df = pd.concat([white_df,red_df])
wine_df = wine_df.sample(frac=1).reset_index(drop=True)

# set x and y values
# remove 'quality' and 'type' column from x array
x_wine = wine_df.iloc[:,:-2].copy()
# scale x vals
x_wine.values[:,:] = scale.fit_transform(x_wine)
# set y array equal to 'type' column 
y_wine = wine_df.iloc[:,-1].copy()

In [5]:
nn = mlrose.NeuralNetwork(hidden_nodes = [100],
                                activation = 'relu',
                                algorithm = 'simulated_annealing',
                                max_iters = 10000,
                                bias = True,
                                is_classifier = True,
                                learning_rate = 0.001,
                                early_stopping = True,
                                clip_max = 1.0,
                                max_attempts =200,
                                curve=True,
                                random_state = 123456)

In [6]:
train_sizes, train_scores, test_scores, fit_times, score_times = learning_curve(
    nn, x_shroom, y_shroom, cv=5, n_jobs=-1, scoring='f1', train_sizes=np.linspace(0.05, 1.0, 10), return_times=True, random_state=123456
)

## Neural Network

In [None]:
grid_search_parameters = ({
    "activation": [mlrose.neural.activation.relu],
    "is_classifier": [True],
    'max_iters': [100],                     # nn params
    'learning_rate': [0.0001],                         # nn params
    'schedule': [mlrose.ArithDecay(100)],  # sa params
})

nnr = NNGSRunner(x_train=x_shroom_train,
                    y_train=y_shroom_train,
                    x_test=x_shroom_test,
                    y_test=y_shroom_test,
                    experiment_name='nn_test',
                    algorithm=mlrose.algorithms.sa.simulated_annealing,
                    grid_search_parameters=grid_search_parameters,
                    iteration_list=[100],
                    hidden_layer_sizes=[[100]],
                    bias=True,
                    early_stopping=False,
                    clip_max=1e+10,
                    max_attempts=500,
                    generate_curves=True,
                    seed=123456)

run_stats_df, curves_df, cv_results_df, sr = nnr.run()          # GridSearchCV instance returned   

In [None]:
sr.best_estimator_

In [None]:
grid_search_parameters = ({
    "activation": [mlrose.neural.activation.relu],
    "is_classifier": [True],
    'max_iters': [150],                     # nn params
    'learning_rate': [0.0001, 0.001, 0.01],                         # nn params
    'schedule': [mlrose.ArithDecay(20), mlrose.ArithDecay(40), mlrose.ArithDecay(60)]
})

nnr = NNGSRunner(x_train=x_shroom_train,
                    y_train=y_shroom_train,
                    x_test=x_shroom_test,
                    y_test=y_shroom_test,
                    experiment_name='nn_test',
                    algorithm=mlrose.algorithms.sa.simulated_annealing,
                    grid_search_parameters=grid_search_parameters,
                    iteration_list=[100, 500, 1000],
                    hidden_layer_sizes=[[10], [50], [100]],
                    bias=True,
                    early_stopping=False,
                    clip_max=1e+10,
                    max_attempts=500,
                    generate_curves=True,
                    seed=123456)

run_stats_df, curves_df, cv_results_df, sr = nnr.run()          # GridSearchCV instance returned   

In [None]:
grid_search_parameters = ({
    "activation": [mlrose.neural.activation.relu],
    "is_classifier": [True],
    'max_iters': [1],
    'learning_rate': [0.0001],
})

nnr = NNGSRunner(x_train=x_shroom_train,
                    y_train=y_shroom_train,
                    x_test=x_shroom_test,
                    y_test=y_shroom_test,
                    experiment_name='nn_test',
                    algorithm=mlrose.algorithms.ga.genetic_alg,
                    grid_search_parameters=grid_search_parameters,
                    iteration_list=[100],
                    hidden_layer_sizes=[[100]],
                    bias=True,
                    early_stopping=False,
                    clip_max=1e+10,
                    max_attempts=500,
                    generate_curves=True,
                    seed=123456)

run_stats_df, curves_df, cv_results_df, sr = nnr.run() 

In [None]:
grid_search_parameters = ({
    "activation": [mlrose.neural.activation.relu],
    "is_classifier": [True],
    'max_iters': [150],
    'learning_rate': [0.0001],
})

nnr = NNGSRunner(x_train=x_shroom_train,
                    y_train=y_shroom_train,
                    x_test=x_shroom_test,
                    y_test=y_shroom_test,
                    experiment_name='nn_test',
                    algorithm=mlrose.algorithms.rhc.random_hill_climb,
                    grid_search_parameters=grid_search_parameters,
                    iteration_list=[100],
                    hidden_layer_sizes=[[100]],
                    bias=True,
                    early_stopping=False,
                    clip_max=1e+10,
                    max_attempts=500,
                    generate_curves=True,
                    seed=123456)

run_stats_df, curves_df, cv_results_df, sr = nnr.run()          # GridSearchCV instance returned   

In [None]:
# Tune max_iters and learning_rate_init
grid_search = {
    "max_iters": [5000, 10000, 25000, 50000],
    "learning_rate_init": [0.001, 0.1, 0.1, 0.5, 1],
    "activation": [mlrose.neural.activation.relu],
    "is_classifier": [True],
}

runner = NNGSRunner(x_train=x_shroom_train,
                    y_train=y_shroom_train,
                    x_test=x_shroom_test,
                    y_test=y_shroom_test,
                    experiment_name="full_grid_search",
                    algorithm=mlrose.algorithms.gradient_descent,
                    grid_search_parameters=grid_search,
                    iteration_list = [1000, 2500, 5000, 10000],
                    hidden_layer_sizes=[[6,6]],
                    bias=True,
                    early_stopping=True,
                    clip_max=1,
                    max_attempts=1000,
                    generate_curves=True,
                    seed=123456,
                    n_jobs=-1
                          )
run_stats, curves, cv_results, best_est = runner.run()

## Define Fitness Functions

### N-Queens

In [None]:
fitness = mlrose.Queens()
four_fitness = mlrose.FourPeaks()
weights = [10, 5, 2, 8, 15, 13, 18, 25]
values = list(np.arange(1, 9))
max_weight_pct = 0.5
knap_fitness = mlrose.Knapsack(weights, values, max_weight_pct)

In [None]:
problem = mlrose.DiscreteOpt(
    length = 8, fitness_fn = fitness, 
    maximize = False, max_val = 8
    )

four_problem = mlrose.DiscreteOpt(
    length = 8, fitness_fn = four_fitness, 
    maximize = False, max_val = 2
    )

knap_problem = mlrose.DiscreteOpt(
    length = 8, fitness_fn = knap_fitness, 
    maximize = False, max_val = 8
    )

In [None]:
import numpy as np
weights = [10, 5, 2, 8, 15]
values = [1, 2, 3, 4, 5]
max_weight_pct = 0.6
fitness = mlrose.Knapsack(weights, values, max_weight_pct)
state = np.array([1, 0, 2, 1, 0])
fitness.evaluate(state)

In [None]:
# a function to run an N-queens problem using
# define a fitness function

In [None]:
# Define decay schedule
schedule = mlrose.ExpDecay()

# Define initial state
init_state = np.array([0, 1, 2, 3, 4, 5, 6, 7])

# Solve problem using simulated annealing
best_state, best_fitness, curve = mlrose.simulated_annealing(
    four_problem, schedule = schedule,
    max_attempts = 10, max_iters = 1000,
    init_state = init_state, random_state = 123321,
    curve=True
    )

print(best_state)
print(best_fitness)
fig, ax = plt.subplots()
ax.plot(curve[:,1], curve[:,0])
plt.show()

In [None]:
# Solve problem using MIMIC
best_state, best_fitness, curve = mlrose.mimic(
    four_problem, max_attempts = 10, max_iters = 1000,
    random_state = 123321, curve=True
    )

print(best_state)
print(best_fitness)
fig, ax = plt.subplots()
ax.plot(curve[:,1], curve[:,0])
plt.show()

In [None]:
# create a runner class and solve the problem
sa_run = SARunner(problem=four_problem,
              experiment_name='queens8_sa',
              output_directory=None, # note: specify an output directory to have results saved to disk
              seed=123456,
              iteration_list=2 ** np.arange(11),
              max_attempts=500,
              temperature_list=[0.1, 0.5, 0.75, 1.0, 2.0, 5.0],
              decay_list=[mlrose.GeomDecay])

# the two data frames will contain the results
df_run_stats, df_run_curves = sa_run.run()

In [None]:
df_run_curves

In [None]:
# create a runner class and solve the problem
mimic_run = MIMICRunner(problem=problem,
              experiment_name='queens8_sa',
              output_directory=None, # note: specify an output directory to have results saved to disk
              seed=123456,
              iteration_list=2 ** np.arange(11),
              max_attempts=500,
              population_sizes = [200],
              keep_percent_list = [0.25, 0.5, 0.75]
              )

# the two data frames will contain the results
df_run_stats, df_run_curves = mimic_run.run()

In [None]:
df_run_curves

In [None]:
sa_run.runner_name()

In [None]:
# create a runner class and solve the problem
ga_run = GARunner(problem=problem,
              experiment_name='queens8_sa',
              output_directory=None, # note: specify an output directory to have results saved to disk
              seed=123456,
              iteration_list=2 ** np.arange(11),
              max_attempts=500,
              population_sizes = [200],
              mutation_rates = [0.25, 0.5, 0.75]
              )

# the two data frames will contain the results
df_run_stats, df_run_curves = ga_run.run()

In [None]:
df_run_curves

In [None]:
# create a runner class and solve the problem
rhc_run = RHCRunner(problem=problem,
              experiment_name='queens8_sa',
              output_directory=None, # note: specify an output directory to have results saved to disk
              seed=123456,
              iteration_list=2 ** np.arange(11),
              max_attempts=500,
              restart_list=[25, 75, 100]
              )

# the two data frames will contain the results
df_run_stats, df_run_curves = rhc_run.run()

In [None]:
df_run_curves[(df_run_curves['Fitness']==0) & (df_run_curves['Iteration']<20)]

In [None]:
# Define initial state
init_state = np.array([0, 1, 2, 3, 4, 5, 6, 7])

# Solve problem using simulated annealing
best_state, best_fitness, curve = mlrose.random_hill_climb(
    problem, max_attempts = 10, max_iters = 1000,
    init_state = init_state, random_state = 123321,
    curve=True
    )

print(best_state)
print(best_fitness)
fig, ax = plt.subplots()
ax.plot(curve[:,1], curve[:,0])
plt.show()

In [None]:
# Define initial state
init_state = np.array([0, 1, 2, 3, 4, 5, 6, 7])

# Solve problem using genetic algorithms
best_state, best_fitness, curve = mlrose.genetic_alg(
    problem, pop_size=200, mutation_prob=0.1, max_attempts=10, 
    max_iters=1000, curve=True, random_state=123321
    )

print(best_state)
print(best_fitness)
fig, ax = plt.subplots()
ax.plot(curve[:,1], curve[:,0])
plt.show()

In [None]:
# Define initial state
init_state = np.array([0, 1, 2, 3, 4, 5, 6, 7])

# Solve problem using MIMIC
best_state, best_fitness, curve = mlrose.mimic(
    problem, pop_size=50, keep_pct=0.01, max_attempts=10,
    max_iters=100, curve=True, random_state=123321
    )

print(best_state)
print(best_fitness)
fig, ax = plt.subplots()
ax.plot(curve[:,1], curve[:,0])
plt.show()

### Neural Network

In [None]:
nn = mlrose.NeuralNetwork(hidden_nodes = [], activation = 'sigmoid', 
                                    algorithm = 'random_hill_climb', 
                                    max_iters = 1000, bias = True, is_classifier = True, 
                                    learning_rate = 0.01, early_stopping = True, 
                                    clip_max = 5, max_attempts = 100, random_state = 3)

In [None]:
nn_params = {
    'activation': 'relu', 'hidden_layer_sizes': (100,), 
    'learning_rate': 'constant', 'learning_rate_init': 0.01, 
    'solver': 'sgd'
    }