In [59]:
import mlrose_hiive
import numpy as np
import os
import matplotlib.pyplot as plt

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import ShuffleSplit, train_test_split
from sklearn.compose import make_column_transformer

import mlrose_hiive as mlrose
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from mlrose_hiive import MaxKColorGenerator, QueensGenerator, FlipFlopGenerator,\
      TSPGenerator, KnapsackGenerator, ContinuousPeaksGenerator
from mlrose_hiive import SARunner, GARunner, NNGSRunner, MIMICRunner, RHCRunner
from mlrose_hiive import SKMLPRunner

In [2]:
from sklearn.neural_network import MLPClassifier

In [3]:
red_wine = os.path.join('data','wine', 'winequality-red.csv')
white_wine = os.path.join('data','wine', 'winequality-white.csv')
turbine = os.path.join('data','turbine','gt_2011.csv')
mushrooms = os.path.join('data','mushroom','secondary_data.csv')

# encoders to use
scale = StandardScaler()
s_split = ShuffleSplit()
ohe = OneHotEncoder(sparse_output=False)

transformer = make_column_transformer(
    (
        ohe, 
        [
        'cap-shape', 'cap-surface', 'cap-color',
       'does-bruise-or-bleed', 'gill-attachment','gill-spacing', 'gill-color', 
       'stem-root', 'stem-surface', 'stem-color','veil-type', 'veil-color',
        'has-ring', 'ring-type', 'spore-print-color','habitat', 'season'
        ]
        ),
    remainder='passthrough'
    )

shroom_df = pd.read_csv(mushrooms,sep=';').sample(frac=1).reset_index(drop=True)
x = shroom_df.iloc[:,1:].copy()
x_shroom = pd.DataFrame(transformer.fit_transform(x), 
                columns=transformer.get_feature_names_out())
y = shroom_df.iloc[:,0].copy()
y_shroom = (y == 'p')


# reduce the number of training examples
x_shroom = x_shroom[:7000]
y_shroom =  y_shroom[:7000]

x_shroom_train, x_shroom_test, y_shroom_train, y_shroom_test = train_test_split(
    x_shroom, y_shroom, test_size=0.2)


In [4]:
x_train,x_test,y_train,y_test = train_test_split(x_shroom, y_shroom, test_size=0.2)

In [61]:
def run_rhc_optimization(problem):
    # create a runner class and solve the problem
    rhc_run = RHCRunner(problem=problem,
                experiment_name='queens8_sa',
                output_directory=None, # note: specify an output directory to have results saved to disk
                seed=123456,
                iteration_list=2 ** np.arange(11),
                max_attempts=500,
                restart_list=[25, 75, 100]
                )

    # the two data frames will contain the results
    df_run_stats, df_run_curves = rhc_run.run()
    name = rhc_run.runner_name()
    return df_run_stats, df_run_curves, name

def run_ga_optimization(problem):
    # create a runner class and solve the problem
    ga_run = GARunner(problem=problem,
                experiment_name='queens8_sa',
                output_directory=None, # note: specify an output directory to have results saved to disk
                seed=123456,
                iteration_list=2 ** np.arange(11),
                max_attempts=500,
                population_sizes = [200, 400, 600],
                mutation_rates = [0.25, 0.5, 0.75]
                )

    # the two data frames will contain the results
    df_run_stats, df_run_curves = ga_run.run()
    name = ga_run.runner_name()
    return df_run_stats, df_run_curves, name

def run_sa_optimization(problem):
    # create a runner class and solve the problem
    sa_run = SARunner(problem=problem,
                experiment_name='queens8_sa',
                output_directory=None, # note: specify an output directory to have results saved to disk
                seed=123456,
                iteration_list=2 ** np.arange(11),
                max_attempts=500,
                temperature_list=[0.1, 0.5, 0.75, 1.0, 2.0, 5.0],
                decay_list=[mlrose.GeomDecay])

    # the two data frames will contain the results
    df_run_stats, df_run_curves = sa_run.run()
    name = sa_run.runner_name()
    return df_run_stats, df_run_curves, name


def run_mimic_optimization(problem):
    # create a runner class and solve the problem
    mimic_run = MIMICRunner(problem=problem,
                experiment_name='queens8_sa',
                output_directory=None, # note: specify an output directory to have results saved to disk
                seed=123456,
                iteration_list=2 ** np.arange(11),
                max_attempts=500,
                population_sizes = [200, 400, 600],
                keep_percent_list = [0.25, 0.5, 0.75]
                )

    # the two data frames will contain the results
    df_run_stats, df_run_curves = mimic_run.run()
    name = mimic_run.runner_name()
    return df_run_stats, df_run_curves, name


In [62]:
count = 1
problem_size = [10, 20, 40, 60, 100]
for size in problem_size:
    flip_problem = FlipFlopGenerator().generate(size=size,seed=123456)
    queen_problem = QueensGenerator().generate(size=size,seed=123456)
    knap_problem = KnapsackGenerator().generate(number_of_items_types=size,seed=123456)
    flip_string = 'flip_flop'
    queen_string = 'queen'
    knap_string = 'knap'
    kcolor_string = 'kcolor'
    kcolor =  MaxKColorGenerator().generate(number_of_nodes=size, seed=123456)
    problem_lst = [(flip_problem, flip_string), (queen_problem,queen_string),\
                    (knap_problem, knap_string), (kcolor, kcolor_string)]
    
    for problem in problem_lst:
        opt_problem = problem[0]
        problem_name = problem[1]

        # run experiments for randomized hill climbing
        df_stats, df_curves, name = run_rhc_optimization(problem=opt_problem)
        df_stats.to_csv(f'{name}_stats_{problem_name}_size_{size}_iteration_{count}.csv')
        df_curves.to_csv(f'{name}_curves_{problem_name}_size_{size}_iteration_{count}.csv')
        
        # run experiments for genetic algorithms
        df_stats, df_curves, name = run_ga_optimization(problem=opt_problem)
        df_stats.to_csv(f'{name}_stats_{problem_name}_size_{size}_iteration_{count}.csv')
        df_curves.to_csv(f'{name}_curves_{problem_name}_size_{size}_iteration_{count}.csv')
        
        # run experiments for simulated annealing
        df_stats, df_curves, name = run_sa_optimization(problem=opt_problem)
        df_stats.to_csv(f'{name}_stats_{problem_name}_size_{size}_iteration_{count}.csv')
        df_curves.to_csv(f'{name}_curves_{problem_name}_size_{size}_iteration_{count}.csv')
        
        # run experiments for mimic
        df_stats, df_curves, name = run_mimic_optimization(problem=opt_problem)
        df_stats.to_csv(f'{name}_stats_{problem_name}_size_{size}_iteration_{count}.csv')
        df_curves.to_csv(f'{name}_curves_{problem_name}_size_{size}_iteration_{count}.csv')

    count = count + 1