In [1]:
import scipy as sp
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import time
import matplotlib
matplotlib.rc('figure', figsize=[10,5])
import mlrose_hiive as mlrose

SEED = 42

<h3>Utility Functions</h3>

In [2]:
# plotting fitness curves, training time, function evaluation 
# [learning curves, loss per iteration, how many iterations to convergence / wall clock time]

def create_plots():
    return

<h1>Genetic Algorithm</h1>

In [3]:
# Initialize fitness function object using pre-defined class
fitness = mlrose.FourPeaks()

In [4]:
# Define optimization problem object
np.random.seed(SEED)
init_state = np.random.randint(2, size=100)
print(init_state)
problem = mlrose.DiscreteOpt(length = len(init_state), fitness_fn = fitness, maximize=True, max_val=2)

[0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1
 0 0 0 0 0 1 1 1 1 1 0 1 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 1 1 1 0 1
 0 1 1 1 0 1 0 1 0 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0]


In [5]:
# Solve using genetic algorithm  

# first use a runner to test multiple parameters for cross validation
ga = mlrose.GARunner(problem=problem,
                  experiment_name="Four_Peaks",
                  output_directory="experiment_results",
                  seed=SEED,
                  iteration_list=2 ** np.arange(12),
                  max_attempts=1000,
                  population_sizes=[150, 200, 300],
                  mutation_rates=[0.4, 0.5, 0.6])

t0 = time.time()

# the two data frames will contain the results
df_run_stats, df_run_curves = ga.run()

t1=time.time()
print(t1-t0)

176.78689575195312


In [8]:
df_run_stats.head(25)

Unnamed: 0,Iteration,Fitness,FEvals,Time,State,Population Size,Mutation Rate,max_iters
0,0,1.0,150,0.004778,"[0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, ...",150,0.4,2048
1,1,12.0,302,0.028043,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",150,0.4,2048
2,2,12.0,453,0.043148,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, ...",150,0.4,2048
3,4,13.0,756,0.064778,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...",150,0.4,2048
4,8,20.0,1362,0.103529,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",150,0.4,2048
5,16,122.0,2575,0.197981,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",150,0.4,2048
6,32,123.0,4992,0.345503,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",150,0.4,2048
7,64,141.0,9832,0.650618,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",150,0.4,2048
8,128,163.0,19506,1.297281,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",150,0.4,2048
9,256,189.0,38840,2.769045,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",150,0.4,2048


In [7]:
df_run_curves.tail(280)

Unnamed: 0,Iteration,Time,Fitness,FEvals,Population Size,Mutation Rate,max_iters
10313,870,9.786546,189.0,262210.0,300,0.6,2048
10314,871,9.798553,189.0,262511.0,300,0.6,2048
10315,872,9.810551,189.0,262812.0,300,0.6,2048
10316,873,9.822352,189.0,263113.0,300,0.6,2048
10317,874,9.834285,189.0,263414.0,300,0.6,2048
...,...,...,...,...,...,...,...
10588,1145,13.209463,189.0,344985.0,300,0.6,2048
10589,1146,13.221455,189.0,345286.0,300,0.6,2048
10590,1147,13.233477,189.0,345587.0,300,0.6,2048
10591,1148,13.245457,189.0,345888.0,300,0.6,2048


In [9]:
# Initialize fitness function object using pre-defined class
fitness = mlrose.FourPeaks()

# Define optimization problem object
np.random.seed(SEED)
init_state = np.random.randint(2, size=100)
print(init_state)
problem = mlrose.DiscreteOpt(length = len(init_state), fitness_fn = fitness, maximize=True, max_val=2)

# choose best parameters (based on run time) and run
t0 = time.time()
best_state, best_fitness, fitness_curve = mlrose.genetic_alg(problem,
                                                             pop_size=150,
                                                             mutation_prob=0.4,
                                                             max_attempts=1000,
                                                             max_iters=2048,
                                                             curve=True, 
                                                             random_state=42)
t1=time.time()
print(t1-t0)
print(best_state)
print(best_fitness)
print(fitness_curve)

[0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1
 0 0 0 0 0 1 1 1 1 1 0 1 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 1 1 1 0 1
 0 1 1 1 0 1 0 1 0 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0]
13.562495946884155
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0]
189.0
[[1.20000e+01 3.02000e+02]
 [1.20000e+01 4.53000e+02]
 [1.20000e+01 6.04000e+02]
 ...
 [1.89000e+02 1.76703e+05]
 [1.89000e+02 1.76854e+05]
 [1.89000e+02 1.77005e+05]]


<h1>Simulated Annealing</h1>

In [10]:
# Initialize fitness function object using pre-defined class
fitness = mlrose.FourPeaks()

# Define optimization problem object
np.random.seed(SEED)
init_state = np.random.randint(2, size=100)
print(init_state)
problem = mlrose.DiscreteOpt(length = len(init_state), fitness_fn = fitness, maximize=True, max_val=2)

[0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1
 0 0 0 0 0 1 1 1 1 1 0 1 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 1 1 1 0 1
 0 1 1 1 0 1 0 1 0 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0]


In [11]:
# Solve using simulated annealing  

print(init_state)

# first use a runner to determine best hyperparameters
sa = mlrose.SARunner(problem=problem,
              experiment_name="Four_Peaks",
              output_directory="experiment_results",
              seed=SEED,
              iteration_list=2 ** np.arange(12),
              max_attempts=1000,
              temperature_list=[1, 10, 50, 100, 250, 500, 1000, 2500, 5000, 10000],
              decay_list=[mlrose.GeomDecay])            
t0 = time.time()

# the two data frames will contain the results
df_run_stats, df_run_curves = sa.run()

t1=time.time()
print(t1-t0)

[0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1
 0 0 0 0 0 1 1 1 1 1 0 1 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 1 1 1 0 1
 0 1 1 1 0 1 0 1 0 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0]
23.683966159820557


In [16]:
df_run_stats.loc[df_run_stats['Fitness'] >= 50]

Unnamed: 0,Iteration,Fitness,FEvals,Time,State,schedule_type,schedule_init_temp,schedule_decay,schedule_min_temp,schedule_current_value,Temperature,max_iters
12,2048,50.0,3539,1.669753,"[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...",geometric,1,0.99,0.001,0.983358,1,2048
25,2048,59.0,3572,1.708761,"[1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, ...",geometric,10,0.99,0.001,9.82973,10,2048
64,2048,58.0,3612,2.230028,"[1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, ...",geometric,250,0.99,0.001,244.459191,250,2048
77,2048,58.0,3612,2.341936,"[1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, ...",geometric,500,0.99,0.001,488.368801,500,2048
90,2048,70.0,3522,2.508594,"[0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, ...",geometric,1000,0.99,0.001,975.102958,1000,2048


In [17]:
df_run_curves.head()

Unnamed: 0,Iteration,Time,Fitness,FEvals,Temperature,max_iters
0,0,0.000113,1.0,0.0,1,2048
1,1,0.009155,1.0,2.0,1,2048
2,2,0.015654,1.0,4.0,1,2048
3,3,0.023511,1.0,6.0,1,2048
4,4,0.023584,1.0,8.0,1,2048


In [30]:
# Initialize fitness function object using pre-defined class
fitness = mlrose.FourPeaks()

# Define optimization problem object
np.random.seed(SEED)
init_state = np.random.randint(2, size=100)
print(init_state)
problem = mlrose.DiscreteOpt(length = len(init_state), fitness_fn = fitness, maximize=True, max_val=2)

# then run with best parameters
t0 = time.time()
best_state, best_fitness, fitness_curve = mlrose.simulated_annealing(problem, 
                                                                     schedule=mlrose.GeomDecay(init_temp=5000.0, decay=0.99, min_temp=0.001),
                                                                     max_attempts=1000,
                                                                     max_iters=2048,
                                                                     init_state = init_state,
                                                                     curve=True,
                                                                     random_state=42)
t1=time.time()
print(t1-t0)
print(best_state)
print(best_fitness)
print(fitness_curve)

[0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1
 0 0 0 0 0 1 1 1 1 1 0 1 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 1 1 1 0 1
 0 1 1 1 0 1 0 1 0 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0]
0.06899499893188477
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 0
 0 0 1 1 1 0 0 0 0 1 0 0 1 1 0 0 1 0 0 1 0 0 1 0 1 1 0 1 1 1 0 0 1 1 1 0 1
 1 1 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
131.0
[[1.000e+00 3.000e+00]
 [1.000e+00 5.000e+00]
 [1.000e+00 7.000e+00]
 ...
 [1.310e+02 3.761e+03]
 [1.310e+02 3.762e+03]
 [1.310e+02 3.764e+03]]


<h1>Randomized Hill Climbing</h1>

In [31]:
# Initialize fitness function object using pre-defined class
fitness = mlrose.FourPeaks()

# Define optimization problem object
np.random.seed(SEED)
init_state = np.random.randint(2, size=100)
print(init_state)
problem = mlrose.DiscreteOpt(length = len(init_state), fitness_fn = fitness, maximize=True, max_val=2)

[0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1
 0 0 0 0 0 1 1 1 1 1 0 1 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 1 1 1 0 1
 0 1 1 1 0 1 0 1 0 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0]


In [None]:
# Solve using randomized hill climbing

print(init_state)

# first use a runner to determine best hyperparameters
rhc = mlrose.RHCRunner(problem=problem,
                experiment_name="Four_Peaks",
                output_directory="experiment_results",
                seed=SEED,
                iteration_list=2 ** np.arange(12),
                max_attempts=1000,
                restart_list=[0, 25, 75, 100]) 

t0 = time.time()

# the two data frames will contain the results
df_run_stats, df_run_curves = rhc.run()

t1=time.time()
print(t1-t0)

[0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1
 0 0 0 0 0 1 1 1 1 1 0 1 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 1 1 1 0 1
 0 1 1 1 0 1 0 1 0 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0]


In [None]:
df_run_stats

In [None]:
df_run_curves

In [None]:
# Initialize fitness function object using pre-defined class
fitness = mlrose.FourPeaks()

# Define optimization problem object
np.random.seed(SEED)
init_state = np.random.randint(2, size=100)
print(init_state)
problem = mlrose.DiscreteOpt(length = len(init_state), fitness_fn = fitness, maximize=True, max_val=2)

# then run with best parameters
t0 = time.time()
best_state, best_fitness, fitness_curve = mlrose.random_hill_climb(problem, 
                                                                   max_attempts=1000,
                                                                   max_iters=2048,
                                                                   restarts=
                                                                   init_state = init_state,
                                                                   curve=True,
                                                                   random_state=42)
t1=time.time()
print(t1-t0)
print(best_state)
print(best_fitness)
print(fitness_curve)