In [1]:
import numpy as np
import time
import pandas as pd
import matplotlib.pyplot as plt
import mlrose_hiive
import seaborn as sns
import ro_ml_util as utl
import os
import pickle
import glob

plt.style.use("ggplot")

folder = "KColors/"

SEED=42

n_iterations = np.arange(1, 201, 1)
prob_size = "XL"
verbose = True
problem_name = "KColors"

<Figure size 1600x1200 with 0 Axes>

In [2]:
utl.check_folder(folder)

C:\Users\joshu\OneDrive - Georgia Institute of Technology\Georgia-Tech\CS 7641 - Machine Learning\Assignments\Randomized Optimization\KColors/ folder already exists.


# K-Colors

In [3]:
iteration_list = np.concatenate((10 ** np.arange(1, 4, 1), (10 ** np.arange(1, 4, 1)) * 5)).tolist()

rhc_params = {"restart_list": (2 ** np.arange(1, 9, 1)).tolist(),
              "iterations": (2 ** np.arange(1, 9, 1)).tolist()}

sa_params = {"temperature_list": (2 ** np.arange(1, 13, 1)).tolist(),
             "iterations": (2 ** np.arange(1, 9, 1)).tolist()}

ga_params = {"population_sizes": np.arange(25, 401, 25).tolist(),
             "iterations": (2 ** np.arange(1, 9, 1)).tolist(),
             "mutation_rates": np.round(np.arange(0.45, 0.91, 0.05), 2).tolist()}

mimic_params = {"population_sizes": np.arange(2, 61, 2).tolist(),
                "keep_percent_list": np.round(np.arange(0.1, 0.51, 0.05), 2).tolist(),
                "iterations": (2 ** np.arange(1, 9, 1)).tolist()}

In [None]:
utl.run_optimization_tests(prob_name="kcolors", parameters={"RHC": rhc_params, "SA": sa_params,
                                                             "GA": ga_params, "MIMIC": mimic_params},
                           size="m", iterations=iteration_list, maximize=True, gridsearch=True,
                           gen_curves=True, cv=2, max_attempts=(2 ** np.arange(1, 8, 1)).tolist())


## Run with gridsearch=False

In [None]:
problem, folder = utl.determine_problem(prob_name=problem_name, size="m", maximize=True, SEED=42)
utl.run_optimization_tests(prob_name="kcolors", parameters={"RHC": rhc_params, "SA": sa_params,
                                                             "GA": ga_params, "MIMIC": mimic_params},
                           size="m", iterations=iteration_list, maximize=True, gridsearch=False,
                           gen_curves=True, cv=2, max_attempts=(2 ** np.arange(1, 8, 1)).tolist(), 
                           change_pop=False, reset_pop=False)


# Random Hill Climb

In [None]:
problem, folder = utl.determine_problem(prob_name=problem_name, size="m", maximize=True, SEED=42)

with open(f"{os.getcwd()}/{folder}/All_RHC_Results.pkl", "rb") as input_file:
    data = pickle.load(input_file)
    input_file.close()

utl.plot_discrete(all_results=data, folder=folder, prob_name=problem_name, alg_name="RHC")
print()

# Genetic Algorithm

In [None]:
problem, folder = utl.determine_problem(prob_name=problem_name, size="m", maximize=True, SEED=42)

with open(f"{os.getcwd()}/{folder}/All_GA_Results.pkl", "rb") as input_file:
    data = pickle.load(input_file)
    input_file.close()

utl.plot_discrete(all_results=data, folder=folder, prob_name=problem_name, alg_name="GA")
print()

# Simulated Annealing

In [None]:
problem, folder = utl.determine_problem(prob_name=problem_name, size="m", maximize=True, SEED=42)

with open(f"{os.getcwd()}/{folder}/All_SA_Results.pkl", "rb") as input_file:
    data = pickle.load(input_file)
    input_file.close()

utl.plot_discrete(all_results=data, folder=folder, prob_name=problem_name, alg_name="SA")
print()

# MIMIC

In [None]:
problem, folder = utl.determine_problem(prob_name=problem_name, size="m", maximize=True, SEED=42)

with open(f"{os.getcwd()}/{folder}/All_MIMIC_Results.pkl", "rb") as input_file:
    data = pickle.load(input_file)
    input_file.close()

utl.plot_discrete(all_results=data, folder=folder, prob_name=problem_name, alg_name="MIMIC")
print()

In [None]:
def get_rhc(time_df, count_df, iter_num, rhc_parameters, prob_name):
    temp_problem = mlrose_hiive.generators.MaxKColorGenerator.generate(seed=int(np.round(time.time())), 
                                                                       number_of_nodes=iter_num,
                                                                       maximize=True)
    temp_start_time = time.time()

    temp_state, \
    temp_fitness, \
    temp_fitness_curve = mlrose_hiive.random_hill_climb(problem=temp_problem,
                                                           max_attempts=rhc_parameters["max_attempts"],
                                                           restarts=rhc_parameters["restarts"], curve=True,
                                                           random_state=int(np.round(time.time())),
                                                           return_results=True)
    temp_end_time = time.time()
    temp_elapsed_time = temp_end_time - temp_start_time
    time_df.loc[iter_num, "RHC"] = temp_elapsed_time
    count_df.loc[iter_num, "RHC"] = temp_problem.counter

In [None]:
def get_sa(time_df, count_df, iter_num, sa_parameter, prob_name):
    temp_problem = mlrose_hiive.generators.MaxKColorGenerator.generate(seed=int(np.round(time.time())), 
                                                                       number_of_nodes=iter_num,
                                                                       maximize=True)
    temp_start_time = time.time()

    temp_state, \
    temp_fitness, \
    temp_fitness_curve = mlrose_hiive.simulated_annealing(problem=temp_problem,
                                                     max_attempts=sa_parameters["max_attempts"],
                                                     schedule=sa_parameters["schedule"], curve=True,
                                                     random_state=int(np.round(time.time())),
                                                     return_results=True)
    temp_end_time = time.time()
    temp_elapsed_time = temp_end_time - temp_start_time
    time_df.loc[iter_num, "SA"] = temp_elapsed_time
    count_df.loc[iter_num, "SA"] = temp_problem.counter

In [None]:
def get_ga(time_df, count_df, iter_num, ga_parameter, prob_name):
    temp_problem = mlrose_hiive.generators.MaxKColorGenerator.generate(seed=int(np.round(time.time())), 
                                                                       number_of_nodes=iter_num,
                                                                       maximize=True)
    temp_start_time = time.time()

    temp_state, \
    temp_fitness, \
    temp_fitness_curve = mlrose_hiive.genetic_alg(problem=temp_problem,
                                                     max_attempts=ga_parameters["max_attempts"], curve=True,
                                                     pop_size=ga_parameters["pop_size"],
                                                     mutation_prob=ga_parameters["mutation_prob"],
                                                     random_state=int(np.round(time.time())),
                                                     return_results=True)
    temp_end_time = time.time()
    temp_elapsed_time = temp_end_time - temp_start_time
    time_df.loc[iter_num, "GA"] = temp_elapsed_time
    count_df.loc[iter_num, "GA"] = temp_problem.counter

In [None]:
def get_mimic(time_df, count_df, iter_num, mimic_parameters, prob_name):
    temp_problem = mlrose_hiive.generators.MaxKColorGenerator.generate(seed=int(np.round(time.time())), 
                                                                       number_of_nodes=iter_num,
                                                                       maximize=True)

    temp_start_time = time.time()

    temp_state, \
    temp_fitness, \
    temp_fitness_curve = mlrose_hiive.mimic(problem=temp_problem,
                                               max_attempts=mimic_parameters["max_attempts"], curve=True,
                                               random_state=int(np.round(time.time())),
                                               pop_size=mimic_parameters["pop_size"],
                                               keep_pct=mimic_parameters["keep_pct"], noise=0.05,
                                               return_results=True)
    temp_end_time = time.time()
    temp_elapsed_time = temp_end_time - temp_start_time
    time_df.loc[iter_num, "MIMIC"] = temp_elapsed_time
    count_df.loc[iter_num, "MIMIC"] = temp_problem.counter

In [None]:
temp_problem, _folder = utl.determine_problem(prob_name=problem_name.lower(), size="l", maximize=True,
                                          SEED=int(np.round(time.time())))
best_parameters = utl.read_parameters_from_file(folder=_folder, cv=2, size="l", change_pop=False,
                                            reset_pop=False)

In [None]:
best_parameters

In [None]:
iteration_list = np.arange(10, 101, 10).astype(int).tolist()

best_parameters = utl.read_parameters_from_file(folder=_folder, cv=2, size="l", change_pop=False,
                                            reset_pop=False)
rhc_parameters = best_parameters["RHC"]
sa_parameters = best_parameters["SA"]
ga_parameters = best_parameters["GA"]
mimic_parameters = best_parameters["MIMIC"]
times_df = pd.DataFrame(columns=["RHC", "SA", "GA", "MIMIC"], index=iteration_list, 
                        data=np.zeros(shape=(len(iteration_list), 4)))
count_df = pd.DataFrame(columns=["RHC", "SA", "GA", "MIMIC"], index=iteration_list, 
                        data=np.zeros(shape=(len(iteration_list), 4)))


In [None]:
for _iter in iteration_list:
    print(f"Current Iteration: {_iter}")
    get_rhc(times_df, count_df, _iter, rhc_parameters, problem_name)
    get_sa(times_df, count_df, _iter, sa_parameters, problem_name)
    get_ga(times_df, count_df, _iter, ga_parameters, problem_name)
    get_mimic(times_df, count_df, _iter, mimic_parameters, problem_name)
    

In [None]:
with open(f"{os.getcwd()}/{folder}/Times_df.pkl", "wb") as f:
    pickle.dump(times_df, f)
    f.close()

with open(f"{os.getcwd()}/{folder}/Eval_Calls_df.pkl", "wb") as f:
    pickle.dump(count_df, f)
    f.close()

In [3]:
with open(f"{os.getcwd()}/{folder}/Eval_Call_and_Times.pkl", "rb") as f:
    results_object = pickle.load(f)
    f.close()
count_df = results_object["Count_df"]
times_df = results_object["Times_df"]

In [4]:
utl.plot_count(count_df, "KColors", "Problem Size", "Function Evaluations", 
               f"{problem_name}\nFunction Evaluations Vs. Problem Size",
              use_log_y=True)
utl.plot_count(times_df, "KColors", "Problem Size", "Runtime", 
               f"{problem_name}\nRun Times Vs. Problem Size",
              use_log_y=True, f_name="_Times_")

In [None]:
times_df.plot()

In [None]:
results = {"Count_df": count_df, "Times_df": times_df}

In [None]:
results

In [None]:
with open(f"{os.getcwd()}/{folder}Eval_Call_and_Times.pkl", "wb") as f:
    pickle.dump(results, f)
    f.close()