In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import sys
import subprocess
def install(package):
    subprocess.check_call([sys.executable, "-m", "pip","install",package])

In [None]:
import six
import sys
sys.modules['sklearn.externals.six'] = six # hack: some library issue
install("mlrose")
import mlrose
import numpy as np

In [None]:
import time
def run_random_search_impl(cb, labels, n_times=3):
    #print(f'running for {labels}')
    start = time.time()
    best_fitness_sum = 0
    for i in range(0, n_times):
        best_state, best_fitness = cb()
        #print(best_fitness)
        best_fitness_sum = best_fitness_sum + best_fitness
    end = time.time()
    return labels + [best_fitness_sum/n_times, (end-start)/n_times]

def run_random_search(cb, labels):
    return run_random_search_impl(cb, labels)
    pass
        

In [None]:
k_max_attempts = 10
k_max_iters = 1000

def get_genetic_cb(problem, pop_size, mutation_prob):
    return lambda: mlrose.genetic_alg(problem, pop_size = pop_size, mutation_prob=mutation_prob,
                                      max_attempts=k_max_attempts, max_iters=k_max_iters)

def get_sa_cb(problem, schedule):
    return lambda: mlrose.simulated_annealing(problem, schedule=schedule,
                                              max_attempts=k_max_attempts, max_iters=k_max_iters)

def get_rhc_cb(problem, restarts=True):
    return lambda: mlrose.random_hill_climb(problem, restarts=restarts,
                                            max_attempts=k_max_attempts, max_iters=k_max_iters)

def get_mimic_cb(problem, pop_size, keep_pct):
    return lambda: mlrose.mimic(problem, pop_size = pop_size, keep_pct=keep_pct,
                                max_attempts=k_max_attempts, max_iters=k_max_iters)

In [None]:
sa_results = []
genetic_results = []
rhc_results = []
mimic_results = []

def run(func, labels):
    #child_pid = os.fork()
    #if child_pid != 0:
    #    return
    rhc_results.append(
        run_random_search(get_rhc_cb(func, True), labels + ['rhc']))
    rhc_results_df = pd.DataFrame(rhc_results, columns=['Problem', 'Size', 'Algorithm', 'Score', 'Time'])
    rhc_results_df.to_csv('rhc_results.csv', index=False)


    for exp_const in (0.001,): #(0.001, 0.003, 0.005, 0.007, 0.009, 0.02): # Find a exp_const that seems to be the best
        schedule = mlrose.ExpDecay(exp_const=exp_const)
        sa_results.append(
            run_random_search(get_sa_cb(func, schedule), labels + ['sa']) + ['ExpDecay', f'{exp_const}'])
        sa_results_df = pd.DataFrame(sa_results,
                                     columns=['Problem', 'Size', 'Algorithm', 'Score', 'Time', 'Schedule', 'Decay'])
        sa_results_df.to_csv('sa_results.csv', index=False)

    for population_ratio in (4,): #(0.5, 1, 2, 4, 8):
        for mutation_ratio in (0.01,): #(0.001, 0.01, 0.1, 0.2, 0.5, 0.8, 1):
            genetic_results.append(
                run_random_search(get_genetic_cb(func, population_ratio * n, mutation_ratio), labels + ['genetic']) + [f'{population_ratio*n}', f'{mutation_ratio}'])
            genetic_results_df = pd.DataFrame(genetic_results,
                                              columns=['Problem', 'Size', 'Algorithm', 'Score', 'Time', 'Population',
                                                       'MutationRatio'])
            genetic_results_df.to_csv('genetic_results.csv', index=False)


    for population_ratio in (4,): #(0.5, 1, 2, 4, 8):
        for keep_pct in (0.2,): #(0.001, 0.01, 0.1, 0.2, 0.5, 0.8, 1):
            mimic_results.append(
                run_random_search(get_mimic_cb(func, population_ratio * n, keep_pct), labels + ['mimic']) + [f'{population_ratio*n}', f'{keep_pct}'])
            mimic_results_df = pd.DataFrame(mimic_results,
                                            columns=['Problem', 'Size', 'Algorithm', 'Score', 'Time', 'Population',
                                                     'KeepPct'])
            mimic_results_df.to_csv('mimic_results.csv', index=False)

In [None]:
for n in list(range(20, 101, 20)):
    four_peaks_problem = mlrose.DiscreteOpt(n, mlrose.FourPeaks(t_pct=0.15), max_val=2)
    labels = ['Four Peaks', f'{n}']
    run(four_peaks_problem, labels)

    one_max_problem = mlrose.DiscreteOpt(n, mlrose.OneMax(), max_val=2)
    labels = ['One Max', f'{n}']
    run(one_max_problem, labels)

    values = [randrange(10) + 1 for i in range(n)]
    weights = [randrange(30) + 1 for i in range(n)]
    max_val = 5 #int(sum(weights) * 0.35 / min(weights))
    labels = ['Knapsack', f'{n}']
    knapsack_problem = mlrose.DiscreteOpt(n, mlrose.Knapsack(weights, values, 2), max_val=max_val)
    run(knapsack_problem, labels)

    sa_results_df = pd.DataFrame(sa_results, columns=['Problem', 'Size', 'Algorithm', 'Score', 'Time', 'Schedule', 'Decay'])
    sa_results_df.to_csv('sa_results.csv', index=False)

    genetic_results_df = pd.DataFrame(genetic_results, columns = ['Problem', 'Size', 'Algorithm', 'Score', 'Time', 'Population', 'MutationRatio'])
    genetic_results_df.to_csv('genetic_results.csv', index=False)

    rhc_results_df = pd.DataFrame(rhc_results, columns = ['Problem', 'Size', 'Algorithm', 'Score', 'Time'])
    rhc_results_df.to_csv('rhc_results.csv', index=False)

    mimic_results_df = pd.DataFrame(mimic_results, columns = ['Problem', 'Size', 'Algorithm', 'Score', 'Time', 'Population', 'KeepPct'])
    mimic_results_df.to_csv('mimic_results.csv', index=False)

In [None]:
../input/randomsearcholdresults/genetic_results.csv

sa_results_df = pd.read_csv('/kaggle/input/randomsearcholdresults/sa_results.csv')
genetic_results_df = pd.read_csv('/kaggle/input/randomsearcholdresults/genetic_results.csv')
rhc_results_df = pd.read_csv('/kaggle/input/randomsearcholdresults/rhc_results.csv')
mimic_results_df = pd.read_csv('/kaggle/input/randomsearcholdresults/mimic_results.csv')


In [None]:
import pandas as pd

sa_results_df = pd.read_csv('/kaggle/input/random-search-results-output/sa_results.csv')
genetic_results_df = pd.read_csv('/kaggle/input/random-search-results-output/genetic_results.csv')
rhc_results_df = pd.read_csv('/kaggle/input/random-search-results-output/rhc_results.csv')
mimic_results_df = pd.read_csv('/kaggle/input/random-search-results-output/mimic_results.csv')

results_dfs = [sa_results_df, genetic_results_df, rhc_results_df, mimic_results_df]
columns = ['Problem', 'Size', 'Algorithm', 'Score', 'Time']
results_dfs = [ df[columns] for df in results_dfs ]
results_df = pd.concat(results_dfs, ignore_index=True)


In [None]:

def get_results(problem):
    selected = results_df[results_df['Problem'] == problem].reset_index(drop=True)
    results = {}
    for algorithm in selected['Algorithm'].unique():
        results[algorithm] = selected[selected['Algorithm'] == algorithm].reset_index(drop=True)
    return results

get_results('Four Peaks')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
def plot_results(df_dict, title, xlabel, ylabel, skipped_algorithms=[]):
    fig, ax = plt.subplots()
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    
    for a in df_dict.keys():
        if a in skipped_algorithms:
            continue
        ax.plot(df_dict[a]['Size'], df_dict[a][ylabel], marker='o', label=a)
    ax.legend()
    plt.show()

In [None]:
for problem in ('Four Peaks', 'One Max', 'Knapsack'):
    df = get_results(problem)
    plot_results(df, f'{problem} Time Comparing', 'Problem Size', 'Time')
    plot_results(df, f'{problem} Score Comparing', 'Problem Size', 'Score')

In [None]:
for problem in ('Four Peaks', 'One Max', 'Knapsack'):
    df = get_results(problem)
    plot_results(df, f'{problem} Time Comparing', 'Problem Size', 'Time', ['mimic'])
    plot_results(df, f'{problem} Score Comparing', 'Problem Size', 'Score', ['mimic'])

In [None]:
for problem in ('Four Peaks', 'One Max', 'Knapsack'):
    df = get_results(problem)
    plot_results(df, f'{problem} Time Comparing', 'Problem Size', 'Time', ['mimic', 'genetic'])
    plot_results(df, f'{problem} Score Comparing', 'Problem Size', 'Score', ['mimic', 'genetic'])

In [None]:
k_max_attempts = 10
k_max_iters = 10000

def get_sa_cb(problem, schedule):
    return lambda: mlrose.simulated_annealing(problem, schedule=schedule,
                                              max_attempts=k_max_attempts, max_iters=k_max_iters)

def get_rhc_cb(problem, restarts=True):
    return lambda: mlrose.random_hill_climb(problem, restarts=restarts,
                                            max_attempts=k_max_attempts, max_iters=k_max_iters)


In [None]:
import pandas as pd
sa_results = []
rhc_results = []

def run_again(func, labels):
    #child_pid = os.fork()
    #if child_pid != 0:
    #    return
    rhc_results.append(
        run_random_search(get_rhc_cb(func, True), labels + ['rhc']))
    rhc_results_df = pd.DataFrame(rhc_results, columns=['Problem', 'Size', 'Algorithm', 'Score', 'Time'])
    rhc_results_df.to_csv('rhc_results.csv', index=False)


    for exp_const in (0.001,): #(0.001, 0.003, 0.005, 0.007, 0.009, 0.02): # Find a exp_const that seems to be the best
        schedule = mlrose.ExpDecay(exp_const=exp_const)
        sa_results.append(
            run_random_search(get_sa_cb(func, schedule), labels + ['sa']) + ['ExpDecay', f'{exp_const}'])
        sa_results_df = pd.DataFrame(sa_results,
                                     columns=['Problem', 'Size', 'Algorithm', 'Score', 'Time', 'Schedule', 'Decay'])
        sa_results_df.to_csv('sa_results.csv', index=False)


In [None]:
for n in list(range(20, 101, 20)):
    one_max_problem = mlrose.DiscreteOpt(n, mlrose.OneMax(), max_val=2)
    labels = ['One Max', f'{n}']
    run_again(one_max_problem, labels)

In [None]:
sa_results_df = pd.DataFrame(sa_results,
                                     columns=['Problem', 'Size', 'Algorithm', 'Score', 'Time', 'Schedule', 'Decay'])
rhc_results_df = pd.DataFrame(rhc_results, columns=['Problem', 'Size', 'Algorithm', 'Score', 'Time'])

results_dfs = [sa_results_df, rhc_results_df]
columns = ['Problem', 'Size', 'Algorithm', 'Score', 'Time']
results_dfs = [ df[columns] for df in results_dfs ]
results_df = pd.concat(results_dfs, ignore_index=True)

In [None]:
df = get_results('One Max')
plot_results(df, f'One Max Time Comparing', 'Problem Size', 'Time')
plot_results(df, f'One Max Score Comparing', 'Problem Size', 'Score')