In [101]:
import pandas as pd
from tqdm import tqdm
import os
import yaml

import sys
sys.path.append('../mlrose')
from mlrose_hiive.generators.four_peaks_generator import FourPeaksGenerator
from mlrose_hiive import GARunner

In [102]:
ALGORITHM = 'ga'
MUTATION_RATE_LIST = [0.25, 0.5, 0.75, 0.9]

T_PCT = [0.25]
PROBLEM_SIZE_LIST = [5, 10, 20, 50, 100, 200, 500, 1000]
ITERATIONS_LIST = [1000]
MAX_ATTEMPTS_LIST = [1000]
NUM_RUNS = [3]
POPULATION_SIZES_LIST = [10, 50, 100, 200, 500, 1000]

In [103]:
from mlrose_hiive.fitness.four_peaks import FourPeaks
print(500*0.498)
problem = FourPeaks(t_pct=0.1)
problem.evaluate([1]*13 + [0]*37)

249.0


87

In [98]:
max_attempts = 1000000
iterations = 1000
population_size = 500
mutation_rate = 0.1
problem_size = 20
t_pct = 0.4

In [104]:
def get_opt_fitness(problem_size):
    R = (1- t_pct)*problem_size
    if R == int(R):
        R = int(R) - 1
    else:
        R = int(R)

    return problem_size + R
OPT_FITNESS = get_opt_fitness(problem_size)
print(OPT_FITNESS)

31


In [99]:
# SEED = 6
import time
SEED = int(time.time())
problem = FourPeaksGenerator().generate(seed=SEED, size=problem_size, t_pct=t_pct)
sa = GARunner(
    problem=problem,
    experiment_name='dontcare',
    output_directory='./',
    seed=SEED,
    max_attempts=max_attempts,
    iteration_list=[iterations],
    population_sizes=[population_size],
    mutation_rates=[mutation_rate]
)
x, df_run_curves = sa.run()

In [100]:
df_run_curves['Fitness'].max()

31.0

In [105]:
df_run_curves.tail(10)

Unnamed: 0,Iteration,Time,Fitness,FEvals,Population Size,Mutation Rate,max_iters
991,991,31.907346,31.0,496994.0,500,0.1,1000
992,992,31.939975,31.0,497495.0,500,0.1,1000
993,993,31.974642,31.0,497996.0,500,0.1,1000
994,994,32.008893,31.0,498497.0,500,0.1,1000
995,995,32.040354,31.0,498998.0,500,0.1,1000
996,996,32.070342,31.0,499499.0,500,0.1,1000
997,997,32.101837,31.0,500000.0,500,0.1,1000
998,998,32.133738,31.0,500501.0,500,0.1,1000
999,999,32.165477,31.0,501002.0,500,0.1,1000
1000,1000,32.197467,31.0,501503.0,500,0.1,1000


In [None]:
df_path = f'metrics/{ALGORITHM}.csv'
if not os.path.exists(df_path):
    df = pd.DataFrame(columns=[
        'problem_size', 'mutation_rate', 'population_size', 'iterations', 'max_attempts', 
        'run_number', 'best_fitness', 'time', 'fevals'])
    df.to_csv(df_path, index=False)
df = pd.read_csv(df_path)

In [None]:
all_df = pd.DataFrame()
group_i = 0
run_i = 0
for problem_size in PROBLEM_SIZE_LIST:
    print(f'Problem Size: {problem_size}')
    for iterations in ITERATIONS_LIST:
        print(f'Iterations: {iterations}')
        for max_attempts in MAX_ATTEMPTS_LIST:
            print(f'Max Attempts: {max_attempts}')
            for population_size in POPULATION_SIZES_LIST:
                print(f'Population Size: {population_size}')
                for mutation_rate in MUTATION_RATE_LIST:
                    print(f"Mutation Rate: {mutation_rate}")
                    for i in tqdm(range(NUM_RUNS)):
                        problem = FourPeaksGenerator().generate(seed=SEED+i, size=problem_size, t_pct=T_PCT)
                        sa = GARunner(
                            problem=problem,
                            experiment_name='dontcare',
                            output_directory='./',
                            seed=SEED+i,
                            max_attempts=max_attempts,
                            iteration_list=[iterations],
                            population_sizes=[population_size],
                            mutation_rates=[mutation_rate]
                        )
                        _, df_run_curves = sa.run()
                        df_run_curves['group_number'] = group_i
                        df_run_curves['run_number'] = run_i
                        df_run_curves['problem_size'] = problem_size
                        df_run_curves['max_iterations'] = iterations
                        df_run_curves['max_attempts'] = max_attempts
                        df_run_curves['population_size'] = population_size
                        df_run_curves['mutation_rate'] = mutation_rate

                        print(f"Max Fitness: {df_run_curves['Fitness'].max()}")
                        print(f"Max Iteration: {df_run_curves['Iteration'].max()}")

                        all_df = pd.concat([all_df, df_run_curves], axis=0)
                        run_i += 1
                    group_i += 1
all_df.reset_index(inplace=True, drop=True)

In [None]:
print(f"Max: {all_df['Fitness'].max()}")
print(f"Min: {all_df['Fitness'].min()}")

In [None]:
all_df.to_csv(os.path.join(output_dir, 'learning_curve.csv'), index=False)