In [1]:
import pandas as pd
from tqdm import tqdm
import os
import time

import sys
sys.path.append('../mlrose')
from mlrose_hiive.generators.four_peaks_generator import FourPeaksGenerator
from mlrose_hiive import GARunner
from mlrose_hiive.fitness.four_peaks import FourPeaks 
from mlrose_hiive import GeomDecay, ExpDecay, ArithDecay

In [2]:
ALGORITHM = 'ga'
T_PCT = 0.4
PROBLEM_SIZE = 30
MAX_ITERATIONS = 10000
MAX_ATTEMPTS = PROBLEM_SIZE * 5
NUM_RUNS = 3

MUTATION_RATE_LIST = [0.1, 0.3, 0.5, 0.7]

POPULATION_SIZES_LIST = [10, 100, 500, 1000]

In [3]:
problem = FourPeaks(t_pct=0.4)
problem.evaluate([1]*21 + [0]*29)

79

In [4]:
def get_opt_fitness(problem_size, t_pct):
    R = (1 - t_pct)*problem_size
    if R == int(R):
        R = int(R) - 1
    else:
        R = int(R)
    return problem_size + R
OPT_FITNESS = get_opt_fitness(PROBLEM_SIZE, T_PCT)
print(OPT_FITNESS)

47


In [5]:
# SEED = 6
SEED = int(time.time())
problem = FourPeaksGenerator().generate(seed=SEED, size=PROBLEM_SIZE, t_pct=T_PCT)

runner = GARunner(
    problem=problem,
    experiment_name='GA',
    output_directory='.',
    seed=SEED,
    iteration_list=[1000],
    max_attempts=PROBLEM_SIZE*5,
    population_sizes=[500],
    mutation_rates=[0.2],
)
x, df_run_curves = runner.run()

In [6]:
print(df_run_curves['Fitness'].max())
df_run_curves.head()

47.0


Unnamed: 0,Iteration,Time,Fitness,FEvals,Population Size,Mutation Rate,max_iters
0,0,0.003813,1.0,500.0,500,0.2,1000
1,1,0.016769,11.0,1002.0,500,0.2,1000
2,2,0.026377,12.0,1504.0,500,0.2,1000
3,3,0.036435,12.0,2005.0,500,0.2,1000
4,4,0.046143,14.0,2507.0,500,0.2,1000


In [7]:
df_path = f'metrics/{ALGORITHM}.csv'
os.makedirs(os.path.dirname(df_path), exist_ok=True)
if not os.path.exists(df_path):
    all_df = pd.DataFrame(columns=[
        # runner columns
        'Iteration', 'Time', 'Fitness', 'FEvals', 'Population Size', 'Mutation Rate', 'max_iters',
        # problem config
        'problem_size', 't_pct', 'max_attempts', 
        # run config
        'run_number', 'group_timestamp', 'total_runtime',
    ])
    all_df.to_csv(df_path, index=False)
all_df = pd.read_csv(df_path)

In [8]:
for mutation in MUTATION_RATE_LIST:
    print(f'Mutation: {mutation}')
    for population in POPULATION_SIZES_LIST:
        print(f"Population: {population}")

        if (
            (all_df['problem_size'] == PROBLEM_SIZE) &
            (all_df['t_pct'] == T_PCT) &
            (all_df['Population Size'] == population) &
            (all_df['Mutation Rate'] == mutation) &
            (all_df['max_attempts'] == MAX_ATTEMPTS) &
            (all_df['max_iters'] == MAX_ITERATIONS)
        ).any():
            print('Already run')
            continue

        group_timestamp = int(time.time())
        time.sleep(1)
        max_fitnesses = []
        for run_i in tqdm(range(NUM_RUNS)):
            t_start = time.perf_counter()
            problem = FourPeaksGenerator().generate(seed=run_i, size=PROBLEM_SIZE, t_pct=T_PCT)
            runner = GARunner(
                problem=problem,
                experiment_name='GA',
                output_directory='.',
                seed=run_i,
                iteration_list=[MAX_ITERATIONS],
                max_attempts=MAX_ATTEMPTS,
                population_sizes=[population],
                mutation_rates=[mutation],
            )
            _, df_run_curves = runner.run()

            df_run_curves['problem_size'] = PROBLEM_SIZE
            df_run_curves['t_pct'] = T_PCT
            df_run_curves['max_attempts'] = MAX_ATTEMPTS

            df_run_curves['Population Size'] = population
            df_run_curves['Mutation Rate'] = mutation
            df_run_curves['max_iters'] = MAX_ITERATIONS

            df_run_curves['run_number'] = run_i
            df_run_curves['group_timestamp'] = group_timestamp

            # print(f"Max Fitness: {df_run_curves['Fitness'].max()}")
            # print(f"Max Iteration: {df_run_curves['Iteration'].max()}")

            max_fitnesses.append(df_run_curves['Fitness'].max())

            t_end = time.perf_counter()
            df_run_curves['total_runtime'] = t_end - t_start

            all_df = pd.concat([all_df, df_run_curves])

        print(f"Max Fitnesses: {max_fitnesses}")

        all_df.reset_index(inplace=True, drop=True)
        all_df.to_csv(df_path, index=False)

Mutation: 0.01
Population: 10


100%|██████████| 3/3 [00:00<00:00, 31.32it/s]


Max Fitnesses: [np.float64(5.0), np.float64(5.0), np.float64(5.0)]
Population: 100


100%|██████████| 3/3 [00:01<00:00,  1.78it/s]


Max Fitnesses: [np.float64(45.0), np.float64(30.0), np.float64(24.0)]
Population: 1000


100%|██████████| 3/3 [00:13<00:00,  4.36s/it]


Max Fitnesses: [np.float64(47.0), np.float64(47.0), np.float64(47.0)]
Mutation: 0.1
Population: 10


100%|██████████| 3/3 [00:00<00:00,  7.24it/s]


Max Fitnesses: [np.float64(23.0), np.float64(30.0), np.float64(27.0)]
Population: 100


100%|██████████| 3/3 [00:01<00:00,  2.69it/s]


Max Fitnesses: [np.float64(30.0), np.float64(30.0), np.float64(30.0)]
Population: 1000


100%|██████████| 3/3 [00:11<00:00,  3.70s/it]


Max Fitnesses: [np.float64(47.0), np.float64(47.0), np.float64(47.0)]
Mutation: 0.5
Population: 10


100%|██████████| 3/3 [00:00<00:00,  9.37it/s]


Max Fitnesses: [np.float64(30.0), np.float64(30.0), np.float64(30.0)]
Population: 100


100%|██████████| 3/3 [00:01<00:00,  2.82it/s]


Max Fitnesses: [np.float64(30.0), np.float64(30.0), np.float64(30.0)]
Population: 1000


100%|██████████| 3/3 [00:11<00:00,  3.97s/it]

Max Fitnesses: [np.float64(47.0), np.float64(30.0), np.float64(47.0)]





In [9]:
all_df.columns

Index(['Iteration', 'Time', 'Fitness', 'FEvals', 'Population Size',
       'Mutation Rate', 'max_iters', 'problem_size', 't_pct', 'max_attempts',
       'run_number', 'group_timestamp'],
      dtype='object')

In [10]:
agg_df = pd.DataFrame()
for group_timestamp in all_df['group_timestamp'].unique():
    df = all_df[all_df['group_timestamp'] == group_timestamp]
    max_fit = df['Fitness'].max()
    total_iters = df['Iteration'].max()
    max_fevals = df['FEvals'].max()
    runtime = df['Time'].max()
    df.drop(columns=['Fitness', 'Iteration', 'FEvals', 'Time'], inplace=True)
    assert(len(df) > NUM_RUNS)
    df.drop_duplicates(inplace=True)
    assert(len(df) == NUM_RUNS)
    df['max_fitness'] = max_fit
    df['total_iterations'] = total_iters
    df['max_fevals'] = max_fevals
    df['runtime'] = runtime
    agg_df = pd.concat([agg_df, df])
agg_df.reset_index(inplace=True, drop=True)

In [11]:
df_path = f'metrics/{ALGORITHM}_agg.csv'
agg_df.to_csv(df_path, index=False)