In [1]:
import pandas as pd
from tqdm import tqdm
import os
import time

import sys
sys.path.append('../mlrose')
from mlrose_hiive.generators.queens_generator import QueensGenerator
from mlrose_hiive import GARunner

In [2]:
ALGORITHM = 'ga'
PROBLEM_SIZE_LIST = [10, 30, 50, 100, 200]
MAX_ITERATIONS = 10000
NUM_RUNS = 3

MUTATION_RATE = 0.5
POPULATION_SIZE = 10

In [3]:
def get_max_size(problem_size):
    if problem_size <= 1:
        return 0
    if problem_size == 2:
        return 1
    return 3*(problem_size-2)

In [4]:
df_path = f'metrics_problem_size/{ALGORITHM}.csv'
os.makedirs(os.path.dirname(df_path), exist_ok=True)
if not os.path.exists(df_path):
    all_df = pd.DataFrame(columns=[
        # runner columns
        'Iteration', 'Time', 'Fitness', 'FEvals', 'Population Size', 'Mutation Rate', 'max_iters',
        # problem config
        'problem_size', 'max_attempts', 
        # run config
        'run_number', 'group_timestamp', 'total_runtime',
    ])
    all_df.to_csv(df_path, index=False)
all_df = pd.read_csv(df_path)

In [5]:
for problem_size in PROBLEM_SIZE_LIST:
    print(f"Problem Size: {problem_size}")
    MAX_ATTEMPTS = problem_size * 5

    if (
        (all_df['problem_size'] == problem_size) &
        (all_df['Population Size'] == POPULATION_SIZE) &
        (all_df['Mutation Rate'] == MUTATION_RATE) &
        (all_df['max_attempts'] == MAX_ATTEMPTS) &
        (all_df['max_iters'] == MAX_ITERATIONS)
    ).any():
        print('Already run')
        continue

    group_timestamp = int(time.time())
    time.sleep(1)
    max_fitnesses = []
    for run_i in tqdm(range(NUM_RUNS)):
        t_start = time.perf_counter()
        problem = QueensGenerator().generate(seed=run_i, size=problem_size, maximize=True)
        runner = GARunner(
            problem=problem,
            experiment_name='GA',
            output_directory='.',
            seed=run_i,
            iteration_list=[MAX_ITERATIONS],
            max_attempts=MAX_ATTEMPTS,
            population_sizes=[POPULATION_SIZE],
            mutation_rates=[MUTATION_RATE],
        )
        _, df_run_curves = runner.run()

        df_run_curves['problem_size'] = problem_size
        df_run_curves['max_attempts'] = MAX_ATTEMPTS

        df_run_curves['Population Size'] = POPULATION_SIZE
        df_run_curves['Mutation Rate'] = MUTATION_RATE
        df_run_curves['max_iters'] = MAX_ITERATIONS

        df_run_curves['run_number'] = run_i
        df_run_curves['group_timestamp'] = group_timestamp

        max_fitnesses.append(df_run_curves['Fitness'].max())

        t_end = time.perf_counter()
        df_run_curves['total_runtime'] = t_end - t_start
        df_run_curves['optimal_fitness'] = get_max_size(problem_size)

        all_df = pd.concat([all_df, df_run_curves])

    print(f"Max Fitnesses: {max_fitnesses}")

    all_df.reset_index(inplace=True, drop=True)
    all_df.to_csv(df_path, index=False)

Problem Size: 10


100%|██████████| 3/3 [00:00<00:00,  8.32it/s]


Max Fitnesses: [22.0, 23.0, 23.0]
Problem Size: 30


100%|██████████| 3/3 [00:03<00:00,  1.22s/it]


Max Fitnesses: [82.0, 81.0, 80.0]
Problem Size: 50


100%|██████████| 3/3 [00:13<00:00,  4.40s/it]


Max Fitnesses: [139.0, 138.0, 137.0]
Problem Size: 100


100%|██████████| 3/3 [01:01<00:00, 20.61s/it]


Max Fitnesses: [283.0, 285.0, 286.0]
Problem Size: 200


100%|██████████| 3/3 [06:48<00:00, 136.19s/it]

Max Fitnesses: [575.0, 585.0, 580.0]





In [6]:
all_df.columns

Index(['Iteration', 'Time', 'Fitness', 'FEvals', 'Population Size',
       'Mutation Rate', 'max_iters', 'problem_size', 'max_attempts',
       'run_number', 'group_timestamp', 'total_runtime', 'optimal_fitness'],
      dtype='object')

In [7]:
agg_df = pd.DataFrame()
for group_timestamp in all_df['group_timestamp'].unique():
    df = all_df[all_df['group_timestamp'] == group_timestamp]
    max_fit = df['Fitness'].max()
    total_iters = df['Iteration'].max()
    max_fevals = df['FEvals'].max()
    df.drop(columns=['Fitness', 'Iteration', 'FEvals', 'Time'], inplace=True)
    assert(len(df) > NUM_RUNS)
    df.drop_duplicates(inplace=True)
    assert(len(df) == NUM_RUNS)
    df['max_fitness'] = max_fit
    df['total_iterations'] = total_iters
    df['max_fevals'] = max_fevals
    agg_df = pd.concat([agg_df, df])
agg_df.reset_index(inplace=True, drop=True)

In [8]:
df_path = f'metrics_problem_size/{ALGORITHM}_agg.csv'
agg_df.to_csv(df_path, index=False)