In [13]:
import pandas as pd
from tqdm import tqdm
import os
import time

import sys
sys.path.append('../mlrose')
from mlrose_hiive.generators.four_peaks_generator import FourPeaksGenerator
from mlrose_hiive import RHCRunner
from mlrose_hiive.fitness.four_peaks import FourPeaks 

In [14]:
ALGORITHM = 'rhc'
T_PCT = 0.4
PROBLEM_SIZE = 30
MAX_ITERATIONS = 10000
MAX_ATTEMPTS = PROBLEM_SIZE * 5
NUM_RUNS = 3

RESTARTS = [1, 20, 50, 100, 200]

In [15]:
problem = FourPeaks(t_pct=0.1)
problem.evaluate([1]*9 + [0]*11)

31

In [16]:
def get_opt_fitness(problem_size, t_pct):
    R = (1 - t_pct)*problem_size
    if R == int(R):
        R = int(R) - 1
    else:
        R = int(R)
    return problem_size + R
OPT_FITNESS = get_opt_fitness(PROBLEM_SIZE, T_PCT)
print(OPT_FITNESS)

47


In [27]:
PROBLEM_SIZE, T_PCT

(30, 0.4)

In [31]:
# SEED = 6
SEED = int(time.time())
problem = FourPeaksGenerator().generate(seed=SEED, size=PROBLEM_SIZE, t_pct=T_PCT)

runner = RHCRunner(
    problem=problem,
    experiment_name='RHC',
    output_directory='.',
    seed=SEED,
    iteration_list=[10000],
    max_attempts=150,
    restart_list=[50],
)
x, df_run_curves = runner.run()

In [30]:
t = 0
for cr in df_run_curves['current_restart'].unique():
    t += df_run_curves[(df_run_curves['current_restart'] == cr)]['Time'].max()
t

np.float64(1.762976374331629)

In [18]:
MAX_ATTEMPTS, MAX_ITERATIONS

(150, 10000)

In [19]:
print(df_run_curves['Fitness'].max())
df_run_curves.head()

30.0


Unnamed: 0,Iteration,Time,Fitness,FEvals,Restarts,max_iters,current_restart
0,0,0.0001,2.0,0.0,1,10000,0
1,1,0.003099,2.0,1.0,1,10000,0
2,2,0.003109,2.0,2.0,1,10000,0
3,3,0.003117,2.0,3.0,1,10000,0
4,4,0.003123,2.0,4.0,1,10000,0


In [20]:
df_path = f'metrics/{ALGORITHM}.csv'
os.makedirs(os.path.dirname(df_path), exist_ok=True)
if not os.path.exists(df_path):
    all_df = pd.DataFrame(columns=[
        # runner columns
        'Iteration', 'Time', 'Fitness', 'FEvals', 'Restarts', 'max_iters', 'current_restart',
        # problem config
        'problem_size', 't_pct', 'max_attempts',  
        # run config
        'run_number', 'group_timestamp', 'total_runtime',
    ])
    all_df.to_csv(df_path, index=False)
all_df = pd.read_csv(df_path)

In [21]:
for restarts in RESTARTS:
    print(f"restarts: {restarts}")

    if (
        (all_df['problem_size'] == PROBLEM_SIZE) &
        (all_df['t_pct'] == T_PCT) &
        (all_df['Restarts'] == restarts) &
        (all_df['max_attempts'] == MAX_ATTEMPTS)
    ).any():
        print('Already run')
        continue

    group_timestamp = int(time.time())
    time.sleep(1)
    for run_i in tqdm(range(NUM_RUNS)):
        t_start = time.perf_counter()
        problem = FourPeaksGenerator().generate(seed=run_i, size=PROBLEM_SIZE, t_pct=T_PCT)
        runner = RHCRunner(
            problem=problem,
            experiment_name='RHC',
            output_directory='.',
            seed=run_i,
            iteration_list=[MAX_ITERATIONS],
            max_attempts=MAX_ATTEMPTS,
            restart_list=[restarts],
        )
        _, df_run_curves = runner.run()

        df_run_curves['problem_size'] = PROBLEM_SIZE
        df_run_curves['t_pct'] = T_PCT
        df_run_curves['max_attempts'] = MAX_ATTEMPTS

        df_run_curves['Restarts'] = restarts
        df_run_curves['max_iters'] = MAX_ITERATIONS

        df_run_curves['run_number'] = run_i
        df_run_curves['group_timestamp'] = group_timestamp

        print(f"Max Fitness: {df_run_curves['Fitness'].max()}")
        print(f"Max Iteration: {df_run_curves['Iteration'].max()}")

        t_end = time.perf_counter()
        df_run_curves['total_runtime'] = t_end - t_start

        all_df = pd.concat([all_df, df_run_curves])
    all_df.reset_index(inplace=True, drop=True)
    all_df.to_csv(df_path, index=False)

restarts: 200


 33%|███▎      | 1/3 [17:56<35:52, 1076.36s/it]

Max Fitness: 30.0
Max Iteration: 1011


 67%|██████▋   | 2/3 [35:47<17:53, 1073.15s/it]

Max Fitness: 30.0
Max Iteration: 927


100%|██████████| 3/3 [8:36:21<00:00, 10327.01s/it]

Max Fitness: 30.0
Max Iteration: 984





In [22]:
all_df.columns

Index(['Iteration', 'Time', 'Fitness', 'FEvals', 'Restarts', 'max_iters',
       'current_restart', 'problem_size', 't_pct', 'max_attempts',
       'run_number', 'group_timestamp'],
      dtype='object')

In [23]:
agg_df = pd.DataFrame()
for group_timestamp in all_df['group_timestamp'].unique():
    df = all_df[all_df['group_timestamp'] == group_timestamp]
    max_fit = df['Fitness'].max()
    total_iters = df['Iteration'].max()
    max_fevals = df['FEvals'].max()
    runtime = df['Time'].max()
    # df.drop(columns=['Fitness', 'Iteration', 'FEvals', 'Time'], inplace=True)
    df.drop(columns=['Fitness', 'Iteration', 'FEvals', 'Time', 'current_restart'], inplace=True)
    assert(len(df) > NUM_RUNS)
    df.drop_duplicates(inplace=True)
    assert(len(df) == NUM_RUNS)
    df['max_fitness'] = max_fit
    df['total_iterations'] = total_iters
    df['max_fevals'] = max_fevals
    df['runtime'] = runtime
    agg_df = pd.concat([agg_df, df])
agg_df.reset_index(inplace=True, drop=True)

In [24]:
df_path = f'metrics/{ALGORITHM}_agg.csv'
agg_df.to_csv(df_path, index=False)