In [1]:
import pandas as pd
from tqdm import tqdm
import os
import time

import sys
sys.path.append('../mlrose')
from mlrose_hiive.generators.four_peaks_generator import FourPeaksGenerator
from mlrose_hiive import SARunner
from mlrose_hiive import GeomDecay, ExpDecay, ArithDecay
import numpy as np

In [2]:
ALGORITHM = 'sa'
T_PCT = 0.4
PROBLEM_SIZE_LIST = [10, 30, 50, 100, 200, 500]
# MAX_ITERATIONS = 10000
MAX_ATTEMPTS = 1e9
NUM_RUNS = 3
MIN_T = 0.001

TEMPERATURE = 10
DECAY = 'ExpDecay'

In [3]:
def get_opt_fitness(problem_size, t_pct):
    R = (1 - t_pct)*problem_size
    if R == int(R):
        R = int(R) - 1
    else:
        R = int(R)
    return problem_size + R

In [4]:
def get_exp_decay(init_temp, max_iters):
    r = (
        -np.log(MIN_T / init_temp)
        /
        max_iters
    )
    return ExpDecay(init_temp=init_temp, exp_const=r, min_temp=MIN_T)
def get_geom_decay(init_temp, max_iters):
    r = (
        (MIN_T / init_temp) ** (1 / max_iters)
    )
    return GeomDecay(init_temp=init_temp, decay=r, min_temp=MIN_T)
def get_arith_decay(init_temp, max_iters):
    r = (
        (init_temp - MIN_T)
        /
        max_iters
    )
    return ArithDecay(init_temp=init_temp, decay=r, min_temp=MIN_T)

In [5]:
df_path = f'metrics_problem_size/{ALGORITHM}.csv'
os.makedirs(os.path.dirname(df_path), exist_ok=True)
if not os.path.exists(df_path):
    print('Creating new metrics file..')
    all_df = pd.DataFrame(columns=[
        # runner columns
        'Iteration', 'Time', 'Fitness', 'FEvals', 'Temperature', 'max_iters',
        # problem config
        'problem_size', 't_pct', 'decay_type', 'max_attempts', 
        # run config
        'run_number', 'group_timestamp', 'total_runtime',
    ])
    all_df.to_csv(df_path, index=False)
all_df = pd.read_csv(df_path)

Creating new metrics file..


In [6]:
for problem_size in PROBLEM_SIZE_LIST:
    print(f"Problem Size: {problem_size}")

    MAX_ITERATIONS = 100 * problem_size

    if DECAY == 'ExpDecay':
        decay_type = get_exp_decay(TEMPERATURE, MAX_ITERATIONS)
    elif DECAY == 'GeomDecay':
        decay_type = get_geom_decay(TEMPERATURE, MAX_ITERATIONS)
    elif DECAY == 'ArithDecay':
        decay_type = get_arith_decay(TEMPERATURE, MAX_ITERATIONS)
    else:
        raise ValueError('Invalid decay type')

    if (
        (all_df['problem_size'] == problem_size) &
        (all_df['t_pct'] == T_PCT) &
        (all_df['decay_type'] == DECAY) &
        (all_df['Temperature'] == TEMPERATURE) &
        (all_df['max_attempts'] == MAX_ATTEMPTS)
    ).any():
        print('Already run')
        continue

    opt_fitness = get_opt_fitness(problem_size, T_PCT)

    group_timestamp = int(time.time())
    time.sleep(1)
    for run_i in tqdm(range(NUM_RUNS)):
        t_start = time.perf_counter()
        problem = FourPeaksGenerator().generate(seed=run_i, size=problem_size, t_pct=T_PCT)
        runner = SARunner(
            problem=problem,
            experiment_name='SA',
            output_directory='.',
            seed=run_i,
            iteration_list=[MAX_ITERATIONS],
            max_attempts=MAX_ATTEMPTS,
            temperature_list=[decay_type],
            # decay_list=[decay_type],
        )
        _, df_run_curves = runner.run()

        df_run_curves['problem_size'] = problem_size
        df_run_curves['t_pct'] = T_PCT
        df_run_curves['decay_type'] = DECAY
        df_run_curves['max_attempts'] = MAX_ATTEMPTS

        df_run_curves['Temperature'] = TEMPERATURE
        df_run_curves['max_iters'] = MAX_ITERATIONS

        df_run_curves['run_number'] = run_i
        df_run_curves['group_timestamp'] = group_timestamp

        print(f"Max Fitness: {df_run_curves['Fitness'].max()}")
        print(f"Max Iteration: {df_run_curves['Iteration'].max()}")

        t_end = time.perf_counter()
        df_run_curves['total_runtime'] = t_end - t_start
        df_run_curves['optimal_fitness'] = opt_fitness

        all_df = pd.concat([all_df, df_run_curves])
    all_df.reset_index(inplace=True, drop=True)
    all_df.to_csv(df_path, index=False)

Problem Size: 10


 67%|██████▋   | 2/3 [00:00<00:00,  9.45it/s]

Max Fitness: 10.0
Max Iteration: 1000
Max Fitness: 15.0
Max Iteration: 1000
Max Fitness: 10.0
Max Iteration: 1000


100%|██████████| 3/3 [00:00<00:00,  9.57it/s]


Problem Size: 30


 33%|███▎      | 1/3 [00:00<00:01,  1.30it/s]

Max Fitness: 30.0
Max Iteration: 3000


 67%|██████▋   | 2/3 [00:01<00:00,  1.32it/s]

Max Fitness: 30.0
Max Iteration: 3000


100%|██████████| 3/3 [00:02<00:00,  1.32it/s]

Max Fitness: 30.0
Max Iteration: 3000
Problem Size: 50



 33%|███▎      | 1/3 [00:02<00:04,  2.05s/it]

Max Fitness: 50.0
Max Iteration: 5000


 67%|██████▋   | 2/3 [00:04<00:02,  2.06s/it]

Max Fitness: 50.0
Max Iteration: 5000


100%|██████████| 3/3 [00:06<00:00,  2.05s/it]

Max Fitness: 50.0
Max Iteration: 5000
Problem Size: 100



 33%|███▎      | 1/3 [00:07<00:15,  7.97s/it]

Max Fitness: 100.0
Max Iteration: 10000


 67%|██████▋   | 2/3 [00:15<00:08,  8.00s/it]

Max Fitness: 100.0
Max Iteration: 10000


100%|██████████| 3/3 [00:23<00:00,  7.98s/it]

Max Fitness: 100.0
Max Iteration: 10000





Problem Size: 200


 33%|███▎      | 1/3 [00:32<01:04, 32.18s/it]

Max Fitness: 163.0
Max Iteration: 20000


 67%|██████▋   | 2/3 [01:04<00:32, 32.34s/it]

Max Fitness: 190.0
Max Iteration: 20000


100%|██████████| 3/3 [01:37<00:00, 32.46s/it]

Max Fitness: 133.0
Max Iteration: 20000





Problem Size: 500


 33%|███▎      | 1/3 [03:32<07:05, 212.56s/it]

Max Fitness: 158.0
Max Iteration: 50000


 67%|██████▋   | 2/3 [07:20<03:41, 221.73s/it]

Max Fitness: 175.0
Max Iteration: 50000


100%|██████████| 3/3 [11:16<00:00, 225.61s/it]

Max Fitness: 175.0
Max Iteration: 50000





In [7]:
all_df.columns

Index(['Iteration', 'Time', 'Fitness', 'FEvals', 'Temperature', 'max_iters',
       'problem_size', 't_pct', 'decay_type', 'max_attempts', 'run_number',
       'group_timestamp', 'total_runtime', 'optimal_fitness'],
      dtype='object')

In [8]:
agg_df = pd.DataFrame()
for group_timestamp in all_df['group_timestamp'].unique():
    df = all_df[all_df['group_timestamp'] == group_timestamp]
    max_fit = df['Fitness'].max()
    total_iters = df['Iteration'].max()
    max_fevals = df['FEvals'].max()
    df.drop(columns=['Fitness', 'Iteration', 'FEvals', 'Time'], inplace=True)
    assert(len(df) > NUM_RUNS)
    df.drop_duplicates(inplace=True)
    assert(len(df) == NUM_RUNS)
    df['max_fitness'] = max_fit
    df['total_iterations'] = total_iters
    df['max_fevals'] = max_fevals
    agg_df = pd.concat([agg_df, df])
agg_df.reset_index(inplace=True, drop=True)

In [9]:
df_path = f'metrics_problem_size/{ALGORITHM}_agg.csv'
agg_df.to_csv(df_path, index=False)

In [10]:
agg_df

Unnamed: 0,Temperature,max_iters,problem_size,t_pct,decay_type,max_attempts,run_number,group_timestamp,total_runtime,optimal_fitness,max_fitness,total_iterations,max_fevals
0,10,1000,10,0.4,ExpDecay,1000000000.0,0,1719007945,0.111252,15.0,15.0,1000,1221.0
1,10,1000,10,0.4,ExpDecay,1000000000.0,1,1719007945,0.100554,15.0,15.0,1000,1221.0
2,10,1000,10,0.4,ExpDecay,1000000000.0,2,1719007945,0.098667,15.0,15.0,1000,1221.0
3,10,3000,30,0.4,ExpDecay,1000000000.0,0,1719007946,0.768637,47.0,30.0,3000,3940.0
4,10,3000,30,0.4,ExpDecay,1000000000.0,1,1719007946,0.753081,47.0,30.0,3000,3940.0
5,10,3000,30,0.4,ExpDecay,1000000000.0,2,1719007946,0.74858,47.0,30.0,3000,3940.0
6,10,5000,50,0.4,ExpDecay,1000000000.0,0,1719007950,2.050576,79.0,50.0,5000,7002.0
7,10,5000,50,0.4,ExpDecay,1000000000.0,1,1719007950,2.067503,79.0,50.0,5000,7002.0
8,10,5000,50,0.4,ExpDecay,1000000000.0,2,1719007950,2.013933,79.0,50.0,5000,7002.0
9,10,10000,100,0.4,ExpDecay,1000000000.0,0,1719007957,7.968158,159.0,100.0,10000,15453.0
