In [None]:
import pandas as pd
from tqdm import tqdm
import os
import yaml
import time
import numpy as np

import sys
sys.path.append('../mlrose')
from mlrose_hiive.generators.max_k_color_generator import MaxKColorGenerator
from mlrose_hiive import SARunner
from mlrose_hiive import GeomDecay, ExpDecay, ArithDecay
import numpy as np

In [None]:
ALGORITHM = 'sa'
PROBLEM_SIZE = 100
CONNECTIONS_PER_NODE = 20
MAX_COLORS = 4
MAX_ITERATIONS = 10000
MAX_ATTEMPTS = 1000
NUM_RUNS = 3
MIN_T = 0.001

TEMPERATURE_LIST = [1, 10, 100, 1000]
DECAY_LIST = ['ExpDecay', 'GeomDecay', 'ArithDecay']

In [None]:
def get_exp_decay(init_temp):
    r = (
        -np.log(MIN_T / init_temp)
        /
        MAX_ITERATIONS
    )
    return ExpDecay(init_temp=init_temp, exp_const=r, min_temp=MIN_T)
def get_geom_decay(init_temp):
    r = (
        (MIN_T / init_temp) ** (1 / MAX_ITERATIONS)
    )
    return GeomDecay(init_temp=init_temp, decay=r, min_temp=MIN_T)
def get_arith_decay(init_temp):
    r = (
        (init_temp - MIN_T)
        /
        MAX_ITERATIONS
    )
    return ArithDecay(init_temp=init_temp, decay=r, min_temp=MIN_T)

# def get_exp_decay(init_temp):
#     return ExpDecay(init_temp=init_temp)
# def get_geom_decay(init_temp):
#     return GeomDecay(init_temp=init_temp)
# def get_arith_decay(init_temp):
#     return ArithDecay(init_temp=init_temp)

In [None]:
# SEED = 6
SEED = int(time.time())

problem = MaxKColorGenerator().generate(seed=SEED, number_of_nodes=PROBLEM_SIZE, 
                                        max_connections_per_node=CONNECTIONS_PER_NODE, 
                                        maximize=True, max_colors=MAX_COLORS)

runner = SARunner(
    problem=problem,
    experiment_name='SA',
    output_directory='.',
    seed=SEED,
    iteration_list=[MAX_ITERATIONS],
    max_attempts=MAX_ATTEMPTS,
    temperature_list=[get_geom_decay(1000)],
)
x, df_run_curves = runner.run()

In [None]:
problem.length

In [None]:
print(df_run_curves['Fitness'].max())
df_run_curves.tail(10)
# df_run_curves.head()

In [None]:
df_path = f'metrics/{ALGORITHM}.csv'
os.makedirs(os.path.dirname(df_path), exist_ok=True)
if not os.path.exists(df_path):
    print('Creating new metrics file..')
    all_df = pd.DataFrame(columns=[
        # runner columns
        'Iteration', 'Time', 'Fitness', 'FEvals', 'Temperature', 'max_iters',
        # problem config
        'problem_size', 'connections_per_node', 'decay_type', 'max_attempts', 'num_edges',
        # run config
        'run_number', 'group_timestamp', 'total_runtime',
    ])
    all_df.to_csv(df_path, index=False)
all_df = pd.read_csv(df_path)

In [None]:
for temperature in TEMPERATURE_LIST:
    print(f'Temp: {temperature}')
    for decay_str in DECAY_LIST:
        if decay_str == 'ExpDecay':
            decay_type = get_exp_decay(temperature)
        elif decay_str == 'GeomDecay':
            decay_type = get_geom_decay(temperature)
        elif decay_str == 'ArithDecay':
            decay_type = get_arith_decay(temperature)
        else:
            raise ValueError('Invalid decay type')
        print(f"Decay Type: {decay_str}")

        if (
            (all_df['problem_size'] == PROBLEM_SIZE) &
            (all_df['connections_per_node'] == CONNECTIONS_PER_NODE) &
            (all_df['decay_type'] == decay_str) &
            (all_df['Temperature'] == temperature) &
            (all_df['max_attempts'] == MAX_ATTEMPTS)
        ).any():
            print('Already run')
            continue

        group_timestamp = int(time.time())
        time.sleep(1)
        max_fitnesses = []
        for run_i in tqdm(range(NUM_RUNS)):
            t_start = time.perf_counter()
            problem = MaxKColorGenerator().generate(seed=run_i, number_of_nodes=PROBLEM_SIZE, 
                                                    max_connections_per_node=CONNECTIONS_PER_NODE, 
                                                    maximize=True, max_colors=MAX_COLORS)
            runner = SARunner(
                problem=problem,
                experiment_name='SA',
                output_directory='.',
                seed=run_i,
                iteration_list=[MAX_ITERATIONS],
                max_attempts=MAX_ATTEMPTS,
                temperature_list=[decay_type],
                # decay_list=[decay_type],
            )
            _, df_run_curves = runner.run()

            df_run_curves['problem_size'] = PROBLEM_SIZE
            df_run_curves['connections_per_node'] = CONNECTIONS_PER_NODE
            df_run_curves['decay_type'] = decay_str
            df_run_curves['max_attempts'] = MAX_ATTEMPTS

            df_run_curves['Temperature'] = temperature
            df_run_curves['max_iters'] = MAX_ITERATIONS

            df_run_curves['run_number'] = run_i
            df_run_curves['group_timestamp'] = group_timestamp
            df_run_curves['num_edges'] = problem.length

            max_fitnesses.append(df_run_curves['Fitness'].max())

            t_end = time.perf_counter()
            df_run_curves['total_runtime'] = t_end - t_start

            all_df = pd.concat([all_df, df_run_curves])
        print(f'Max Fitness: {np.mean(max_fitnesses)}')
        all_df.reset_index(inplace=True, drop=True)
        all_df.to_csv(df_path, index=False)

In [None]:
all_df.columns

In [None]:
agg_df = pd.DataFrame()
for group_timestamp in all_df['group_timestamp'].unique():
    df = all_df[all_df['group_timestamp'] == group_timestamp]
    max_fit = df['Fitness'].max()
    total_iters = df['Iteration'].max()
    max_fevals = df['FEvals'].max()
    df.drop(columns=['Fitness', 'Iteration', 'FEvals', 'Time'], inplace=True)
    assert(len(df) > NUM_RUNS)
    df.drop_duplicates(inplace=True)
    assert(len(df) == NUM_RUNS)
    df['max_fitness'] = max_fit
    df['total_iterations'] = total_iters
    df['max_fevals'] = max_fevals
    agg_df = pd.concat([agg_df, df])
agg_df.reset_index(inplace=True, drop=True)

In [None]:
df_path = f'metrics/{ALGORITHM}_agg.csv'
agg_df.to_csv(df_path, index=False)

In [None]:
agg_df