In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
ga_df = pd.read_csv('metrics/ga.csv')

In [3]:
len(ga_df)

28194

In [4]:
ga_df.columns

Index(['Iteration', 'Time', 'Fitness', 'FEvals', 'Population Size',
       'Mutation Rate', 'max_iters', 'problem_size', 'max_attempts',
       'run_number', 'group_timestamp', 'total_runtime', 'num_edges',
       'optimal_fitness'],
      dtype='object')

In [5]:
ga_df['X'] = ga_df.apply(lambda row: f"Pop={int(row['Population Size'])}, Mut={row['Mutation Rate']}", axis=1)

In [6]:
data = {'Population Size': [], 'Mutation Rate': [], 'Best Fitness Mean': [], 'Best Time Mean': [], 
        'Best FEvals Mean': [], 'Best Iterations Mean': []}
def sort_fn(x):
    return int(x.split('=')[1].split(',')[0])
for group in sorted(ga_df['X'].unique(), key=sort_fn):
    df = ga_df[ga_df['X'] == group]
    assert(len(df['group_timestamp'].unique()) == 1)
    last_iter = np.inf
    for run_i in df['run_number'].unique():
        run_df = df[df['run_number'] == run_i]
        max_iter = run_df['Iteration'].max()
        last_iter = min(max_iter, last_iter)
    best_fitness = -np.inf
    best_time = None 
    best_feval = None
    best_iters = None
    for iter_i in range(1, last_iter+1):
        iter_df = df[df['Iteration'] == iter_i]
        mean_fit = iter_df['Fitness'].mean()
        if mean_fit > best_fitness:
            best_fitness = mean_fit
            best_time = iter_df['total_runtime'].mean()
            best_feval = iter_df['FEvals'].mean()
            best_iters = iter_i
    data['Population Size'].append(df['Population Size'].iloc[0])
    data['Mutation Rate'].append(f"{df['Mutation Rate'].iloc[0]:.2f}")
    data['Best Fitness Mean'].append(f"{best_fitness:.2f}")
    data['Best Time Mean'].append(f"{best_time:.2f}")
    data['Best FEvals Mean'].append(f"{best_feval:.0f}")
    data['Best Iterations Mean'].append(f"{best_iters:.0f}")
df = pd.DataFrame(data)
df.to_csv('metrics/ga_table.csv', index=False)
df.style.hide(axis='index')

Population Size,Mutation Rate,Best Fitness Mean,Best Time Mean,Best FEvals Mean,Best Iterations Mean
10,0.1,255.0,5.01,5556,501
10,0.3,266.0,4.82,4308,387
10,0.5,269.67,4.66,5093,458
10,0.7,267.33,5.41,4155,373
20,0.1,265.0,12.16,10918,517
20,0.3,275.67,11.01,11237,532
20,0.5,275.0,11.09,9598,454
20,0.7,281.0,13.22,12753,604
50,0.1,271.0,23.94,15135,295
50,0.3,283.33,23.75,27128,530


In [7]:
sa_df = pd.read_csv('metrics/sa.csv')

In [8]:
sa_df.columns

Index(['Iteration', 'Time', 'Fitness', 'FEvals', 'Temperature', 'max_iters',
       'decay_type', 'problem_size', 'max_attempts', 'run_number',
       'group_timestamp', 'total_runtime', 'num_edges', 'optimal_fitness'],
      dtype='object')

In [9]:
sa_df['X'] = sa_df.apply(lambda row: f"Temp={row['Temperature']}, DecayType={row['decay_type']}", axis=1)

In [10]:
data = {'Temperature': [], 'Decay Type': [], 
        'Best Fitness Mean': [], 'Best Time Mean': [], 
        'Best FEvals Mean': [], 'Best Iterations Mean': []}
for group in sorted(sa_df['X'].unique()):
    df = sa_df[sa_df['X'] == group]
    assert(len(df['group_timestamp'].unique()) == 1)
    last_iter = np.inf
    for run_i in df['run_number'].unique():
        run_df = df[df['run_number'] == run_i]
        max_iter = run_df['Iteration'].max()
        last_iter = min(max_iter, last_iter)
    best_fitness = -np.inf
    best_time = None 
    best_feval = None
    best_iters = None
    for iter_i in range(1, last_iter+1):
        iter_df = df[df['Iteration'] == iter_i]
        mean_fit = iter_df['Fitness'].mean()
        if mean_fit > best_fitness:
            best_fitness = mean_fit
            best_time = iter_df['total_runtime'].mean()
            best_feval = iter_df['FEvals'].mean()
            best_iters = iter_i
    data['Temperature'].append(df['Temperature'].iloc[0])
    data['Decay Type'].append(f"{df['decay_type'].iloc[0]}")
    data['Best Fitness Mean'].append(f"{best_fitness:.2f}")
    data['Best Time Mean'].append(f"{best_time:.2f}")
    data['Best FEvals Mean'].append(f"{best_feval:.0f}")
    data['Best Iterations Mean'].append(f"{best_iters:.0f}")
df = pd.DataFrame(data)
df.to_csv('metrics/sa_table.csv', index=False)
df.style.hide(axis='index')

Temperature,Decay Type,Best Fitness Mean,Best Time Mean,Best FEvals Mean,Best Iterations Mean
0.01,ArithDecay,289.67,15.89,9431,8938
0.01,ExpDecay,289.67,15.76,9431,8938
0.01,GeomDecay,289.67,15.77,9431,8938
0.1,ArithDecay,289.67,16.3,9431,8938
0.1,ExpDecay,289.67,15.95,9431,8938
0.1,GeomDecay,289.67,16.05,9431,8938
1.0,ArithDecay,285.33,17.85,11574,9307
1.0,ExpDecay,289.33,16.62,9482,8685
1.0,GeomDecay,289.33,16.93,9482,8685
10.0,ArithDecay,284.67,22.17,16478,9924


In [11]:
rhc_df = pd.read_csv('metrics/rhc.csv')

In [12]:
rhc_df['Restarts'] = rhc_df['Restarts'].astype(int)

In [13]:
rhc_df.columns

Index(['Iteration', 'Time', 'Fitness', 'FEvals', 'Restarts', 'max_iters',
       'current_restart', 'problem_size', 'max_attempts', 'run_number',
       'group_timestamp', 'total_runtime', 'num_edges', 'optimal_fitness'],
      dtype='object')

In [14]:
rhc_df['Restarts'] = rhc_df['Restarts'].astype(int)

In [15]:
rhc_df['X'] = rhc_df.apply(lambda row: f"Restarts={row['Restarts']:.0f}", axis=1)

In [16]:
rhc_df['X']

0          Restarts=1
1          Restarts=1
2          Restarts=1
3          Restarts=1
4          Restarts=1
             ...     
190809    Restarts=50
190810    Restarts=50
190811    Restarts=50
190812    Restarts=50
190813    Restarts=50
Name: X, Length: 190814, dtype: object

In [17]:
data = {'Restarts': [],
        'Best Fitness Mean': [], 'Best Time Mean': [], 
        'Best FEvals Mean': [], 'Best Iterations Mean': []}
def sort_fn(x):
    return int(x.split('=')[1])
for group in sorted(rhc_df['X'].unique(), key=sort_fn):
    df = rhc_df[rhc_df['X'] == group]
    assert(len(df['group_timestamp'].unique()) == 1)
    last_iter = np.inf
    for run_i in df['run_number'].unique():
        run_df = df[df['run_number'] == run_i]
        max_iter = run_df['Iteration'].max()
        last_iter = min(max_iter, last_iter)
    best_fitness = -np.inf
    best_time = None 
    best_feval = None
    best_iters = None
    for iter_i in range(1, last_iter+1):
        iter_df = df[df['Iteration'] == iter_i]
        mean_fit = iter_df['Fitness'].mean()
        if mean_fit > best_fitness:
            best_fitness = mean_fit
            best_time = iter_df['total_runtime'].mean()
            best_feval = iter_df['FEvals'].mean()
            best_iters = iter_i
    data['Restarts'].append(df['Restarts'].iloc[0])
    data['Best Fitness Mean'].append(f"{best_fitness:.2f}")
    data['Best Time Mean'].append(f"{best_time:.2f}")
    data['Best FEvals Mean'].append(f"{best_feval:.0f}")
    data['Best Iterations Mean'].append(f"{best_iters:.0f}")
df = pd.DataFrame(data)
df.to_csv('metrics/rhc_table.csv', index=False)
df.style.hide(axis='index')

Restarts,Best Fitness Mean,Best Time Mean,Best FEvals Mean,Best Iterations Mean
1,264.4,1.4,966,632
5,270.25,5.34,3187,849
10,270.0,12.07,4234,849
20,270.22,33.23,8441,898
50,270.71,152.52,17395,984
