In [19]:
import os
import pandas as pd
import numpy as np

In [20]:
ga_df = pd.read_csv('saving_this_shit_metrics/ga.csv')

In [21]:
len(ga_df)

7169

In [22]:
ga_df.columns

Index(['Iteration', 'Time', 'Fitness', 'FEvals', 'Population Size',
       'Mutation Rate', 'max_iters', 'problem_size', 't_pct', 'max_attempts',
       'run_number', 'group_timestamp'],
      dtype='object')

In [23]:
ga_df['X'] = ga_df.apply(lambda row: f"Pop={int(row['Population Size'])}, Mut={row['Mutation Rate']}", axis=1)

In [24]:
data = {'Population Size': [], 'Mutation Rate': [], 'Best Fitness Mean': [], 'Best Time Mean': [], 
        'Best FEvals Mean': [], 'Best Iterations Mean': []}
for group in sorted(ga_df['X'].unique()):
    df = ga_df[ga_df['X'] == group]
    assert(len(df['group_timestamp'].unique()) == 1)
    last_iter = np.inf
    for run_i in df['run_number'].unique():
        run_df = df[df['run_number'] == run_i]
        max_iter = run_df['Iteration'].max()
        last_iter = min(max_iter, last_iter)
    best_fitness = -np.inf
    best_time = None 
    best_feval = None
    best_iters = None
    for iter_i in range(1, last_iter+1):
        iter_df = df[df['Iteration'] == iter_i]
        mean_fit = iter_df['Fitness'].mean()
        if mean_fit > best_fitness:
            best_fitness = mean_fit
            best_time = iter_df['Time'].mean()
            best_feval = iter_df['FEvals'].mean()
            best_iters = iter_i
    data['Population Size'].append(df['Population Size'].iloc[0])
    data['Mutation Rate'].append(f"{df['Mutation Rate'].iloc[0]:.2f}")
    data['Best Fitness Mean'].append(f"{best_fitness:.2f}")
    data['Best Time Mean'].append(f"{best_time:.2f}")
    data['Best FEvals Mean'].append(f"{best_feval:.0f}")
    data['Best Iterations Mean'].append(f"{best_iters:.0f}")
df = pd.DataFrame(data)
df.to_csv('saving_this_shit_metrics/ga_table.csv', index=False)
df.style.hide(axis='index')

Population Size,Mutation Rate,Best Fitness Mean,Best Time Mean,Best FEvals Mean,Best Iterations Mean
10,0.01,5.0,0.0,22,1
10,0.1,25.33,0.1,5157,467
10,0.5,28.67,0.06,3347,302
100,0.01,31.33,0.26,15155,149
100,0.1,30.0,0.13,7787,76
100,0.5,30.0,0.09,4757,46
1000,0.01,47.0,2.13,94100,93
1000,0.1,47.0,0.36,17023,16
1000,0.5,41.33,0.51,23030,22


In [25]:
sa_df = pd.read_csv('saving_this_shit_metrics/sa.csv')

In [26]:
sa_df.columns

Index(['Iteration', 'Time', 'Fitness', 'FEvals', 'Temperature', 'max_iters',
       'problem_size', 't_pct', 'decay_type', 'max_attempts', 'run_number',
       'group_timestamp'],
      dtype='object')

In [27]:
sa_df['X'] = sa_df.apply(lambda row: f"Temp={row['Temperature']}, DecayType={row['decay_type']}", axis=1)

In [28]:
data = {'Temperature': [], 'Decay Type': [], 
        'Best Fitness Mean': [], 'Best Time Mean': [], 
        'Best FEvals Mean': [], 'Best Iterations Mean': []}
for group in sorted(sa_df['X'].unique()):
    df = sa_df[sa_df['X'] == group]
    assert(len(df['group_timestamp'].unique()) == 1)
    last_iter = np.inf
    for run_i in df['run_number'].unique():
        run_df = df[df['run_number'] == run_i]
        max_iter = run_df['Iteration'].max()
        last_iter = min(max_iter, last_iter)
    best_fitness = -np.inf
    best_time = None 
    best_feval = None
    best_iters = None
    for iter_i in range(1, last_iter+1):
        iter_df = df[df['Iteration'] == iter_i]
        mean_fit = iter_df['Fitness'].mean()
        if mean_fit > best_fitness:
            best_fitness = mean_fit
            best_time = iter_df['Time'].mean()
            best_feval = iter_df['FEvals'].mean()
            best_iters = iter_i
    data['Temperature'].append(df['Temperature'].iloc[0])
    data['Decay Type'].append(f"{df['decay_type'].iloc[0]}")
    data['Best Fitness Mean'].append(f"{best_fitness:.2f}")
    data['Best Time Mean'].append(f"{best_time:.2f}")
    data['Best FEvals Mean'].append(f"{best_feval:.0f}")
    data['Best Iterations Mean'].append(f"{best_iters:.0f}")
df = pd.DataFrame(data)
df.to_csv('saving_this_shit_metrics/sa_table.csv', index=False)
df.style.hide(axis='index')

Temperature,Decay Type,Best Fitness Mean,Best Time Mean,Best FEvals Mean,Best Iterations Mean
1,arith_decay,30.0,0.15,1629,1298
1,exp_decay,28.33,0.02,664,461
1,geom_decay,30.0,0.02,577,403
10,arith_decay,8.67,4.88,15390,7738
10,exp_decay,26.33,0.07,1439,863
10,geom_decay,26.33,0.03,870,548
100,arith_decay,5.67,1.78,9338,4671
100,exp_decay,27.0,0.18,2594,1442
100,geom_decay,27.33,0.06,1307,775
1000,arith_decay,8.67,3.61,13276,6638


In [49]:
rhc_df = pd.read_csv('saving_this_shit_metrics/rhc.csv')

In [50]:
rhc_df.columns

Index(['Iteration', 'Time', 'Fitness', 'FEvals', 'Restarts', 'max_iters',
       'current_restart', 'problem_size', 't_pct', 'max_attempts',
       'run_number', 'group_timestamp'],
      dtype='object')

In [51]:
rhc_df['X'] = rhc_df.apply(lambda row: f"Restarts={row['Restarts']}", axis=1)

In [52]:
gt = rhc_df.iloc[-1]['group_timestamp']
df = rhc_df[rhc_df['group_timestamp'] == gt]
len(df)

81918

In [53]:
df['Restarts'].unique(), df['max_iters'].unique(), df['run_number'].unique()

(array([50]), array([10000]), array([0, 1, 2]))

In [54]:
df1 = df[df['run_number'] == 1]
len(df1)

25780

In [55]:
t = 0
for cr in df1['current_restart'].unique():
    temp = df1[df1['current_restart'] == cr]
    # t += temp['Time'].max()
    t += temp['Iteration'].max()
print(t)

25779


In [56]:
df.iloc[-1]

Iteration                    460
Time                    0.020226
Fitness                     30.0
FEvals                   28702.0
Restarts                      50
max_iters                  10000
current_restart               50
problem_size                  30
t_pct                        0.4
max_attempts                 150
run_number                     2
group_timestamp       1718949969
X                  Restarts=50.0
Name: 297502, dtype: object

In [42]:
from tqdm import tqdm
data = {'Restarts': [], 
        'Best Fitness Mean': [], 'Best Time Mean': [], 
        'Best FEvals Mean': [], 'Best Iterations Mean': []}
# for group in sorted(rhc_df['X'].unique()):
    # df = rhc_df[rhc_df['X'] == group]
for rest in sorted(rhc_df['Restarts'].unique()):
    print(f"rest: {rest}")
    df = rhc_df[rhc_df['Restarts'] == rest]
    assert(len(df['group_timestamp'].unique()) == 1)

    for run_i in df['run_number'].unique():
        print(f"run_i: {run_i}")
        run_df = df[df['run_number'] == run_i]
        restart_dict = {}
        for i, row in tqdm(run_df.iterrows(), total=len(run_df)):
            current_restart = row['current_restart']
            count_iters = 0
            count_time = 0
            if current_restart not in restart_dict:
                for prev_rest in range(current_restart):
                    temp_df = run_df[run_df['current_restart'] == prev_rest]
                    count_iters += temp_df['Iteration'].max()
                    count_time += temp_df['Time'].max()
                restart_dict[current_restart] = (count_iters, count_time)
            else:
                count_iters, count_time = restart_dict[current_restart]
            df.loc[i, 'count_iterations'] = count_iters + row['Iteration']
            df.loc[i, 'count_time'] = count_time + row['Time']

    last_iter = np.inf
    for run_i in df['run_number'].unique():
        run_df = df[df['run_number'] == run_i]
        max_iter = run_df['count_iterations'].max()
        last_iter = min(max_iter, last_iter)
    best_fitness = -np.inf
    best_time = None 
    best_feval = None
    best_iters = None
    for iter_i in range(1, int(last_iter)+1):
        iter_df = df[df['count_iterations'] == iter_i]
        mean_fit = iter_df['Fitness'].mean()
        if mean_fit > best_fitness:
            best_fitness = mean_fit
            best_time = iter_df['count_time'].mean()
            best_feval = iter_df['FEvals'].mean()
            best_iters = iter_i
    data['Restarts'].append(df['Restarts'].iloc[0])
    data['Best Fitness Mean'].append(f"{best_fitness:.2f}")
    data['Best Time Mean'].append(f"{best_time:.2f}")
    data['Best FEvals Mean'].append(f"{best_feval:.0f}")
    data['Best Iterations Mean'].append(f"{best_iters:.0f}")
df = pd.DataFrame(data)
df.to_csv('saving_this_shit_metrics/rhc_table.csv', index=False)
df.style.hide(axis='index')

rest: 1
run_i: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[i, 'count_iterations'] = count_iters + row['Iteration']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[i, 'count_time'] = count_time + row['Time']
100%|██████████| 1111/1111 [00:00<00:00, 7712.61it/s]


run_i: 1


100%|██████████| 992/992 [00:00<00:00, 9383.97it/s]


run_i: 2


100%|██████████| 861/861 [00:00<00:00, 9378.92it/s]


rest: 10
run_i: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[i, 'count_iterations'] = count_iters + row['Iteration']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[i, 'count_time'] = count_time + row['Time']
100%|██████████| 5724/5724 [00:00<00:00, 9591.91it/s]


run_i: 1


100%|██████████| 6014/6014 [00:00<00:00, 9812.30it/s]


run_i: 2


100%|██████████| 5652/5652 [00:00<00:00, 9704.95it/s]


rest: 20
run_i: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[i, 'count_iterations'] = count_iters + row['Iteration']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[i, 'count_time'] = count_time + row['Time']
100%|██████████| 11544/11544 [00:01<00:00, 9723.94it/s]


run_i: 1


100%|██████████| 10153/10153 [00:01<00:00, 9689.76it/s]


run_i: 2


100%|██████████| 11067/11067 [00:01<00:00, 9539.29it/s]


rest: 50
run_i: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[i, 'count_iterations'] = count_iters + row['Iteration']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[i, 'count_time'] = count_time + row['Time']
100%|██████████| 28171/28171 [00:03<00:00, 9096.12it/s]


run_i: 1


100%|██████████| 25780/25780 [00:02<00:00, 9245.56it/s]


run_i: 2


100%|██████████| 27967/27967 [00:03<00:00, 9241.63it/s]


rest: 100
run_i: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[i, 'count_iterations'] = count_iters + row['Iteration']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[i, 'count_time'] = count_time + row['Time']
100%|██████████| 53608/53608 [00:06<00:00, 8715.75it/s]


run_i: 1


100%|██████████| 53098/53098 [00:06<00:00, 8763.51it/s]


run_i: 2


100%|██████████| 55761/55761 [00:06<00:00, 8884.61it/s]


Restarts,Best Fitness Mean,Best Time Mean,Best FEvals Mean,Best Iterations Mean
1,26.67,0.01,339,328
10,29.67,0.24,3978,3875
20,29.67,0.28,3978,3875
50,30.0,1.34,19398,18898
100,30.0,1.16,19398,18898


In [None]:
np.mean([28702, 26425, 28923])