In [35]:
## Load numbers


import os
import numpy as np
import pandas as pd


# root = '/home/kopi/kinit/table1/'
root = '/home/kopal/overshoot/lightning_logs/table1'


task_name_mapping = {
    "mlp_housing": "MLP-CA",
    "vae_f-mnist": "VAE-FM",
    "vae_mnist": "VAE-M",
    "2c2d_f-mnist": "2c2d-FM",
    "3c3d_cifar10": "3c3d-C10",
    "resnet18_cifar100": "ResNet-C100",
    "gpt_hf_qqp": "GPT-2",
}

optimizers_names_mapping = {
    "sgd_baseline": "CM",
    "nesterov": "NAG",
    "sgd_overshoot_3": "SGD3",
    "sgd_overshoot_5": "SGD5",
    "sgd_overshoot_7": "SGD7",
    "adam_baseline": "Adam",
    "nadam": "Nadam",
    "adam_overshoot_3": "Adam3",
    "adam_overshoot_5": "Adam5",
    "adam_overshoot_7": "Adam7",
}



def average_convergance(path):
    dfs = [pd.read_csv(os.path.join(path, seed, 'training_stats.csv')) for seed in os.listdir(path)]
    return np.mean([df['base_loss_1'] for df in dfs], axis=0)

def process_task(path):
    results = {}
    for method_name in os.listdir(path):
        if method_name not in optimizers_names_mapping.keys():
            continue
        
        method_path = os.path.join(path, method_name)
        if os.path.isdir(method_path):
            results[method_name] =  average_convergance(method_path)
    return results



tasks = {}
for task_name in os.listdir(root):
    if task_name not in task_name_mapping.keys():
        continue
    task_path = os.path.join(root, task_name)
    print("Processing", task_name)
    tasks[task_name] = process_task(task_path)

Processing resnet18_cifar100
Processing 3c3d_cifar10
Processing vae_f-mnist
Processing vae_mnist
Processing mlp_housing
Processing gpt_hf_qqp
Processing 2c2d_f-mnist


In [56]:
task_running_avg = {}
avg_size = 400
for task_name, task in tasks.items():
    task_avg = {}
    for method_name, values in task.items():
        task_avg[method_name] = np.array([np.mean(values[i-avg_size:i]) for i in range(avg_size, len(values))])
    task_running_avg[task_name] = task_avg

In [61]:

reduction_sgd = [[], [], []]
reduction_adam = [[], [], []]
resutls = {"sgd":[[], [], []], "adam":[[], [], []]}

for task_name, task in task_running_avg.items():
    for t in ["sgd", "adam"]:
        baseline_treshold = task[f'{t}_baseline'][0] - 0.95 * (task[f'{t}_baseline'][0] - task[f'{t}_baseline'][-1])
        baseline_steps = np.where(task[f'{t}_baseline'] <= baseline_treshold)[0][0]
        for r, c in zip(resutls[t], [3, 5, 7]):
            steps = np.where(task[f'{t}_overshoot_{c}'] <= baseline_treshold)[0][0]
            # r.append(100 - 100 * steps / baseline_steps)
            r.append(100 * (baseline_steps - steps) / baseline_steps)
            



print([np.mean(r) for r in resutls["sgd"]])
print([np.mean(r) for r in resutls["adam"]])






[23.770347902564946, 26.19080263139313, 26.539956544187213]
[15.273993986447236, 19.53347561166684, 20.109127850947406]
