In [2]:
import jax
jax.config.update('jax_platform_name', 'cpu')
import pickle as pkl
import matplotlib.pyplot as plt
import jax.numpy as jnp
import numpy as np 
import os
import numpy as np
import pickle as pkl
import jax.numpy as jnp
from tqdm import tqdm
from jax.tree_util import tree_map,tree_flatten,tree_flatten_with_path,keystr,tree_map_with_path
import matplotlib.pyplot as plt
from jax.numpy.linalg import matrix_norm,vector_norm


def compare_stats_settings(path):
    settings = {}
    if not os.path.isdir(path):
        return None,None
    
    for setting in tqdm(os.listdir(path)):

        argmax_test_accs = []
        maximum_test_accs = []
        train_accs = []
        for run in os.listdir(path + "/" + setting):
            if not os.path.isfile(path + "/" + setting + "/" + run + "/" + "stats.pkl"):
                continue
            with open(path + "/" + setting + "/" + run + "/" + "stats.pkl","rb") as f:
                stats = pkl.load(f)

            test_acc = tree_flatten_with_path(stats["test_acc"])[0]
            train_acc = tree_flatten_with_path(stats["train_acc"])[0]


            keys, test_acc = [e[0] for e in test_acc],jnp.stack([e[1] for e in test_acc])
            train_acc = jnp.stack([e[1] for e in train_acc])

            argmax_test_accs.append(np.asarray([int(keystr(keys[i])[1:-1]) for i in jnp.argmax(test_acc,axis=0)]))
            maximum_test_accs.append(np.asarray(test_acc[jnp.argmax(test_acc,axis=0),np.arange(test_acc.shape[-1])]))
            train_accs.append(np.asarray(train_acc[jnp.argmax(test_acc,axis=0),np.arange(test_acc.shape[-1])]))

        if len(argmax_test_accs) > 0:
            settings[setting] = (np.concatenate(argmax_test_accs),np.concatenate(maximum_test_accs),np.concatenate(train_accs))
    
    if settings == {}:
        return None,None
    
    highest_mean_acc_across_settings = max(settings,key=lambda x: np.mean(settings.get(x)[1]))
    return (np.mean(settings[highest_mean_acc_across_settings][1]),highest_mean_acc_across_settings, settings[highest_mean_acc_across_settings]),settings

def plot_stats(*paths):

    fig,axs = plt.subplots(nrows=1,ncols=len(paths))
    fig.set_size_inches(len(paths)*4,6)
    if len(paths) == 1:
        axs = [axs]
    for path,ax in zip(paths,axs):

        if not os.path.isfile(os.path.join(path,"stats.pkl")):
            continue

        with open(os.path.join(path,"stats.pkl"),"rb") as f:
            stats = pkl.load(f)

        train_acc = tree_flatten_with_path(stats["train_acc"])[0]
        test_acc = tree_flatten_with_path(stats["test_acc"])[0]

        train_x, train_y = np.asarray([keystr(e[0])[1:-1] for e in train_acc]),jnp.mean(jnp.stack([e[1] for e in train_acc]),axis=-1)
        test_x, test_y =np.asarray([keystr(e[0])[1:-1] for e in test_acc]),jnp.mean(jnp.stack([e[1] for e in test_acc]),axis=-1)

        ax.plot(train_x,train_y, label="train acc",c="blue")
        ax.plot(test_x,test_y, label="test acc",c="green")
        ax.plot(test_x,train_y-test_y, label="dif",c="red")
        ax.set_ylim(0.0,1.0)
        ax.legend()
        #ax.fill_between(x, y-error, y+error,alpha=0.3)
        #argmax = np.argmax(y)
        #max = np.max(y)
        #ax.plot([x[0],x[-1]],[max,max],c="red")
        #ax.plot([x[argmax],x[argmax]],[0,1],c="red")
        #ax.set_yticks((0,np.max(y),1))
        #ax.legend()
        

In [2]:

max_standard,settings_standard = compare_stats_settings("./exps_adam/standard")
max_wd,settings_wd = compare_stats_settings("./exps_adam/wd")
max_norm,settings_norm = compare_stats_settings("./exps_adam/norm")
max_mean_norm,settings_mean_norm = compare_stats_settings("./exps_adam/mean_norm")
max_mean_std,settings_mean_std = compare_stats_settings("./exps_adam/mean_std")
max_reverse_mean_norm,settings_reverse_mean_norm = compare_stats_settings("./exps_adam/reverse_mean_norm")
max_svd_static_exp_fit,settings_svd_static_exp_fit = compare_stats_settings("./exps_adam/svd_static_exp_fit")
max_denseSVD,settings_denseSVD = compare_stats_settings("./sidequest/DenseSVD")

100%|██████████| 3/3 [00:00<00:00,  7.08it/s]
100%|██████████| 42/42 [00:01<00:00, 30.45it/s]
100%|██████████| 30/30 [00:00<00:00, 72.17it/s]
100%|██████████| 30/30 [00:00<00:00, 71.04it/s]
100%|██████████| 30/30 [00:00<00:00, 78.91it/s]
100%|██████████| 23/23 [00:00<00:00, 91.97it/s]
100%|██████████| 80/80 [00:02<00:00, 30.21it/s]
100%|██████████| 2/2 [00:00<00:00, 43.25it/s]


In [None]:
print("max_standard: ", max_standard)
print("max_wd: ", max_wd)
print("max_norm: ", max_norm)
print("max_mean_norm: ", max_mean_norm)
print("max_mean_std: ", max_mean_std)
print("max_reverse_mean_norm: ", max_reverse_mean_norm)
print("max_svd_static_exp_fit: ", max_svd_static_exp_fit)
print("max_denseSVD: ", max_denseSVD)

max_standard:  (0.55133337, 'standard0.0001', (array([ 94285, 146516,  12884]), array([0.5514    , 0.55020005, 0.55240005], dtype=float32), array([0.99988, 1.     , 0.84012], dtype=float32)))
max_wd:  (0.56270003, '0.01', (array([ 73810, 116181, 107870]), array([0.56350005, 0.56380004, 0.5608    ], dtype=float32), array([0.7115601 , 0.73210007, 0.72648025], dtype=float32)))
max_norm:  (0.5727667, '0.5_100', (array([144295, 126159,  37094]), array([0.5766    , 0.57000005, 0.57170004], dtype=float32), array([0.75571996, 0.75999993, 0.7449    ], dtype=float32)))
max_mean_norm:  (0.5722334, '0.5_100', (array([99785, 99785, 71930]), array([0.5727    , 0.5715    , 0.57250005], dtype=float32), array([0.75540006, 0.74758   , 0.7473401 ], dtype=float32)))
max_mean_std:  (0.40440002, '0.9_100', (array([ 92797, 135496,    280]), array([0.40230003, 0.40739998, 0.40350005], dtype=float32), array([0.40379995, 0.4064    , 0.4099801 ], dtype=float32)))
max_reverse_mean_norm:  (0.57366675, '0.5_1', (ar

In [3]:
print(compare_stats_settings("./exps_adam/norm")[0])
print(compare_stats_settings("./exps_adam/norm_stepscale")[0])
print(compare_stats_settings("./exps_adam/norm_stepscale_reverse")[0])
print(compare_stats_settings("./exps_adam/norm_layerwise_stepscale")[0])

100%|██████████| 30/30 [00:02<00:00, 14.15it/s]

(0.5727667, '0.5_100', (array([144295, 126159,  37094]), array([0.5766    , 0.57000005, 0.57170004], dtype=float32), array([0.75571996, 0.75999993, 0.7449    ], dtype=float32)))
None
None
None



