#### Experiment: 

Evaluate hebbian growth

#### Motivation:

Control all other variables and evaluate hebbian growth alone

#### Conclusions:

- Lower accuracy, about 0.2%, but it converges faster (18 vs 24) compared to random growth.
- Results are consistent with expectation: it accelerates learning, but focusing early on few specific units might prevent the neural network from finding a different set of connections that can lead to higher performance


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../../")

In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from dynamic_sparse.common.browser import *

## Load and check data

In [4]:
exps = ['neurips_debug_test13', ]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)

In [5]:
df.head(5)

Unnamed: 0,Experiment Name,train_acc_max,train_acc_max_epoch,train_acc_min,train_acc_min_epoch,train_acc_median,train_acc_last,val_acc_max,val_acc_max_epoch,val_acc_min,...,momentum,network,num_classes,on_perc,optim_alg,pruning_early_stop,test_noise,use_kwinners,weight_decay,weight_prune_perc
0,0_hebbian_grow=True,0.98965,27,0.926333,0,0.98745,0.989083,0.9783,16,0.9618,...,0.9,MLPHeb,10,0.2,SGD,0,False,False,0.0001,0.3
1,1_hebbian_grow=False,0.992483,27,0.923767,0,0.989317,0.991917,0.981,19,0.9622,...,0.9,MLPHeb,10,0.2,SGD,0,False,False,0.0001,0.3
2,2_hebbian_grow=True,0.990167,28,0.925483,0,0.986908,0.9891,0.9795,22,0.9648,...,0.9,MLPHeb,10,0.2,SGD,0,False,False,0.0001,0.3
3,3_hebbian_grow=False,0.992417,25,0.926733,0,0.989283,0.992067,0.9813,18,0.9605,...,0.9,MLPHeb,10,0.2,SGD,0,False,False,0.0001,0.3
4,4_hebbian_grow=True,0.989933,23,0.92735,0,0.986875,0.989267,0.978,7,0.9625,...,0.9,MLPHeb,10,0.2,SGD,0,False,False,0.0001,0.3


In [6]:
# replace hebbian prine
df['hebbian_prune_perc'] = df['hebbian_prune_perc'].replace(np.nan, 0.0, regex=True)
df['weight_prune_perc'] = df['weight_prune_perc'].replace(np.nan, 0.0, regex=True)

In [7]:
df.columns

Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'epochs',
       'experiment_file_name', 'trial_time', 'mean_epoch_time', 'batch_norm',
       'data_dir', 'dataset_name', 'debug_sparse', 'debug_weights', 'device',
       'hebbian_grow', 'hebbian_prune_perc', 'hidden_sizes', 'input_size',
       'learning_rate', 'lr_gamma', 'lr_milestones', 'lr_scheduler', 'model',
       'momentum', 'network', 'num_classes', 'on_perc', 'optim_alg',
       'pruning_early_stop', 'test_noise', 'use_kwinners', 'weight_decay',
       'weight_prune_perc'],
      dtype='object')

In [8]:
df.shape

(16, 42)

In [9]:
df.iloc[1]

Experiment Name                                      1_hebbian_grow=False
train_acc_max                                                    0.992483
train_acc_max_epoch                                                    27
train_acc_min                                                    0.923767
train_acc_min_epoch                                                     0
train_acc_median                                                 0.989317
train_acc_last                                                   0.991917
val_acc_max                                                         0.981
val_acc_max_epoch                                                      19
val_acc_min                                                        0.9622
val_acc_min_epoch                                                       0
val_acc_median                                                    0.97855
val_acc_last                                                       0.9797
epochs                                

In [10]:
df.groupby('model')['model'].count()

model
DSNNMixedHeb    16
Name: model, dtype: int64

 ## Analysis

Experiment Details

In [11]:
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()

0

In [12]:
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape

(16, 42)

In [13]:
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<30
df_origin[df_origin['failed']]['epochs']

Series([], Name: epochs, dtype: int64)

In [17]:
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,
                   'val_acc_last': stats,
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'val_acc_last': stats,
                   'model': ['count']})).round(round)


##### What are optimal levels of hebbian and weight pruning

In [18]:
agg(['hebbian_grow'])

Unnamed: 0_level_0,val_acc_max_epoch,val_acc_max,val_acc_max,val_acc_max,val_acc_max,val_acc_last,val_acc_last,val_acc_last,val_acc_last,model
Unnamed: 0_level_1,round_mean,min,max,mean,std,min,max,mean,std,count
hebbian_grow,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
False,24,0.979,0.981,0.981,0.001,0.977,0.98,0.979,0.001,8
True,18,0.978,0.98,0.979,0.001,0.974,0.98,0.976,0.002,8


- Lower accuracy, about 0.2%, but it converges faster (18 vs 24) compared to random growth.
- Results are consistent with expectation: it accelerates learning, but focusing early on few specific units might prevent the neural network from finding a different set of connections that can lead to higher performance