#### Experiment: 

Compare prunning by Hebbian Learning and Weight Magnitude.

#### Motivation.

Verify if Hebbian Learning pruning outperforms pruning by Magnitude

#### Conclusions:
- No pruning leads (0,0) to acc of 0.976
- Pruning all connections at every epoch (1,0) leads to acc of 0.964
- Best performing model is still no hebbian pruning, and weight pruning set to 0.2 (0.981)
- Pruning only by hebbian learning decreases accuracy
- Combining hebbian and weight magnitude is not an improvement compared to simple weight magnitude pruning


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../../")

In [26]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from dynamic_sparse.common.browser import *

## Load and check data

In [27]:
exps = ['neurips_debug_test6', ]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)

In [28]:
df.head(5)

Unnamed: 0,Experiment Name,train_acc_max,train_acc_max_epoch,train_acc_min,train_acc_min_epoch,train_acc_median,train_acc_last,val_acc_max,val_acc_max_epoch,val_acc_min,...,momentum,network,num_classes,on_perc,optim_alg,pruning_early_stop,test_noise,use_kwinners,weight_decay,weight_prune_perc
0,"0_hebbian_prune_perc=None,weight_prune_perc=None",0.988333,28,0.92345,0,0.985358,0.988,0.9768,29,0.9614,...,0.9,MLPHeb,10,0.2,SGD,0,False,False,0.0001,
1,"1_hebbian_prune_perc=0.2,weight_prune_perc=None",0.974583,27,0.924417,0,0.970733,0.974483,0.9753,5,0.9609,...,0.9,MLPHeb,10,0.2,SGD,0,False,False,0.0001,
2,"2_hebbian_prune_perc=0.4,weight_prune_perc=None",0.96825,25,0.926067,0,0.963083,0.967533,0.971,20,0.9623,...,0.9,MLPHeb,10,0.2,SGD,0,False,False,0.0001,
3,"3_hebbian_prune_perc=0.6,weight_prune_perc=None",0.957933,23,0.926083,0,0.952508,0.957533,0.9673,23,0.9589,...,0.9,MLPHeb,10,0.2,SGD,0,False,False,0.0001,
4,"4_hebbian_prune_perc=0.8,weight_prune_perc=None",0.943033,22,0.923467,2,0.936533,0.935983,0.9665,9,0.9514,...,0.9,MLPHeb,10,0.2,SGD,0,False,False,0.0001,


In [30]:
df['on_perc'].unique()

array([0.2])

In [6]:
# replace hebbian prine
df['hebbian_prune_perc'] = df['hebbian_prune_perc'].replace(np.nan, 0.0, regex=True)
df['weight_prune_perc'] = df['weight_prune_perc'].replace(np.nan, 0.0, regex=True)

In [7]:
df.columns

Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'epochs',
       'experiment_file_name', 'trial_time', 'mean_epoch_time', 'batch_norm',
       'data_dir', 'dataset_name', 'debug_sparse', 'debug_weights', 'device',
       'hebbian_grow', 'hebbian_prune_perc', 'hidden_sizes', 'input_size',
       'learning_rate', 'lr_gamma', 'lr_milestones', 'lr_scheduler', 'model',
       'momentum', 'network', 'num_classes', 'on_perc', 'optim_alg',
       'pruning_early_stop', 'test_noise', 'use_kwinners', 'weight_decay',
       'weight_prune_perc'],
      dtype='object')

In [8]:
df.shape

(108, 42)

In [9]:
df.iloc[1]

Experiment Name           1_hebbian_prune_perc=0.2,weight_prune_perc=None
train_acc_max                                                    0.974583
train_acc_max_epoch                                                    27
train_acc_min                                                    0.924417
train_acc_min_epoch                                                     0
train_acc_median                                                 0.970733
train_acc_last                                                   0.974483
val_acc_max                                                        0.9753
val_acc_max_epoch                                                       5
val_acc_min                                                        0.9609
val_acc_min_epoch                                                       0
val_acc_median                                                     0.9694
val_acc_last                                                       0.9672
epochs                                

In [10]:
df.groupby('model')['model'].count()

model
DSNNMixedHeb    108
Name: model, dtype: int64

 ## Analysis

Experiment Details

In [11]:
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()

0

In [12]:
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape

(108, 42)

In [13]:
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<30
df_origin[df_origin['failed']]['epochs']

Series([], Name: epochs, dtype: int64)

In [14]:
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)


##### What are optimal levels of hebbian and weight pruning

In [15]:
# ignoring experiments where weight_prune_perc = 1, results not reliable
filter = (df['weight_prune_perc'] < 1)

In [16]:
agg(['hebbian_prune_perc'], filter)

Unnamed: 0_level_0,val_acc_max_epoch,val_acc_max,val_acc_max,val_acc_max,val_acc_max,model
Unnamed: 0_level_1,round_mean,min,max,mean,std,count
hebbian_prune_perc,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0.0,23,0.976,0.982,0.979,0.002,15
0.2,17,0.973,0.982,0.978,0.003,15
0.4,21,0.97,0.981,0.977,0.004,15
0.6,20,0.967,0.982,0.977,0.004,15
0.8,18,0.964,0.981,0.977,0.006,15
1.0,19,0.963,0.982,0.977,0.007,15


* No relevant difference 

In [18]:
filter = (df['weight_prune_perc'] < 1)
agg(['weight_prune_perc'], filter)

Unnamed: 0_level_0,val_acc_max_epoch,val_acc_max,val_acc_max,val_acc_max,val_acc_max,model
Unnamed: 0_level_1,round_mean,min,max,mean,std,count
weight_prune_perc,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0.0,13,0.963,0.977,0.97,0.005,18
0.2,23,0.978,0.982,0.98,0.001,18
0.4,22,0.977,0.982,0.98,0.001,18
0.6,20,0.978,0.981,0.979,0.001,18
0.8,20,0.976,0.98,0.978,0.001,18


* Optimal level between 0.2 and 0.4 (consistent with previous experiments and SET paper, where 0.3 is an optimal value)

In [24]:
magonly = (df['hebbian_prune_perc'] == 0.0) & (df['weight_prune_perc'] < 0.6) 
agg(['weight_prune_perc'], magonly)

Unnamed: 0_level_0,val_acc_max_epoch,val_acc_max,val_acc_max,val_acc_max,val_acc_max,model
Unnamed: 0_level_1,round_mean,min,max,mean,std,count
weight_prune_perc,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0.0,22,0.976,0.977,0.976,0.0,3
0.2,25,0.979,0.982,0.981,0.001,3
0.4,24,0.98,0.981,0.98,0.0,3


##### What is the optimal combination of both

In [20]:
pd.pivot_table(df[filter], 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)

weight_prune_perc,0.0,0.2,0.4,0.6,0.8
hebbian_prune_perc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,0.976 ± 0.000,0.981 ± 0.001,0.980 ± 0.000,0.980 ± 0.001,0.979 ± 0.001
0.2,0.974 ± 0.001,0.979 ± 0.001,0.979 ± 0.003,0.979 ± 0.002,0.977 ± 0.001
0.4,0.971 ± 0.001,0.980 ± 0.001,0.979 ± 0.001,0.979 ± 0.001,0.978 ± 0.001
0.6,0.969 ± 0.001,0.979 ± 0.001,0.980 ± 0.001,0.979 ± 0.001,0.978 ± 0.001
0.8,0.966 ± 0.001,0.980 ± 0.001,0.980 ± 0.002,0.980 ± 0.001,0.978 ± 0.000
1.0,0.964 ± 0.001,0.981 ± 0.001,0.981 ± 0.001,0.980 ± 0.001,0.980 ± 0.001


In [25]:
pd.pivot_table(df[filter], 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_last',
              aggfunc=mean_and_std)

weight_prune_perc,0.0,0.2,0.4,0.6,0.8
hebbian_prune_perc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,0.974 ± 0.003,0.980 ± 0.001,0.977 ± 0.002,0.978 ± 0.001,0.976 ± 0.001
0.2,0.969 ± 0.002,0.978 ± 0.002,0.976 ± 0.001,0.977 ± 0.001,0.975 ± 0.001
0.4,0.967 ± 0.000,0.977 ± 0.002,0.978 ± 0.002,0.975 ± 0.002,0.977 ± 0.003
0.6,0.967 ± 0.003,0.977 ± 0.001,0.978 ± 0.001,0.976 ± 0.002,0.975 ± 0.001
0.8,0.961 ± 0.000,0.979 ± 0.001,0.978 ± 0.001,0.978 ± 0.002,0.976 ± 0.002
1.0,0.954 ± 0.001,0.979 ± 0.000,0.979 ± 0.001,0.978 ± 0.001,0.976 ± 0.002


In [21]:
df.shape

(108, 42)

#### Conclusions:
- No pruning leads (0,0) to acc of 0.976
- Pruning all connections at every epoch (1,0) leads to acc of 0.964
- Best performing model is still no hebbian pruning, and weight pruning set to 0.2 (0.981)
- Pruning only by hebbian learning decreases accuracy
- Combining hebbian and weight magnitude is not an improvement compared to simple weight magnitude pruning
