this notebook does some spot checking on learned weight patterns in `scripts/training/yuanyuan_8k_a_3day/feature_approximation/local_pcn_recurrent/submit_sep2.py`.

In [1]:
from thesis_v2 import dir_dict
from thesis_v2.training.training_aux import load_training_results

In [2]:
from sys import path
from os.path import join, exists, dirname
from os import makedirs



In [3]:
folder_to_check = 'scripts/training/yuanyuan_8k_a_3day/feature_approximation/local_pcn_recurrent'
path.insert(0, join(dir_dict['root'], folder_to_check))
from submit_sep2 import param_iterator

In [4]:
from numpy.linalg import norm
import numpy as np

In [5]:
from sklearn.decomposition import PCA
from pickle import dump

In [6]:


def calc_raw_stats(weight_matrix):
    # weight_matrix is N * D
    # N = number of filters
    # D = size of a (flattened) filter.
    mean_all = weight_matrix.mean(axis=1)
    std_all = weight_matrix.std(axis=1)
    print('mean_all', mean_all.mean(), mean_all.std())
    print('std_all', std_all.mean(), std_all.std())
    
def calc_normed_pca(weight_matrix, verbose=False):
    # here `normed` means PCA after normalization of weight_matrix
    
    # there are multiple ways to define `normalization`
    # 
    # a simple way is to make each flattened filter with unit norm.
    weight_matrix_norm = norm(weight_matrix, axis=1, keepdims=True)
#     assert weight_matrix_norm.shape == (256,1)
    weight_matrix_normed = weight_matrix/weight_matrix_norm
    
    # tested, works as expected.
    if verbose:
        assert np.allclose(norm(weight_matrix_normed, axis=1), 1.0)
    
    
    # then let's do PCA
    pca_obj = PCA(svd_solver='full')
    pca_obj.fit(weight_matrix_normed)
    if verbose:
        # top 10 explains 60%;
        # top 20 explains 85%;
        # looks fine to me, compared to https://doi.org/10.1101/677237
        # "Recurrent networks can recycle neural resources to flexibly trade speed for accuracy in visual recognition"
        
        print(np.cumsum(pca_obj.explained_variance_ratio_)[:20])
        print(pca_obj.components_.shape)
    
    # then let's return and save them.
    return {
        'weight_matrix_norm': weight_matrix_norm,
        'weight_matrix': weight_matrix,
        'components': pca_obj.components_,
        'explained_variance_ratio': pca_obj.explained_variance_ratio_,
    }

def construct_back_by_interleave(x1, x2, by=16):
    n1, m1 = x1.shape
    n2, m2 = x2.shape
    assert n1 % by == 0
    assert n2 % by == 0
    assert m1==m2 and n1==n2
    
    x1 = x1.reshape(n1//by, by, m1)
    x2 = x2.reshape(n2//by, by, m2)
    return np.concatenate((x1,x2), axis=1).reshape((n1+n2, m1))


def save_one_result(filename, data):
    makedirs(dirname(filename), exist_ok=True)
    with open(filename, 'wb') as f:
        dump(data, f)

In [7]:
# create dir
global_vars = {
    'save_dir': join(dir_dict['analyses'],
                     'yuanyuan_8k_a_3day+feature_approximation+local_pcn_recurrent+submit_sep2',
                    'pca')
}

def prepare_dir():
    save_dir = global_vars['save_dir']
    if not exists(save_dir):
        makedirs(save_dir)
prepare_dir()

In [8]:
def collect_all():
    save_dir = global_vars['save_dir']
    count = 0
    for idx, data in enumerate(param_iterator()):
        count += 1
        verbose = idx % 50 == 0
        
        
        
        key = data['key_this_original']
        
        def filename_gen(x):
            return join(save_dir, f'{idx}/unit_norm', f'{x}.pkl')
        
        filename_set = {'ff', 'lateral', 'all'}
        if all([exists(filename_gen(x)) for x in filename_set]):
            # right now if partially done, the partial results will be fully overwritten.
            continue
        # then load weights.
        result = load_training_results(key, return_model=False, return_checkpoint=True)
        # use float64 for all analysis to retain precision.
        weight_np = result['checkpoint']['model']['moduledict.conv0.weight'].numpy().astype(np.float64)
#         print(weight_np.dtype)
#         print(weight_np.shape)
        channel_shape_all = (16, 32)
        assert len(weight_np.shape)==4 and weight_np.shape[:2] == channel_shape_all
        
        kernel_shape = weight_np.shape[2:]
        channel_shape = (16, 16)
        
        assert kernel_shape in {(7,7), (9,9)}
        kernel_numel = kernel_shape[0]*kernel_shape[1]
        # according to the way I write the model (scripts/training/yuanyuan_8k_a_3day/feature_approximation/local_pcn_recurrent/master.py),
        # in the `32` part of (16, 32, k, k), first 16 channels are input from lower layer.
        weight_np_ff = weight_np[:,:16]
        # rest 16 channels are input from recurrent layer.
        weight_np_lateral = weight_np[:,16:]
#         print(weight_np_ff.shape, weight_np_lateral.shape)
        assert weight_np_ff.shape == weight_np_lateral.shape == channel_shape + kernel_shape
        
        new_shape = (channel_shape[0]*channel_shape[1], kernel_shape[0]*kernel_shape[1])
        
        weight_np_ff = weight_np_ff.reshape(new_shape)
        weight_np_lateral = weight_np_lateral.reshape(new_shape)
        
        new_shape_all = (channel_shape_all[0]*channel_shape_all[1], kernel_shape[0]*kernel_shape[1])
        
        weight_np_all = weight_np.reshape(new_shape_all)
        
        # interleave in groups of 16.
        
        weight_np_all_debug = construct_back_by_interleave(weight_np_ff, weight_np_lateral)
        
        
        
#         print(weight_np_all_debug.shape)
        assert np.array_equal(
            weight_np_all,
            weight_np_all_debug,
        )
#         print(weight_np_lateral.shape, weight_np_ff.shape)
        
        if verbose:
            print('idx', idx)
            print('key', key)
            # show some raw stats.
            calc_raw_stats(weight_np_ff)
            calc_raw_stats(weight_np_lateral)
            calc_raw_stats(weight_np_all)
        pca_res_dict = dict()
        pca_res_dict['ff'] = calc_normed_pca(weight_np_ff, verbose)
        pca_res_dict['lateral'] = calc_normed_pca(weight_np_lateral, verbose)
        pca_res_dict['all'] = calc_normed_pca(weight_np_all, verbose)
        
        # simply store it as pickle, for simplicity.
        
#         data_to_save = {
#             'idx': idx,
#             'key': key,
#             'ff': pca_res_ff,
#             'lateral': pca_res_lateral,
#             'all': pca_res_all,
#         }
        
        # then save.
        # I think it's good to save as `idx/unit_norm/ff|lateral|all/data.pkl`;
        # this way, I can extend as needed.
        
        for fn in filename_set:
            save_one_result(filename_gen(fn), pca_res_dict[fn])
        
    print(count)


In [9]:
collect_all()

idx 0
key yuanyuan_8k_a_3day/feature_approximation_lpcn_recurrent_sep2/baseidx10/actrelu/lossmse/k7/bn_preTrue/model_seed0
mean_all -0.00018880494135170006 0.006698544208436294
std_all 0.01944413025675884 0.007153826594466825
mean_all 0.00042352848567291416 0.007041900223180945
std_all 0.02207085077286015 0.009327162321586481
mean_all 0.00011736177216060698 0.006879183431866645
std_all 0.020757490514809497 0.00841495758313068
[0.16479427 0.258138   0.34052148 0.40926025 0.46463458 0.51096948
 0.5451942  0.57807923 0.60954633 0.63958864 0.66515607 0.68836399
 0.71070547 0.73058255 0.7496009  0.76711914 0.78359972 0.79837939
 0.81274754 0.82636937]
(49, 49)
[0.16905511 0.26684794 0.3489334  0.40692816 0.45874946 0.50389226
 0.54580793 0.5813355  0.61249352 0.64184447 0.66929289 0.69448178
 0.71745058 0.73707575 0.75649949 0.77319779 0.78946152 0.80391945
 0.81763972 0.83058278]
(49, 49)
[0.15808512 0.25203545 0.32993069 0.38984277 0.43881649 0.48198359
 0.52020868 0.55481284 0.58485809 0