In [1]:
%matplotlib inline
from matplotlib import pyplot as plt 

In [2]:
from thesis_v2 import dir_dict
from os.path import join

import pandas as pd
import numpy as np

In [3]:
from thesis_v2.configs.model.maskcnn_polished_with_rcnn_k_bl import (
    explored_models_20200218 as param_iterator_obj_k_bl,
    keygen as keygen_k_bl
)

from thesis_v2.configs.model.maskcnn_polished_with_local_pcn import (
    explored_models_summer_2019_certain as param_iterator_obj_local_pcn,
    keygen as keygen_local_pcn
)

In [4]:
from thesis_v2.data.prepared.yuanyuan_8k import get_data, get_indices, get_neural_data
from thesis_v2.data.raw import load_data
from thesis_v2.training.training_aux import load_training_results

In [5]:
# TODO: verify my param_iterator_obj_local_pcn is the same as the original one.
# well, you can achieve this by comparing the output of 
# scripts/training/yuanyuan_8k_a_3day/maskcnn_polished_with_local_pcn/submit_certain_configs_refactored.py
# AND
# scripts/training/yuanyuan_8k_a_3day/maskcnn_polished_with_local_pcn/submit_certain_configs.py

In [19]:
global_vars = {
    'cases': {
        'k_bl': {
            'metric_file': join(dir_dict['analyses'],
                                'metrics_yuanyuan_8k_a_3day+maskcnn_polished_with_rcnn_k_bl+20200218.pkl'),
            'param_iterator': param_iterator_obj_k_bl,
            'keygen': keygen_k_bl,
            'cls_level': 'rcnn_bl_cls',
            'base_key': 2,
            'improved_key': 4,
            'fc_size': 14,
            'dir_response': join(
                dir_dict['analyses'],
                'responses_yuanyuan_8k_a_3day+maskcnn_polished_with_rcnn_k_bl+20200218'
            ),
        },
        'local_pcn_50': {
            'metric_file': join(dir_dict['analyses'],
                                'metrics_yuanyuan_8k_a_3day+maskcnn_polished_with_local_pcn+certain_configs.pkl'),
            'metric_filter': {'input_size': 50},
            'param_iterator': param_iterator_obj_local_pcn,
            'param_filter': lambda x: x['input_size'] == 50,
            'keygen': keygen_local_pcn,
            'cls_level': 'pcn_cls',
            'base_key': 1,
            'improved_key': (2,3,4,5),
            'fc_size': 14,
            'dir_response': join(
                dir_dict['analyses'],
                'responses_yuanyuan_8k_a_3day+maskcnn_polished_with_local_pcn+certain_configs'
            ),
        },
        'local_pcn_100': {
            'metric_file': join(dir_dict['analyses'],
                                'metrics_yuanyuan_8k_a_3day+maskcnn_polished_with_local_pcn+certain_configs.pkl'),
            # only use subset of rows, using pd.DataFrame.xs
            'metric_filter': {'input_size': 100},
            'param_iterator': param_iterator_obj_local_pcn,
            'param_filter': lambda x: x['input_size'] == 100,
            'keygen': keygen_local_pcn,
            'cls_level': 'pcn_cls',
            'base_key': 1,
            'improved_key': (2,3,4,5),
            'fc_size': 31,
            'dir_response': join(
                dir_dict['analyses'],
                'responses_yuanyuan_8k_a_3day+maskcnn_polished_with_local_pcn+certain_configs'
            ),
        }
    }
}

In [20]:
def load_shared_ground_truth(debug=False):
    # load neural reponses on all.
    neural_data = get_neural_data(('042318', '043018', '051018'), scale=0.5)
    idx_train, idx_val, idx_test = get_indices('a', 'legacy')
    images = load_data('yuanyuan_8k_images', 'a')['images']
    
    print(neural_data.shape, images.shape)
    
    if debug:
        data_gt = get_data('a', 200, 50,
                 ('042318', '043018', '051018'),
                 scale=0.5,
                 seed='legacy')


        assert np.array_equal(data_gt[5], neural_data[idx_test])
        assert np.array_equal(data_gt[3], neural_data[idx_val])
        assert np.array_equal(data_gt[1], neural_data[idx_train])

    return {
        'neural_data': neural_data,
        'idx_train': idx_train,
        'idx_val': idx_val,
        'idx_test': idx_test,
        'images': images,
    }

In [23]:
def load_ground_truth(case_name, case_config):
    
    # load neural responses on test data.
    # go over each config,
    # load responses on test data, collected over each cls.
    # load masks averaged over all configs.
    
    param_iterator_obj = case_config['param_iterator']
    keygen = case_config['keygen']
    fc_size = case_config['fc_size']
    dir_response = case_config['dir_response']
    
    rf_map_all = []
    
    response_map = {}
    param_filter = case_config.get('param_filter', lambda _: True)
    
    count = 0
    
    
    for idx, param in enumerate(param_iterator_obj().generate()):
        if idx % 20 == 0:
            print(idx)
        if not param_filter(param):
            continue
        count += 1
        key = keygen(**{k: v for k, v in param.items() if k not in {'scale', 'smoothness'}})
        
        # load model params.
        result = load_training_results(key, return_model=False,return_checkpoint=True)
        rf_map = result['checkpoint']['model']['moduledict.fc.weight_spatial'].numpy()
        assert rf_map.shape == (79, fc_size, fc_size)
        
        rf_map = abs(rf_map)
        # make each mask sum to one.
        rf_map = rf_map/np.sum(rf_map,axis=(1,2),keepdims=True)
#         print(rf_map.sum())
        rf_map_all.append(rf_map)
    
    
        # get response.
        file_to_load_this = join(dir_response, key, 'test' + '.npy')
        resp = np.load(file_to_load_this)
        assert resp.shape == (1600, 79)
        
        
        cls_this = param[case_config['cls_level']]
        if cls_this not in response_map:
            response_map[cls_this] = []
            
        response_map[cls_this].append(resp)
    
    for cls_value in response_map:
        response_map[cls_value] = np.asarray(response_map[cls_value])
    
    for kk, vv in response_map.items():
        print(kk, vv.shape)
    
    # average over all normalized map
    rf_map_all = np.asarray(rf_map_all).mean(axis=0)
#     print(rf_map_all.sum())
    print(rf_map_all.shape)

    return {
        'nunm_cases': count,
        # all 79 maps.
        'rf_map': rf_map_all,
    }
    
load_ground_truth(None,global_vars['cases']['local_pcn_50'])  

0


FileNotFoundError: [Errno 2] No such file or directory: '/my_data/thesis-yimeng-v2/results/analyses/responses_yuanyuan_8k_a_3day+maskcnn_polished_with_rcnn_k_bl+20200218/yuanyuan_8k_a_3day/maskcnn_polished_with_local_pcn/s_selegacy/in_sz50/out_ch16/num_l2/k_l19/k_p3/ptavg/bn_b_actTrue/bn_a_fcFalse/actrelu/p_c0/p_bypassFalse/p_n_actFalse/p_bn_pFalse/p_actTrue/p_bnTrue/p_biasTrue/sc0.01/sm0.000005/lmse/m_se0/test.npy'

In [24]:
load_ground_truth(None,global_vars['cases']['local_pcn_100'])  

0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380
400
420
440
460
480
500
520
540
560
580
600
620
640
660
680
700
720
740
760
780
800
820
840
860
880
900
920
940
960
980
1000
1020
1040
1060
1080
1100
1120
1140
1160
1180
1200
1220
1240
1260
1280
1300
1320
1340
1360
1380
1400
1420
1440
1460
1480
1500
1520
1540
1560
1580
1600
1620
1640
1660
1680
1700
1720
1740
1760
1780
1800
1820
1840
1860
1880
1900
1920
1940
1960
1980
2000
2020
2040
2060
2080
2100
2120
2140
2160
2180
2200
2220
2240
2260
2280
2300
0 (192, 1600, 79)
1 (192, 1600, 79)
2 (192, 1600, 79)
3 (192, 1600, 79)
4 (192, 1600, 79)
5 (192, 1600, 79)
(79, 31, 31)


{'nunm_cases': 1152,
 'rf_map': array([[[9.60995851e-04, 3.13948095e-03, 4.78348549e-04, ...,
          3.72561975e-04, 3.78090335e-04, 4.11142450e-04],
         [4.60950250e-04, 6.13679353e-04, 2.92069861e-04, ...,
          1.66415062e-03, 2.25735130e-04, 1.90143022e-04],
         [2.22078263e-04, 2.59687105e-04, 2.50283891e-04, ...,
          8.09564022e-04, 3.11436015e-04, 2.01943592e-04],
         ...,
         [3.40137485e-04, 4.45728394e-04, 2.59066437e-04, ...,
          3.42535757e-04, 8.11291859e-03, 4.84541385e-03],
         [3.97733995e-04, 2.91021803e-04, 5.79404819e-04, ...,
          2.88065698e-04, 3.91666498e-03, 2.11987947e-03],
         [2.54744955e-04, 3.34982644e-04, 4.86117846e-04, ...,
          3.62338353e-04, 1.74468150e-03, 4.14092210e-04]],
 
        [[4.59937059e-04, 1.95439818e-04, 1.49128027e-04, ...,
          2.33823404e-04, 2.93978403e-04, 4.02704463e-04],
         [1.89184822e-04, 4.33573150e-04, 1.30390195e-04, ...,
          2.58776156e-04, 6.4660538

In [15]:
def process_one_case(case_name, case_config):
    # load data
    print(f'process case {case_name}')
    df = pd.read_pickle(case_config['metric_file'])
    
    # only work on test data.
    df = df.xs('test', level='subset')
    
    
    # collect neurons top ranked and worst ranked under various measures.
    # I will use of two: mse, and avg(mse, cc, cc2)
    # this part is easy to config.
    
    neuron_ranking_info = None
    
    
    
    # load relevance ground truth data, such as neural response,
    # mask of each network (averaged across different circles),
    # (not upsampled yet)
    gt_dict = load_ground_truth(case_name, case_config)
    
    
    
    # for each ranking mode,
    # then plot.
    
    # I will calculate everything,
    # but visualize top 10 neurons, bottom 10 neurons.
    #
    # for each neuron, I show
    #  0) image mask.
    #  1) the best responding images over all 8K images
    #  2) response distribution over all data
    #  3) top 20 images that contribute most to recurrence, and response improvement
    #  4) top 20 images that contribute least to recurrence, and response improvement
    #  
    # all the visualization is done on an area centered around mask and covers 90% of the energy in the mask.