TODO: print scatter plot,
as in https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient

also, it seems that I do not need to use ranks to compute spearman r

In [1]:
from thesis_v2 import dir_dict
from os.path import join

import pandas as pd
import numpy as np

In [2]:
metric_pkl_loc_k_bl = join(
    dir_dict['analyses'],
    'metrics_yuanyuan_8k_a_3day+maskcnn_polished_with_rcnn_k_bl+20200218.pkl'
)

In [3]:
df_all_k_bl = pd.read_pickle(metric_pkl_loc_k_bl)

In [4]:
def find_sensitive_neurons(series_this, measure, cls_level, baseline_key, improved_key):
    # series_this should be generated by `df_all.xs(...).xs(...)...['val_per']
    
    # base line is performance at rcnn_bl_cls=2
    # improved is performance at rcnn_bl_cls=4 
    # (
    #     or maybe rcnn_bl_cls=3 then /2? 
    # )
    # but I think it's fine, because we have so many models to average out these issues.

    baseline_perf = series_this.xs(key=baseline_key, level=cls_level).xs(key=measure,level='measure').sort_index()
    
    if isinstance(improved_key, int):
        improved_perf = series_this.xs(key=improved_key, level=cls_level).xs(key=measure,level='measure').sort_index()
    else:
        # then summing all together.
        improved_perf = series_this.xs(key=improved_key[0], level=cls_level).xs(key=measure,level='measure').sort_index()
        
        for improved_key_this in improved_key[1:]:
            improved_perf = improved_perf + series_this.xs(key=improved_key_this, level=cls_level).xs(key=measure,level='measure').sort_index()
        
        # all addition and division will work at numpy array level
        improved_perf = improved_perf/len(improved_key)

    assert baseline_perf.index.equals(improved_perf.index)
    
    num_neuron = 79
    
    data = pd.DataFrame({
        'baseline': baseline_perf,
        'improved': improved_perf,
    })
    
    delta_all = np.zeros((num_neuron,), dtype=np.float64)
    bad_case_count = np.zeros((num_neuron,), dtype=np.int64)
    good_case_count = np.zeros((num_neuron,), dtype=np.int64)
    all_case_count = 0
    
    for (k, v) in data.iterrows():
        
        assert isinstance(v['baseline'], np.ndarray)
        assert isinstance(v['improved'], np.ndarray)
        assert v['baseline'].shape == (num_neuron,)
        assert v['improved'].shape == (num_neuron,)
    
        # compute perf gain.
        delta_this = (v['improved'] - v['baseline'])/v['baseline']
        mask_this = v['baseline']!=0
        bad_case_count += np.logical_not(mask_this).astype(np.int64)
        good_case_count += mask_this.astype(np.int64)
        delta_all[mask_this] += delta_this[mask_this]
        all_case_count += num_neuron
        
    
    delta_all /= good_case_count
    print(all_case_count, bad_case_count.sum(), good_case_count.sum(), bad_case_count.sum()/all_case_count)
    return delta_all

In [5]:
from scipy.stats import spearmanr

In [6]:
def gen_rank(array_this, reverse):
    assert array_this.ndim == 1
    temp = array_this.argsort()
    if reverse:
        temp = temp[::-1]
    ranks = np.empty_like(temp)
    ranks[temp] = np.arange(array_this.size)
    return ranks

def find_sensitive_neurons_wrapper(series_this, cls_level, baseline_key, improved_key):
    # using normed or normed does not matter, as that does not affect detla at all.
    delta_mse = find_sensitive_neurons(series_this, 'mse_normed', cls_level, baseline_key, improved_key)
    delta_cc2 = find_sensitive_neurons(series_this, 'cc2_normed', cls_level, baseline_key, improved_key)
    delta_cc = find_sensitive_neurons(series_this, 'cc_normed', cls_level, baseline_key, improved_key)
    
    # create rank for each of them
    # https://stackoverflow.com/questions/5284646/rank-items-in-an-array-using-python-numpy-without-sorting-array-twice/
    
    # smaller the better,
    rank_mse = gen_rank(delta_mse, reverse=False)
    # bigger better.
    rank_cc = gen_rank(delta_cc, reverse=True)
    # bigger better
    rank_cc2 = gen_rank(delta_cc2, reverse=True)
    
    spear_mse_cc = spearmanr(rank_mse, rank_cc)[0]
    spear_cc_cc2 = spearmanr(rank_cc, rank_cc2)[0]
    spear_mse_cc2 = spearmanr(rank_mse, rank_cc2)[0]
    
    print(spear_mse_cc, spear_cc_cc2, spear_mse_cc2)
    
    return {
        'mse': rank_mse,
        'cc': rank_cc,
        'rank_cc2': rank_cc2,
    }

In [7]:
find_sensitive_neurons_wrapper(df_all_k_bl['val_per'], 'rcnn_bl_cls', 2, 4)

60672 0 60672 0.0
60672 0 60672 0.0
60672 0 60672 0.0
0.7921616358325219 0.9223466407010711 0.7725170399221032


{'mse': array([46, 32, 65,  0, 49, 31, 63,  5, 56, 76,  4, 70, 20, 75, 39, 50, 60,
        24, 21, 30, 22, 34, 15, 55,  3, 10, 51, 35,  2, 17, 29, 69, 25,  7,
        57, 74, 72, 54, 47, 67, 52, 58, 73, 71, 40, 37, 77, 19, 27, 59, 68,
        28,  8,  6, 44, 53,  1, 43, 45, 12, 61, 78, 33, 18, 38, 48, 26, 23,
        66, 64, 62,  9, 13, 36, 42, 41, 16, 14, 11]),
 'cc': array([20, 34, 64,  5, 48, 42, 54, 18, 57, 68, 24, 70, 33, 72, 56, 52, 59,
        41, 39, 46, 23, 36,  0,  3, 29, 25, 55, 14, 13, 26,  7, 67, 30, 16,
        60, 73, 69, 50, 45, 66, 44, 58, 76, 71, 47, 32, 74,  4,  1,  8, 65,
        28, 10, 21, 27, 17,  9, 53, 43, 22, 61, 77, 35, 37, 40, 51, 38, 31,
        63, 75, 62, 11,  6,  2, 78, 49, 19, 12, 15]),
 'rank_cc2': array([19, 36, 65,  8, 50, 45, 53, 20, 58, 70, 25, 73, 32, 74, 55, 56, 59,
        44, 41, 49, 23, 37,  1,  4, 31, 26, 57, 13, 15, 28,  9, 69, 30, 17,
        61, 76, 71, 48, 46, 68, 42, 60, 77, 72, 47, 34, 75,  6,  3,  5, 66,
        29, 11, 24, 27, 16, 10,

In [8]:
find_sensitive_neurons_wrapper(df_all_k_bl['val_per'], 'rcnn_bl_cls', 2, [3,4])

60672 0 60672 0.0
60672 0 60672 0.0
60672 0 60672 0.0
0.7221032132424537 0.9195715676728335 0.6865141187925998


{'mse': array([54, 33, 60,  0, 51, 26, 68,  6, 55, 76,  4, 72, 19, 75, 36, 44, 45,
        29, 20, 27, 24, 32, 18, 58,  3,  8, 47, 34,  2, 17, 28, 65, 30, 11,
        53, 73, 70, 39, 43, 67, 42, 59, 74, 71, 40, 35, 77, 23, 38, 63, 69,
        31,  7,  5, 48, 62,  1, 41, 56, 15, 57, 78, 25, 14, 37, 50, 21, 22,
        61, 64, 66, 10, 12, 52, 49, 46, 16, 13,  9]),
 'cc': array([22, 40, 63,  6, 49, 43, 60, 25, 56, 69, 26, 71, 30, 72, 53, 54, 57,
        45, 41, 47, 18, 39,  0,  1, 33, 23, 55, 12, 13, 24,  7, 65, 34, 17,
        58, 74, 68, 44, 42, 66, 35, 61, 76, 70, 48, 31, 75,  4,  2,  9, 67,
        29,  8, 20, 21, 15, 10, 51, 46, 27, 59, 77, 32, 36, 37, 52, 38, 28,
        62, 73, 64, 14,  5,  3, 78, 50, 19, 11, 16]),
 'rank_cc2': array([20, 41, 65,  8, 50, 45, 59, 24, 60, 71, 27, 73, 29, 74, 53, 55, 58,
        46, 42, 49, 18, 40,  1,  2, 35, 25, 56, 10, 15, 26,  9, 68, 36, 19,
        61, 76, 70, 43, 44, 67, 33, 64, 77, 72, 48, 34, 75,  6,  4,  5, 69,
        30, 11, 23, 22, 14, 13,

In [9]:
metric_pkl_loc_pcn = join(
    dir_dict['analyses'],
    'metrics_yuanyuan_8k_a_3day+maskcnn_polished_with_local_pcn+certain_configs.pkl'
)

df_all_pcn = pd.read_pickle(metric_pkl_loc_pcn)

In [10]:
# bad consistency, not study it.
find_sensitive_neurons_wrapper(df_all_pcn['val_per'], 'pcn_cls', 1, (2,3,4,5))

30336 0 30336 0.0
30336 0 30336 0.0
30336 0 30336 0.0
0.29776046738072054 0.8749269717624149 0.08704965920155795


{'mse': array([62, 28, 19,  0, 51, 41, 76, 21, 57, 69,  9, 74, 11, 77,  6, 46,  4,
        52, 16, 13, 22, 38, 43, 64,  3, 24, 34, 40,  1, 37, 50, 61, 54, 36,
        26, 65, 14,  2, 44, 56, 27, 42, 39, 59,  5, 47, 60, 29, 73, 68, 15,
        33, 10,  7, 66, 71, 35, 75, 31, 30, 53, 78, 25, 32, 49, 63,  8, 45,
        48, 72, 55, 58, 18, 67, 70, 12, 23, 20, 17]),
 'cc': array([18, 15, 54, 27, 38,  3, 77, 41, 31, 72, 50, 74, 13, 76, 25, 68, 12,
        71, 44, 47, 35, 46,  7,  0, 52, 49, 20, 24, 30, 55, 14, 39, 75, 56,
        29, 36, 26,  6, 48, 64, 23, 67, 21, 70, 17, 58, 66,  8,  2,  5, 34,
        11, 16, 45, 69, 28, 60, 32, 61, 51, 42, 73,  9, 63, 59, 40, 37, 65,
        53, 78, 57, 62, 10,  1,  4, 33, 19, 22, 43]),
 'rank_cc2': array([19, 20, 58, 37, 36,  3, 78, 45, 10, 75, 43, 55, 13, 77, 31, 74, 14,
        76, 46, 52, 42, 48,  9,  1, 61, 53, 25, 32, 39, 63, 17, 38, 72, 62,
        21, 18, 28,  7, 51, 57, 30, 73, 23, 71, 22, 64, 69, 11,  5,  8, 40,
        16, 26, 50, 60, 29, 65,

In [11]:
find_sensitive_neurons_wrapper(df_all_pcn['val_per'], 'pcn_cls', 1, 3)

30336 0 30336 0.0
30336 0 30336 0.0
30336 0 30336 0.0
0.35107108081791627 0.8829113924050633 0.13770691333982474


{'mse': array([60, 29, 28,  0, 52, 48, 74, 14, 34, 71, 42, 68, 15, 77,  7, 58,  6,
        65, 25, 27, 19, 35, 49, 55,  4, 24, 20, 37,  1, 36, 47, 66, 56, 26,
        18, 61, 22,  3, 45, 53, 31, 41, 44, 64,  2, 51, 62, 33, 78, 59, 11,
        32, 16,  5, 63, 73, 23, 75, 39, 13, 67, 76, 30, 50, 40, 54, 12, 46,
        43, 69, 57, 38,  8, 72, 70, 10, 17, 21,  9]),
 'cc': array([18, 19, 59, 26, 41,  3, 78, 40, 21, 73, 68, 65, 12, 76, 27, 69, 17,
        71, 46, 57, 36, 45, 16,  1, 56, 49, 11, 30, 34, 55, 15, 47, 75, 43,
        28, 22, 23,  6, 48, 63, 20, 60, 29, 74, 14, 61, 62, 10,  4,  5, 33,
         9, 24, 42, 70, 52, 50, 35, 72, 38, 53, 67,  8, 66, 54, 31, 39, 64,
        51, 77, 58, 44,  7,  2,  0, 32, 13, 25, 37]),
 'rank_cc2': array([22, 25, 64, 35, 37,  4, 78, 44, 11, 74, 62, 48, 15, 75, 30, 73, 16,
        77, 52, 60, 45, 51, 14,  1, 61, 56, 18, 29, 41, 58, 19, 42, 72, 50,
        23, 10, 24,  8, 54, 63, 26, 68, 28, 76, 21, 66, 67, 17,  5,  7, 40,
        12, 31, 49, 65, 33, 53,

In [12]:
find_sensitive_neurons_wrapper(df_all_pcn['val_per'], 'pcn_cls', 1, 2)

30336 0 30336 0.0
30336 0 30336 0.0
30336 0 30336 0.0
0.45384615384615384 0.8879746835443038 0.2687439143135346


{'mse': array([46, 32,  9,  0, 26, 30, 74, 19, 27, 73, 67, 78,  4, 70, 23, 33, 34,
        22, 12,  5, 40, 16, 21, 59,  7, 36, 29, 58,  2, 51, 57, 49, 56, 17,
         8, 31, 76,  1, 45, 39, 25, 69, 52, 60,  3, 24, 75, 18, 50, 53, 35,
        14, 48,  6, 65, 68, 15, 71, 28, 13, 55, 77, 64, 66, 54, 43, 42, 41,
        38, 63, 62, 72, 20, 47, 61, 37, 10, 44, 11]),
 'cc': array([14, 23, 46, 29, 25,  4, 76, 42, 11, 68, 66, 77, 10, 69, 32, 57, 24,
        60, 35, 33, 56, 31,  5,  1, 52, 53, 27, 65, 30, 59, 18, 39, 64, 51,
        17, 13, 62,  7, 54, 37, 21, 75, 34, 70, 20, 40, 71,  8,  3,  6, 48,
        12, 47, 41, 67, 15, 38, 28, 55, 43, 26, 74,  9, 73, 63, 19, 44, 58,
        45, 78, 61, 72, 16,  2,  0, 49, 22, 50, 36]),
 'rank_cc2': array([17, 26, 53, 33, 28,  2, 74, 45,  7, 71, 47, 78, 12, 64, 31, 68, 21,
        69, 41, 38, 65, 37, 10,  1, 63, 60, 29, 54, 36, 66, 19, 35, 56, 59,
        16, 11, 49,  8, 61, 32, 25, 77, 30, 72, 24, 44, 75, 14,  5,  9, 52,
        18, 42, 48, 58, 20, 39,

In [13]:
find_sensitive_neurons_wrapper(df_all_pcn['val_per'], 'pcn_cls', 1, 4)

30336 0 30336 0.0
30336 0 30336 0.0
30336 0 30336 0.0
0.38186465433300876 0.8805014605647518 0.1702775073028238


{'mse': array([67, 30, 18,  0, 66, 41, 76, 28, 75, 64,  3, 61, 12, 78,  5, 47,  4,
        60, 22, 15, 19, 39, 42, 62,  2, 31, 33, 36,  1, 34, 55, 53, 50, 45,
        16, 58, 10,  7, 40, 57, 24, 46, 35, 52,  8, 44, 51, 26, 72, 70, 27,
        43,  6, 14, 68, 71, 32, 74, 29, 38, 37, 77, 11, 17, 54, 63,  9, 48,
        49, 73, 56, 69, 21, 59, 65, 20, 25, 13, 23]),
 'cc': array([27, 16, 49, 30, 54,  3, 77, 40, 74, 71, 34, 58, 15, 76, 24, 69, 12,
        72, 44, 46, 31, 45,  5,  1, 48, 55, 25, 18, 32, 53, 17, 37, 75, 57,
        26, 28, 19,  8, 39, 61, 22, 70, 21, 64, 20, 56, 63,  7,  4,  9, 41,
        14, 11, 47, 73, 36, 60, 29, 62, 51, 38, 68,  6, 52, 65, 42, 33, 67,
        50, 78, 59, 66, 10,  2,  0, 35, 23, 13, 43]),
 'rank_cc2': array([24, 21, 55, 36, 44,  3, 78, 46, 76, 73, 38, 40, 13, 77, 29, 74, 15,
        75, 49, 54, 37, 47,  9,  1, 57, 60, 31, 25, 39, 59, 17, 35, 70, 62,
        20, 12, 19,  7, 45, 50, 27, 72, 26, 65, 28, 58, 67, 10,  5,  8, 43,
        16, 18, 56, 68, 33, 63,

In [14]:
find_sensitive_neurons_wrapper(df_all_pcn['val_per'], 'pcn_cls', 1, 5)

30336 0 30336 0.0
30336 0 30336 0.0
30336 0 30336 0.0
0.34532619279454724 0.862682570593963 0.11755111976630965


{'mse': array([58, 20, 22,  0, 49, 41, 75, 21, 65, 60,  5, 66, 23, 78,  4, 38,  1,
        51, 15, 16, 19, 42, 47, 68,  3, 14, 37, 34,  2, 32, 52, 57, 63, 39,
        45, 74,  7,  6, 43, 64, 25, 33, 29, 56, 12, 46, 53, 30, 71, 72, 11,
        35, 10, 13, 59, 61, 50, 76, 24, 36, 62, 77, 31, 18, 44, 69,  8, 40,
        48, 70, 54, 55, 27, 67, 73,  9, 28, 17, 26]),
 'cc': array([17, 10, 52, 31, 37,  2, 76, 41, 34, 67, 38, 61, 21, 77, 19, 66,  8,
        70, 40, 50, 30, 55,  7,  0, 47, 39, 29, 14, 32, 45, 11, 35, 74, 56,
        44, 75, 26,  5, 51, 72, 28, 59, 15, 65, 20, 58, 68,  4,  3,  6, 25,
        16, 12, 43, 57, 24, 69, 36, 48, 63, 53, 73,  9, 60, 49, 71, 33, 62,
        54, 13, 46, 64, 22,  1, 78, 27, 23, 18, 42]),
 'rank_cc2': array([17, 13, 55, 39, 36,  3, 77, 48,  9, 72, 41, 47, 15, 78, 22, 73, 11,
        74, 44, 57, 38, 60, 10,  1, 56, 45, 32, 18, 40, 52, 14, 37, 75, 63,
        34, 76, 28,  7, 53, 71, 31, 65, 19, 68, 24, 61, 66, 12,  5,  8, 33,
        20, 21, 51, 43, 23, 70,