# Analysis of correlation networks using all correlations removing the mean-corr matrix


In [None]:
import seaborn as sns
from tqdm.notebook import tqdm
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%pylab inline
%config InlineBackend.figure_format = 'retina'

In [None]:
import pickle as pk
corrs = pd.read_pickle(open('../data/resampled_corrs.pck', 'rb'))

In [None]:
regions = corrs[4][('P', 'A')][(0, 1, 2, 3)].columns
num_regions = len(regions)

In [None]:
regions 

In [None]:
good_regions = list(regions)
substitutes = {'ATN I_l': 'ATN_l', 'ATN I_r': 'ATN_r', 'VL I_l': 'VL_l', 'VL I_r': 'VL_r'}

for r in substitutes:
    ind = good_regions.index(r);
    good_regions[ind] = substitutes[r]

In [None]:
regions = good_regions;

In [None]:
def extract_tensor_graph(graph_tower):
    # reshaping
    L = len(graph_tower)
    x = graph_tower[list(graph_tower.keys())[0]].shape[0]
    keys = list(graph_tower.keys())
    mat = np.zeros((L, x, x))
    for l in range(L):
        mat[l, :, :] = graph_tower[keys[l]]
    return mat


def density_threshold(mat, density, binarized=False):
    ind = np.triu_indices_from(mat)
    values = mat[ind]
    thr_value = np.quantile(values, 1.0-density)
    thr_mat = mat.copy()
    thr_mat[mat < thr_value] = 0
    if binarized == True:
        thr_mat[mat >= thr_value] = 1  # binarization
    return thr_mat

In [None]:
chosen_rho = 0.07;


sz = 4
groups = list(corrs[sz].keys())
corrs_rho = {}

for i in tqdm(groups):
    corrs_rho[i] = {}
    for inds in corrs[sz][i]:
        corrs_rho[i][inds] = density_threshold(
            corrs[sz][i][inds].values, chosen_rho)

bin_corrs_rho = {}

for i in tqdm(groups):
    bin_corrs_rho[i] = {}
    for inds in corrs[sz][i]:
        bin_corrs_rho[i][inds] = density_threshold(
            corrs[sz][i][inds].values, chosen_rho, binarized=True)

In [None]:
av_corrs_rho = {}
for i in tqdm(groups):
    av_corrs_rho[i] = pd.DataFrame(np.mean(extract_tensor_graph(
        corrs_rho[i]), 0), columns=regions, index=regions)

av_binarized_corrs_rho = {}
for i in tqdm(groups):
    av_binarized_corrs_rho[i] = pd.DataFrame(np.mean(
        extract_tensor_graph(bin_corrs_rho[i]), 0), columns=regions, index=regions)

av_corrs_full = {}
for i in tqdm(groups):
    av_corrs_full[i] = pd.DataFrame(np.mean(extract_tensor_graph(
        corrs[sz][i]), 0), columns=regions, index=regions)

In [None]:
# saving the average matrices for various densities

pk.dump(av_corrs_rho, open('../data/av_corrs_rho_by_group_density'+str(chosen_rho).replace('.','_')+'.pck', 'wb'))

## visualize the matrices 

In [None]:
ordered_keys = list(av_corrs_rho.keys())
ordered_keys = list(np.array(ordered_keys)[[2,3,0,1]])
ordered_keys = list(map(tuple, ordered_keys))
ordered_keys


In [None]:
fig = plt.figure(figsize=(28,29))
for i, n in enumerate(ordered_keys):
    plt.subplot(2,2,i+1)
    df_corr = av_corrs_rho[n] - np.eye(av_corrs_rho[n].shape[0]);
    mask = np.logical_or(np.triu(np.ones_like(df_corr, dtype=np.bool), k=0), np.abs(df_corr.values)<0.1)
#     cmap = sns.diverging_palette(0, 230, 90, 60, center='light', as_cmap=True)
    sns.heatmap(df_corr, mask=mask, square=True, linewidths=2, cmap='coolwarm',#cmap = cmap,
               vmin=0.0, vmax = .3, cbar=False)
    plt.title(''.join(n), fontsize=30)
# plt.tight_layout()

plt.savefig('../data/viz-data/julia-adjacency_matrices_high_sig_spearman.pdf')


In [None]:
fig = plt.figure(figsize=(15, 1))
ax1 = fig.add_axes([0.05, 0.80, 0.9, 0.15])
cmap = mpl.cm.coolwarm
norm = mpl.colors.Normalize(vmin=0, vmax=.4)

# ColorbarBase derives from ScalarMappable and puts a colorbar
# in a specified axes, so it has everything needed for a
# standalone colorbar.  There are many more kwargs, but the
# following gives a basic continuous colorbar with ticks
# and labels.
cb1 = mpl.colorbar.ColorbarBase(ax1, cmap=cmap,
                                norm=norm,
                                orientation='horizontal')
# plt.savefig('../data/viz-data/colorbar.pdf')

In [None]:
plt.hist(np.ravel(df_corr.values), np.linspace(-1,1,30))

# Node egonetwork similarities
In the following we compute the similarity of node neighbourhoods betweenn conditions. We do this by computing the 
cosine similarity 
\begin{align}
s_i^{x,y} = cos(\theta) = \frac{\sum_j a^x_{ij} a^y_{i,j}}{\sqrt{\sum_j (a^x_{ij})^2} \sqrt{\sum_j (a^y_{i,j})^2}}
\end{align}

for node $i$ between condition/pairing $x$ and $y$, where $a^x_{ij}$ is the correlation matrix of condition/pairing $x$.   
The idea here is that if $s_i^{x,y}$ is large for $(x,y)$ then in the these two conditions node $i$ hsa the same behaviour and hence plays the same role. In other terms, since it does not change its behaviour between conditions then it can be considered not to be relevant for the transition.  
Conversely when $s_i^{x,y}$ is small, node $i$ is changing its behaviour to a large degree between the two conditions and should therefore be considered important in that transition. 

In [None]:
from itertools import combinations
conditions = ['P', 'U']
social = ['A', 'S']

In [None]:
def egosine_sim(g, gg):
    from sklearn.metrics.pairwise import cosine_similarity
    es = pd.Series(index=g.columns)
    for c in g.columns:
        x, y = np.array(g[c]), np.array(gg[c])
        es[c] = cosine_similarity(x.reshape(1, len(x)), y.reshape(1, len(y)))
    return es


def egosine_sim_randomized(g, gg):
    from sklearn.metrics.pairwise import cosine_similarity
    es = pd.Series(index=g.columns)
    for c in g.columns:
        x, y = np.array(g[c]), np.array(gg[c])
        shuffle(x)
        shuffle(y)
        es[c] = cosine_similarity(x.reshape(1, len(x)), y.reshape(1, len(y)))
    return es

In [None]:
ego_sim_df = {}
for g, gg in combinations(av_corrs_rho.items(), 2):
    ego_sim_df[(g[0], gg[0])] = egosine_sim(g[1], gg[1])

ego_sim_df = pd.DataFrame(ego_sim_df)

In [None]:
ego_sim_df_random = {}
for g, gg in combinations(av_corrs_rho.items(), 2):
    ego_sim_df_random[(g[0], gg[0])] = egosine_sim_randomized(g[1], gg[1])

ego_sim_df_random = pd.DataFrame(ego_sim_df_random)

In [None]:
def k_select(d, k):
    if k > 0:
        return d[:k]
    if k < 0:
        return d[k:]


topk = 20

## Trying to isolate control effects 

In [None]:
print(av_corrs_rho.keys())

deltaS = av_corrs_rho[('P', 'S')] - av_corrs_rho[('U', 'S')]
deltaA = av_corrs_rho[('P', 'A')] - av_corrs_rho[('U', 'A')]

ego_sim_df[('DeltaS', 'DeltaA')] = egosine_sim(deltaS, deltaA)

In [None]:
deltaP = av_corrs_rho[('P', 'S')] - av_corrs_rho[('P', 'A')]  # social learning
deltaU = av_corrs_rho[('U', 'S')] - av_corrs_rho[('U', 'A')]  # pure sociality

ego_sim_df[('DeltaP', 'DeltaU')] = egosine_sim(deltaP, deltaU)

## Effects on regional similarity of the Social and Asocial treatments 

In [None]:
from scipy.stats import pearsonr
fig = plt.figure(figsize=(8, 6))
plt.plot(np.linspace(0, 1.5, 10), np.linspace(0, 1.5, 10), 'k--', alpha=.4)
plt.scatter(deltaS.apply(np.linalg.norm), deltaA.apply(
    np.linalg.norm), c=ego_sim_df[('DeltaS', 'DeltaA')])
plt.colorbar()
plt.xlabel(r'$|\Delta_i^S|$', fontsize=30)
plt.ylabel(r'$|\Delta_i^A|$', fontsize=30)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
# plt.ylim(-.6, .6)
plt.tight_layout()
# plt.savefig('../data/viz-data/deltaA_deltaS_scatterplot.pdf')

pearsonr(deltaS.apply(np.linalg.norm), deltaA.apply(np.linalg.norm))

### construction of expected values for norms of columns

In [None]:
# construction of expected values for norms of columns, tot be used below to identify large/small changes
norms_S = []
vals = deltaS.values.reshape(num_regions*num_regions)
n_samples = 10000
for n in range(n_samples):
    norms_S.append(np.linalg.norm(choice(vals, num_regions)))

norms_A = []
vals = deltaA.values.reshape(num_regions*num_regions)
n_samples = 10000
for n in range(n_samples):
    norms_A.append(np.linalg.norm(choice(vals, num_regions)))

In [None]:
import bootstrapped.stats_functions as bs_stats
import bootstrapped.bootstrap as bs
fig = plt.figure(figsize=(20, 8))
plt.subplot(211)
alpha = 0.05
plt.fill_between(range(num_regions), np.quantile(
    norms_S, 1-alpha), np.quantile(norms_S, alpha), alpha=.3)
plt.plot(deltaS.apply(np.linalg.norm), 'o')
plt.xticks([])
deh = bs.bootstrap(np.array(norms_S), stat_func=bs_stats.mean)
plt.hlines(deh.value, 0, num_regions, 'k', alpha=.4)
plt.ylabel(r'$|\Delta^S_i|$', fontsize=30)
plt.yticks(fontsize=15)
plt.xlim(-.2, num_regions-.5)


plt.subplot(212)
plt.fill_between(range(num_regions), np.quantile(
    norms_A, 1-alpha), np.quantile(norms_S, alpha), alpha=.3)
plt.plot(deltaA.apply(np.linalg.norm), 'o')
plt.xticks(rotation=90)

deh = bs.bootstrap(np.array(norms_A), stat_func=bs_stats.mean)
plt.hlines(deh.value, 0, num_regions, 'k', alpha=.4)
plt.ylabel(r'$|\Delta^A_i|$', fontsize=30)
plt.xticks(fontsize=14)
plt.yticks(fontsize=15)
plt.xlim(-.2, num_regions-.5)
plt.tight_layout()
plt.savefig('../data/viz-data/deltaA_deltaS_significances_edit_2.pdf')

In [None]:
import bootstrapped.stats_functions as bs_stats
import bootstrapped.bootstrap as bs
fig = plt.figure(figsize=(20, 6))
plt.subplot(211)
alpha = 0.05
plt.fill_between(range(num_regions), np.quantile(
    norms_S, 1-alpha), np.quantile(norms_S, alpha), alpha=.3)
plt.plot(deltaS.apply(np.linalg.norm), 'o')
plt.xticks([])
deh = bs.bootstrap(np.array(norms_S), stat_func=bs_stats.mean)
plt.hlines(deh.value, 0, num_regions, 'k', alpha=.4)
plt.ylabel(r'$|\Delta^S_i|$', fontsize=30)
plt.yticks(fontsize=15)
plt.xlim(-.2, num_regions-.5)


plt.subplot(212)
plt.fill_between(range(num_regions), np.quantile(
    norms_A, 1-alpha), np.quantile(norms_S, alpha), alpha=.3)
plt.plot(deltaA.apply(np.linalg.norm), 'o')
plt.xticks(rotation=90)

deh = bs.bootstrap(np.array(norms_A), stat_func=bs_stats.mean)
plt.hlines(deh.value, 0, num_regions, 'k', alpha=.4)
plt.ylabel(r'$|\Delta^A_i|$', fontsize=30)
plt.xticks(fontsize=14)
plt.yticks(fontsize=15)
plt.xlim(-.2, num_regions-.5)
plt.tight_layout()
plt.savefig('../data/viz-data/deltaA_deltaS_significances_edit_no_lab.pdf')

In [None]:
from scipy.stats import percentileofscore

list_high_intensity, list_low_intensity = {}, {}
list_high_intensity['S'] = []
list_high_intensity['A'] = []
list_low_intensity['S'] = []
list_low_intensity['A'] = []

up, lo = np.quantile(norms_S, 1-alpha), np.quantile(norms_S, alpha)
d = deltaS.apply(np.linalg.norm)
for i in d.index:
    if d[i] > up:
        list_high_intensity['S'].append(
            [i, (100 - percentileofscore(norms_S, d[i]))/100])
    elif d[i] < lo:
        list_low_intensity['S'].append(
            [i, percentileofscore(norms_S, d[i])/100])

up, lo = np.quantile(norms_A, 1-alpha), np.quantile(norms_A, alpha)
d = deltaA.apply(np.linalg.norm)
for i in d.index:
    if d[i] > up:
        list_high_intensity['A'].append(
            [i, (100 - percentileofscore(norms_A, d[i]))/100])
    elif d[i] < lo:
        list_low_intensity['A'].append(
            [i, percentileofscore(norms_A, d[i])/100])

In [None]:
print(list_high_intensity)
print(list_low_intensity)

In [None]:
new_high_intensity = {}
for k in list_high_intensity:
    new_high_intensity[k] = {}
    for reg in list_high_intensity[k]:
        new_high_intensity[k][reg[0]] = reg[1]

In [None]:
pd.DataFrame(new_high_intensity).to_excel('../data/p-values-high-amplitude-changes.xls')

In [None]:
new_low_intensity = {}
for k in list_low_intensity:
    new_low_intensity[k] = {}
    for reg in list_low_intensity[k]:
        new_low_intensity[k][reg[0]] = reg[1]

In [None]:
pd.DataFrame(new_low_intensity).to_excel('../data/p-values-low-amplitude-changes.xls')
print(pd.DataFrame(new_low_intensity).fillna('-').to_latex())

### Norm residual analysis

In [None]:
residuals = deltaS.apply(np.linalg.norm) - deltaA.apply(np.linalg.norm)

#### Residual null model for significance

In [None]:
# construction of expected values for norms of columns, tot be used below to identify large/small changes
residual = []
valS = deltaS.values.reshape(num_regions*num_regions)
valA = deltaA.values.reshape(num_regions*num_regions)
n_samples = 10000
null_residual = []
for n in range(n_samples):
    null_residual.append(np.linalg.norm(
        choice(valS, num_regions)) - np.linalg.norm(choice(valA, num_regions)))

In [None]:
fig = plt.figure(figsize=(15, 4))
(markers, stemlines, baseline) = plt.stem(residuals.values, use_line_collection=True)
plt.setp(stemlines, linestyle="-", color="blue", linewidth=0.5)
markers.set_markerfacecolor('none')
plt.xticks(range(len(residuals)), residuals.index, rotation=90)
plt.ylim(-.8, .8)
plt.ylabel(r'$|\Delta^S_i| - |\Delta^A_i|$', fontsize=20)
alpha = 0.05

plt.fill_between(range(num_regions), np.quantile(
    null_residual, 1-alpha), np.quantile(null_residual, alpha), alpha=.3)

plt.savefig('../data/viz-data/residual_deltaA_deltaS_significances.pdf')

In [None]:
high_residual, low_residual = [], []

up, lo = np.quantile(null_residual, 1-alpha), np.quantile(null_residual, alpha)
d = deltaS.apply(np.linalg.norm)
for r in residuals.index:
    if residuals[r] > up:
        high_residual.append(r)
    elif residuals[r] < lo:
        low_residual.append(r)

In [None]:
print('High: ', high_residual)
print('Low: ', low_residual)

### Significances of similarities 

In [None]:
inds = list(ego_sim_df.columns)
fig = plt.figure(figsize=(15, 5))

plt.plot(ego_sim_df[('DeltaS', 'DeltaA')], 'o')
plt.title("Similarity ('DeltaS','DeltaA')")
plt.xticks(rotation=90)
plt.legend()
plt.hlines(0, 0, 70, linestyles='dashed', alpha=.4)
plt.ylim(-.6, .6)

In [None]:
# creation of the expectations values for the delta_S/Delta_A similarity
ego_sim_expectation = pd.DataFrame()
num_iter = 5000
for n in tqdm(range(num_iter)):
    ego_sim_expectation[n] = egosine_sim_randomized(deltaS, deltaA)
ego_sim_expectation = ego_sim_expectation.T

In [None]:
import bootstrapped.stats_functions as bs_stats
import bootstrapped.bootstrap as bs
from scipy.stats import ttest_1samp
significance_color = []
for c in regions:
    low, high = np.quantile(ego_sim_expectation[c], 0.05), np.quantile(
        ego_sim_expectation[c], 0.95)
    if ego_sim_df[('DeltaS', 'DeltaA')][c] <= low or ego_sim_df[('DeltaS', 'DeltaA')][c] >= high:
        significance_color.append(1)
    else:
        significance_color.append(0)

t_significance_color = []
for c in regions:
    t, p = ttest_1samp(
        ego_sim_expectation[c], ego_sim_df[('DeltaS', 'DeltaA')][c])
    if p < 0.05:
        t_significance_color.append(1)
    else:
        t_significance_color.append(0)


z_significance_color = []
for c in regions:
    mu, std = np.mean(ego_sim_expectation[c]), np.std(ego_sim_expectation[c])
    z_significance_color.append(
        np.abs((ego_sim_df[('DeltaS', 'DeltaA')][c]-mu)/std))


boot_significance_color = []
conf_intervs = []
std_intervs = []
for c in regions:
    deh = bs.bootstrap(ego_sim_expectation[c].values, stat_func=bs_stats.mean)
    std = np.std(ego_sim_expectation[c])
    mu = np.mean(ego_sim_df[('DeltaS', 'DeltaA')][c])
    if mu < deh.lower_bound or mu >= deh.upper_bound:
        boot_significance_color.append(np.abs((mu-deh.value)/std))
    else:
        boot_significance_color.append(0)
    conf_intervs.append([deh.lower_bound, deh.upper_bound])
    deh = bs.bootstrap(ego_sim_expectation[c].values, stat_func=bs_stats.std)
    std_intervs.append([deh.lower_bound, deh.upper_bound])

In [None]:
fig = plt.figure(figsize = (15, 5))
plt.scatter(range(70), ego_sim_df[(
    'DeltaS', 'DeltaA')], c = boot_significance_color, vmax = 5)
plt.title("Similarity ('DeltaS','DeltaA')")
plt.boxplot(ego_sim_expectation, positions = list(range(70)), showfliers = False)
plt.hlines(0, 0, 70, linestyles = 'dashed', alpha = .4)
plt.ylim(-.6, .6)
plt.colorbar()
plt.xticks(range(70), regions, rotation = 90)
plt.xlim(-1, 71)
plt.tight_layout()

In [None]:
fig = plt.figure(figsize=(20, 5))

plt.scatter(range(num_regions), ego_sim_df[('DeltaS', 'DeltaA')], c=z_significance_color,
            s=100*np.array(significance_color), vmax=5,  vmin=1, cmap='Greys', alpha=1)
plt.ylabel(r" $\chi(\Delta^S_i, \Delta^A_i)$", fontsize=25)
for n in range(num_regions):
    plt.vlines(n, (conf_intervs[n][0]+conf_intervs[n][1])/2 - std_intervs[n][1], (conf_intervs[n][0]+conf_intervs[n][1])/2 + std_intervs[n][1],
               'k', linestyles='solid', lw=2)
# plt.boxplot(ego_sim_expectation.T, positions=range(70), showfliers=False);
plt.hlines(0, 0, 70, linestyles='dashed', alpha=.4)
plt.ylim(-.6, .6)
plt.colorbar(label=r"z-score")
plt.xticks(range(num_regions), regions, rotation=90)
plt.xlim(-1, num_regions+1)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.tight_layout()
plt.savefig('../data/viz-data/similarity_SA_vs_random_expection.pdf')

In [None]:
# list significant regionss and their up/down regulation
from scipy.stats import percentileofscore
down_regulation, up_regulation = [], []

for c in regions:
    mu, std = np.mean(ego_sim_expectation[c]), np.std(ego_sim_expectation[c])
    low, high = np.quantile(ego_sim_expectation[c], 0.05), np.quantile(
        ego_sim_expectation[c], 1-0.05)
    if ego_sim_df[('DeltaS', 'DeltaA')][c] <= low:
        perc = percentileofscore(
            ego_sim_expectation[c], ego_sim_df[('DeltaS', 'DeltaA')][c])/100.0
        z = (ego_sim_df[('DeltaS', 'DeltaA')][c] - mu)/std
        down_regulation.append((c, perc, z))
    if ego_sim_df[('DeltaS', 'DeltaA')][c] >= high:
        perc = (
            100.0 - percentileofscore(ego_sim_expectation[c], ego_sim_df[('DeltaS', 'DeltaA')][c]))/100.0
        z = (ego_sim_df[('DeltaS', 'DeltaA')][c] - mu)/std
        up_regulation.append((c, perc, z))

In [None]:
down_r_df = pd.DataFrame(down_regulation, columns=['region', 'p-value', 'z-score']).set_index('region')
down_r_df.to_excel('../data/p-values-significant-dissimilarities.xls')

In [None]:
print(down_r_df.to_latex())

In [None]:
print(up_regulation)
np.min([x[2] for x in up_regulation]), np.max([x[1] for x in up_regulation])
print([x[0] for x in up_regulation])

In [None]:
up_r_df = pd.DataFrame(up_regulation, columns=['region', 'p-value', 'z-score']).set_index('region')
up_r_df.to_excel('../data/p-values-significant-similarities.xls')

In [None]:
print(up_r_df.to_latex())

## Effects on regional similarity of the Paired and Unpaired treatments 

In [None]:
from scipy.stats import pearsonr
fig = plt.figure(figsize=(8, 6))
plt.plot(np.linspace(0, 1.5, 10), np.linspace(0, 1.5, 10), 'k--', alpha=.4)
plt.scatter(deltaP.apply(np.linalg.norm), deltaU.apply(
    np.linalg.norm), c=ego_sim_df[('DeltaP', 'DeltaU')])
plt.colorbar()
plt.xlabel(r'$|\Delta_i^P|$', fontsize=30)
plt.ylabel(r'$|\Delta_i^U|$', fontsize=30)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
# plt.ylim(-.6, .6)
plt.tight_layout()
plt.savefig('../data/viz-data/deltaP_deltaU_scatterplot.pdf')
pearsonr(deltaP.apply(np.linalg.norm), deltaU.apply(np.linalg.norm))

In [None]:
# construction of expected values for norms of columns, tot be used below to identify large/small changes
norms_P = []
vals = deltaP.values.reshape(num_regions*num_regions)
n_samples = 10000
for n in range(n_samples):
    norms_P.append(np.linalg.norm(choice(vals, num_regions)))

norms_U = []
vals = deltaU.values.reshape(num_regions*num_regions)
n_samples = 10000
for n in range(n_samples):
    norms_U.append(np.linalg.norm(choice(vals, num_regions)))

In [None]:
import bootstrapped.stats_functions as bs_stats
import bootstrapped.bootstrap as bs
fig = plt.figure(figsize=(15, 6))
plt.subplot(211)
alpha = 0.05
plt.fill_between(range(num_regions), np.quantile(
    norms_P, 1-alpha), np.quantile(norms_P, alpha), alpha=.3)
plt.plot(deltaP.apply(np.linalg.norm), 'o')
plt.xticks([])
deh = bs.bootstrap(np.array(norms_P), stat_func=bs_stats.mean)
plt.hlines(deh.value, 0, num_regions, 'k', alpha=.4)
plt.ylabel(r'$|\Delta^P_i|$', fontsize=30)
plt.yticks(fontsize=15)


plt.subplot(212)
plt.fill_between(range(num_regions), np.quantile(
    norms_U, 1-alpha), np.quantile(norms_U, alpha), alpha=.3)
plt.plot(deltaU.apply(np.linalg.norm), 'o')
plt.xticks(rotation=90)

deh = bs.bootstrap(np.array(norms_U), stat_func=bs_stats.mean)
plt.hlines(deh.value, 0, num_regions, 'k', alpha=.4)
plt.ylabel(r'$|\Delta^U_i|$', fontsize=30)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)

plt.tight_layout()
plt.savefig('../data/viz-data/deltaP_deltaU_significances.pdf')

In [None]:
list_high_intensity['P'] = []
list_high_intensity['U'] = []
list_low_intensity['P'] = []
list_low_intensity['U'] = []

up, lo = np.quantile(norms_P, 1-alpha), np.quantile(norms_P, alpha)
d = deltaP.apply(np.linalg.norm)
for i in d.index:
    if d[i] > up:
        list_high_intensity['P'].append(i)
    elif d[i] < lo:
        list_low_intensity['P'].append(i)

up, lo = np.quantile(norms_U, 1-alpha), np.quantile(norms_U, alpha)
d = deltaU.apply(np.linalg.norm)
for i in d.index:
    if d[i] > up:
        list_high_intensity['U'].append(i)
    elif d[i] < lo:
        list_low_intensity['U'].append(i)

In [None]:
print(list_high_intensity)

print(list_low_intensity)

In [None]:
# creation of the expectations values for the delta_S/Delta_A similarity
ego_sim_expectation_UP = pd.DataFrame()
num_iter = 1000
for n in tqdm(range(num_iter)):
    ego_sim_expectation_UP[n] = egosine_sim_randomized(deltaU, deltaP)
ego_sim_expectation_UP = ego_sim_expectation_UP.T

In [None]:
import bootstrapped.stats_functions as bs_stats
import bootstrapped.bootstrap as bs
from scipy.stats import ttest_1samp
significance_color = []
for c in regions:
    low, high = np.quantile(ego_sim_expectation_UP[c], 0.05), np.quantile(
        ego_sim_expectation_UP[c], 0.95)
    if ego_sim_df[('DeltaP', 'DeltaU')][c] <= low or ego_sim_df[('DeltaP', 'DeltaU')][c] >= high:
        significance_color.append(1)
    else:
        significance_color.append(0)


z_significance_color = []
for c in regions:
    mu, std = np.mean(ego_sim_expectation_UP[c]), np.std(
        ego_sim_expectation_UP[c])
    z_significance_color.append(
        np.abs((ego_sim_df[('DeltaP', 'DeltaU')][c]-mu)/std))


conf_intervs = []
std_intervs = []
for c in regions:
    deh = bs.bootstrap(
        ego_sim_expectation_UP[c].values, stat_func=bs_stats.mean)
    conf_intervs.append([deh.lower_bound, deh.upper_bound])
    deh = bs.bootstrap(
        ego_sim_expectation_UP[c].values, stat_func=bs_stats.std)
    std_intervs.append([deh.lower_bound, deh.upper_bound])

In [None]:
fig = plt.figure(figsize=(20, 5))

plt.scatter(range(num_regions), ego_sim_df[('DeltaP', 'DeltaU')], c=z_significance_color,
            s=100*np.array(significance_color), vmax=5,  vmin=1, cmap='Greys', alpha=1)
plt.ylabel(r" $\chi(\Delta^P_i, \Delta^U_i)$", fontsize=25)
for n in range(num_regions):
    plt.vlines(n, (conf_intervs[n][0]+conf_intervs[n][1])/2 - std_intervs[n][1], (conf_intervs[n][0]+conf_intervs[n][1])/2 + std_intervs[n][1],
               'k', linestyles='solid', lw=2)
# plt.boxplot(ego_sim_expectation.T, positions=range(70), showfliers=False);
plt.hlines(0, 0, 70, linestyles='dashed', alpha=.4)
plt.ylim(-.6, .6)
plt.colorbar(label=r"z-score")
plt.xticks(range(num_regions), regions, rotation=90)
plt.xlim(-1, num_regions+1)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.tight_layout()
plt.savefig('../data/viz-data/similarity_UP_vs_random_expection.pdf')

In [None]:
# list significant regionss and their up/down regulation
from scipy.stats import percentileofscore
down_regulation, up_regulation = [], []

for c in regions:
    mu, std = np.mean(ego_sim_expectation_UP[c]), np.std(
        ego_sim_expectation_UP[c])
    low, high = np.quantile(ego_sim_expectation_UP[c], 0.05), np.quantile(
        ego_sim_expectation_UP[c], 1-0.05)
    if ego_sim_df[('DeltaS', 'DeltaA')][c] <= low:
        perc = percentileofscore(
            ego_sim_expectation_UP[c], ego_sim_df[('DeltaP', 'DeltaU')][c])/100.0
        z = (ego_sim_df[('DeltaP', 'DeltaU')][c] - mu)/std
        down_regulation.append((c, perc, z))
    if ego_sim_df[('DeltaP', 'DeltaU')][c] >= high:
        perc = (
            100.0 - percentileofscore(ego_sim_expectation_UP[c], ego_sim_df[('DeltaP', 'DeltaU')][c]))/100.0
        z = (ego_sim_df[('DeltaP', 'DeltaU')][c] - mu)/std
        up_regulation.append((c, perc, z))

In [None]:
print(down_regulation)

In [None]:
print(up_regulation)

# Centralities

In [None]:
import operator
eigen = {}
topk = 10
for i in av_corrs_rho:
    g = nx.from_pandas_adjacency(av_corrs_rho[i])
    eigen[i] = nx.eigenvector_centrality(g)
    sorted_eig = sorted(eigen[i].items(), key=operator.itemgetter(1))
    print(i, k_select(sorted_eig, -topk), '\n')

In [None]:
degree = {}
topk = 10
for i in av_corrs_rho:
    g = nx.from_pandas_adjacency(av_corrs_rho[i])
    degree[i] = nx.degree_centrality(g)
    sorted_deg = sorted(degree[i].items(), key=operator.itemgetter(1))
    print(i, k_select(sorted_deg, -topk), '\n')

In [None]:
for i in av_corrs_rho:
    g = nx.from_pandas_adjacency(av_corrs_rho[i])
    degree[i] = nx.degree_centrality(g)
    sorted_deg = sorted(degree[i].items(), key=operator.itemgetter(1))
    print(i, k_select(sorted_deg, topk), '\n')