In [None]:
import seaborn as sns
from tqdm.notebook import tqdm
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%pylab inline
%config InlineBackend.figure_format = 'retina'

In [None]:
import pickle as pk
av_corrs_rho = pd.read_pickle(open('../data/av_corrs_rho_by_group_density0_07.pck','rb'))

In [None]:
density = 0.07

In [None]:
def density_threshold(mat, density, binarized=False):
    ind = np.triu_indices_from(mat)
    values = mat[ind]
    thr_value = np.quantile(values, 1.0-density)
    thr_mat = mat.copy()
    thr_mat[mat < thr_value] = 0
    if binarized == True:
        thr_mat[mat >= thr_value] = 1  # binarization
    return thr_mat


def correlation_threshold(mat, thr_corr, binarized=False):
    ind = np.triu_indices_from(mat)
    values = mat[ind]
    thr_mat = mat.copy()
    thr_mat[mat < thr_corr] = 0
    if binarized == True:
        thr_mat[mat >= thr_corr] = 1  # binarization
    return thr_mat


correlation_thr = 0.1
effective_viz_matrices = {}

fig = plt.figure(figsize=(16, 6))
for k, i in enumerate(av_corrs_rho.keys()):
    plt.subplot(2, 4, k+1)
    plt.imshow(av_corrs_rho[i] - np.eye(70))
    print(nx.density(nx.from_pandas_adjacency(av_corrs_rho[i])))
    effective_viz_matrices[i] = pd.DataFrame(correlation_threshold(
        av_corrs_rho[i].values, correlation_thr), columns=av_corrs_rho[i].columns, index=av_corrs_rho[i].index)
    print(nx.density(nx.from_pandas_adjacency(effective_viz_matrices[i])))
    plt.subplot(2, 4, k+5)
    plt.imshow(effective_viz_matrices[i] - np.eye(70))

In [None]:
import networkx as nx

In [None]:
import igraph as ig
add_isolated = True
IG_comms = {}
ig_final = {}


partition_iterations = 100;

for i in tqdm(av_corrs_rho):
    IG_comms[i] = {}
    G = nx.from_pandas_adjacency(av_corrs_rho[i], create_using=nx.Graph())
    ## detect only on giant component
    G.remove_edges_from(nx.selfloop_edges(G))
    nx.write_graphml(G, '../data/test-ig.graphml')
    g = ig.load('../data/test-ig.graphml')
    GC = nx.subgraph(G, max(nx.connected_components(G), key=len))
    ## keep track of isolated nodes
    isolated_nodes = set(list(G.nodes())) - set(list(GC.nodes()));
    ## setup (awkwardly) the comm detection
    nx.write_graphml(GC, '../data/test-ig-gc.graphml')
    gc = ig.load('../data/test-ig-gc.graphml')
    for it in range(partition_iterations):
        IG_comms[i][it] = gc.community_leiden('modularity', weights='weight', n_iterations = 2)
#         print(len(set(IG_comms[i][it].membership)))
        IG_comms[i][it] = dict(zip(gc.vs['id'], IG_comms[i][it].membership))
        if add_isolated==True:
            max_comm = np.max(list(IG_comms[i][it].values()))
            for n in isolated_nodes:
                max_comm += 1;
                IG_comms[i][it][n] = max_comm;
    print(i, [len(set(IG_comms[i][x].values())) for x in range(partition_iterations)])

In [None]:
relabel_parts = dict(zip(list(G.nodes()), range(len(list(G.nodes())))))
inv_relabel_parts = dict(zip(range(len(list(G.nodes()))), list(G.nodes())))

In [None]:
def comm_to_nodes(cludict, relabel = None):
    from collections import defaultdict
    ctn_dict = defaultdict(list)
    if relabel == None:
        for n in cludict:
            ctn_dict[cludict[n]].append(n)
    else:
        for n in cludict:
            ctn_dict[cludict[n]].append(relabel[n])
    return ctn_dict;

In [None]:
ctn = {}
for i in IG_comms:
    ctn[i] = {}
    for it in IG_comms[i]:
        ctn[i][it] = comm_to_nodes(IG_comms[i][it], relabel_parts)

In [None]:
import graph_tool.all as gt

IG_comms_center = {}
for i in tqdm(ctn):
    parts = [list(IG_comms[i][x].values()) for x in IG_comms[i]]
    IG_comms_center[i] = gt.partition_overlap_center(parts)

In [None]:
for i in IG_comms:
    print('Best partition for ', i , 'Num comms:', len(set(IG_comms_center[i][0])), 'Nodes covered: ',
          len(IG_comms_center[i][0]), ' \n', IG_comms_center[i][0])
    print('Confidence ', IG_comms_center[i][1], '\n')

In [None]:
for i in IG_comms_center:
    relabel = dict(zip(G.nodes(), IG_comms_center[i][0]))
    ig_final[i] = nx.from_pandas_adjacency(effective_viz_matrices[i], create_using=nx.Graph())
    ig_final[i].remove_edges_from(nx.selfloop_edges(ig_final[i]))
    nx.set_node_attributes(ig_final[i], relabel, 'spin_comms')
    if i==('U', 'A'):
        regular_comm = relabel.copy();
    
for i in ig_final:
    nx.set_node_attributes(ig_final[i], regular_comm, 'control_spin_comms')
    nx.write_gexf(ig_final[i], '../data/viz-data/finalized_spinglass_comms_graphs_'+str(i).replace("'", "_")+'.gexf')

In [None]:
center_ctn = {}
for i in IG_comms_center:
    dic = dict(zip(list(G.nodes()), IG_comms_center[i][0]))
    center_ctn[i] = comm_to_nodes(dic)

In [None]:
from itertools import combinations 
def intra_edges_weight(gg, comm_nodes, measure = 'weighted'):
    reduced_g = nx.subgraph(gg, comm_nodes);
    w = nx.get_edge_attributes(reduced_g, 'weight');
    if measure == 'weighted':
        return np.sum(list(w.values()));
    else:
        return(2 * len(w) / (len(comm_nodes)**2) );

def inter_edges_weight(gg, comm1, comm2, measure='weighted'):
    s = 0
    for n in comm1:
        for m in comm2:
            if gg.has_edge(n,m):
                if measure =='weighted':
                    s += gg[n][m]['weight'];
                else:
                    s += 1;
    if measure == 'weighted':
        return s;
    else:
        return s / (len(comm1) * len(comm2));

comm_structure = {}
for i in av_corrs_rho:
    print(i);
    deh = []
    G = nx.from_pandas_adjacency(av_corrs_rho[i], create_using=nx.Graph())
    G.remove_edges_from(nx.selfloop_edges(G))
    comm_structure[i] = np.zeros((len(center_ctn[i]), len(center_ctn[i])));
    for j, comm in enumerate(center_ctn[i].values()):
        comm_structure[i][j,j] = intra_edges_weight(G, comm)
    for j, jj in combinations(sorted(center_ctn[i].keys()), 2):
        comm_structure[i][j,jj] = inter_edges_weight(G, center_ctn[i][j], center_ctn[i][jj])

In [None]:
fig = plt.figure(figsize=(10, 6))
count = 1
for i in comm_structure:
    plt.subplot(2, 2, count)
    sns.heatmap(comm_structure[i])
    count += 1
    plt.title(i)
plt.tight_layout()

In [None]:
fig = plt.figure(figsize=(18, 4))
count = 1


names = [('U', 'A'), ('U', 'S'), ('P', 'A'), ('P', 'S')]
cmap = None
for i in names:
    plt.subplot(1, 4, count)
    if i == ('U', 'S'):
        sns.heatmap(comm_structure[i][:5, :5], cmap=cmap, vmin=0, alpha=1)
    else:
        sns.heatmap(comm_structure[i], cmap=cmap,  vmin=0, alpha=1)
    count += 1
    plt.title(i, fontsize=20)
plt.tight_layout()
plt.savefig('../data/viz-data/comm_structure_plots.pdf')

# Compute r values

In [None]:
for i in comm_structure:
    di = np.diag_indices_from(comm_structure[i])
    ind = np.triu_indices_from(comm_structure[i], 1)
    print(i, np.mean(comm_structure[i][di][comm_structure[i][di]>0]) / np.mean(comm_structure[i][ind][comm_structure[i][ind]>0]))


In [None]:
# random model for the r ratios
def compute_random_comm_structure(data, cond, comms):
    G = nx.from_pandas_adjacency(data[cond], create_using=nx.Graph())
    G.remove_edges_from(nx.selfloop_edges(G))
    random_comm_structure = np.zeros((len(comms[cond]), len(comms[cond])))
    nodes = list(G.nodes())
    for j, comm in enumerate(comms[cond].values()):
        rcomm = np.random.choice(nodes, size=len(comm), replace=False)
        random_comm_structure[j, j] = intra_edges_weight(G, rcomm)
    for j, jj in combinations(sorted(comms[cond].keys()), 2):
        rcomm1 = np.random.choice(nodes, size=len(comms[cond][j]), replace=False)
        rcomm2 = np.random.choice(nodes, size=len(comms[cond][jj]), replace=False)
        random_comm_structure[j, jj] = inter_edges_weight(
            G, rcomm1, rcomm2)
    return random_comm_structure


def r_ratio(cs):
    di = np.diag_indices_from(cs)
    ind = np.triu_indices_from(cs, 1)
    return np.mean(cs[di][cs[di] > 0]) / np.mean(cs[ind][cs[ind] > 0])

num_iter = 4000
random_r_distribs = {}
for i in tqdm(av_corrs_rho):
    random_r_distribs[i] = [r_ratio(compute_random_comm_structure(
        av_corrs_rho, i, center_ctn)) for it in range(num_iter)]

In [None]:
count=1 
from scipy.stats import percentileofscore
fig = plt.figure(figsize=(16,3))
record = []
for i in comm_structure:
    r = r_ratio(comm_structure[i])
    print(i, r, np.quantile(random_r_distribs[i], 0.95), percentileofscore(random_r_distribs[i], r)) 
    record.append([i, r, percentileofscore(random_r_distribs[i], r)/100])
    plt.subplot(1,4,count)
    plt.title(i)
    plt.hist(random_r_distribs[i], np.linspace(0,2.5,400), density=True, stacked=True,
             label='null', alpha=.5, histtype='step', linewidth=2)
    count+=1
    plt.vlines(r, 0, 4.5, color='orange', linestyle='dashed', label=r'$r$')
    plt.xlim(0,2.6)
    plt.ylim(0,6)
    plt.legend(loc=0)
plt.tight_layout()
plt.savefig('../data/viz-data/r-significance-plots.pdf')

df = pd.DataFrame(record, columns = ['cond', r'$r$', 'p-value'] )
df.to_excel('../data/r-significance.xls')

In [None]:
print(df.to_latex())

## Significance of differences between r values

In [None]:
diff_sample = 10000;
fig = plt.figure(figsize=(16,6))
count = 1
record = []
for i, ii in combinations(list(comm_structure.keys()), 2):
    plt.subplot(2,3,count)
    diffs = np.random.choice(random_r_distribs[i],diff_sample) - np.random.choice(random_r_distribs[ii],diff_sample)
    real_diff =  r_ratio(comm_structure[i]) - r_ratio(comm_structure[ii]) 
    print(i, ii, real_diff, percentileofscore(diffs, real_diff)/100) 
    record.append([i, ii, real_diff, percentileofscore(diffs, real_diff)/100])
    plt.title((i,ii))
    plt.hist(diffs, np.linspace(-.4,.4,100), density=True, stacked=True,
             label='null', alpha=.5, histtype='step', linewidth=2)
    count+=1
    plt.vlines(real_diff, 0, 4.5, color='orange', linestyle='dashed', label=r'$\Delta r$')
#     plt.xlim(0,1)
    plt.ylim(0,6)
    plt.legend(loc=0)
plt.tight_layout()
plt.savefig('../data/viz-data/delta-r-significance-plots.pdf')

df = pd.DataFrame(record, columns = ['cond1', 'cond2', r'$r_1$ - $r_2$', 'p-value'] )
df.to_excel('../data/delta-r-significance.xls')

In [None]:
print(df.to_latex())

# community overlap

In [None]:
def comm_jaccard(a, b):
    num = len(set(a).intersection(set(b)))
    den = len(set(a).union(set(b)))
    return num/den


def partition_overlap(ctn1, ctn2):
    conf = {}
    for c in ctn1:
        conf[c] = {}
        for cc in ctn2:
            conf[c][cc] = comm_jaccard(ctn1[c], ctn2[cc])
    return pd.DataFrame(conf)


def z_comm_jaccard(a, b, nodes, num_samples=1000, thr=None):
    real_jacc = comm_jaccard(a,b)
    samples = []
    for n in range(num_samples):
        x = np.random.choice(nodes, len(a), replace=False) 
        y = np.random.choice(nodes, len(b), replace=False)
        samples.append(comm_jaccard(x, y));
    if thr==None:
        return (real_jacc - np.mean(samples)) / np.std(samples)
    else:
        if (real_jacc - np.mean(samples)) / np.std(samples)<-thr:
            return (real_jacc - np.mean(samples)) / np.std(samples);
        elif (real_jacc - np.mean(samples)) / np.std(samples)>thr:
            return (real_jacc - np.mean(samples)) / np.std(samples)
        else:
            return 0;

def z_partition_overlap(ctn1, ctn2, nodes, num_samples=1000, thr=None):
    conf = {}
    for c in ctn1:
        conf[c] = {}
        for cc in ctn2:
            conf[c][cc] = z_comm_jaccard(ctn1[c], ctn2[cc], nodes, num_samples=num_samples, thr=thr)
    return pd.DataFrame(conf)


def alpha_comm_jaccard(a, b, nodes, num_samples=1000, alpha=0.05):
    real_jacc = comm_jaccard(a,b)
    samples = []
    for n in range(num_samples):
        x = np.random.choice(nodes, len(a), replace=False) 
        y = np.random.choice(nodes, len(b), replace=False)
        samples.append(comm_jaccard(x, y));
    if alpha==None:
        return (real_jacc - np.mean(samples)) / np.std(samples)
    else:
        lower_quantile = np.quantile(samples, alpha);
        upper_quantile = np.quantile(samples, 1 -  alpha)
        if real_jacc<lower_quantile:
            return real_jacc - np.mean(samples);
        elif real_jacc > upper_quantile:
            return real_jacc - np.mean(samples)
        else:
            return 0;

def alpha_partition_overlap(ctn1, ctn2, nodes, num_samples=1000, alpha=0.05):
    conf = {}
    for c in ctn1:
        conf[c] = {}
        for cc in ctn2:
            conf[c][cc] = alpha_comm_jaccard(ctn1[c], ctn2[cc], nodes, num_samples=num_samples, alpha=alpha)
    return pd.DataFrame(conf)

In [None]:
import seaborn as sns
from itertools import combinations

fig = plt.figure(figsize=(18, 8))
count = 1
for i, ii in combinations(center_ctn.keys(), 2):
    plt.subplot(2, 3, count)
    df = partition_overlap(center_ctn[i], center_ctn[ii])
    sns.heatmap(df, vmin=0, vmax=.5)
    plt.title([i, ii, np.mean(np.mean(df))])
    count += 1;

plt.tight_layout()

In [None]:
import seaborn as sns
from itertools import combinations

fig = plt.figure(figsize=(18, 8))
count = 1
zdf = {}
for i, ii in combinations(center_ctn.keys(), 2):
    plt.subplot(2, 3, count)
    zdf[(i, ii)] = z_partition_overlap(center_ctn[i], center_ctn[ii],
                                       range(G.number_of_nodes()), thr=3, num_samples=5000)
    sns.heatmap(zdf[(i, ii)], cmap='BrBG')  # vmin=-3, vmax=3,
    plt.title([i, ii, np.mean(np.mean(np.abs(zdf[(i, ii)]))),
               np.mean(np.mean(zdf[(i, ii)]))])
    count += 1

plt.tight_layout()

In [None]:
import seaborn as sns
from itertools import combinations

fig = plt.figure(figsize=(18, 8))
count = 1
alpha_df = {}
for i, ii in combinations(center_ctn.keys(), 2):
    plt.subplot(2, 3, count)
    alpha_df[(i, ii)] = alpha_partition_overlap(center_ctn[i], center_ctn[ii], range(G.number_of_nodes()),
                                                num_samples=5000, alpha=0.01)
    sns.heatmap(alpha_df[(i, ii)], cmap='BrBG')  # vmin=-3, vmax=3,
    plt.title([i, ii, np.mean(np.mean(np.abs(alpha_df[(i, ii)]))),
               np.mean(np.mean(alpha_df[(i, ii)]))])
    count += 1

plt.tight_layout()

In [None]:
social_learning = [('P', 'A'), ('P', 'S')]
pure_learning = [('P', 'A'), ('U', 'A')]
pure_sociality = [('U', 'A'), ('U', 'S')]
complete_task  = [('P', 'S'), ('U', 'S')]
maximum_weird = [('P', 'S'), ('U', 'A')]
even_weirder = [('P', 'A'), ('U', 'S')]

In [None]:
only_visual_stimulus = [('U', 'A'), ('U', 'S')]
consimilar_recon = [('P', 'S'), ('U', 'S')]
learned_response = [('P', 'A'), ('P', 'S')]

In [None]:
def common_nodes(a, b): 
    return set(a).intersection(set(b));

In [None]:
common_nodes_dict = {}
for cond in [social_learning, pure_learning, pure_sociality, maximum_weird, complete_task, even_weirder]:
    test = zdf[tuple(cond)]
    arr  = np.array(test)
    a, b = np.where(arr == numpy.amax(arr))
    print(cond, zdf[tuple(cond)][b[0]][a[0]], comm_jaccard(ctn[cond[0]][b[0]], ctn[cond[1]][a[0]]))
    common_nodes_dict[tuple(cond)] = [i for i in common_nodes(ctn[cond[0]][b[0]], ctn[cond[1]][a[0]])]

In [None]:
chosen_sigma = 3

common_nodes_dict = {}
for cond in [social_learning, pure_learning, pure_sociality, maximum_weird, complete_task, even_weirder]:
    print(cond)
    test = zdf[tuple(cond)]
    J, I = test.shape;
    common_nodes_dict[tuple(cond)] = []
    for i in range(I):
        for j in range(J):
            if test[i][j]>chosen_sigma:
                print(zdf[tuple(cond)][i][j], comm_jaccard(center_ctn[cond[0]][i], center_ctn[cond[1]][j]))
                common_nodes_dict[tuple(cond)].append([i for i in common_nodes(center_ctn[cond[0]][i], center_ctn[cond[1]][j])])
    print('\n')

In [None]:
for deh in common_nodes_dict:
    print(deh)
    for nodes in common_nodes_dict[deh]:
        print(len(nodes), nodes)
    print('\n')

In [None]:
comparison_names= {}
comparison_names[('U', 'A'), ('U', 'S')] = 'visual_stimulus'
comparison_names[('P', 'S'), ('U', 'S')]  = 'consimilar_recognition'
comparison_names[('P', 'A'), ('P', 'S')] = 'learned_response'

In [None]:
module_viz_graph = nx.Graph()
module_viz_graph.add_nodes_from(G.nodes())
for deh in common_nodes_dict:
    if deh in comparison_names:
        labels = dict.fromkeys(list(G.nodes()), '')
        if deh == 'visual_stimulus':
            comm_part = dict.fromkeys(module_viz_graph.nodes(), 0)
            for num_comm, comm_nodes in enumerate(common_nodes_dict[deh]):
                for node in comm_nodes:
                    comm_part[node] = num_comm + 1;
                    labels[node] = node
            nx.set_node_attributes(module_viz_graph, comm_part, comparison_names[deh])
            nx.set_node_attributes(module_viz_graph, labels, comparison_names[deh]+ 'labels')
            
        else:
            comm_part = dict.fromkeys(module_viz_graph.nodes(), 0)
            for num_comm, comm_nodes in enumerate(common_nodes_dict[deh]):
                for node in comm_nodes:
                    if node in common_nodes_dict[('U', 'A'), ('U', 'S')][0]:
                        comm_part[node] = 1;
                    if node in common_nodes_dict[('U', 'A'), ('U', 'S')][1]:
                        comm_part[node] = 2;
                        labels[node] = node
                    if comm_part[node] == 0:
                        comm_part[node] = num_comm + 3
                    labels[node] = node
                        
            nx.set_node_attributes(module_viz_graph, comm_part, comparison_names[deh])
            nx.set_node_attributes(module_viz_graph, labels, comparison_names[deh]+ 'labels')

nx.set_node_attributes(module_viz_graph, regular_comm, 'control_spin_comms')

In [None]:
common_nodes_dict[('U', 'A'), ('U', 'S')]

In [None]:
ordered_keys = list(comparison_names.values())

In [None]:
nx.write_gexf(module_viz_graph, '../data/viz-data/module_viz_graph.gexf')