In [2]:
import graspy as gp
import glob
import os, sys
import numpy as np
import pandas as pd
import multiprocessing as multiproc
import dask
from dask.distributed import Client, progress
import dask.dataframe as ddf
from scipy.stats import mannwhitneyu
from matplotlib.pyplot import savefig
import networkx as nx

In [170]:
gr = nx.read_graphml('./data/G.graphml')
A = nx.to_numpy_matrix(gr)
node_hemi = nx.get_node_attributes(gr, 'Hemisphere')
node_pair = nx.get_node_attributes(gr, 'Pair')
n=A.shape[0]

In [158]:
def make_edgecomm_from_dict(dict_nodes):
    C = np.zeros((len(dict_nodes), len(dict_nodes)))
    for i, (keyi, vali) in enumerate(dict_nodes.items()):
        for j, (keyj, valj) in enumerate(dict_nodes.items()):
            if i != j:
                if vali == valj:
                    C[i,j] = 1
                else:
                    C[i,j] = 2
    return C

In [159]:
Rowmtx = np.zeros((n,n))
Colmtx = np.zeros((n,n))
vtx_order = node_hemi
for i in range(1, n+1):
    Rowmtx[i-1,:] = i
    Colmtx[:,i-1] = i
    
C_hemi = make_edgecomm_from_dict(node_hemi)
C_pair = make_edgecomm_from_dict(node_pair)
    
A_df = pd.DataFrame({"Value": np.array(A).flatten(), 
                     "Row": Rowmtx.astype(int).flatten(),
                     "Column": Colmtx.astype(int).flatten(),
                     "Homophilic": C_hemi.astype(int).flatten(),
                     "Homotopic": C_pair.astype(int).flatten()})

A_df.to_csv('./data/dros_edges.csv')

In [160]:
def mww(G, C, alternative='two-sided'):
    A = G[C == 1]
    B = G[C == 2]
    return(mannwhitneyu(A, B, alternative=alternative)[1])

In [161]:
mww(np.array(A), C_hemi, alternative='greater')

0.0

In [162]:
mww(np.array(A), C_pair, alternative='greater')

0.0

In [163]:
vertex_dict = {key:{"Name": key, "Hemisphere": node_hemi[key], "Region": node_pair[key]} for key in node_hemi.keys()}

In [164]:
vertex_data = {}
for sub in vertex_dict.values():              # Python 3: use d.values()
    for key, value in sub.items():  # Python 3: use d.items()
        vertex_data.setdefault(key, []).append(value)

In [165]:
vertex_df = pd.DataFrame(vertex_data)
vertex_df.head()

Unnamed: 0,Name,Hemisphere,Region
0,4493199,left,13a PN
1,8244823,right,13a PN
2,7865696,left,1a PN
3,8252067,right,1a PN
4,7865652,left,22c PN


In [166]:
vertex_df.to_csv('./data/dros_vertices.csv')

In [190]:
e = gp.models.SBMEstimator()
vlabs = np.array(list(node_hemi.values()))
left_right_vertices = np.array([vlab in ["left", "right"] for vlab in vlabs])
print(A.shape)
A = np.delete(A, np.where(left_right_vertices == False), axis=0)
A = np.delete(A, np.where(left_right_vertices == False), axis=1)
print(A.shape)

(321, 321)
(319, 319)


In [191]:
vlabs_int = [0 if vlab == "left" else 1 for vlab in vlabs[left_right_vertices]]
len(vlabs_int)

319

In [196]:
e.estimate_block_structure(A, np.array(vlabs_int), ["abba", "abca", "abbd", "abcd"], test_method="anova")

(0.0, 'abba')

In [197]:
e.estimate_block_structure(A, np.array(vlabs_int), ["abba", "abca", "abbd", "abcd"], test_method="kw")

(0.0, 'abba')

In [198]:
e.estimate_block_structure(A, np.array(vlabs_int), ["abba", "abca", "abbd", "abcd"], test_method="mgc")

MemoryError: Unable to allocate 77.2 GiB for an array with shape (101761, 101761) and data type float64