In [1]:
from functools import partial
from itertools import product
import graspy as gp
from graspy.simulations import er_np, sbm
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import ttest_ind
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

from twins import load_dataset
%matplotlib inline



In [2]:
graphs = load_dataset(modality='dmri', parcellation='desikan', preprocess=None, ptr=None)[0]

df = pd.read_csv('../../../../twins/data/raw/unrestricted_jaewonc78_1_20_2019_23_7_58.csv')

gender = []
for sub in graphs.keys():
    gender.append(df[df.Subject == int(sub)]['Gender'].values[0])
    
le = LabelEncoder()
labels = le.fit_transform(gender)
# 0 is female, 1 is male

node_df = pd.read_csv("../../../data/desikan_labels.csv")
node_df = node_df.sort_values('name')
hemispheres = node_df.hemisphere.values

In [3]:
male_graphs = []
female_graphs = []

for idx, (sub, graph) in enumerate(graphs.items()):
    if labels[idx] == 1:
        male_graphs.append(graph)
    else:
        female_graphs.append(graph)

male_graphs = (np.array(male_graphs) > 0) * 1
female_graphs = (np.array(female_graphs) > 0) * 1

In [7]:
def generate_binary_sbms(m, n, block_1, block_2, p, q):
    total_n = block_1 + block_2
    p2 = [[q, p], [p, p]]

    pop1 = np.array([er_np(total_n, p, directed=False) for _ in np.arange(m)])
    pop2 = np.array([sbm(
        [block_1, block_2],
        p2, directed=False) for _ in np.arange(n)])

    labels = np.array([0] * block_1 + [1] * block_2)

    return pop1, pop2, labels


def run_experiment(m1, m2, block_1, block_2, p, q, reps):
    total_n = block_1 + block_2
    r, c = np.triu_indices(total_n, k=1)

    res = np.zeros((reps))

    for i in np.arange(reps).astype(int):
        pop1, pop2, true_labels = generate_binary_sbms(m1, m2, block_1, block_2, p, q)

        pop1_edges = pop1[:, r, c]
        pop2_edges = pop2[:, r, c]
        true_edges = (true_labels[:, None] + true_labels[None, :])[r, c]

        pvals = np.zeros(3)
        for j in np.unique(true_edges):
            tmp_labels = true_edges == j

            statistics, pval = ttest_ind(
                pop1_edges[:, tmp_labels].ravel(), pop2_edges[:, tmp_labels].ravel(),
            )
            
            pvals[j] = pval
        
        if np.argsort(pvals)[0] == 0:
            res[i] = 1
        else:
            res[i] = 0

    res = np.nanmean(res, axis=0).reshape(-1)

    to_append = [m1, m2, p, q, *res]
    return to_append

In [8]:
def estimate_p(X, node_labels):
    r, c = np.triu_indices(len(node_labels), k=1)
    true_edges = (node_labels[:, None] + node_labels[None, :])[r, c]
    edges = X[:, r, c]
    
    p = np.zeros(len(np.unique(true_edges)))
    for i, val in enumerate(np.unique(true_edges)):
        p[i] = edges[:,true_edges == val].mean()
        
    return p

In [9]:
males = (labels == 1).sum()
females = (labels == 0).sum()

male_p = estimate_p(male_graphs, np.array([0]*35 + [1]*35))
female_p = estimate_p(female_graphs, np.array([0]*35 + [1]*35))

block_1 = 25
block_2 = 25

res = []
for p, q in zip(male_p, female_p):
    res.append(run_experiment(
        males, females, block_1, block_2, p, q, 100
    ))

In [17]:
cols = ['male_n', 'female_n', 'block_11', 'block_12', 'block_22']
df = pd.DataFrame(res, columns=cols)
df.to_csv("../results/exp3_HCP.csv")

In [16]:
df

Unnamed: 0,male_n,female_n,block_11,block_12,block_22
0,449,535,0.606947,0.621996,1.0
1,449,535,0.28034,0.29957,1.0
2,449,535,0.60694,0.613251,0.99
