In [1]:
%load_ext nb_black
%load_ext rpy2.ipython

<IPython.core.display.Javascript object>

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from graspologic.datasets import load_mice
from statsmodels.stats.multitest import multipletests

<IPython.core.display.Javascript object>

In [3]:
# Load the full mouse dataset
mice = load_mice()

# Stack all adjacency matrices in a 3D numpy array
graphs = np.array(mice.graphs)

# Get sample parameters
n_subjects = mice.meta["n_subjects"]
n_vertices = mice.meta["n_vertices"]

<IPython.core.display.Javascript object>

## Identifying Signal Edges

In [4]:
from hyppo.ksample import KSample

<IPython.core.display.Javascript object>

In [5]:
# Split the set of graphs by genotype
btbr = graphs[mice.labels == "BTBR"]
b6 = graphs[mice.labels == "B6"]
cast = graphs[mice.labels == "CAST"]
dba2 = graphs[mice.labels == "DBA2"]

connectomes = [btbr, b6, cast, dba2]

<IPython.core.display.Javascript object>

In [6]:
# Make iterator for traversing the upper triangle of the connectome
indices = zip(*np.triu_indices(n_vertices, 1))

<IPython.core.display.Javascript object>

In [7]:
edge_pvals = []

for roi_i, roi_j in indices:

    # Get the (i,j)-th edge for each connectome
    samples = [genotype[:, roi_i, roi_j] for genotype in connectomes]

    # Calculate the p-value for the (i,j)-th edge
    try:
        statistic, pvalue = KSample("Dcorr").test(*samples)
    except ValueError:
        # A ValueError is thrown when any of the samples have equal edge
        # weights (i.e. one of the inputs has 0 variance)
        statistic = np.nan
        pvalue = 1

    edge_pvals.append([roi_i + 1, roi_j + 1, statistic, pvalue])

<IPython.core.display.Javascript object>

In [8]:
# Convert the nested list to a dataframe
signal_edges = pd.DataFrame(edge_pvals, columns=["ROI_1", "ROI_2", "stat", "pvalue"])
signal_edges.sort_values(by="pvalue", inplace=True, ignore_index=True)

# Correct p-values
reject, holm_pvalue, _, _ = multipletests(
    signal_edges["pvalue"], alpha=0.05, method="holm"
)
signal_edges["holm_pvalue"] = holm_pvalue
signal_edges["significant"] = reject
signal_edges.sort_values(by="holm_pvalue", inplace=True, ignore_index=True)
signal_edges.to_csv("../results/signal_edges.csv", index=False)
signal_edges.head()

Unnamed: 0,ROI_1,ROI_2,stat,pvalue,holm_pvalue,significant
0,121,230,0.717036,9.911903e-07,0.054462,False
1,121,293,0.699473,1.327371e-06,0.072932,False
2,121,244,0.698197,1.355858e-06,0.074496,False
3,121,243,0.685735,1.668308e-06,0.091662,False
4,228,287,0.670809,2.139082e-06,0.117525,False


<IPython.core.display.Javascript object>

In [9]:
def lookup_roi_name(roi):
    roi -= 1
    hemisphere = "R" if roi // 166 else "L"
    roi = roi % 166
    structure = mice.atlas["Structure"].values[roi]
    structure = " ".join(structure.split("_"))
    return f"{structure} ({hemisphere})"

<IPython.core.display.Javascript object>

In [10]:
# Get the top 20 strongest signal edges
strong_signal_edges = signal_edges.head(20)
strong_signal_edges["ROI_1"] = strong_signal_edges["ROI_1"].apply(lookup_roi_name)
strong_signal_edges["ROI_2"] = strong_signal_edges["ROI_2"].apply(lookup_roi_name)
strong_signal_edges

Unnamed: 0,ROI_1,ROI_2,stat,pvalue,holm_pvalue,significant
0,Corpus Callosum (L),Striatum (R),0.717036,9.911903e-07,0.054462,False
1,Corpus Callosum (L),Internal Capsule (R),0.699473,1.327371e-06,0.072932,False
2,Corpus Callosum (L),Reticular Nucleus of Thalamus (R),0.698197,1.355858e-06,0.074496,False
3,Corpus Callosum (L),Zona Incerta (R),0.685735,1.668308e-06,0.091662,False
4,Septum (R),Corpus Callosum (R),0.670809,2.139082e-06,0.117525,False
5,Lateral Ventricle (L),Striatum (R),0.667327,2.266866e-06,0.124544,False
6,Striatum (L),Striatum (R),0.663875,2.401064e-06,0.131914,False
7,Corpus Callosum (L),Ventral Thalamic Nuclei (R),0.66338,2.420987e-06,0.133007,False
8,Hippocampus (L),Middle Cerebellar Peduncle (L),0.658415,2.629842e-06,0.144478,False
9,Caudomedial Entorhinal Cortex (R),Ventral Hippocampal Commissure (R),0.655984,2.738604e-06,0.150451,False


<IPython.core.display.Javascript object>

## Identifying Signal Vertices

In [11]:
from itertools import product

from graspologic.embed import OmnibusEmbed

<IPython.core.display.Javascript object>

In [12]:
# Jointly embed graphs using OMNI
embedder = OmnibusEmbed()
omni_embedding = embedder.fit_transform(graphs)
omni_embedding = omni_embedding.reshape(-1, omni_embedding.shape[-1])
print(f"Omnibus embedding shape is {omni_embedding.shape}")

# Convert array to a dataframe
omni_embedding = pd.DataFrame(
    omni_embedding, columns=[f"omni_{i + 1}" for i in range(omni_embedding.shape[-1])]
).astype(np.float64)
omni_embedding.head()

Omnibus embedding shape is (10624, 5)


Unnamed: 0,omni_1,omni_2,omni_3,omni_4,omni_5
0,12.730154,-5.338366,-2.503565,14.369667,4.277382
1,9.080038,-3.107737,-0.569771,14.679973,2.005405
2,3.491072,-2.343092,-5.636349,11.416039,-0.373804
3,0.761835,-0.652198,-1.929811,3.537828,-0.247422
4,9.124708,-2.755428,-2.402753,17.376237,0.947563


<IPython.core.display.Javascript object>

In [13]:
# Construct identifiers for each embedded vertex
left = mice.atlas["ROI"].unique()
right = left + 166
rois = np.append(left, right)

participants = mice.participants["participant_id"]
participants = participants.apply(lambda x: x.split("-")[1])

identifiers = np.array(list(product(participants, rois))).reshape(-1, 2)
identifiers = pd.DataFrame(identifiers, columns=["participant_id", "ROI"])
identifiers["ROI"] = identifiers["ROI"].astype(np.int64)
identifiers["genotype"] = identifiers["participant_id"].apply(
    lambda i: mice.participants.query(f"participant_id == 'sub-{i}'")[
        "genotype"
    ].values[0]
)

omni = pd.concat([omni_embedding, identifiers], axis=1)
omni.head()

Unnamed: 0,omni_1,omni_2,omni_3,omni_4,omni_5,participant_id,ROI,genotype
0,12.730154,-5.338366,-2.503565,14.369667,4.277382,54776,1,DBA2
1,9.080038,-3.107737,-0.569771,14.679973,2.005405,54776,2,DBA2
2,3.491072,-2.343092,-5.636349,11.416039,-0.373804,54776,3,DBA2
3,0.761835,-0.652198,-1.929811,3.537828,-0.247422,54776,4,DBA2
4,9.124708,-2.755428,-2.402753,17.376237,0.947563,54776,5,DBA2


<IPython.core.display.Javascript object>

In [14]:
%%R -i omni -i n_vertices -o signal_vertices

suppressPackageStartupMessages(require(tidyverse))

col1 <- which(grepl("omni", names(omni))) # column indices for the embeddings
col2 <- which(grepl("genotype", names(omni))) # column index for the genotype

embedding <- colnames(omni)[col1]
genotype <- colnames(omni)[col2]
form <- paste0("cbind(", paste(embedding, collapse=", "), ") ~ ", genotype)

pvec <- rep(0, n_vertices)
for (i in 1 : n_vertices) {
    omni.v <- omni[which(omni$ROI == i), ]
    ans <- manova(as.formula(form), data=omni.v)
    pval <- summary(ans)$stats[1,"Pr(>F)"]
    pvec[i] <- pval
}

signal_vertices <- data.frame(ROI=unique(omni$ROI), pvalue=pvec)

<IPython.core.display.Javascript object>

In [15]:
# Correct p-values
signal_vertices.sort_values(by="pvalue", inplace=True, ignore_index=True)
reject, holm_pvalue, _, _ = multipletests(
    signal_vertices["pvalue"], alpha=0.05, method="holm"
)
signal_vertices["holm_pvalue"] = holm_pvalue
signal_vertices["significant"] = reject
signal_vertices.sort_values(by="holm_pvalue", inplace=True, ignore_index=True)
signal_vertices.to_csv("../results/signal_vertices.csv", index=False)

<IPython.core.display.Javascript object>

In [16]:
# Get the top 10 strongest signal edges
strong_signal_vertices = signal_vertices.head(10)
strong_signal_vertices["ROI"] = strong_signal_vertices["ROI"].apply(lookup_roi_name)
strong_signal_vertices

Unnamed: 0,ROI,pvalue,holm_pvalue,significant
0,Corpus Callosum (L),1.5327630000000001e-27,5.0887740000000005e-25,True
1,Corpus Callosum (R),3.30428e-26,1.0937170000000001e-23,True
2,Fimbria (L),2.2019170000000003e-22,7.266325e-20,True
3,Secondary Motor Cortex (L),2.495654e-22,8.210702e-20,True
4,Midbrain Reticular Nucleus (R),4.2089250000000003e-22,1.3805279999999998e-19,True
5,Substantia Nigra (R),6.725453e-19,2.199223e-16,True
6,Internal Capsule (R),7.016915999999999e-19,2.287515e-16,True
7,Secondary Motor Cortex (R),1.045814e-18,3.398894e-16,True
8,Cerebral Peduncle (R),1.3383340000000001e-17,4.336201e-15,True
9,Internal Capsule (L),2.077169e-17,6.709257e-15,True


<IPython.core.display.Javascript object>