In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from graspologic.datasets import load_mice
from statsmodels.stats.multitest import multipletests

In [2]:
ventricles = [147, 151, 160]
ventricles += [roi + 166 for roi in ventricles]
ventricles = np.array(ventricles)

def mask_rois(graphs, rois=ventricles):
    """Remove all connections to and from specified ROIs."""
    graphs = np.delete(graphs, ventricles, axis=1)
    graphs = np.delete(graphs, ventricles, axis=2)
    return graphs

In [3]:
# Load the full mouse dataset
mice = load_mice()

# Stack all adjacency matrices in a 3D numpy array
graphs = np.array(mice.graphs)
graphs = mask_rois(graphs)

# Sort the connectomes and genotype labels so BTBR is first
label_indices = np.argsort(mice.labels).reshape(4, 8)
label_indices = label_indices[[1, 0, 2, 3]].reshape(-1)
labels = mice.labels[label_indices]
graphs = graphs[label_indices]

# Get sample parameters
n_subjects = mice.meta["n_subjects"]
n_vertices = mice.meta["n_vertices"] - len(ventricles)

In [4]:
# Split the set of graphs by genotype
btbr = graphs[labels == "BTBR"]
b6 = graphs[labels == "B6"]
cast = graphs[labels == "CAST"]
dba2 = graphs[labels == "DBA2"]

connectomes = [btbr, b6, cast, dba2]

In [5]:
from hyppo.ksample import KSample

In [6]:
btbr.shape

(8, 326, 326)

In [7]:
mdmr_pvals = []

for vertex in range(n_vertices):

    samples = [genotype[:, vertex, :] for genotype in connectomes]
    
    # Calculate the p-value for the i-th edge
    try:
        statistic, pvalue = KSample("Dcorr").test(*samples)
    except ValueError:
        # A ValueError is thrown when any of the samples have equal edge
        # weights (i.e. one of the inputs has 0 variance)
        statistic = np.nan
        pvalue = 1

    mdmr_pvals.append([vertex + 1, statistic, pvalue])

In [8]:
# Convert the nested list to a dataframe
mdmr_vertices = pd.DataFrame(mdmr_pvals, columns=["ROI", "stat", "pvalue"])
mdmr_vertices.sort_values(by="pvalue", inplace=True, ignore_index=True)

# Correct p-values
reject, holm_pvalue, _, _ = multipletests(
    mdmr_vertices["pvalue"], alpha=0.05, method="holm"
)
mdmr_vertices["holm_pvalue"] = holm_pvalue
mdmr_vertices["significant"] = reject
mdmr_vertices.sort_values(by="holm_pvalue", inplace=True, ignore_index=True)
mdmr_vertices.to_csv("../results/mdmr_vertices.csv", index=False)
mdmr_vertices.head()

Unnamed: 0,ROI,stat,pvalue,holm_pvalue,significant
0,83,0.855008,1.007113e-07,3.3e-05,True
1,307,0.851743,1.062976e-07,3.5e-05,True
2,127,0.850835,1.079052e-07,3.5e-05,True
3,246,0.844896,1.190383e-07,3.8e-05,True
4,267,0.840283,1.284743e-07,4.1e-05,True


In [9]:
def lookup_roi_name(roi):
    roi += np.sum(roi >= ventricles) # Adjust for removing the ventricles
    roi -= 1
    hemisphere = "R" if roi // 166 else "L"
    roi = roi % 166
    structure = mice.atlas["Structure"].values[roi]
    structure = " ".join(structure.split("_"))
    return f"{structure} ({hemisphere})"

In [10]:
# Get the top 20 strongest signal edges
strong_mdmr_vertices = mdmr_vertices.head(35)
strong_mdmr_vertices["ROI"] = strong_mdmr_vertices["ROI"].apply(lookup_roi_name)
strong_mdmr_vertices.drop(["pvalue", "significant"], axis=1, inplace=True)
strong_mdmr_vertices.columns = ["Vertex", "statistic", "p-value"]

# pd.set_option('display.float_format', '{:.3g}'.format)
strong_mdmr_vertices["statistic"] = strong_mdmr_vertices["statistic"].apply(lambda x: f"{x:.3f}")
strong_mdmr_vertices["p-value"] = strong_mdmr_vertices["p-value"].apply(lambda x: f"{x:.3g}")
strong_mdmr_vertices

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ve

Unnamed: 0,Vertex,statistic,p-value
0,Substantia Nigra (L),0.855,3.28e-05
1,Middle Cerebellar Peduncle (R),0.852,3.45e-05
2,Internal Capsule (L),0.851,3.5e-05
3,Substantia Nigra (R),0.845,3.84e-05
4,Pontine Reticular Nucleus (R),0.84,4.14e-05
5,Pontine Reticular Nucleus (L),0.838,4.25e-05
6,Parasubiculum (L),0.838,4.25e-05
7,Ventral Tegmental Area (R),0.837,4.32e-05
8,Retro Rubral Field (L),0.829,4.91e-05
9,Fastigial Medial Nucleus of Cerebellum (R),0.824,5.3e-05


In [11]:
strong_mdmr_vertices.to_csv("../results/formatted_tables/mdmr_vertices.csv", index=False)