# Vertex Alternatives

Do other strategies for embedding vertices successfully recover the corpus callosum as a strong signal vertex?

## Vertex Weighting - doesn't work

Represent a vertex as the vector of edge weight connections to other vertices (i.e., a row of the adjacency matrix).
Compare vectors across strains via a Distance Correlation (this is effectively multivariate distance matrix regression, MDMR).

## Graph Statistics - doesn't work

Use 5 common vertex-level graph statistics: degree, clustering, betweenness centrailty, closeness centrailty, and the number of triangles. 

In [13]:
from collections import defaultdict

import numpy as np
import pandas as pd
import networkx as nx
from hyppo.ksample import KSample
from graspologic.datasets import load_mice
from statsmodels.stats.multitest import multipletests

In [2]:
ventricles = [147, 151, 160]
ventricles += [roi + 166 for roi in ventricles]
ventricles = np.array(ventricles)

def mask_rois(graphs, rois=ventricles):
    """Remove all connections to and from specified ROIs."""
    graphs = np.delete(graphs, rois, axis=1)
    graphs = np.delete(graphs, rois, axis=2)
    return graphs

In [3]:
# Load the full mouse dataset
mice = load_mice()

# Stack all adjacency matrices in a 3D numpy array
graphs = np.array(mice.graphs)
graphs = mask_rois(graphs)

# Sort the connectomes and genotype labels so BTBR is first
label_indices = np.argsort(mice.labels).reshape(4, 8)
label_indices = label_indices[[1, 0, 2, 3]].reshape(-1)
labels = mice.labels[label_indices]
graphs = graphs[label_indices]

# Get sample parameters
n_subjects = mice.meta["n_subjects"]
n_vertices = mice.meta["n_vertices"] - len(ventricles)

In [4]:
# Split the set of graphs by genotype
btbr = graphs[labels == "BTBR"]
b6 = graphs[labels == "B6"]
cast = graphs[labels == "CAST"]
dba2 = graphs[labels == "DBA2"]

connectomes = [btbr, b6, cast, dba2]

In [5]:
btbr.shape

(8, 326, 326)

In [6]:
mdmr_pvals = []

for vertex in range(n_vertices):

    samples = [genotype[:, vertex, :] for genotype in connectomes]
    
    # Calculate the p-value for the i-th edge
    try:
        statistic, pvalue = KSample("Dcorr").test(*samples)
    except ValueError:
        # A ValueError is thrown when any of the samples have equal edge
        # weights (i.e. one of the inputs has 0 variance)
        statistic = np.nan
        pvalue = 1

    mdmr_pvals.append([vertex + 1, statistic, pvalue])

In [7]:
def summarize(graph):

    # Define the network statistics to compute
    functions = [
        nx.degree,
        nx.clustering,
        nx.betweenness_centrality,
        nx.closeness_centrality,
        nx.triangles,
    ]

    # Get a dictionary of network statistics for each node in the graph
    G = nx.from_numpy_array(graph)
    stats = defaultdict(list)
    for func in functions:
        if func == nx.degree:
            for node, degree in G.degree():
                stats[node].append(degree)
        else:
            for node, scalar in func(G).items():
                stats[node].append(scalar)

    # Organize statistics in a pandas df
    df = pd.DataFrame.from_dict(stats, orient="index")
    df["node"] = df.index
    df.columns = [func.__name__ for func in functions] + ["node"]

    return df


# %% Embed all graphs
def embed(graphs):
    frames = [summarize(graph) for graph in graphs]
    df = pd.concat(frames)
    df.reset_index(level=0, inplace=True, drop=True)
    return df


graph_stats = []
for graph in connectomes:
    graph_stats.append(embed(graph))


vertex_pvals = []

for roi in range(332):

    # Get the (i,j)-th edge for each connectome
    samples = [Xhat.query(f"node == {roi}").drop(columns=["node"]).values for Xhat in graph_stats]

    # Calculate the p-value for the (i,j)-th edge
    try:
        statistic, pvalue = KSample("Dcorr").test(*samples, reps=10000000)
    except ValueError:
        # A ValueError is thrown when any of the samples have equal edge
        # weights (i.e. one of the inputs has 0 variance)
        statistic = np.nan
        pvalue = 1

    vertex_pvals.append([roi + 1, statistic, pvalue])

In [28]:
# Convert the nested list to a dataframe
alpha = 0.05
correction = correction = sum((1 / i for i in range(1, 326 + 1)))

mdmr_vertices = pd.DataFrame(mdmr_pvals, columns=["ROI", "mdmr_stat", "mdmr_pvalue"])
mdmr_vertices.sort_values(by="mdmr_pvalue", inplace=True, ignore_index=True)
# _, pvalues, _, _ = multipletests(mdmr_vertices["mdmr_pvalue"], alpha=alpha / correction, method="fdr_bh", is_sorted=True)
# mdmr_vertices["mdmr_pvalue"] = pvalues

graph_vertices = pd.DataFrame(vertex_pvals, columns=["ROI", "graph_stat", "graph_pvalue"])
graph_vertices.sort_values(by="graph_pvalue", inplace=True, ignore_index=True)
_, pvalues, _, _ = multipletests(graph_vertices["graph_pvalue"], alpha=alpha / correction, method="fdr_bh", is_sorted=True)
graph_vertices["graph_pvalue"] = pvalues

df = pd.merge(mdmr_vertices, graph_vertices, on="ROI")
df.head()

Unnamed: 0,ROI,mdmr_stat,mdmr_pvalue,graph_stat,graph_pvalue
0,83,0.855008,1.007113e-07,0.188709,0.014878
1,307,0.851743,1.062976e-07,0.111955,0.042052
2,127,0.850835,1.079052e-07,0.208054,0.012143
3,246,0.844896,1.190383e-07,0.272471,0.007505
4,267,0.840283,1.284743e-07,0.278887,0.007446


In [33]:
graph_vertices.query("ROI == 121")

Unnamed: 0,ROI,graph_stat,graph_pvalue
92,121,0.255187,0.008738


In [29]:
def lookup_roi_name(roi):
    roi += np.sum(roi >= ventricles) # Adjust for removing the ventricles
    roi -= 1
    hemisphere = "R" if roi // 166 else "L"
    roi = roi % 166
    structure = mice.atlas["Structure"].values[roi]
    structure = " ".join(structure.split("_"))
    return f"{structure} ({hemisphere})"

In [30]:
# Get the top 20 strongest signal edges
strong_mdmr_vertices = df.head(35)
strong_mdmr_vertices["ROI"] = strong_mdmr_vertices["ROI"].apply(lookup_roi_name)
# strong_mdmr_vertices.drop(["pvalue", "significant"], axis=1, inplace=True)
strong_mdmr_vertices.columns = ["Vertex", "mstatistic", "mp-value", "gstatistic", "gp-value"]

# pd.set_option('display.float_format', '{:.3g}'.format)
strong_mdmr_vertices["mstatistic"] = strong_mdmr_vertices["mstatistic"].apply(lambda x: f"{x:.3f}")
strong_mdmr_vertices["gstatistic"] = strong_mdmr_vertices["gstatistic"].apply(lambda x: f"{x:.3f}")
strong_mdmr_vertices["mp-value"] = strong_mdmr_vertices["mp-value"].apply(lambda x: f"{x:.3g}")
strong_mdmr_vertices["gp-value"] = strong_mdmr_vertices["gp-value"].apply(lambda x: f"{x:.3g}")
strong_mdmr_vertices

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  strong_mdmr_vertices["ROI"] = strong_mdmr_vertices["ROI"].apply(lookup_roi_name)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  strong_mdmr_vertices["mstatistic"] = strong_mdmr_vertices["mstatistic"].apply(lambda x: f"{x:.3f}")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  strong_mdmr_vertices["gs

Unnamed: 0,Vertex,mstatistic,mp-value,gstatistic,gp-value
0,Substantia Nigra (L),0.855,1.01e-07,0.189,0.0149
1,Middle Cerebellar Peduncle (R),0.852,1.06e-07,0.112,0.0421
2,Internal Capsule (L),0.851,1.08e-07,0.208,0.0121
3,Substantia Nigra (R),0.845,1.19e-07,0.272,0.0075
4,Pontine Reticular Nucleus (R),0.84,1.28e-07,0.279,0.00745
5,Pontine Reticular Nucleus (L),0.838,1.33e-07,0.205,0.0121
6,Parasubiculum (L),0.838,1.33e-07,0.334,0.00524
7,Ventral Tegmental Area (R),0.837,1.36e-07,0.247,0.00925
8,Retro Rubral Field (L),0.829,1.54e-07,0.334,0.00524
9,Fastigial Medial Nucleus of Cerebellum (R),0.824,1.67e-07,0.215,0.0117


In [23]:
strong_mdmr_vertices.to_csv("../results/formatted_tables/alt_vertices.csv", index=False)