In [1]:
import datetime
import time
from pathlib import Path

from scipy.spatial.distance import squareform

from hyppo.conditional import ConditionalDcorr
from hyppo.independence import Dcorr

In [2]:
reps = 25000

parcellations = [
    "AAL",
    "CPAC200",
    "DKT",
    "Desikan",
    "Glasser",
    "Yeo-17-liberal",
    "Yeo-17",
    "Yeo-7-liberal",
    "Yeo-7",
    "Schaefer200",
    "Schaefer300",
    "Schaefer400",
    "Schaefer1000",
]

In [3]:
all_subjects_df = pd.read_csv("../restricted_data/690subjects.csv")
twins_df = pd.read_csv("../restricted_data/same_sex_twins.csv")


pval_cols = ["parcellation", "model", "pvalue", "hypothesis", "gender"]
pvalues = pd.DataFrame(columns=pval_cols)


data_path = Path("../results/")
genome_distance = np.load(data_path / "genome_distance.npz")["genome"]

female_idx = all_subjects_df.Gender == "F"
male_idx = all_subjects_df.Gender == "M"

## Compute permutation blocks

In [4]:
nrows = all_subjects_df.shape[0]
fams, _ = np.unique(all_subjects_df.Family_ID, return_counts=True)
blocks = np.zeros((nrows, 3), dtype=int)  # cols are = zygosity, family, then subjects
for fdx, fam in enumerate(fams):
    rows = all_subjects_df[all_subjects_df.Family_ID == fam]

    idx, jdx = rows.index
    zyg1, zyg2 = rows.Zygosity

    if zyg1 == "MZ" and zyg2 == "MZ":
        first_block = -1
    elif zyg1 == "DZ" and zyg2 == "DZ":
        first_block = -2
    elif zyg1 != zyg2 or (zyg1 == "NotTwin" and zyg2 == "NotTwin"):
        first_block = -3
    else:
        raise ValueError(fam)

    blocks[[idx, jdx]] = [[first_block, fdx + 1, 1], [first_block, fdx + 1, 2]]

## Part A) Monozygotic vs Dizygotics

In [5]:
twin_idx = all_subjects_df.Subject.isin(twins_df.Subject)

twin_female_idx = np.logical_and(twin_idx, female_idx)
twin_male_idx = np.logical_and(twin_idx, male_idx)

gender_idx = [["All", twin_idx], ["F", twin_female_idx], ["M", twin_male_idx]]

In [None]:
res = []

for parcellation in parcellations:
    print(f"running {parcellation}")
    parc_path = data_path / parcellation
    connectome_distances = np.load(parc_path / "connectome_distances.npz")

    for gender, gender_index in gender_idx:
        indices = gender_index
        for model in ["exact", "glob", "vertex"]:
            cdist = squareform(connectome_distances[model])[indices, :][:, indices]
            gdist = squareform(genome_distance)[indices, :][:, indices]
            twin_blocks = blocks[:, 0][indices]

            dcorr = Dcorr(compute_distance=None)
            stat, pval = dcorr.test(
                gdist,
                cdist,
                perm_blocks=twin_blocks,
                reps=reps,
                auto=False,
                workers=88,
            )
            res.append([parcellation, model, pval, "dcorr_twins_only", gender])

pvalues = pd.concat(
    [pvalues, pd.DataFrame(res, columns=pval_cols)], ignore_index=True, axis=0
)
pvalues.to_csv("../results/pvalues.csv", index=False)

## Part B) Dcorr of all relationships

In [None]:
gender_idx = [["All", np.arange(690)], ["F", female_idx], ["M", male_idx]]

In [None]:
res = []
for parcellation in parcellations:
    print(f"running {parcellation}")
    parc_path = data_path / parcellation
    connectome_distances = np.load(parc_path / "connectome_distances.npz")
    for gender, gender_index in gender_idx:
        indices = gender_index
        for model in ["exact", "glob", "vertex"]:
            cdist = squareform(connectome_distances[model])[indices, :][:, indices]
            gdist = squareform(genome_distance)[indices, :][:, indices]
            dcorr = Dcorr(compute_distance=None)

            stat, pval = dcorr.test(
                gdist,
                cdist,
                perm_blocks=blocks[:, 0][indices],
                reps=reps,
                auto=False,
                workers=90,
            )
            res.append([parcellation, model, pval, "dcorr_all", gender])

pvalues = pd.concat(
    [pvalues, pd.DataFrame(res, columns=pval_cols)], ignore_index=True, axis=0
)
pvalues.to_csv("../results/pvalues.csv", index=False)

## Part C Neuroanatomy

In [None]:
gender_idx = [["All", np.arange(690)], ["F", female_idx], ["M", male_idx]]

In [None]:
data_path = Path("../results/")

genome_distance = np.load(data_path / "genome_distance.npz")["genome"]

res = []
for parcellation in parcellations:
    print(f"running {parcellation}")
    parc_path = data_path / parcellation
    neuro_distance = np.load(parc_path / "covariate_distance.npz")
    for gender, gender_index in gender_idx:
        indices = gender_index

        cdist = squareform(neuro_distance["neuroanatomy"])[indices, :][:, indices]
        gdist = squareform(genome_distance)[indices, :][:, indices]

        dcorr = Dcorr(compute_distance=None)
        stat, pval = dcorr.test(
            gdist,
            cdist,
            perm_blocks=blocks[:, 0][indices],
            reps=reps,
            auto=False,
            workers=90,
        )
        res.append([parcellation, "neuroanatomy", pval, "dcorr_neuroanatomy", gender])

pvalues = pd.concat(
    [pvalues, pd.DataFrame(res, columns=pval_cols)], ignore_index=True, axis=0
)
pvalues.to_csv("../results/pvalues.csv", index=False)

## Part D conditional dcorr

In [None]:
data_path = Path("../results/")
genome_distance = np.load(data_path / "genome_distance.npz")["genome"]

res = []

gender_idx = [["All", np.arange(690)], ["F", female_idx], ["M", male_idx]]

for parcellation in parcellations:
    print(f"running {parcellation}")

    parc_path = data_path / parcellation
    connectome_distances = np.load(parc_path / "connectome_distances.npz")
    covariate_distances = np.load(parc_path / "covariate_kde.npz")["covariates"]

    for gender, gender_index in gender_idx:
        indices = gender_index
        for model in ["exact", "glob", "vertex"]:

            cdist = squareform(connectome_distances[model])[indices, :][:, indices]
            gdist = squareform(genome_distance)[indices, :][:, indices]
            vdist = covariate_distances[indices, :][:, indices]

            cd = ConditionalDcorr(compute_distance=None)
            stat, pval = cd.test(gdist, cdist, vdist, reps=10000, workers=90)
            res.append([parcellation, model, pval, "cdcorr_all", gender])

pvalues = pd.concat(
    [pvalues, pd.DataFrame(res, columns=pval_cols)], ignore_index=True, axis=0
)
pvalues.to_csv("../results/pvalues.csv", index=False)

running AAL


## Vertex

## Correct pvalues

In [30]:
from statsmodels.stats.multitest import multipletests

In [39]:
significant, corrected, _, _ = multipletests(pvalues.pvalue, method="fdr_bh")

In [40]:
pvalues["corrected_pvalue"] = corrected
pvalues["is_significant"] = significant

In [41]:
pvalues.to_csv("../results/pvalues.csv", index=False)