In [None]:
%matplotlib qt
import os
from pathlib import Path
import pickle
from tqdm.contrib.itertools import product
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, ttest_rel
from statsmodels.stats.multitest import multipletests
from mne import read_labels_from_annot
from hyppo.ksample import KSample
from mne.stats import f_mway_rm, fdr_correction
from scipy.stats import ttest_rel

Load the dataframe / define responders

In [None]:
## load power in labels and add whole brain activation
dfs_list = []
folder_name = Path.cwd().parent / "data" / "dataframes" / "powers"
for file in tqdm(sorted(os.listdir(folder_name))):
    if file.endswith(".csv"):
        fname = folder_name / file
        df = pd.read_csv(fname, index_col=[0])
        
        ## add whole brain power
        df1s_list = []
        for frange in ["delta", "theta", "alpha", "beta", "gamma"]:
            for hemi in ["lh", "rh"]:
                df1 = df[df["frequency_band"] == frange]
                df1 = df1[df1['brain_label'].str.contains(f'-{hemi}', case=False, na=False)]
                power_hemi = df1["power"].sum()
                keys = list(df.columns)
                values = [df1[key].unique()[0] for key in keys[:-2]]
                values.append([f"whole-{hemi}"])
                values.append([power_hemi])
                new_row = dict(zip(keys, values))
                df_new_row = pd.DataFrame(new_row)
                df1 = pd.concat([df1, df_new_row])
                df1s_list.append(df1)
        df_updated = pd.concat(df1s_list)
        dfs_list.append(df_updated)
        
dfs = pd.concat(dfs_list)
df = dfs.reset_index().drop(columns=["index", "Unnamed: 0"])

## define responders and non responders
subjects_to_drop_dict = {}
prots = ["1 Hz", "10 Hz", "20 Hz"]
hemis = ["left", "right"]
all_combs = product(prots, hemis)
for protocol, hemi in all_combs:
    fname = "/Users/payamsadeghishabestari/codes/regTMS/data/behavioral_data/TL_data.xlsx"
    df_tl = pd.read_excel(fname)
    df_tl["protcol"] = df_tl["protcol"].replace("1Hz", "1 Hz")
    df_tl_sub = df_tl.query(f'protcol == "{protocol}" & hemisphere == "{hemi}"')
    df_tl_sub['pre_avg'] = df_tl.filter(like='pre').mean(axis=1)
    df_tl_sub['post_avg'] = df_tl.filter(like='post').mean(axis=1)
    df_tl_sub["diff_TL"] = df_tl_sub['pre_avg'] - df_tl_sub['post_avg']
    df_tl_sub = df_tl_sub.rename(columns={'ID': 'subject_ID'})
    df_tl_diff = df_tl_sub[["subject_ID", "diff_TL"]]
    df_tl_diff = df_tl_diff.query('diff_TL <= 0')
    subjects_to_drop_dict[f"{hemi}_{protocol}"] = list(df_tl_diff["subject_ID"].values)

which brain label power is significantly different (pre vs post) at which frequency range and protocol?

In [None]:
## which brain label power is significantly different (pre vs post) at which frequency range and protocol?
hemis = ["left", "right"]
prots = ["0.1 Hz", "1 Hz", "10 Hz", "20 Hz"][1:]
franges = ["delta", "theta", "alpha", "beta", "gamma"]
brain_labels = read_labels_from_annot(subject="fsaverage", parc="aparc", verbose=False)[:-1]
bls = [bl.name for bl in brain_labels]
bls.append("whole-lh")
bls.append("whole-rh")
combs = product(hemis, prots, franges)
p_thr = 0.05
t_test = True
k_sample_test = False
texts = []
sig_brain_regions = {}
for hemi, prot, frange in combs:
    sig_brain_regions[f"{hemi}_{prot}_{frange}"] = []

combs = product(hemis, prots, franges)
for hemi, prot, frange in combs:
    subjects_to_drop = subjects_to_drop_dict[f"{hemi}_{prot}"]
    df_1 = df
    # df_1 = df[~df['subject_ID'].isin(subjects_to_drop)]
    p_vals = []
    for bl in bls:
        df_sub = df_1.query(f'hemisphere == "{hemi}" & protocol == "{prot}" & frequency_band == "{frange}" & brain_label == "{bl}"')
        group_pre = df_sub[df_sub["run"]=="pre"]["power"].to_numpy()
        group_post = df_sub[df_sub["run"]=="post"]["power"].to_numpy()

        if t_test:
            t_stat, p_value = ttest_rel(group_pre, group_post)
        if k_sample_test:
            stat, p_value = KSample("Dcorr").test(group_pre, group_post)
        
        if p_value < 0.05:
            print(f"{hemi}_{prot}_{frange}_{bl}: uncorrected : {round(p_value, 4)}")
            sig_brain_regions[f"{hemi}_{prot}_{frange}"].append(bl)

        p_vals.append(p_value)
    
    adjusted_p_vals = multipletests(np.array(p_vals), alpha=0.05, method='fdr_bh')[1]
    p_idxs = np.where(adjusted_p_vals < p_thr)[0]
    if len(p_idxs) > 0:
        print(f"{hemi}_{prot}_{frange}_{bl}: {round(p_value, 4)}")

In [27]:
## save the dictionary
with open(Path.cwd().parent / "data" / "dataframes" / "results_new" / "whole_bl.pkl", 'wb') as pickle_file:
    pickle.dump(sig_brain_regions, pickle_file)

2. contrasts between stimulation protocols (pre-to-post changes; ∆rsEEG at each protocol)

In [None]:
protocols = ["0.1 Hz", "1 Hz", "10 Hz", "20 Hz"]
franges = ["delta", "theta", "alpha", "beta", "gamma"]
hemis = ["left", "right"]
brain_labels = read_labels_from_annot(subject="fsaverage", parc="aparc", verbose=False)[:-1]
bls = [bl.name for bl in brain_labels]
bls.append("whole-lh")
bls.append("whole-rh")
combs = product(hemis, hemis, protocols[1:], protocols, franges)
p_thr = 0.05
t_test = True
k_sample_test = False

sig_brain_regions = {}
for hemi_1, hemi_2, prot_1, prot_2, frange in combs:
    p_vals = []
    if hemi_1 == hemi_2 and prot_2 == "0.1 Hz":
        sig_brain_regions[f"{hemi_1}_{prot_1}_{frange}"] = []


combs = product(hemis, hemis, protocols[1:], protocols, franges)
for hemi_1, hemi_2, prot_1, prot_2, frange in combs:
    p_vals = []
    if hemi_1 == hemi_2 and prot_2 == "0.1 Hz":
        subjects_to_drop = subjects_to_drop_dict[f"{hemi_1}_{prot_1}"]
        # df_1 = df[~df['subject_ID'].isin(subjects_to_drop)]
        df_1 = df
        for bl in bls:
            df_sub = df_1.query(f'hemisphere == "{hemi_1}" & protocol == "{prot_1}" & frequency_band == "{frange}" & brain_label == "{bl}"')
            df_pivot = df_sub.pivot(index='subject_ID', columns='run', values='power')
            df_pivot['power_difference'] = df_pivot['post'] - df_pivot['pre']
            power_diff_prot_1 = df_pivot['power_difference'].values

            df_sub = df_1.query(f'hemisphere == "{hemi_2}" & protocol == "{prot_2}" & frequency_band == "{frange}" & brain_label == "{bl}"')
            df_pivot = df_sub.pivot(index='subject_ID', columns='run', values='power')
            df_pivot['power_difference'] = df_pivot['post'] - df_pivot['pre']
            power_diff_prot_2 = df_pivot['power_difference'].values

            if t_test:
                t_stat, p_value = ttest_rel(power_diff_prot_1, power_diff_prot_2)
            if k_sample_test:
                stat, p_value = KSample("Dcorr").test(power_diff_prot_1, power_diff_prot_2)
            
            if p_value < p_thr:
                print(f"{hemi_1}_{prot_1}_{prot_2}_{frange}_{bl}: uncorrected: {round(p_value, 4)}")
                sig_brain_regions[f"{hemi_1}_{prot_1}_{frange}"].append(bl)
            p_vals.append(p_value)

        adjusted_p_vals = multipletests(np.array(p_vals), alpha=0.05, method='fdr_bh')[1]
        p_idxs = np.where(adjusted_p_vals < p_thr)[0]
        if len(p_idxs) > 0:
            print(f"{prot_1}_{hemi_1} vs {prot_2}_{hemi_2} at {frange}")
            for idx in p_idxs:
                print(np.array(bls)[idx], round(adjusted_p_vals[idx],4))


In [35]:
## save the dictionary
with open(Path.cwd().parent / "data" / "dataframes" / "results_new" / "whole_compared_to_sham_bl_.pkl", 'wb') as pickle_file:
    pickle.dump(sig_brain_regions, pickle_file)

Bls that are significant pre to post and significant than sham

In [None]:
## load the dictionaries
folder_name = Path.cwd().parent / "data" / "dataframes" / "results_new"

with open(folder_name / "whole_bl.pkl", 'rb') as pickle_file:
    dict_1 = pickle.load(pickle_file)
with open(folder_name / "whole_compared_to_sham_bl.pkl", 'rb') as pickle_file:
    dict_2 = pickle.load(pickle_file)

## loop over brain labels to find commons
for key in dict_1:
    vals_1 = dict_1[key]
    vals_2 = dict_2[key]
    common_elements = set(vals_1) & set(vals_2)
    if len(common_elements):
        print(f"{key}: {list(common_elements)}")

In [None]:
for key, value in dict_2.items():
    if len(value):
        print(f"{key}: {value}")

3. contrasts between stimulation protocols (pre-to-post changes; ∆rsEEG = pre/post vs left/right)

In [None]:
protocols = ["0.1Hz", "1Hz", "10Hz", "20Hz"]
franges = ["delta", "theta", "alpha", "beta", "gamma"]
p_thr = 0.05
subjects_to_drop = ["3dx2e", "6sjul", "musky", "8kmc7", "cmh15", "uvxfg", "2fjeu", "dws0m", "gigcm", "6wms4"]
df_1 = df[~df['subject_ID'].isin(subjects_to_drop)]

for protocol, frange in product(protocols, franges):
    df1 = df_1.query(f'protocol == "{protocol}" & frequency_band == "{frange}"')
    pivoted_df = df1.pivot(index="subject_ID", columns=["hemisphere", "run", "brain_label"], values='power')
    pivoted_df = pivoted_df.dropna()
    df_array = pivoted_df.to_numpy()

    data = df_array.reshape(len(df_array), 4, int(df_array.shape[-1]/4)) # left/right * post/pre
    fvals, pvals = f_mway_rm(data, factor_levels=[2, 2], effects="A*B")
    if pvals[-1].min() < p_thr:
        p_vals = pvals[-1]
        reject, p_adj = fdr_correction(p_vals)
        idx = np.where(reject == True)[0]
        bl = np.array(bls)[idx]
        print(f"{protocol}_{frange}_{bl}_{p_adj[idx]}")

Connectivity

In [None]:
## connectivity analysis (pre vs post)
method = "coh"
stat_method = "fdr_bh"

protocols = ["_0.1Hz", "_1Hz", "_10Hz", "_20Hz"][1:]
franges = ["delta", "theta", "alpha", "beta", "gamma"]

for protocol, hemisphere, freq in product(protocols, ["_L_", "_R_"], franges):
    if hemisphere == "_L_": hemi = "left"
    if hemisphere == "_R_": hemi = "right"
    
    subjects_to_drop = subjects_to_drop_dict[f"{hemi}_{protocol[1:-2]} Hz"]
    n_labels = 68
    low_tri_idxs = np.tril_indices(n_labels, k=-1) 
    group_pre = []
    group_post = []
    fnames = []
    folder_name = Path.cwd().parent / "data" / "dataframes" / "conns"
    for file in sorted(os.listdir(folder_name)):
        if file.endswith(".pkl"):
            if file[:5] not in subjects_to_drop: 
                if protocol in file and hemisphere in file:
                    fname = folder_name / file
                    
                    if "_pre" in file: 
                        with open(fname, "rb") as filename:
                            my_dict = pickle.load(filename)
                        if method == "wpli": group_pre.append(my_dict[freq][0])
                        if method == "coh": group_pre.append(my_dict[freq][1])

                    if "_post" in file: 
                        with open(fname, "rb") as filename:
                            my_dict = pickle.load(filename)
                        if method == "wpli": group_post.append(my_dict[freq][0])
                        if method == "coh": group_post.append(my_dict[freq][1])

    vectors_pre = np.array([con[low_tri_idxs] for con in group_pre])
    vectors_post = np.array([con[low_tri_idxs] for con in group_post])

    for vector in vectors_pre:
        zero_edges = np.where(vector < vector.max() * 0.01)[0]
        vector[zero_edges] = 0
    for vector in vectors_post:
        zero_edges = np.where(vector < vector.max() * 0.01)[0]
        vector[zero_edges] = 0

    stat, p_values = ttest_rel(vectors_pre, vectors_post)
    reject_null, p_corrected, _, _ = multipletests(pvals=p_values, alpha=0.05, method=stat_method)
    if p_corrected.min() < 0.05:
        print(p_corrected.min())

Graph

In [None]:
stat_method = "fdr_bh"
protocols = ["_0.1Hz", "_1Hz", "_10Hz", "_20Hz"][1:]
franges = ["delta", "theta", "alpha", "beta", "gamma"]
n_labels = 68
low_tri_idxs = np.tril_indices(n_labels, k=-1) 

for protocol, hemisphere, freq in product(protocols, ["_L_", "_R_"], franges):
    if hemisphere == "_L_": hemi = "left"
    if hemisphere == "_R_": hemi = "right"

    subjects_to_drop = subjects_to_drop_dict[f"{hemi}_{protocol[1:-2]} Hz"]
    group_pre = []
    group_post = []
    folder_name = Path.cwd().parent / "data" / "dataframes" / "graphs"
    for file in sorted(os.listdir(folder_name)):
        if file.endswith(".pkl"):
            if file[:5] not in subjects_to_drop: 
                if protocol in file and hemisphere in file:
                    fname = folder_name / file
                    if "_pre" in file: 
                        with open(fname, "rb") as filename:
                            my_dict = pickle.load(filename)
                        graph = my_dict[freq]
                        group_pre.append(graph / np.linalg.norm(graph, 'fro'))
                    
                    if "_post" in file: 
                        with open(fname, "rb") as filename:
                            my_dict = pickle.load(filename)
                        graph = my_dict[freq]
                        group_post.append(graph / np.linalg.norm(graph, 'fro'))

    vectors_pre = np.array([con[low_tri_idxs] for con in group_pre])
    vectors_post = np.array([con[low_tri_idxs] for con in group_post])

    # removing very small connections
    for vector in vectors_pre:
        zero_edges = np.where(vector < vector.max() * 0.01)[0]
        vector[zero_edges] = 0
    for vector in vectors_post:
        zero_edges = np.where(vector < vector.max() * 0.01)[0]
        vector[zero_edges] = 0

    stat, p_values = ttest_rel(vectors_pre, vectors_post)
    reject_null, p_corrected, _, _ = multipletests(pvals=p_values, alpha=0.05, method=stat_method)
    if p_corrected.min() < 0.05:
        print(p_corrected.min())

Responders vs non-responders

In [None]:
# df = df[df["subject_ID"] != "6wms4"]
# non_responders = ["3dx2e", "6sjul", "musky", "8kmc7", "cmh15", "uvxfg", "2fjeu", "dws0m", "gigcm", "6wms4"]
# df["type"] = df["subject_ID"].apply(lambda x: "responder" if x not in non_responders else "non-responder")

hemis = ["left", "right"]
prots = ["0.1 Hz", "1 Hz", "10 Hz", "20 Hz"][1:2]
franges = ["delta", "theta", "alpha", "beta", "gamma"][-1:]
brain_labels = read_labels_from_annot(subject="fsaverage", parc="aparc", verbose=False)[:-1]
bls = [bl.name for bl in brain_labels]
bls.append("whole-lh")
bls.append("whole-rh")
combs = product(hemis, prots, franges)
p_thr = 0.05
stat_method = "fdr_bh"

for hemi, prot, frange in combs:
    non_responders = subjects_to_drop_dict[prot]
    df["type"] = df["subject_ID"].apply(lambda x: "responder" if x not in non_responders else "non-responder")
    p_vals = []
    for bl in bls[-1:]:
        df_sub = df.query(f'hemisphere == "{hemi}" & protocol == "{prot}" & frequency_band == "{frange}" & brain_label == "{bl}"')
        df_pivot = df_sub.pivot(index='subject_ID', columns='run', values='power')
        df_pivot['power_difference'] = df_pivot['post'] - df_pivot['pre']
        df_pivot_1 = df_pivot.query('subject_ID not in @non_responders')
        df_pivot_2 = df_pivot.query('subject_ID in @non_responders')
        power_diff_prot_1 = df_pivot_1['power_difference'].values
        power_diff_prot_2 = df_pivot_2['power_difference'].values
        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(9, 4))
        ax.violinplot([power_diff_prot_1, power_diff_prot_2], showmeans=False, showmedians=True)
        ax.set_title(f"{prot}, {hemi}, {frange}_{bl}")

        t_stat, p_value = ttest_ind(power_diff_prot_1, power_diff_prot_2)
        p_vals.append(p_value)

    reject_null, p_corrected, _, _ = multipletests(pvals=p_vals, alpha=0.05, method=stat_method)
    
    if p_corrected.min() < 0.05:
        for idx in np.where(p_corrected < 0.05)[0]:
            print(f"{prot}, {hemi}, {frange}_{bls[idx]}_{p_corrected.min()}")

Check behavioural data

In [None]:
## define responders and non responders
fname = "/Users/payamsadeghishabestari/codes/regTMS/data/behavioral_data/TL_data.xlsx"
df_tl = pd.read_excel(fname)
df_tl["protcol"].replace("1Hz", "1 Hz", inplace=True)
df_tl.rename(columns={'ID': 'subject_ID'}, inplace=True)
df_tl.rename(columns={'protcol': 'protocol'}, inplace=True)
df_tl.drop(columns="NO.", inplace=True)
df_tl['pre_avg'] = df_tl.filter(like='pre').mean(axis=1)
df_tl['post_avg'] = df_tl.filter(like='post').mean(axis=1)
df_tl = df_tl.melt(id_vars=["subject_ID", "hemisphere", "protocol"], 
                    value_vars=['pre_avg', 'post_avg'], 
                    var_name='run', 
                    value_name='tl_avg')
df_tl['run'] = df_tl['run'].str.replace('_avg', '')

In [None]:
col_order = ["left", "right"]
order = ["0.1 Hz", "1 Hz", "10 Hz", "20 Hz"]
hue_order = ["pre", "post"]
cl1 = sns.cubehelix_palette(10, rot=2.5, light=.7, reverse=True)[7]
cl2 = sns.cubehelix_palette(10, rot=-2*np.pi/10, light=.7, reverse=True)[3]
g = sns.FacetGrid(data=df_tl, col="hemisphere", col_order=col_order, height=4, aspect=2)
g.map_dataframe(sns.boxplot, x="protocol", y="tl_avg", hue="run", order=order, hue_order=hue_order,
                fill=False, gap=0.2, palette=[cl1, cl2], linewidth=2)
g.map_dataframe(sns.stripplot, x="protocol", y="tl_avg", hue="run", order=order, hue_order=hue_order,
                dodge=True, palette=[cl1, cl2])

In [None]:
protocols = df_tl['protocol'].unique()
p_values = []
for protocol in protocols:
    subset = df_tl[df_tl['protocol'] == protocol]
    data_pre = subset[subset['run'] == "pre"]
    data_post = subset[subset['run'] == "post"]
    t_stat, p_value = ttest_rel(data_pre["tl_avg"], data_post["tl_avg"], alternative='greater')
    print(f'Stimulus: {protocol}, t-statistic: {t_stat}, p-value: {p_value}')
    p_values.append(p_value)

corrected_p_values = multipletests(p_values, method='bonferroni')
print('Bonferroni corrected p-values:', corrected_p_values[1])