In [None]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import seaborn as sb
import matplotlib.pyplot as plt
from datetime import datetime
import math
from IPython.display import display, HTML
from datetime import date
import os
import statsmodels.formula.api as smf
import pytz
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import math

In [None]:
my_bucket = os.getenv('WORKSPACE_BUCKET')
CDR_version=os.getenv("WORKSPACE_CDR")

### Get demographics of all individuals in All of Us

In [None]:
dataset_69461310_person_sql = """
    SELECT
        person.person_id,
        p_gender_concept.concept_name as gender,
        person.birth_datetime as date_of_birth,
        p_race_concept.concept_name as race,
        p_ethnicity_concept.concept_name as ethnicity,
        p_sex_at_birth_concept.concept_name as sex_at_birth 
    FROM
        `""" + os.environ["WORKSPACE_CDR"] + """.person` person 
    LEFT JOIN
        `""" + os.environ["WORKSPACE_CDR"] + """.concept` p_gender_concept 
            ON person.gender_concept_id = p_gender_concept.concept_id 
    LEFT JOIN
        `""" + os.environ["WORKSPACE_CDR"] + """.concept` p_race_concept 
            ON person.race_concept_id = p_race_concept.concept_id 
    LEFT JOIN
        `""" + os.environ["WORKSPACE_CDR"] + """.concept` p_ethnicity_concept 
            ON person.ethnicity_concept_id = p_ethnicity_concept.concept_id 
    LEFT JOIN
        `""" + os.environ["WORKSPACE_CDR"] + """.concept` p_sex_at_birth_concept 
            ON person.sex_at_birth_concept_id = p_sex_at_birth_concept.concept_id"""

dataset_69461310_person_df = pd.read_gbq(
    dataset_69461310_person_sql,
    dialect="standard",
    use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),
    progress_bar_type="tqdm_notebook")

dataset_69461310_person_df = dataset_69461310_person_df.rename(columns = {'person_id':'indiv'})

### Format Tables

In [None]:
mdd_cohort = pd.read_csv("phecode_counts_allofUs.csv")
mdd_cohort_unique = mdd_cohort.drop_duplicates(subset = 'person_id')
mdd_cohort_unique = mdd_cohort_unique.rename(columns={'person_id': 'indiv'})
mdd_cohort_unique['MDD'] = 1

pgs = pd.read_csv("als_pgs.profile", sep=",")
pgs = pgs.rename(columns={'IID': 'indiv', 'SCORESUM':'WEIGHT'})

pgs_mdd = pgs.merge(mdd_cohort_unique, how = "left", on = "indiv")
pgs_mdd["MDD"] = pgs_mdd["MDD"].fillna(0)

In [None]:
pgs_mdd_cov = pgs_mdd.merge(dataset_69461310_person_df, how = "left", on = "indiv")

In [None]:
pgs_mdd_cov['sex_at_birth'] = pgs_mdd_cov['sex_at_birth'].fillna(3)
pgs_mdd_cov['sex_at_birth'] = pgs_mdd_cov['sex_at_birth'].replace({'Male': 0, 
                                                                 'Female': 1, 
                                                                 'PMI: Skip': 3,
                                                                 'No matching concept':3,
                                                                 'Intersex':2,
                                                                 'I prefer not to answer':3,
                                                                 'None':3})

In [None]:
pgs_mdd_cov = pgs_mdd_cov[pgs_mdd_cov['sex_at_birth'].isin([0, 1])]

In [None]:
now = datetime.now(tz=pytz.UTC)

pgs_mdd_cov['diff'] = now - pgs_mdd_cov["date_of_birth"]
pgs_mdd_cov['current_age'] = pgs_mdd_cov['diff'] / np.timedelta64(1, 'Y')
pgs_mdd_cov["current_age"] = pgs_mdd_cov["current_age"].fillna(0)
pgs_mdd_cov['current_age'] = pgs_mdd_cov['current_age'].astype('int')

In [None]:
ancestry_pred_path = "gs://fc-aou-datasets-controlled/v7/wgs/short_read/snpindel/aux/ancestry/ancestry_preds.tsv"
ancestry_pred = hl.import_table(ancestry_pred_path,
                               key="research_id", 
                               impute=True, 
                               types={"research_id":"tstr","pca_features":hl.tarray(hl.tfloat)})
ancestry_pred_df = ancestry_pred.to_pandas(flatten=True)
ancestry_pred_df = pd.concat([ancestry_pred_df,
                              ancestry_pred_df.pop("pca_features").apply(pd.Series).add_prefix("pca_")], axis=1)
ancestry_pred_df['research_id'] = ancestry_pred_df['research_id'].astype('int')

ancestry_pcs = ancestry_pred_df[['research_id', 'ancestry_pred', 'pca_0', 'pca_1', 'pca_2', 'pca_3', 'pca_4']]
ancestry_pcs = ancestry_pcs.rename(columns={'research_id': 'indiv'})

In [None]:
pgs_mdd_cov_anc_pcs = pgs_mdd_cov.merge(ancestry_pcs, how = "left", on = "indiv")

In [None]:
insurance = pd.read_csv('insurance_class_allofUs.csv')
insurance = insurance.rename(columns={'person_id': 'indiv', 'case_adjusted': 'insurance'})
insurance['insurance'] = insurance.insurance.astype('category')

pgs_mdd_cov_anc_pcs = pgs_mdd_cov_anc_pcs.merge(insurance, how = "left", on = "indiv")

In [None]:
mdd_cases = pgs_mdd_cov_anc_pcs[pgs_mdd_cov_anc_pcs["MDD"] == 1]
controls = pgs_mdd_cov_anc_pcs[pgs_mdd_cov_anc_pcs["MDD"] == 0]

In [None]:
# separate into ancestry dataframes and standardize by ancestry 
eur_data = pgs_mdd_cov_anc_pcs[pgs_mdd_cov_anc_pcs['ancestry_pred'] == "eur"]
afr_data = pgs_mdd_cov_anc_pcs[pgs_mdd_cov_anc_pcs['ancestry_pred'] == "afr"]
amr_data = pgs_mdd_cov_anc_pcs[pgs_mdd_cov_anc_pcs['ancestry_pred'] == "amr"]
eas_data = pgs_mdd_cov_anc_pcs[pgs_mdd_cov_anc_pcs['ancestry_pred'] == "eas"]
sas_data = pgs_mdd_cov_anc_pcs[pgs_mdd_cov_anc_pcs['ancestry_pred'] == "sas"]

eur_data = eur_data.rename(columns = {'indiv':'research_id'})
afr_data = afr_data.rename(columns = {'indiv':'research_id'})
amr_data = amr_data.rename(columns = {'indiv':'research_id'})
eas_data = eas_data.rename(columns = {'indiv':'research_id'})
sas_data = sas_data.rename(columns = {'indiv':'research_id'})

eur_data['age_2'] = eur_data['current_age']*eur_data['current_age']
afr_data['age_2'] = afr_data['current_age']*afr_data['current_age']
amr_data['age_2'] = amr_data['current_age']*amr_data['current_age']
eas_data['age_2'] = eas_data['current_age']*eas_data['current_age']
sas_data['age_2'] = sas_data['current_age']*sas_data['current_age']

#standardize PGS 
eur_data['PGS_Z'] = (eur_data['WEIGHT'] - eur_data['WEIGHT'].mean())/eur_data['WEIGHT'].std()
afr_data['PGS_Z'] = (afr_data['WEIGHT'] - afr_data['WEIGHT'].mean())/afr_data['WEIGHT'].std()
amr_data['PGS_Z'] = (amr_data['WEIGHT'] - amr_data['WEIGHT'].mean())/amr_data['WEIGHT'].std()
eas_data['PGS_Z'] = (eas_data['WEIGHT'] - eas_data['WEIGHT'].mean())/eas_data['WEIGHT'].std()
sas_data['PGS_Z'] = (sas_data['WEIGHT'] - sas_data['WEIGHT'].mean())/sas_data['WEIGHT'].std()

### Load in TRD, nonresponder, and responder files

In [None]:
TRD = pd.read_csv("TRD", index_col = 0)
TRD.columns = ["research_id"]

responder = pd.read_csv('responder.csv', index_col = 0)
nonresponders = pd.read_csv("nonresponder.csv", index_col = 0)
nonresponders.columns = ["research_id"]

responder = responder[~responder.research_id.isin(TRD.research_id)]
responder = responder[~responder.research_id.isin(nonresponders.research_id)]

In [None]:
TRD_eur = pd.merge(TRD, eur_data, on = "research_id", how = "inner")
TRD_eur = TRD_eur[TRD_eur['MDD'] == 1]

TRD_amr = pd.merge(TRD, amr_data, on = "research_id", how = "inner")
TRD_amr = TRD_amr[TRD_amr['MDD'] == 1]

TRD_afr = pd.merge(TRD, afr_data, on = "research_id", how = "inner")
TRD_afr = TRD_afr[TRD_afr['MDD'] == 1]

TRD_eas = pd.merge(TRD, eas_data, on = "research_id", how = "inner")
TRD_eas = TRD_eas[TRD_eas['MDD'] == 1]

TRD_sas = pd.merge(TRD, sas_data, on = "research_id", how = "inner")
TRD_sas = TRD_sas[TRD_sas['MDD'] == 1]
##################################################################################
responder_eur = pd.merge(responder, eur_data, on = "research_id", how = "inner")
responder_eur = responder_eur[responder_eur['MDD'] == 1]

responder_amr = pd.merge(responder, amr_data, on = "research_id", how = "inner")
responder_amr = responder_amr[responder_amr['MDD'] == 1]

responder_afr = pd.merge(responder, afr_data, on = "research_id", how = "inner")
responder_afr = responder_afr[responder_afr['MDD'] == 1]

responder_eas = pd.merge(responder, eas_data, on = "research_id", how = "inner")
responder_eas = responder_eas[responder_eas['MDD'] == 1]

responder_sas = pd.merge(responder, sas_data, on = "research_id", how = "inner")
responder_sas = responder_sas[responder_sas['MDD'] == 1]
##################################################################################
nonresponder_eur = pd.merge(nonresponders, eur_data, on = "research_id", how = "inner")
nonresponder_eur = nonresponder_eur[nonresponder_eur['MDD'] == 1]

nonresponder_amr = pd.merge(nonresponders, amr_data, on = "research_id", how = "inner")
nonresponder_amr = nonresponder_amr[nonresponder_amr['MDD'] == 1]

nonresponder_afr = pd.merge(nonresponders, afr_data, on = "research_id", how = "inner")
nonresponder_afr = nonresponder_afr[nonresponder_afr['MDD'] == 1]

nonresponder_eas = pd.merge(nonresponders, eas_data, on = "research_id", how = "inner")
nonresponder_eas = nonresponder_eas[nonresponder_eas['MDD'] == 1]

nonresponder_sas = pd.merge(nonresponders, sas_data, on = "research_id", how = "inner")
nonresponder_sas = nonresponder_sas[nonresponder_sas['MDD'] == 1]

### Table for Baseline Characteristics

In [None]:
total_df = pd.concat([eur_data, amr_data, afr_data, eas_data, sas_data])
trd_df = pd.concat([TRD_eur, TRD_amr, TRD_afr, TRD_eas, TRD_sas])
responder_df = pd.concat([responder_eur, responder_amr, responder_afr, 
                          responder_eas, responder_sas])
nonresponder_df = pd.concat([nonresponder_eur, nonresponder_amr, 
                             nonresponder_afr, nonresponder_eas, nonresponder_sas])

In [None]:
table = {'Treatment Responder': {'n': len(responder_df[responder_df['MDD'] == 1]),
                     'Female': responder_df[responder_df['MDD'] == 1]['sex_at_birth'].sum(),
                     'Age(median)': responder_df[responder_df['MDD'] == 1]['current_age'].median(),
                     'q1': responder_df[responder_df['MDD'] == 1]['current_age'].quantile(0.25),
                     'q3': responder_df[responder_df['MDD'] == 1]['current_age'].quantile(0.75)},
         'Treatment Nonresponder': {'n': len(nonresponder_df[nonresponder_df['MDD'] == 1]),
                     'Female': nonresponder_df[nonresponder_df['MDD'] == 1]['sex_at_birth'].sum(),
                     'Age(median)': nonresponder_df[nonresponder_df['MDD'] == 1]['current_age'].median(),
                     'q1': nonresponder_df[nonresponder_df['MDD'] == 1]['current_age'].quantile(0.25),
                     'q3': nonresponder_df[nonresponder_df['MDD'] == 1]['current_age'].quantile(0.75)},
         'Treatment Resistant': {'n': len(trd_df[trd_df['MDD'] == 1]),
                     'Female': trd_df[trd_df['MDD'] == 1]['sex_at_birth'].sum(),
                     'Age(median)': trd_df[trd_df['MDD'] == 1]['current_age'].median(),
                     'q1': trd_df[trd_df['MDD'] == 1]['current_age'].quantile(0.25),
                     'q3': trd_df[trd_df['MDD'] == 1]['current_age'].quantile(0.75)},
         'MDDcases':{'n': len(total_df[total_df['MDD'] == 1]),
                     'Female': total_df[total_df['MDD'] == 1]['sex_at_birth'].sum(),
                     'Age(median)': total_df[total_df['MDD'] == 1]['current_age'].median(),
                     'q1': total_df[total_df['MDD'] == 1]['current_age'].quantile(0.25),
                     'q3': total_df[total_df['MDD'] == 1]['current_age'].quantile(0.75)},
         'Controls': {'n': len(total_df[total_df['MDD'] == 0]),
                      'Female': total_df[total_df['MDD'] == 0]['sex_at_birth'].sum(),
                      'Age(median)': total_df[total_df['MDD'] == 0]['current_age'].median(),
                      'q1': total_df[total_df['MDD'] == 0]['current_age'].quantile(0.25),
                      'q3': total_df[total_df['MDD'] == 0]['current_age'].quantile(0.75)},
         'Total': {'n': len(total_df),
                   'Female': total_df['sex_at_birth'].sum(),
                   'Age(median)': total_df['current_age'].median(),
                   'q1': total_df['current_age'].quantile(0.25),
                   'q3': total_df['current_age'].quantile(0.75)}}

In [None]:
mdd_dict = total_df[total_df['MDD'] == 1]['ancestry_pred'].value_counts().to_dict()
table['MDDcases'].update(mdd_dict)

control_dict = total_df[total_df['MDD'] == 0]['ancestry_pred'].value_counts().to_dict()
table['Controls'].update(control_dict)

total_dict = total_df['ancestry_pred'].value_counts().to_dict()
table['Total'].update(total_dict)

responder_dict = responder_df[responder_df['MDD'] == 1]['ancestry_pred'].value_counts().to_dict()
table['Treatment Responder'].update(responder_dict)

nonresponder_dict = nonresponder_df[nonresponder_df['MDD'] == 1]['ancestry_pred'].value_counts().to_dict()
table['Treatment Nonresponder'].update(nonresponder_dict)

trd_dict = trd_df[trd_df['MDD'] == 1]['ancestry_pred'].value_counts().to_dict()
table['Treatment Resistant'].update(trd_dict)

In [None]:
table_df = pd.DataFrame.from_dict(table, orient='index').T

### MDD plot

In [None]:
pgs_alldata = pd.concat([eur_data, amr_data, afr_data, eas_data, sas_data])
results = {}
for ancestry, group in pgs_alldata.groupby('ancestry_pred'):
    results[ancestry] = smf.logit('MDD ~ PGS_Z + C(sex_at_birth) + insurance + current_age + age_2 + pca_0 + pca_1 + pca_2 + pca_3 + pca_4', 
                                  group).fit()

In [None]:
# Get odds ratios and confidence intervals for each ancestry group and sex
odds_ratios = {}
for ancestry, result in results.items():
    params = result.params
    conf = result.conf_int()
    conf['Odds Ratio'] = params
    conf.columns = ['2.5%', '97.5%', 'Odds Ratio']
    conf['p-value'] =result.pvalues
    conf['Odds Ratio'] = np.exp(conf['Odds Ratio'])
    conf['2.5%'] = np.exp(conf['2.5%'])
    conf['97.5%'] = np.exp(conf['97.5%'])
    
    odds_ratios[ancestry] = conf.loc['PGS_Z']

In [None]:
# Create a DataFrame for plotting
plot_data = pd.DataFrame(odds_ratios).T.reset_index()
plot_data.columns = ['Ancestry', 'Lower CI', 'Upper CI','Odds Ratio','p-value']
gia = ['eur', "amr", "afr", 'eas', "sas"]
plot_data['Ancestry'] = pd.Categorical(plot_data['Ancestry'], ordered=True, categories=gia)
plot_data = plot_data.sort_values(by = "Ancestry")

In [None]:
# Function to classify p-values
def significance_category(p_value):
    if p_value <= 0.0001:
        return "****"
    elif p_value <= 0.001:
        return "***"
    elif p_value <= 0.01:
        return "**"
    elif p_value <= 0.05:
        return "*"
    else:
        return "ns"

# Apply the function to create a new column
plot_data['Significance'] = plot_data['p-value'].apply(significance_category)

In [None]:
from matplotlib.transforms import Affine2D
from matplotlib.lines import Line2D

fig, ax = plt.subplots(nrows=1, sharex=True, sharey=True, figsize=(5, 3), dpi=150)
# Group data by Ancestry
odds = plot_data.set_index('Ancestry')
# Plot error-bar plot

for idx, row in odds.iloc[::-1].iterrows():
    trans1 = Affine2D().translate(0.0, -0.1) + ax.transData
    trans3 = Affine2D().translate(0.0, -0.18) + ax.transData
    trans2 = Affine2D().translate(0.0, +0.1) + ax.transData
    trans4 = Affine2D().translate(0.0, +0.19) + ax.transData
    

    if row['Significance'] == '****':
        ci = [[row['Odds Ratio'] - row[::-1]['Lower CI']], [row['Upper CI'] - row['Odds Ratio']]]
        plt.errorbar(x=[row['Odds Ratio']], y=[row.name], xerr=ci,
            ecolor='tab:red', capsize=3, linestyle='None', linewidth=1, marker="o", 
                     markersize=5, mfc="tab:red", mec="tab:red", transform=trans2)
        plt.text(row['Odds Ratio'], [row.name], '****', color='black', ha='center', transform=trans4, fontsize=6)
        
    elif row['Significance'] == '***':
        ci = [[row['Odds Ratio'] - row[::-1]['Lower CI']], [row['Upper CI'] - row['Odds Ratio']]]
        plt.errorbar(x=[row['Odds Ratio']], y=[row.name], xerr=ci,
            ecolor='tab:red', capsize=3, linestyle='None', linewidth=1, marker="o", 
                     markersize=5, mfc="tab:red", mec="tab:red", transform=trans2)
        plt.text(row['Odds Ratio'], [row.name], '***', color='black', ha='center', transform=trans4, fontsize=6)
        
    elif row['Significance'] == '**':
        ci = [[row['Odds Ratio'] - row[::-1]['Lower CI']], [row['Upper CI'] - row['Odds Ratio']]]
        plt.errorbar(x=[row['Odds Ratio']], y=[row.name], xerr=ci,
            ecolor='tab:red', capsize=3, linestyle='None', linewidth=1, marker="o", 
                     markersize=5, mfc="tab:red", mec="tab:red", transform=trans2)
        plt.text(row['Odds Ratio'], [row.name], '**', color='black', ha='center', transform=trans4, fontsize=6)
        
    elif row['Significance'] == '*':
        ci = [[row['Odds Ratio'] - row[::-1]['Lower CI']], [row['Upper CI'] - row['Odds Ratio']]]
        plt.errorbar(x=[row['Odds Ratio']], y=[row.name], xerr=ci,
            ecolor='tab:red', capsize=3, linestyle='None', linewidth=1, marker="o", 
                     markersize=5, mfc="tab:red", mec="tab:red", transform=trans2)
        plt.text(row['Odds Ratio'], [row.name], '*', color='black', ha='center', transform=trans4, fontsize=6)
        
    elif row['Significance'] == 'ns':
        ci = [[row['Odds Ratio'] - row[::-1]['Lower CI']], [row['Upper CI'] - row['Odds Ratio']]]
        plt.errorbar(x=[row['Odds Ratio']], y=[row.name], xerr=ci,
            ecolor='tab:red', capsize=3, linestyle='None', linewidth=1, marker="o", 
                     markersize=5, mfc="tab:red", mec="tab:red", transform=trans2)
        plt.text(row['Odds Ratio'], [row.name], 'ns', color='black', ha='center',transform=trans4, fontsize=6)

plt.axvline(x=1, linewidth=0.8, linestyle='--', color='dimgray')

plt.tick_params(axis='both', which='major', labelsize=7, colors='dimgray')
c1 = "tab:red"
c2 = "tab:blue"
leg = plt.legend(title='Cohort', 
    loc="center",
    handles=[
        Line2D([], [], c=c1, label="ATLAS"),
        Line2D([], [], c=c2, label="AoU")], 
           fontsize=6 , 
           frameon=False, 
           bbox_to_anchor=(1.08, 0.5), title_fontsize = 7) 
leg._legend_box.align = "left"

plt.title("MDD-PGS Predicts MDD \n For Case Individuals", fontsize=7)
plt.xlabel('Odds Ratio (95% CI)', fontsize = 8)
#ax.set_xticks(['EUR', 'AMR', 'AFR', 'EAS', "SAS"])
plt.ylabel('GIA', fontsize = 8)
# Hide the right and top spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()

### TRD, nonresponder, responder model

In [None]:
def run_regression_trd(df):
    # Fit a logistic regression model for the full model
    result_full = smf.logit('Resistant ~ PGS_Z + C(sex_at_birth) + insurance + current_age + age_2 + pca_0 + pca_1 + pca_2 + pca_3 + pca_4',
                   df).fit()
    return result_full

In [None]:
TRD_eur['Resistant'] = 1
TRD_amr['Resistant'] = 1
TRD_afr['Resistant'] = 1
TRD_eas['Resistant'] = 1

nonresponder_eur['Resistant'] = 1
nonresponder_amr['Resistant'] = 1
nonresponder_afr['Resistant'] = 1
nonresponder_eas['Resistant'] = 1

responder_eur['Resistant'] = 0
responder_amr['Resistant'] = 0
responder_afr['Resistant'] = 0
responder_eas['Resistant'] = 0

TRD_v_TR_eur = pd.concat([TRD_eur, responder_eur])
TRD_v_TR_amr = pd.concat([TRD_amr, responder_amr])
TRD_v_TR_afr = pd.concat([TRD_afr, responder_afr])
TRD_v_TR_eas = pd.concat([TRD_eas, responder_eas])

nonresponder_v_TR_eur = pd.concat([nonresponder_eur, responder_eur])
nonresponder_v_TR_amr = pd.concat([nonresponder_amr, responder_amr])
nonresponder_v_TR_afr = pd.concat([nonresponder_afr, responder_afr])
nonresponder_v_TR_eas = pd.concat([nonresponder_eas, responder_eas])

In [None]:
#TRD vs Responder
eur_trd = run_regression_trd(TRD_v_TR_eur)
amr_trd = run_regression_trd(TRD_v_TR_amr)
afr_trd = run_regression_trd(TRD_v_TR_afr)
eas_trd = run_regression_trd(TRD_v_TR_eas)

#Nonresponder vs Responder
eur_nonresp = run_regression_trd(nonresponder_v_TR_eur)
amr_nonresp = run_regression_trd(nonresponder_v_TR_amr)
afr_nonresp = run_regression_trd(nonresponder_v_TR_afr)
eas_nonresp = run_regression_trd(nonresponder_v_TR_eas2)

In [None]:
data = {'Treatment Resistant Depression vs. Treatment Responder': {'EUR': {'OddsRatio': np.exp(eur_trd.params['PGS_Z']),
                                                                           '2.5':np.exp(eur_trd.conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(eur_trd.conf_int().loc['PGS_Z',1]),
                                                                           'p-value':eur_trd.pvalues['PGS_Z']},
                                                                  'AMR': {'OddsRatio': np.exp(amr_trd.params['PGS_Z']),
                                                                           '2.5':np.exp(amr_trd.conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(amr_trd.conf_int().loc['PGS_Z',1]),
                                                                           'p-value':amr_trd.pvalues['PGS_Z']},
                                                                  'AFR': {'OddsRatio': np.exp(afr_trd.params['PGS_Z']),
                                                                           '2.5':np.exp(afr_trd.conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(afr_trd.conf_int().loc['PGS_Z',1]),
                                                                           'p-value':afr_trd.pvalues['PGS_Z']},
                                                                  'EAS': {'OddsRatio': np.exp(eas_trd.params['PGS_Z']),
                                                                           '2.5':np.exp(eas_trd.conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(eas_trd.conf_int().loc['PGS_Z',1]),
                                                                           'p-value':eas_trd.pvalues['PGS_Z']}},
        
        'Treatment Nonresponder vs. Treatment Responder': {'EUR': {'OddsRatio': np.exp(eur_nonresp.params['PGS_Z']),
                                                                           '2.5':np.exp(eur_nonresp.conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(eur_nonresp.conf_int().loc['PGS_Z',1]),
                                                                           'p-value':eur_nonresp.pvalues['PGS_Z']},
                                                                  'AMR': {'OddsRatio': np.exp(amr_nonresp.params['PGS_Z']),
                                                                           '2.5':np.exp(amr_nonresp.conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(amr_nonresp.conf_int().loc['PGS_Z',1]),
                                                                           'p-value':amr_nonresp.pvalues['PGS_Z']},
                                                                  'AFR': {'OddsRatio': np.exp(afr_nonresp.params['PGS_Z']),
                                                                           '2.5':np.exp(afr_nonresp.conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(afr_nonresp.conf_int().loc['PGS_Z',1]),
                                                                           'p-value':afr_nonresp.pvalues['PGS_Z']},
                                                                  'EAS': {'OddsRatio': np.exp(eas_nonresp.params['PGS_Z']),
                                                                           '2.5':np.exp(eas_nonresp.conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(eas_nonresp.conf_int().loc['PGS_Z',1]),
                                                                           'p-value':eas_nonresp.pvalues['PGS_Z']}}
       }
data_df = pd.DataFrame.from_dict({(i,j): data[i][j] 
                           for i in data.keys() 
                           for j in data[i].keys()}, orient='index')

In [None]:
MDD_eur = eur_data[eur_data['MDD'] == 1.0]
MDD_amr = amr_data[amr_data['MDD'] == 1.0]
MDD_afr = afr_data[afr_data['MDD'] == 1.0]
MDD_eas = eas_data[eas_data['MDD'] == 1.0]
MDD_sas = sas_data[sas_data['MDD'] == 1.0]

control_eur = eur_data[eur_data['MDD'] == 0.0]
control_amr = amr_data[amr_data['MDD'] == 0.0]
control_afr = afr_data[afr_data['MDD'] == 0.0]
control_eas = eas_data[eas_data['MDD'] == 0.0]
control_sas = sas_data[sas_data['MDD'] == 0.0]

In [None]:
import scipy.stats as stats
import scipy.stats as st
treatment_groupings = {"MDD Patients": {'EUR': {'mean': MDD_eur['PGS_Z'].mean(), 
                                                 'ci_low': stats.t.interval(0.95, len(MDD_eur['PGS_Z'])-1, 
                                                                            loc=MDD_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(MDD_eur["PGS_Z"]))[0],
                                                'ci_high': stats.t.interval(0.95, len(MDD_eur['PGS_Z'])-1,
                                                                            loc=MDD_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(MDD_eur["PGS_Z"]))[1]},
                                          'AMR': {'mean': MDD_amr['PGS_Z'].mean(), 
                                                  'ci_low': stats.t.interval(0.95, len(MDD_amr['PGS_Z'])-1, 
                                                                            loc=MDD_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(MDD_amr["PGS_Z"]))[0],
                                                'ci_high': stats.t.interval(0.95, len(MDD_amr['PGS_Z'])-1,
                                                                            loc=MDD_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(MDD_amr["PGS_Z"]))[1]},
                                          'AFR': {'mean': MDD_afr['PGS_Z'].mean(), 
                                                  'ci_low': stats.t.interval(0.95, len(MDD_afr['PGS_Z'])-1, 
                                                                            loc=MDD_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(MDD_afr["PGS_Z"]))[0],
                                                'ci_high': stats.t.interval(0.95, len(MDD_afr['PGS_Z'])-1,
                                                                            loc=MDD_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(MDD_afr["PGS_Z"]))[1]},
                                          'EAS': {'mean': MDD_eas['PGS_Z'].mean(), 
                                                  'ci_low': stats.t.interval(0.95, len(MDD_eas['PGS_Z'])-1, 
                                                                            loc=MDD_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(MDD_eas["PGS_Z"]))[0],
                                                'ci_high': stats.t.interval(0.95, len(MDD_eas['PGS_Z'])-1,
                                                                            loc=MDD_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(MDD_eas["PGS_Z"]))[1]}},
                        
                         "Control": {'EUR': {'mean': control_eur['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(control_eur['PGS_Z'])-1, 
                                                                            loc=control_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(control_eur["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(control_eur['PGS_Z'])-1,
                                                                            loc=control_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(control_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': control_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(control_amr['PGS_Z'])-1, 
                                                                            loc=control_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(control_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(control_amr['PGS_Z'])-1,
                                                                            loc=control_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(control_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': control_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(control_afr['PGS_Z'])-1, 
                                                                            loc=control_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(control_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(control_afr['PGS_Z'])-1,
                                                                            loc=control_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(control_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': control_eas['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(control_eas['PGS_Z'])-1, 
                                                                            loc=control_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(control_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(control_eas['PGS_Z'])-1,
                                                                            loc=control_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(control_eas["PGS_Z"]))[1]}},
                        
                         "Treatment \n Resistant Depression": {'EUR': {'mean': TRD_eur['PGS_Z'].mean(), 
                                                         'ci_low': stats.t.interval(0.95, len(TRD_eur['PGS_Z'])-1, 
                                                                            loc=TRD_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(TRD_eur["PGS_Z"]))[0],
                                                         'ci_high': stats.t.interval(0.95, len(TRD_eur['PGS_Z'])-1,
                                                                            loc=TRD_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(TRD_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': TRD_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(TRD_amr['PGS_Z'])-1, 
                                                                            loc=TRD_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(TRD_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(TRD_amr['PGS_Z'])-1,
                                                                            loc=TRD_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(TRD_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': TRD_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(TRD_afr['PGS_Z'])-1, 
                                                                            loc=TRD_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(TRD_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(TRD_afr['PGS_Z'])-1,
                                                                            loc=TRD_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(TRD_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': TRD_eas['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(TRD_eas['PGS_Z'])-1, 
                                                                            loc=TRD_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(TRD_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(TRD_eas['PGS_Z'])-1,
                                                                            loc=TRD_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(TRD_eas["PGS_Z"]))[1]}},
                        
                         "Treatment \n Nonresponder": {'EUR': {'mean': nonresponder_eur['PGS_Z'].mean(), 
                                                            'ci_low': stats.t.interval(0.95, len(nonresponder_eur['PGS_Z'])-1, 
                                                                            loc=nonresponder_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(nonresponder_eur["PGS_Z"]))[0],
                                                            'ci_high': stats.t.interval(0.95, len(nonresponder_eur['PGS_Z'])-1,
                                                                            loc=nonresponder_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(nonresponder_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': nonresponder_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(nonresponder_amr['PGS_Z'])-1, 
                                                                            loc=nonresponder_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(nonresponder_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(nonresponder_amr['PGS_Z'])-1,
                                                                            loc=nonresponder_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(nonresponder_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': nonresponder_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(nonresponder_afr['PGS_Z'])-1, 
                                                                            loc=nonresponder_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(nonresponder_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(nonresponder_afr['PGS_Z'])-1,
                                                                            loc=nonresponder_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(nonresponder_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': nonresponder_eas['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(nonresponder_eas['PGS_Z'])-1, 
                                                                            loc=nonresponder_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(nonresponder_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(nonresponder_eas['PGS_Z'])-1,
                                                                            loc=nonresponder_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(nonresponder_eas["PGS_Z"]))[1]}},
                        
                         "Treatment Responder": {'EUR': {'mean': responder_eur['PGS_Z'].mean(), 
                                               'ci_low': stats.t.interval(0.95, len(responder_eur['PGS_Z'])-1, 
                                                                            loc=responder_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(responder_eur["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(responder_eur['PGS_Z'])-1,
                                                                            loc=responder_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(responder_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': responder_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(responder_amr['PGS_Z'])-1, 
                                                                            loc=responder_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(responder_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(responder_amr['PGS_Z'])-1,
                                                                            loc=responder_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(responder_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': responder_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(responder_afr['PGS_Z'])-1, 
                                                                            loc=responder_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(responder_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(responder_afr['PGS_Z'])-1,
                                                                            loc=responder_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(responder_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': responder_eas['PGS_Z'].mean(), 
                                            'ci_low': stats.t.interval(0.95, len(responder_eas['PGS_Z'])-1, 
                                                                            loc=responder_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(responder_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(responder_eas['PGS_Z'])-1,
                                                                            loc=responder_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(responder_eas["PGS_Z"]))[1]}}}


treatment_group_df = pd.DataFrame.from_dict({(i,j): treatment_groupings[i][j] 
                           for i in treatment_groupings.keys() 
                           for j in treatment_groupings[i].keys()}, orient='index')

### Antidepressant class models

In [None]:
######################## Nonresponders ###################################################
ssri_nonresponder = nonresponders[nonresponders['Class'] == 'SSRI']
snri_nonresponder = nonresponders[nonresponders['Class'] == 'SNRI']
atypical_nonresponder = nonresponders[nonresponders['Class'] == 'Atypical']
tricyclic_nonresponder = nonresponders[nonresponders['Class'] == 'Tricyclic']
serotonin_nonresponder = nonresponders[nonresponders['Class'] == 'Serotonin']

######################## Responders ###################################################
ssri_responder = responder[responder['Class'] == 'SSRI']
snri_responder = responder[responder['Class'] == 'SNRI']
atypical_responder = responder[responder['Class'] == 'Atypical']
tricyclic_responder = responder[responder['Class'] == 'Tricyclic']
serotonin_responder = responder[responder['Class'] == 'Serotonin']

In [None]:
def make_antidepclass_df(class_df):
    eur = pd.merge(class_df, MDD_eur, on = "research_id", how = "inner")
    amr = pd.merge(class_df, MDD_amr, on = "research_id", how = "inner")
    afr = pd.merge(class_df, MDD_afr, on = "research_id", how = "inner")
    eas = pd.merge(class_df, MDD_eas, on = "research_id", how = "inner")
    return eur, amr, afr, eas

In [None]:
ssri_resp_eur, ssri_resp_amr, ssri_resp_afr, ssri_resp_eas = make_antidepclass_df(ssri_responder)
snri_resp_eur, snri_resp_amr, snri_resp_afr, snri_resp_eas = make_antidepclass_df(snri_responder)
atypical_resp_eur, atypical_resp_amr, atypical_resp_afr, atypical_resp_eas = make_antidepclass_df(atypical_responder)
tricyclic_resp_eur, tricyclic_resp_amr, tricyclic_resp_afr, tricyclic_resp_eas = make_antidepclass_df(tricyclic_responder)
serotonin_resp_eur, serotonin_resp_amr, serotonin_resp_afr, serotonin_resp_eas = make_antidepclass_df(serotonin_responder)

In [None]:
ssri_nonresp_eur, ssri_nonresp_amr, ssri_nonresp_afr, ssri_nonresp_eas = make_antidepclass_df(ssri_nonresponder)
snri_nonresp_eur, snri_nonresp_amr, snri_nonresp_afr, snri_nonresp_eas = make_antidepclass_df(snri_nonresponder)
atypical_nonresp_eur, atypical_nonresp_amr, atypical_nonresp_afr, atypical_nonresp_eas = make_antidepclass_df(atypical_nonresponder)
tricyclic_nonresp_eur, tricyclic_nonresp_amr, tricyclic_nonresp_afr, tricyclic_nonresp_eas = make_antidepclass_df(tricyclic_nonresponder)
serotonin_nonresp_eur, serotonin_nonresp_amr, serotonin_nonresp_afr, serotonin_nonresp_eas = make_antidepclass_df(serotonin_nonresponder)

In [None]:
import scipy.stats as st
antidep_class_groupings = {"SSRI Responder": {'EUR': {'mean': ssri_resp_eur['PGS_Z'].mean(), 
                                                 'ci_low': stats.t.interval(0.95, len(ssri_resp_eur['PGS_Z'])-1, 
                                                                            loc=ssri_resp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_resp_eur["PGS_Z"]))[0],
                                                'ci_high': stats.t.interval(0.95, len(ssri_resp_eur['PGS_Z'])-1,
                                                                            loc=ssri_resp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_resp_eur["PGS_Z"]))[1]},
                                          'AMR': {'mean': ssri_resp_amr['PGS_Z'].mean(), 
                                                  'ci_low': stats.t.interval(0.95, len(ssri_resp_amr['PGS_Z'])-1, 
                                                                            loc=ssri_resp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_resp_amr["PGS_Z"]))[0],
                                                'ci_high': stats.t.interval(0.95, len(ssri_resp_amr['PGS_Z'])-1,
                                                                            loc=ssri_resp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_resp_amr["PGS_Z"]))[1]},
                                          'AFR': {'mean': ssri_resp_afr['PGS_Z'].mean(), 
                                                  'ci_low': stats.t.interval(0.95, len(ssri_resp_afr['PGS_Z'])-1, 
                                                                            loc=ssri_resp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_resp_afr["PGS_Z"]))[0],
                                                'ci_high': stats.t.interval(0.95, len(ssri_resp_afr['PGS_Z'])-1,
                                                                            loc=ssri_resp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_resp_afr["PGS_Z"]))[1]},
                                          'EAS': {'mean': ssri_resp_eas['PGS_Z'].mean(), 
                                                  'ci_low': stats.t.interval(0.95, len(ssri_resp_eas['PGS_Z'])-1, 
                                                                            loc=ssri_resp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_resp_eas["PGS_Z"]))[0],
                                                'ci_high': stats.t.interval(0.95, len(ssri_resp_eas['PGS_Z'])-1,
                                                                            loc=ssri_resp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_resp_eas["PGS_Z"]))[1]}},
                        
                         "SSRI Nonresponder": {'EUR': {'mean': ssri_nonresp_eur['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(ssri_nonresp_eur['PGS_Z'])-1, 
                                                                            loc=ssri_nonresp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_nonresp_eur["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(ssri_nonresp_eur['PGS_Z'])-1,
                                                                            loc=ssri_nonresp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_nonresp_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': ssri_nonresp_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(ssri_nonresp_amr['PGS_Z'])-1, 
                                                                            loc=ssri_nonresp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_nonresp_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(ssri_nonresp_amr['PGS_Z'])-1,
                                                                            loc=ssri_nonresp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_nonresp_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': ssri_nonresp_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(ssri_nonresp_afr['PGS_Z'])-1, 
                                                                            loc=ssri_nonresp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_nonresp_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(ssri_nonresp_afr['PGS_Z'])-1,
                                                                            loc=ssri_nonresp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_nonresp_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': ssri_nonresp_eas['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(ssri_nonresp_eas['PGS_Z'])-1, 
                                                                            loc=ssri_nonresp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_nonresp_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(ssri_nonresp_eas['PGS_Z'])-1,
                                                                            loc=ssri_nonresp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(ssri_nonresp_eas["PGS_Z"]))[1]}},
                        
                         "SNRI Responder": {'EUR': {'mean': snri_resp_eur['PGS_Z'].mean(), 
                                                         'ci_low': stats.t.interval(0.95, len(snri_resp_eur['PGS_Z'])-1, 
                                                                            loc=snri_resp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_resp_eur["PGS_Z"]))[0],
                                                         'ci_high': stats.t.interval(0.95, len(snri_resp_eur['PGS_Z'])-1,
                                                                            loc=snri_resp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_resp_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': snri_resp_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(snri_resp_amr['PGS_Z'])-1, 
                                                                            loc=snri_resp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_resp_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(snri_resp_amr['PGS_Z'])-1,
                                                                            loc=snri_resp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_resp_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': snri_resp_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(snri_resp_afr['PGS_Z'])-1, 
                                                                            loc=snri_resp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_resp_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(snri_resp_afr['PGS_Z'])-1,
                                                                            loc=snri_resp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_resp_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': snri_resp_eas['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(snri_resp_eas['PGS_Z'])-1, 
                                                                            loc=snri_resp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_resp_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(snri_resp_eas['PGS_Z'])-1,
                                                                            loc=snri_resp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_resp_eas["PGS_Z"]))[1]}},
                        
                         "SNRI Nonresponder": {'EUR': {'mean': snri_nonresp_eur['PGS_Z'].mean(), 
                                                            'ci_low': stats.t.interval(0.95, len(snri_nonresp_eur['PGS_Z'])-1, 
                                                                            loc=snri_nonresp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_nonresp_eur["PGS_Z"]))[0],
                                                            'ci_high': stats.t.interval(0.95, len(snri_nonresp_eur['PGS_Z'])-1,
                                                                            loc=snri_nonresp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_nonresp_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': snri_nonresp_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(snri_nonresp_amr['PGS_Z'])-1, 
                                                                            loc=snri_nonresp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_nonresp_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(snri_nonresp_amr['PGS_Z'])-1,
                                                                            loc=snri_nonresp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_nonresp_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': snri_nonresp_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(snri_nonresp_afr['PGS_Z'])-1, 
                                                                            loc=snri_nonresp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_nonresp_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(snri_nonresp_afr['PGS_Z'])-1,
                                                                            loc=snri_nonresp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_nonresp_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': snri_nonresp_eas['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(snri_nonresp_eas['PGS_Z'])-1, 
                                                                            loc=snri_nonresp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_nonresp_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(snri_nonresp_eas['PGS_Z'])-1,
                                                                            loc=snri_nonresp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(snri_nonresp_eas["PGS_Z"]))[1]}},
                        
                         "Atypical Responder": {'EUR': {'mean': atypical_resp_eur['PGS_Z'].mean(), 
                                               'ci_low': stats.t.interval(0.95, len(atypical_resp_eur['PGS_Z'])-1, 
                                                                            loc=atypical_resp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_resp_eur["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(atypical_resp_eur['PGS_Z'])-1,
                                                                            loc=atypical_resp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_resp_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': atypical_resp_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(atypical_resp_amr['PGS_Z'])-1, 
                                                                            loc=atypical_resp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_resp_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(atypical_resp_amr['PGS_Z'])-1,
                                                                            loc=atypical_resp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_resp_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': atypical_resp_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(atypical_resp_afr['PGS_Z'])-1, 
                                                                            loc=atypical_resp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_resp_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(atypical_resp_afr['PGS_Z'])-1,
                                                                            loc=atypical_resp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_resp_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': atypical_resp_eas['PGS_Z'].mean(), 
                                            'ci_low': stats.t.interval(0.95, len(atypical_resp_eas['PGS_Z'])-1, 
                                                                            loc=atypical_resp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_resp_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(atypical_resp_eas['PGS_Z'])-1,
                                                                            loc=atypical_resp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_resp_eas["PGS_Z"]))[1]}},
                        
                         "Atypical Nonresponder": {'EUR': {'mean': atypical_nonresp_eur['PGS_Z'].mean(), 
                                               'ci_low': stats.t.interval(0.95, len(atypical_nonresp_eur['PGS_Z'])-1, 
                                                                            loc=atypical_nonresp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_nonresp_eur["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(atypical_nonresp_eur['PGS_Z'])-1,
                                                                            loc=atypical_nonresp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_nonresp_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': atypical_nonresp_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(atypical_nonresp_amr['PGS_Z'])-1, 
                                                                            loc=atypical_nonresp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_nonresp_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(atypical_nonresp_amr['PGS_Z'])-1,
                                                                            loc=atypical_nonresp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_nonresp_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': atypical_nonresp_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(atypical_nonresp_afr['PGS_Z'])-1, 
                                                                            loc=atypical_nonresp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_nonresp_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(atypical_nonresp_afr['PGS_Z'])-1,
                                                                            loc=atypical_nonresp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_nonresp_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': atypical_nonresp_eas['PGS_Z'].mean(), 
                                            'ci_low': stats.t.interval(0.95, len(atypical_nonresp_eas['PGS_Z'])-1, 
                                                                            loc=atypical_nonresp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_nonresp_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(atypical_nonresp_eas['PGS_Z'])-1,
                                                                            loc=atypical_nonresp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(atypical_nonresp_eas["PGS_Z"]))[1]}},
                        
                         "Tricyclic Responder": {'EUR': {'mean': tricyclic_resp_eur['PGS_Z'].mean(), 
                                               'ci_low': stats.t.interval(0.95, len(tricyclic_resp_eur['PGS_Z'])-1, 
                                                                            loc=tricyclic_resp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_resp_eur["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(tricyclic_resp_eur['PGS_Z'])-1,
                                                                            loc=tricyclic_resp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_resp_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': tricyclic_resp_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(tricyclic_resp_amr['PGS_Z'])-1, 
                                                                            loc=tricyclic_resp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_resp_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(tricyclic_resp_amr['PGS_Z'])-1,
                                                                            loc=tricyclic_resp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_resp_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': tricyclic_resp_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(tricyclic_resp_afr['PGS_Z'])-1, 
                                                                            loc=tricyclic_resp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_resp_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(tricyclic_resp_afr['PGS_Z'])-1,
                                                                            loc=tricyclic_resp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_resp_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': tricyclic_resp_eas['PGS_Z'].mean(), 
                                            'ci_low': stats.t.interval(0.95, len(tricyclic_resp_eas['PGS_Z'])-1, 
                                                                            loc=tricyclic_resp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_resp_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(tricyclic_resp_eas['PGS_Z'])-1,
                                                                            loc=tricyclic_resp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_resp_eas["PGS_Z"]))[1]}},
                        
                         "Tricyclic Nonresponder": {'EUR': {'mean': tricyclic_nonresp_eur['PGS_Z'].mean(), 
                                               'ci_low': stats.t.interval(0.95, len(tricyclic_nonresp_eur['PGS_Z'])-1, 
                                                                            loc=tricyclic_nonresp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_nonresp_eur["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(tricyclic_nonresp_eur['PGS_Z'])-1,
                                                                            loc=tricyclic_nonresp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_nonresp_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': tricyclic_nonresp_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(tricyclic_nonresp_amr['PGS_Z'])-1, 
                                                                            loc=tricyclic_nonresp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_nonresp_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(tricyclic_nonresp_amr['PGS_Z'])-1,
                                                                            loc=tricyclic_nonresp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_nonresp_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': tricyclic_nonresp_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(tricyclic_nonresp_afr['PGS_Z'])-1, 
                                                                            loc=tricyclic_nonresp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_nonresp_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(tricyclic_nonresp_afr['PGS_Z'])-1,
                                                                            loc=tricyclic_nonresp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_nonresp_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': tricyclic_nonresp_eas['PGS_Z'].mean(), 
                                            'ci_low': stats.t.interval(0.95, len(tricyclic_nonresp_eas['PGS_Z'])-1, 
                                                                            loc=tricyclic_nonresp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_nonresp_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(tricyclic_nonresp_eas['PGS_Z'])-1,
                                                                            loc=tricyclic_nonresp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(tricyclic_nonresp_eas["PGS_Z"]))[1]}},
                        
                         "Serotonin Responder": {'EUR': {'mean': serotonin_resp_eur['PGS_Z'].mean(), 
                                               'ci_low': stats.t.interval(0.95, len(serotonin_resp_eur['PGS_Z'])-1, 
                                                                            loc=serotonin_resp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_resp_eur["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(serotonin_resp_eur['PGS_Z'])-1,
                                                                            loc=serotonin_resp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_resp_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': serotonin_resp_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(serotonin_resp_amr['PGS_Z'])-1, 
                                                                            loc=serotonin_resp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_resp_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(serotonin_resp_amr['PGS_Z'])-1,
                                                                            loc=serotonin_resp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_resp_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': serotonin_resp_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(serotonin_resp_afr['PGS_Z'])-1, 
                                                                            loc=serotonin_resp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_resp_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(serotonin_resp_afr['PGS_Z'])-1,
                                                                            loc=serotonin_resp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_resp_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': serotonin_resp_eas['PGS_Z'].mean(), 
                                            'ci_low': stats.t.interval(0.95, len(serotonin_resp_eas['PGS_Z'])-1, 
                                                                            loc=serotonin_resp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_resp_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(serotonin_resp_eas['PGS_Z'])-1,
                                                                            loc=serotonin_resp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_resp_eas["PGS_Z"]))[1]}},
                        
                         "Serotonin Nonresponder": {'EUR': {'mean': serotonin_nonresp_eur['PGS_Z'].mean(), 
                                               'ci_low': stats.t.interval(0.95, len(serotonin_nonresp_eur['PGS_Z'])-1, 
                                                                            loc=serotonin_nonresp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_nonresp_eur["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(serotonin_nonresp_eur['PGS_Z'])-1,
                                                                            loc=serotonin_nonresp_eur['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_nonresp_eur["PGS_Z"]))[1]},
                                     'AMR': {'mean': serotonin_nonresp_amr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(serotonin_nonresp_amr['PGS_Z'])-1, 
                                                                            loc=serotonin_nonresp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_nonresp_amr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(serotonin_nonresp_amr['PGS_Z'])-1,
                                                                            loc=serotonin_nonresp_amr['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_nonresp_amr["PGS_Z"]))[1]},
                                     'AFR': {'mean': serotonin_nonresp_afr['PGS_Z'].mean(), 
                                             'ci_low': stats.t.interval(0.95, len(serotonin_nonresp_afr['PGS_Z'])-1, 
                                                                            loc=serotonin_nonresp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_nonresp_afr["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(serotonin_nonresp_afr['PGS_Z'])-1,
                                                                            loc=serotonin_nonresp_afr['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_nonresp_afr["PGS_Z"]))[1]},
                                     'EAS': {'mean': serotonin_nonresp_eas['PGS_Z'].mean(), 
                                            'ci_low': stats.t.interval(0.95, len(serotonin_nonresp_eas['PGS_Z'])-1, 
                                                                            loc=serotonin_nonresp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_nonresp_eas["PGS_Z"]))[0],
                                            'ci_high': stats.t.interval(0.95, len(serotonin_nonresp_eas['PGS_Z'])-1,
                                                                            loc=serotonin_nonresp_eas['PGS_Z'].mean(), 
                                                                            scale=st.sem(serotonin_nonresp_eas["PGS_Z"]))[1]}}}


antidep_class_group_df = pd.DataFrame.from_dict({(i,j): antidep_class_groupings[i][j] 
                           for i in antidep_class_groupings.keys() 
                           for j in antidep_class_groupings[i].keys()}, orient='index')

In [None]:
def run_regression_class(response_df, nonresponder_df):
    # Define your independent variables (features) and dependent variable (target)
    response_df["Response"] = 1
    nonresponder_df["Response"] = 0
    df = pd.concat([response_df, nonresponder_df])
    result_full = smf.logit('Response ~ PGS_Z + C(sex_at_birth) + insurance + current_age + age_2 + pca_0 + pca_1 + pca_2 + pca_3 + pca_4',
                   df).fit()

    return result_full

In [None]:
class_data = {'SSRI Responder vs. SSRI Nonresponder': {'EUR': {'OddsRatio': np.exp(run_regression_class(ssri_resp_eur, ssri_nonresp_eur).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(ssri_resp_eur, ssri_nonresp_eur).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(ssri_resp_eur, ssri_nonresp_eur).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(ssri_resp_eur, ssri_nonresp_eur).pvalues['PGS_Z']},
                                                                  
                                                 'AMR': {'OddsRatio': np.exp(run_regression_class(ssri_resp_amr, ssri_nonresp_amr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(ssri_resp_amr, ssri_nonresp_amr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(ssri_resp_amr, ssri_nonresp_amr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(ssri_resp_amr, ssri_nonresp_amr).pvalues['PGS_Z']},
                                                                  
                                                 'AFR': {'OddsRatio': np.exp(run_regression_class(ssri_resp_afr, ssri_nonresp_afr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(ssri_resp_afr, ssri_nonresp_afr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(ssri_resp_afr, ssri_nonresp_afr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(ssri_resp_afr, ssri_nonresp_afr).pvalues['PGS_Z']},
                                                                  
                                                 'EAS': {'OddsRatio': np.exp(run_regression_class(ssri_resp_eas, ssri_nonresp_eas).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(ssri_resp_eas, ssri_nonresp_eas).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(ssri_resp_eas, ssri_nonresp_eas).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(ssri_resp_eas, ssri_nonresp_eas).pvalues['PGS_Z']}},
        
        'SNRI Responder vs. SNRI Nonresponder': {'EUR': {'OddsRatio': np.exp(run_regression_class(snri_resp_eur, snri_nonresp_eur).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(snri_resp_eur, snri_nonresp_eur).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(snri_resp_eur, snri_nonresp_eur).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(snri_resp_eur, snri_nonresp_eur).pvalues['PGS_Z']},
                                                                  
                                                 'AMR': {'OddsRatio': np.exp(run_regression_class(snri_resp_amr, snri_nonresp_amr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(snri_resp_amr, snri_nonresp_amr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(snri_resp_amr, snri_nonresp_amr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(snri_resp_amr, snri_nonresp_amr).pvalues['PGS_Z']},
                                                                  
                                                 'AFR': {'OddsRatio': np.exp(run_regression_class(snri_resp_afr, snri_nonresp_afr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(snri_resp_afr, snri_nonresp_afr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(snri_resp_afr, snri_nonresp_afr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(snri_resp_afr, snri_nonresp_afr).pvalues['PGS_Z']},
                                                                  
                                                 'EAS': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan}},
        
        'Atypical Responder vs. Atypical Nonresponder': {'EUR': {'OddsRatio': np.exp(run_regression_class(atypical_resp_eur, atypical_nonresp_eur).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(atypical_resp_eur, atypical_nonresp_eur).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(atypical_resp_eur, atypical_nonresp_eur).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(atypical_resp_eur, atypical_nonresp_eur).pvalues['PGS_Z']},
                                                                  
                                                 'AMR': {'OddsRatio': np.exp(run_regression_class(atypical_resp_amr, atypical_nonresp_amr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(atypical_resp_amr, atypical_nonresp_amr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(atypical_resp_amr, atypical_nonresp_amr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(atypical_resp_amr, atypical_nonresp_amr).pvalues['PGS_Z']},
                                                                  
                                                 'AFR': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan},
                                                                  
                                                 'EAS': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan}},
              
              'Tricyclic Responder vs. Tricyclic Nonresponder': {'EUR': {'OddsRatio': np.exp(run_regression_class(tricyclic_resp_eur, tricyclic_nonresp_eur).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(tricyclic_resp_eur, tricyclic_nonresp_eur).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(tricyclic_resp_eur, tricyclic_nonresp_eur).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(tricyclic_resp_eur, tricyclic_nonresp_eur).pvalues['PGS_Z']},
                                                                  
                                                 'AMR': {'OddsRatio': np.exp(run_regression_class(tricyclic_resp_amr, tricyclic_nonresp_amr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(tricyclic_resp_amr, tricyclic_nonresp_amr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(tricyclic_resp_amr, tricyclic_nonresp_amr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(tricyclic_resp_amr, tricyclic_nonresp_amr).pvalues['PGS_Z']},
                                                                  
                                                 'AFR': {'OddsRatio': np.exp(run_regression_class(tricyclic_resp_afr, tricyclic_nonresp_afr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(tricyclic_resp_afr, tricyclic_nonresp_afr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(tricyclic_resp_afr, tricyclic_nonresp_afr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(tricyclic_resp_afr, tricyclic_nonresp_afr).pvalues['PGS_Z']},
                                                                  
                                                 'EAS': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan}},
              
              'Serotonin Modulator Responder vs. Serotonin Modulator Nonresponder': {'EUR': {'OddsRatio': np.exp(run_regression_class(serotonin_resp_eur, serotonin_nonresp_eur).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(serotonin_resp_eur, serotonin_nonresp_eur).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(serotonin_resp_eur, serotonin_nonresp_eur).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(serotonin_resp_eur, serotonin_nonresp_eur).pvalues['PGS_Z']},
                                                                  
                                                 'AMR': {'OddsRatio': np.exp(run_regression_class(serotonin_resp_amr, serotonin_nonresp_amr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(serotonin_resp_amr, serotonin_nonresp_amr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(serotonin_resp_amr, serotonin_nonresp_amr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(serotonin_resp_amr, serotonin_nonresp_amr).pvalues['PGS_Z']},
                                                                  
                                                 'AFR': {'OddsRatio': np.exp(run_regression_class(serotonin_resp_afr, serotonin_nonresp_afr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(serotonin_resp_afr, serotonin_nonresp_afr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(serotonin_resp_afr, serotonin_nonresp_afr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(serotonin_resp_afr, serotonin_nonresp_afr).pvalues['PGS_Z']},
                                                                  
                                                 'EAS': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan}}
       }
class_data_df = pd.DataFrame.from_dict({(i,j): class_data[i][j] 
                           for i in class_data.keys() 
                           for j in class_data[i].keys()}, orient='index')

In [None]:
class_data_withTRD = {'SSRI Responder vs. Treatment Resistant': {'EUR': {'OddsRatio': np.exp(run_regression_class(ssri_resp_eur, TRD_eur).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(ssri_resp_eur, TRD_eur).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(ssri_resp_eur, TRD_eur).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(ssri_resp_eur, TRD_eur).pvalues['PGS_Z']},
                                                                  
                                                 'AMR': {'OddsRatio': np.exp(run_regression_class(ssri_resp_amr, TRD_amr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(ssri_resp_amr, TRD_amr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(ssri_resp_amr, TRD_amr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(ssri_resp_amr, TRD_amr).pvalues['PGS_Z']},
                                                                  
                                                 'AFR': {'OddsRatio': np.exp(run_regression_class(ssri_resp_afr, TRD_afr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(ssri_resp_afr, TRD_afr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(ssri_resp_afr, TRD_afr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(ssri_resp_afr, TRD_afr).pvalues['PGS_Z']},
                                                                  
                                                 'EAS': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan}},
        
        'SNRI Responder vs. Treatment Resistant': {'EUR': {'OddsRatio': np.exp(run_regression_class(snri_resp_eur, TRD_eur).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(snri_resp_eur, TRD_eur).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(snri_resp_eur, TRD_eur).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(snri_resp_eur, TRD_eur).pvalues['PGS_Z']},
                                                                  
                                                 'AMR': {'OddsRatio': np.exp(run_regression_class(snri_resp_amr, TRD_amr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(snri_resp_amr, TRD_amr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(snri_resp_amr, TRD_amr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(snri_resp_amr, TRD_amr).pvalues['PGS_Z']},
                                                                  
                                                 'AFR': {'OddsRatio': np.exp(run_regression_class(snri_resp_afr, TRD_afr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(snri_resp_afr, TRD_afr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(snri_resp_afr, TRD_afr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(snri_resp_afr, TRD_afr).pvalues['PGS_Z']},
                                                                  
                                                 'EAS': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan}},
        
        'Atypical Responder vs. Treatment Resistant': {'EUR': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan},
                                                                  
                                                 'AMR': {'OddsRatio': np.exp(run_regression_class(atypical_resp_amr, TRD_amr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(atypical_resp_amr, TRD_amr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(atypical_resp_amr, TRD_amr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(atypical_resp_amr, TRD_amr).pvalues['PGS_Z']},
                                                                  
                                                 'AFR': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan},
                                                                  
                                                 'EAS': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan}},
              
              'Tricyclic Responder vs. Treatment Resistant': {'EUR': {'OddsRatio': np.exp(run_regression_class(tricyclic_resp_eur, TRD_eur).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(tricyclic_resp_eur, TRD_eur).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(tricyclic_resp_eur, TRD_eur).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(tricyclic_resp_eur, TRD_eur).pvalues['PGS_Z']},
                                                                  
                                                 'AMR': {'OddsRatio': np.exp(run_regression_class(tricyclic_resp_amr, TRD_amr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(tricyclic_resp_amr, TRD_amr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(tricyclic_resp_amr, TRD_amr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(tricyclic_resp_amr, TRD_amr).pvalues['PGS_Z']},
                                                                  
                                                 'AFR': {'OddsRatio': np.exp(run_regression_class(tricyclic_resp_afr, TRD_afr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(tricyclic_resp_afr, TRD_afr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(tricyclic_resp_afr, TRD_afr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(tricyclic_resp_afr, TRD_afr).pvalues['PGS_Z']},
                                                                  
                                                 'EAS': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan}},
              
              'Serotonin Modulator Responder vs. Treatment Resistant': {'EUR': {'OddsRatio': np.exp(run_regression_class(serotonin_resp_eur, TRD_eur).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(serotonin_resp_eur, TRD_eur).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(serotonin_resp_eur, TRD_eur).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(serotonin_resp_eur, TRD_eur).pvalues['PGS_Z']},
                                                                  
                                                 'AMR': {'OddsRatio': np.exp(run_regression_class(serotonin_resp_amr, TRD_amr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(serotonin_resp_amr, TRD_amr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(serotonin_resp_amr, TRD_amr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(serotonin_resp_amr, TRD_amr).pvalues['PGS_Z']},
                                                                  
                                                 'AFR': {'OddsRatio': np.exp(run_regression_class(serotonin_resp_afr, TRD_afr).params['PGS_Z']),
                                                                           '2.5':np.exp(run_regression_class(serotonin_resp_afr, TRD_afr).conf_int().loc['PGS_Z',0]),
                                                                           '7.5':np.exp(run_regression_class(serotonin_resp_afr, TRD_afr).conf_int().loc['PGS_Z',1]),
                                                                           'p-value':run_regression_class(serotonin_resp_afr, TRD_afr).pvalues['PGS_Z']},
                                                                  
                                                 'EAS': {'OddsRatio': np.nan,
                                                                           '2.5':np.nan,
                                                                           '7.5':np.nan,
                                                                           'p-value':np.nan}}
       }
class_data_withTRD_df = pd.DataFrame.from_dict({(i,j): class_data_withTRD[i][j] 
                           for i in class_data_withTRD.keys() 
                           for j in class_data_withTRD[i].keys()}, orient='index')