In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import pingouin as pg
from main import create_dataset
import os

from statsmodels.stats.contingency_tables import mcnemar
import statsmodels.api as sm
from statsmodels.stats.contingency_tables import Table

In [2]:
if not os.path.exists("Liat graphs/data/mcnemar_data/young"):
    os.mkdir("Liat graphs/data/mcnemar_data/young")

if not os.path.exists("Liat graphs/data/mcnemar_data/young/short_effect"):
    os.mkdir("Liat graphs/data/mcnemar_data/young/short_effect")

if not os.path.exists("Liat graphs/data/mcnemar_data/young/long_effect"):
    os.mkdir("Liat graphs/data/mcnemar_data/young/long_effect")

    
df_times = {
    'intake': pd.read_csv(r'Liat graphs/data/intake_processed.csv'),
    'time2' : pd.read_csv(r'Liat graphs/data/time2_processed.csv'),
    'time3' : pd.read_csv(r'Liat graphs/data/time3_processed.csv')
}

for time in df_times.keys():
    df_times[time]['time'] = time

In [3]:

intake_target_variables =  ['suicidal_ideation', 'suicidal_behavior',
        'suicidal_attempt', 'ER', 'NSSI']

time2_target_variables = ['suicidal_ideation', 'suicidal_behavior', 
        'suicidal_attempt', 'ER', 'Psychiatric', 
        'NSSI', 'finished_treatment']

target_variables = {
    'intake': intake_target_variables,
    'time2': time2_target_variables, 
    'time3': time2_target_variables}

for time in ['intake', 'time2', 'time3']:
    df = df_times[time]
    df = df[df['age_child_pre'] < 12.5]
    df_times[time] = df[df['group'].isin(['ipt', 'control'])]


In [4]:
df_short = pd.concat([df_times['intake'], df_times['time2']])
df_long = pd.concat([df_times['intake'], df_times['time3']])

current_target_vars = ['suicidal_ideation',
 'suicidal_behavior', 'NSSI']

info_cols = ['group', 'id', 'age_child_pre', 'gender', 'redcap_event_name']

df_long[current_target_vars + info_cols].to_csv(f"Liat graphs/data/mcnemar_data/young/long_effect_raw_data.xlsx", index=False)
df_short[current_target_vars + info_cols].to_csv(f"Liat graphs/data/mcnemar_data/young/short_effect_raw_data.xlsx", index=False)


In [5]:
df_long

Unnamed: 0,id,gender,redcap_event_name,age_child_pre,parents_born_m,parents_born_2_m,born_child_m,born_child_2_m,parent_religion_m,parent_religion_other_m,...,group___3,suicidal_ideation,suicidal_behavior,suicidal_attempt,ER,Psychiatric,NSSI,finished_treatment,group,time
8,i6535,,intake_arm_1,10.2,,,,,,,...,1.0,0,0,0,0,0,0,0,control,intake
9,N4125‏,,intake_arm_1,12.0,,,,,,,...,0.0,0,0,0,0,0,0,0,ipt,intake
13,E5541,2.0,intake_arm_1,12.0,1.0,,1.0,,1.0,,...,1.0,1,0,0,0,0,0,0,control,intake
19,N7245,1.0,intake_arm_1,9.0,2.0,אירן,1.0,,4.0,,...,0.0,1,0,0,0,0,0,0,ipt,intake
22,H0740,2.0,intake_arm_1,12.0,1.0,,1.0,,3.0,,...,1.0,1,1,0,0,0,0,0,control,intake
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,T7894,,control_3month_arm_1,11.0,,,,,,,...,0.0,0,0,0,1,0,0,0,ipt,time3
246,D4440,,control_3month_arm_1,9.0,,,,,,,...,0.0,0,0,0,0,0,0,0,ipt,time3
254,M4805,,control_6month_arm_1,9.0,,,,,,,...,1.0,0,0,0,0,0,0,0,control,time3
256,9624,,control_6month_arm_1,11.5,,,,,,,...,1.0,0,0,0,0,0,0,0,control,time3


In [6]:
def do_mcnemar_test(df, target_variable):
    contingency_table = pd.crosstab(df['group'], df['time'], values=df[target_variable], aggfunc='sum')
    results = mcnemar(contingency_table.values)
    
    print(f'{target_variable = }\nmcnemar(contingency_table.values) =\n')
    print(mcnemar(contingency_table.values))
    print('\t---------------------\n\n\n\n\n')
    
    return contingency_table, results.pvalue

## short effect

In [7]:
for target_variable in current_target_vars:
        
    contingency_table, pval = do_mcnemar_test(df_short, target_variable)
    
    if pval < 0.05:
        df_short = df_short[df_short.group.isin(['ipt', 'control'])]
        print(df_short.groupby(['time', 'group', target_variable]).id.nunique())
        print ('\n\n\n\n\n\n\n')
    
    contingency_table.to_excel(f"Liat graphs/data/mcnemar_data/young/short_effect/{target_variable} - pval = {pval.round(decimals=3)}.xlsx", index=False)
    
    

target_variable = 'suicidal_ideation'
mcnemar(contingency_table.values) =

pvalue      0.14330665429588407
statistic   14.0
	---------------------





target_variable = 'suicidal_behavior'
mcnemar(contingency_table.values) =

pvalue      1.0
statistic   4.0
	---------------------





target_variable = 'NSSI'
mcnemar(contingency_table.values) =

pvalue      1.0
statistic   4.0
	---------------------







## Long effect

In [8]:
for target_variable in current_target_vars:
    
    
    contingency_table, pval = do_mcnemar_test(df_long, target_variable)
    
    if pval < 0.05:
        df_long = df_long[df_long.group.isin(['ipt', 'control'])]
        print(df_long.groupby(['time', 'group', target_variable]).id.nunique())
        print ('\n\n\n\n\n\n\n')
    
    contingency_table.to_excel(f"Liat graphs/data/mcnemar_data/young/long_effect/{target_variable} - pval = {pval.round(decimals=3)}.csv", index=False)
    

target_variable = 'suicidal_ideation'
mcnemar(contingency_table.values) =

pvalue      0.0070003666914999485
statistic   8.0
	---------------------





time    group    suicidal_ideation
intake  control  0                    19
                 1                    17
        ipt      0                    14
                 1                    24
time3   control  0                    15
                 1                     8
        ipt      0                    22
                 1                     9
Name: id, dtype: int64








target_variable = 'suicidal_behavior'
mcnemar(contingency_table.values) =

pvalue      0.125
statistic   0.0
	---------------------





target_variable = 'NSSI'
mcnemar(contingency_table.values) =

pvalue      0.6875
statistic   2.0
	---------------------







## Test

In [9]:
data = {
    'group': ['X', 'X', 'Y', 'Y', 'Y'],
    'target': [True, False, True, True, False]
}

df = pd.DataFrame(data)

In [10]:
contingency_table = pd.crosstab(df['group'], df['target'])

In [11]:
#contingency_table.to_csv('meowmeow.csv')