In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import pingouin as pg
from main import create_dataset
import os

from statsmodels.stats.contingency_tables import mcnemar
import statsmodels.api as sm
from statsmodels.stats.contingency_tables import Table

In [3]:
if not os.path.exists("Liat graphs/data/mcnemar_data"):
    os.mkdir("Liat graphs/data/mcnemar_data")

if not os.path.exists("Liat graphs/data/mcnemar_data/short_time"):
    os.mkdir("Liat graphs/data/mcnemar_data/short_time")

if not os.path.exists("Liat graphs/data/mcnemar_data/long_time"):
    os.mkdir("Liat graphs/data/mcnemar_data/long_time")

    
df_times = {
    'intake': pd.read_csv(r'Liat graphs/data/intake_processed.csv'),
    'time2' : pd.read_csv(r'Liat graphs/data/time2_processed.csv'),
    'time3' : pd.read_csv(r'Liat graphs/data/time3_processed.csv')
}

for time in df_times.keys():
    df_times[time]['time'] = time

In [4]:

intake_target_variables =  ['suicidal_ideation', 'suicidal_behavior',
        'suicidal_attempt', 'ER', 'NSSI']

time2_target_variables = ['suicidal_ideation', 'suicidal_behavior', 
        'suicidal_attempt', 'ER', 'Psychiatric', 
        'NSSI', 'finished_treatment']

target_variables = {
    'intake': intake_target_variables,
    'time2': time2_target_variables, 
    'time3': time2_target_variables}
        

In [5]:
df_short = pd.concat([df_times['intake'], df_times['time2']])
df_long = pd.concat([df_times['intake'], df_times['time3']])

## Short time

In [6]:
def do_mcnemar_test(df, groups, target_variable):
    df = df[df.group.isin(groups)]
    contingency_table = pd.crosstab(df['group'], df['time'], values=df[target_variable], aggfunc='sum')
    results = mcnemar(contingency_table.values)
    print(f'{groups = }\n {target_variable = }')
    print(mcnemar(contingency_table.values))
    print('\t---------------------\n\n\n\n\n')
    
    return contingency_table, results.pvalue

In [8]:
for target_variable in intake_target_variables:
    
    groups = ['ipt', 'control']
    contingency_table, pval = do_mcnemar_test(df_short, groups, target_variable)
    
    
    contingency_table.to_csv(f"Liat graphs/data/mcnemar_data/short_time/{target_variable} - pval = {pval.round(decimals=3)}.csv")
    
    

groups = ['ipt', 'control']
 target_variable = 'suicidal_ideation'
pvalue      1.4694252456726812e-05
statistic   47.0
	---------------------





groups = ['ipt', 'control']
 target_variable = 'suicidal_behavior'
pvalue      0.3367836351899315
statistic   16.0
	---------------------





groups = ['ipt', 'control']
 target_variable = 'suicidal_attempt'
pvalue      0.01181793212890625
statistic   4.0
	---------------------





groups = ['ipt', 'control']
 target_variable = 'ER'
pvalue      0.7265625
statistic   3.0
	---------------------





groups = ['ipt', 'control']
 target_variable = 'NSSI'
pvalue      0.009853448195259268
statistic   20.0
	---------------------







## Long time

In [9]:
for target_variable in intake_target_variables:
    
    
    groups = ['ipt', 'control']
    contingency_table, pval = do_mcnemar_test(df_long, groups, target_variable)
    
    
    contingency_table.to_csv(f"Liat graphs/data/mcnemar_data/long_time/{target_variable} - pval = {pval.round(decimals=3)}.csv")
    

groups = ['ipt', 'control']
 target_variable = 'suicidal_ideation'
pvalue      6.990213957185692e-08
statistic   37.0
	---------------------





groups = ['ipt', 'control']
 target_variable = 'suicidal_behavior'
pvalue      0.08953107893466952
statistic   12.0
	---------------------





groups = ['ipt', 'control']
 target_variable = 'suicidal_attempt'
pvalue      0.001312255859375
statistic   2.0
	---------------------





groups = ['ipt', 'control']
 target_variable = 'ER'
pvalue      0.7265625
statistic   3.0
	---------------------





groups = ['ipt', 'control']
 target_variable = 'NSSI'
pvalue      0.0062176026593266705
statistic   19.0
	---------------------







## Test

In [10]:
data = {
    'group': ['X', 'X', 'Y', 'Y', 'Y'],
    'target': [True, False, True, True, False]
}

df = pd.DataFrame(data)

In [11]:
contingency_table = pd.crosstab(df['group'], df['target'])

In [13]:
contingency_table.to_csv('meowmeow.csv')