In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import pingouin as pg
from main import create_dataset
import dtale
import os
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
from statsmodels.stats.contingency_tables import mcnemar


In [2]:
times = {'intake': ['intake_arm_1', 'pre_treatment_arm_1'],
         'time2' : ['5th_session_arm_1', 'control_5weeks_arm_1'],
         'time3' : ['followup_3month_arm_1', 'control_3month_arm_1', 'control_6month_arm_1'],
        }
if not os.path.exists("Liat graphs/plots"):
    os.mkdir("Liat graphs/plots")

group_names = {
    '0': 'invalid',
    '1': 'ipt', # חירום
    '2' : 'tau', # רגיל
    '3': 'control' # מינימלית
}

# for time in times.keys():
#     print(time)
#     create_dataset(event_names = times[time], path = f"Liat graphs/data/{time}.csv")


In [3]:
df_times = {
    'intake': pd.read_csv(r'Liat graphs/data/intake.csv'),
    'time2' : pd.read_csv(r'Liat graphs/data/time2.csv'),
    'time3' : pd.read_csv(r'Liat graphs/data/time3.csv')
         }

intake_target_variables =  ['suicidal_ideation', 'suicidal_behavior',
        'suicidal_attempt', 'ER', 'NSSI']

time2_target_variables = ['suicidal_ideation', 'suicidal_behavior', 
        'suicidal_attempt', 'ER', 'Psychiatric', 
        'NSSI', 'finished_treatment']

target_variables = {
    'intake': intake_target_variables,
    'time2': time2_target_variables, 
    'time3': time2_target_variables}
        

In [4]:
def rename_groups(df, group_names):
    df['group'] = '0'
    df.loc[df['group___1'] == 1, 'group'] = '1'
    df.loc[df['group___2'] == 1, 'group'] = '2'
    df.loc[df['group___3'] == 1, 'group'] = '3'
    df['group'] = df['group'].map(group_names)
    return df

for time in times:
    df = df_times[time]
    df_times[time] = rename_groups(df, group_names)

In [5]:

def plot (df, target, stat='anova'):
    # remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
    df = df.reset_index().drop('index', axis=1, errors='ignore')
    
    
    df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers
    
    if stat == 'anova':
        anova_result = pg.anova(data=df, dv=target, between='group')[['Source', 'F', 'p-unc']]
        anova_str = anova_result.round(decimals=3).to_string(index=False).split('\n')
        stat_text = f"<b>ANOVA Result:</b><br>{anova_str[0]}</b><br>{anova_str[1]}"
    elif stat == 'chi_square':
        expected, observed, stats = pg.chi2_independence(data=df, x=target, y='group')
        stats = stats[stats.test == 'pearson'].round(3)[['pval', 'power']].to_string(index=False).split('\n')
        stat_text = f"<b>chi_square Result:</b><br>{stats[0]}</b><br>{stats[1]}"
        
        
    chart_data = pd.concat([
        pd.Series(df.index, index=df.index, name='__index__'),
        df['group'],
        df[target],
    ], axis=1)
    chart_data = chart_data.query(f"""(`{target}` == 1) or (`{target}` == 0)""")
    chart_data = chart_data.sort_values([target, 'group'])
    chart_data = chart_data.rename(columns={'group': 'x'})
    chart_data_pctct = chart_data.groupby([target, 'x'])[['__index__']].count()
    chart_data_pctct = chart_data_pctct / chart_data_pctct.groupby(['x']).count()
    chart_data_pctct.columns = ['__index__|pctct']
    chart_data = chart_data_pctct.reset_index()
    chart_data = chart_data.dropna()
    
    chart_data = chart_data.query(f"""{target} == 1""")


    charts = []
    charts.append(go.Bar(
        x=chart_data['x'],
        y=chart_data['__index__|pctct'],
        name=f'({target}: 1)',
        marker_color = 'red'
    ))


    chart_data = pd.concat([
        pd.Series(df.index, index=df.index, name='__index__'),
        df['group'],
        df[target],
    ], axis=1)
    chart_data = chart_data.query(f"""(`{target}` == 1) or (`{target}` == 0)""")
    chart_data = chart_data.sort_values([target, 'group'])
    chart_data = chart_data.rename(columns={'group': 'x'})
    chart_data_pctct = chart_data.groupby([target, 'x'])[['__index__']].count()
    chart_data_pctct = chart_data_pctct / chart_data_pctct.groupby(['x']).count()
    chart_data_pctct.columns = ['__index__|pctct']
    chart_data = chart_data_pctct.reset_index()
    chart_data = chart_data.dropna()
    # WARNING: This is not taking into account grouping of any kind, please apply filter associated with
    #          the group in question in order to replicate chart. For this we're using '"""`gender` == 'man'"""'
    chart_data = chart_data.query(f"""`{target}` == 0""")

    charts.append(go.Bar(
        x=chart_data['x'],
        y=chart_data['__index__|pctct'],
        name=f'({target}: 0)',
        marker_color = 'green'
    ))

    figure = go.Figure(data=charts, layout=go.Layout({
        'barmode': 'group',
        'legend': {'orientation': 'h'},
        'title': {'text': f'{target} x group'},
        'xaxis': {'tickformat': '0:g', 'title': {'text': 'group'}},
        'yaxis': {'tickformat': '0:g', 'title': {'text': 'Count'}, 'type': 'linear'},
    }))
    figure.add_annotation(
        x=1,
        y=1,
        text=stat_text,
        showarrow=False,
        font=dict(size=11, color='black'),
        bgcolor='lightgray',
        bordercolor='black',
        borderwidth=1,
        borderpad=12,
        xref='paper',
        yref='paper'
    )
    return figure


In [6]:
for time in times:
    df = df_times[time]
    
    df[df.group == 'invalid'][['id', 'group']].to_csv(f'Liat graphs/data/{time}_invalid_group.csv')
    
    df = df[df.group.isin(['ipt', 'control'])]
    df = df[target_variables[time] + ['group']]
    df.to_csv(f'Liat graphs/data/{time}_processed.csv')
    
    for target in target_variables[time]:

        figure = plot(df, target, stat='chi_square')
        figure.write_html(f"Liat graphs/plots/{time}_{target}.html")



Low count on observed frequencies.


Low count on expected frequencies.


Low count on observed frequencies.


Low count on observed frequencies.


Low count on expected frequencies.


Low count on observed frequencies.


Low count on expected frequencies.


Low count on observed frequencies.


Low count on expected frequencies.


Low count on observed frequencies.


Low count on expected frequencies.



In [10]:
for target in target_variables['intake']:
    intake = df_times['intake']
    time2 = df_times['time2']
    
    intake = intake[['group', target]]
    time2 = time2[['group', target]]

    intake_values = intake.groupby('group')[target].sum()[['ipt', 'control']].values
    time2_values = time2.groupby('group')[target].sum()[['ipt', 'control']].values

    df = pd.DataFrame({'intake': intake_values, 'time2': time2_values})
    print('\n\n\n\n\n', target)
    print(mcnemar(df.values, exact=False))






 suicidal_ideation
pvalue      0.36330214088689783
statistic   0.8264462809917356





 suicidal_behavior
pvalue      0.032762645078859856
statistic   4.558139534883721





 suicidal_attempt
pvalue      1.0
statistic   0.0





 ER
pvalue      0.22779999398822554
statistic   1.4545454545454546





 NSSI
pvalue      0.3221988061625787
statistic   0.98


In [9]:
for target in target_variables['intake']:
    intake = df_times['intake']
    time3 = df_times['time3']
    
    intake = intake[['group', target]]
    time3 = time3[['group', target]]

    intake_values = intake.groupby('group')[target].sum()[['ipt', 'control']].values
    time3_values = time3.groupby('group')[target].sum()[['ipt', 'control']].values

    df = pd.DataFrame({'intake': intake_values, 'time3': time3_values})
    print('\n\n\n\n\n', target)
    print(mcnemar(df.values, exact=False))






 suicidal_ideation
pvalue      0.1845725528398791
statistic   1.7604166666666667





 suicidal_behavior
pvalue      0.48623432138829725
statistic   0.48484848484848486





 suicidal_attempt
pvalue      0.21129954733370696
statistic   1.5625





 ER
pvalue      0.502334954360502
statistic   0.45





 NSSI
pvalue      0.07363827012030258
statistic   3.2
