In [15]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import pingouin as pg
from main import create_dataset
import dtale
import os
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
from statsmodels.stats.contingency_tables import mcnemar
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
import statsmodels.api as sm

In [2]:
times = {'intake': ['intake_arm_1', 'pre_treatment_arm_1'],
         'time2' : ['5th_session_arm_1', 'control_5weeks_arm_1'],
         'time3' : ['followup_3month_arm_1', 'control_3month_arm_1', 'control_6month_arm_1'],
        }
if not os.path.exists("Liat graphs/young/plots"):
    os.mkdir("Liat graphs/young/plots")
    
if not os.path.exists("Liat graphs/young/data"):
    os.mkdir("Liat graphs/young/data")

group_names = {
    '0': 'invalid',
    '1': 'ipt', # חירום
    '2' : 'tau', # רגיל
    '3': 'control' # מינימלית
}

# for time in times.keys():
#     print(time)
#     create_dataset(event_names = times[time], path = f"Liat graphs/data/{time}.csv")


In [3]:
df_times = {
    'intake': pd.read_csv(r'Liat graphs/data/intake.csv'),
    'time2' : pd.read_csv(r'Liat graphs/data/time2.csv'),
    'time3' : pd.read_csv(r'Liat graphs/data/time3.csv')
         }

intake_target_variables =  ['suicidal_ideation', 'suicidal_behavior', 'NSSI']

time2_target_variables = ['suicidal_ideation', 'suicidal_behavior', 'NSSI']

target_variables = {
    'intake': intake_target_variables,
    'time2': time2_target_variables, 
    'time3': time2_target_variables}
        

In [4]:
for time in times:
    df = df_times[time]
    df_times[time] = df[df['age_child_pre'] < 12.5]
    df_times[time]['time'] = time




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [5]:
def rename_groups(df, group_names):
    df['group'] = '0'
    df.loc[df['group___1'] == 1, 'group'] = '1'
    df.loc[df['group___2'] == 1, 'group'] = '2'
    df.loc[df['group___3'] == 1, 'group'] = '3'
    df['group'] = df['group'].map(group_names)
    return df

for time in times:
    df = df_times[time]
    df_times[time] = rename_groups(df, group_names)
    df_times[time] = df[df['group'].isin(['ipt', 'control'])]

In [6]:
def plot (df, target, stat='anova'):
    # remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
    df = df.reset_index().drop('index', axis=1, errors='ignore')
    
    
    df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers
    
    if stat == 'anova':
        anova_result = pg.anova(data=df, dv=target, between='group')[['Source', 'F', 'p-unc']]
        anova_str = anova_result.round(decimals=3).to_string(index=False).split('\n')
        stat_text = f"<b>ANOVA Result:</b><br>{anova_str[0]}</b><br>{anova_str[1]}"
    elif stat == 'chi_square':
        expected, observed, stats = pg.chi2_independence(data=df, x=target, y='group')
        stats = stats[stats.test == 'pearson'].round(3)[['pval', 'power']].to_string(index=False).split('\n')
        stat_text = f"<b>chi_square Result:</b><br>{stats[0]}</b><br>{stats[1]}"
        
        
    chart_data = pd.concat([
        pd.Series(df.index, index=df.index, name='__index__'),
        df['group'],
        df[target],
    ], axis=1)
    chart_data = chart_data.query(f"""(`{target}` == 1) or (`{target}` == 0)""")
    chart_data = chart_data.sort_values([target, 'group'])
    chart_data = chart_data.rename(columns={'group': 'x'})
    chart_data_pctct = chart_data.groupby([target, 'x'])[['__index__']].count()
    chart_data_pctct = chart_data_pctct / chart_data_pctct.groupby(['x']).count()
    chart_data_pctct.columns = ['__index__|pctct']
    chart_data = chart_data_pctct.reset_index()
    chart_data = chart_data.dropna()
    
    chart_data = chart_data.query(f"""{target} == 1""")


    charts = []
    charts.append(go.Bar(
        x=chart_data['x'],
        y=chart_data['__index__|pctct'],
        name=f'({target}: 1)',
        marker_color = 'red'
    ))


    chart_data = pd.concat([
        pd.Series(df.index, index=df.index, name='__index__'),
        df['group'],
        df[target],
    ], axis=1)
    chart_data = chart_data.query(f"""(`{target}` == 1) or (`{target}` == 0)""")
    chart_data = chart_data.sort_values([target, 'group'])
    chart_data = chart_data.rename(columns={'group': 'x'})
    chart_data_pctct = chart_data.groupby([target, 'x'])[['__index__']].count()
    chart_data_pctct = chart_data_pctct / chart_data_pctct.groupby(['x']).count()
    chart_data_pctct.columns = ['__index__|pctct']
    chart_data = chart_data_pctct.reset_index()
    chart_data = chart_data.dropna()
    # WARNING: This is not taking into account grouping of any kind, please apply filter associated with
    #          the group in question in order to replicate chart. For this we're using '"""`gender` == 'man'"""'
    chart_data = chart_data.query(f"""`{target}` == 0""")

    charts.append(go.Bar(
        x=chart_data['x'],
        y=chart_data['__index__|pctct'],
        name=f'({target}: 0)',
        marker_color = 'green'
    ))

    figure = go.Figure(data=charts, layout=go.Layout({
        'barmode': 'group',
        'legend': {'orientation': 'h'},
        'title': {'text': f'{target} x group'},
        'xaxis': {'tickformat': '0:g', 'title': {'text': 'group'}},
        'yaxis': {'tickformat': '0:g', 'title': {'text': 'Count'}, 'type': 'linear'},
    }))
    figure.add_annotation(
        x=1,
        y=1,
        text=stat_text,
        showarrow=False,
        font=dict(size=11, color='black'),
        bgcolor='lightgray',
        bordercolor='black',
        borderwidth=1,
        borderpad=12,
        xref='paper',
        yref='paper'
    )
    figure.show()
    return figure


In [7]:
if False:
    df_intake = df_times['intake']
    df_intake = df_intake[df_intake.group.isin(['ipt', 'control'])]
    df_intake = df_intake[target_variables[time] + ['group', 'age_child_pre']]
    df_intake.to_csv(f'Liat graphs/young/data/{time}_groups_targets_data.csv')


    for target in target_variables['intake']:

        figure = plot(df_intake, target, stat='chi_square')
        figure.write_html(f"Liat graphs/young/plots/intake_{target} X groups.html")


In [8]:

current_target_vars = ['suicidal_ideation', 'suicidal_behavior', 'NSSI']

info_cols = ['group', 'age_child_pre', 'gender', 'time']

df_short = pd.concat([df_times['intake'], df_times['time2']])[current_target_vars + info_cols]
df_long = pd.concat([df_times['intake'], df_times['time3']])[current_target_vars + info_cols]

In [9]:
d = dtale.show(df_long)
d.open_browser()

2023-09-23 16:46:51,523 - ERROR    - Exception occurred while processing request: 
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido
Traceback (most recent call last):
  File "C:\Users\nogur\anaconda3\Lib\site-packages\dtale\views.py", line 119, in _handle_exceptions
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\nogur\anaconda3\Lib\site-packages\dtale\views.py", line 3708, in chart_export
    output = export_png(data_id, params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\nogur\anaconda3\Lib\site-packages\dtale\dash_application\charts.py", line 4156, in export_png
    write_image(chart, file=image_buffer, format="png")
  File "C:\Users\nogur\anaconda3\Lib\site-packages\plotly\io\_kaleido.py", line 267, in write_image
    img_data = to_image(
               ^^^^^^^^^
  File "C:\Users\nogur\anaconda3\Lib\site-packages\plotly\io\_kaleido.py", line 133, in t

In [22]:
def plot_treatment_improvement (df, target):

    # remove any pre-existing indices for ease of use in the D-Tale code, but this is not required
    df = df.reset_index().drop('index', axis=1, errors='ignore')
    df.columns = [str(c) for c in df.columns]  # update columns to strings in case they are numbers

    chart_data = pd.concat([
        df['group'],
        df['suicidal_ideation'],
        df['time'],
    ], axis=1)
    chart_data = chart_data.query("""(`time` == 'intake') or (`time` == 'time3')""")
    chart_data = chart_data.sort_values(['time', 'group'])
    chart_data = chart_data.rename(columns={'group': 'x'})
    chart_data_mean = chart_data.groupby(['time','x'], dropna=True)[[target]].mean()
    chart_data_mean.columns = [f'{target}||mean']
    chart_data = chart_data_mean.reset_index()
    chart_data = chart_data.dropna()
    # WARNING: This is not taking into account grouping of any kind, please apply filter associated with
    #          the group in question in order to replicate chart. For this we're using '"""`time` == 'intake'"""'

    import plotly.graph_objs as go

    charts = []

    intake_chart_data = chart_data.query("""`time` == 'intake'""")
    charts.append(go.Bar(
        x=intake_chart_data['x'],
        y=intake_chart_data[f'{target}||mean'],
        name='(time: intake)'
    ))
    
    
    time3_chart_data = chart_data.query("""`time` == 'time3'""")
    charts.append(go.Bar(
        x=time3_chart_data['x'],
        y=time3_chart_data[f'{target}||mean'],
        name='(time: long follow up)'
    ))
    
    figure = go.Figure(data=charts, layout=go.Layout({
        'barmode': 'group',
        'legend': {'orientation': 'h', 'y': -0.3},
        'title': {'text': f'Rate of {target} by treatment group'},
        'xaxis': {'title': {'text': 'group'}},
        'yaxis': {'title': {'text': f'Rate of {target}'}, 'type': 'linear'}
    }))

    figure.show()
    return figure


In [23]:
def logistic_regresstion_test(df, target):
    X = df[['time', 'group']]
    Y = df[target]

    label_encoder_of_time = LabelEncoder()
    label_encoder_of_group = LabelEncoder()
    standard_scaler = StandardScaler()
    
    X['group'] = label_encoder_of_group.fit_transform(X['group'])
    X['time'] = label_encoder_of_group.fit_transform(X['time'])
    X[['group', 'time']] = standard_scaler.fit_transform(X[['group', 'time']])
    X['interaction'] = X['time'] * X['group']
    
    model = sm.Logit(Y, X).fit()
    
    wald_test_with_interation = model.wald_test('time + group + interaction = 0')
    wald_test_linear = model.wald_test('time + group = 0')
    
    print(f"{wald_test_with_interation = }")
    print(f"{wald_test_linear = }")



In [24]:
figure = plot_treatment_improvement (df_long, 'suicidal_ideation')



df_long.to_csv(f'Liat graphs/young/data/treatment_effect_data.csv')
figure.write_html(f"Liat graphs/young/plots/treatment_effect_suicidal_ideation.html")


In [16]:
for target in target_variables['intake']:
    print(f'{target = }')
    logistic_regresstion_test(df_long, target)

target = 'suicidal_ideation'
Optimization terminated successfully.
         Current function value: 0.656563
         Iterations 4
wald_test_with_interation = <class 'statsmodels.stats.contrast.ContrastResults'>
<Wald test (chi2): statistic=[[4.05770164]], p-value=0.04397030721853156, df_denom=1>
wald_test_linear = <class 'statsmodels.stats.contrast.ContrastResults'>
<Wald test (chi2): statistic=[[2.47984302]], p-value=0.11531376172741112, df_denom=1>
target = 'suicidal_behavior'
Optimization terminated successfully.
         Current function value: 0.689344
         Iterations 4
wald_test_with_interation = <class 'statsmodels.stats.contrast.ContrastResults'>
<Wald test (chi2): statistic=[[0.37425232]], p-value=0.540695465228956, df_denom=1>
wald_test_linear = <class 'statsmodels.stats.contrast.ContrastResults'>
<Wald test (chi2): statistic=[[0.39680343]], p-value=0.5287447338495318, df_denom=1>
target = 'NSSI'
Optimization terminated successfully.
         Current function value: 0.69



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [17]:
for target in target_variables['intake']:
    print(f'{target = }')
    logistic_regresstion_test(df_short, target)

target = 'suicidal_ideation'
Optimization terminated successfully.
         Current function value: 0.681658
         Iterations 4
wald_test_with_interation = <class 'statsmodels.stats.contrast.ContrastResults'>
<Wald test (chi2): statistic=[[1.3637821]], p-value=0.24288308485540322, df_denom=1>
wald_test_linear = <class 'statsmodels.stats.contrast.ContrastResults'>
<Wald test (chi2): statistic=[[0.23750775]], p-value=0.626011969616308, df_denom=1>
target = 'suicidal_behavior'
Optimization terminated successfully.
         Current function value: 0.687183
         Iterations 4
wald_test_with_interation = <class 'statsmodels.stats.contrast.ContrastResults'>
<Wald test (chi2): statistic=[[0.61722165]], p-value=0.43208161442900206, df_denom=1>
wald_test_linear = <class 'statsmodels.stats.contrast.ContrastResults'>
<Wald test (chi2): statistic=[[1.22992159]], p-value=0.2674222869475166, df_denom=1>
target = 'NSSI'
Optimization terminated successfully.
         Current function value: 0.691



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/