# Descriptive Statistics for Study 3

In [3]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
from IPython.display import display

#Allowing each cell to display multiple outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [4]:
df=pd.read_csv(r"C:\Users\z5291979\OneDrive - UNSW\Documents\lsac-data\df_alls.csv")

#Examining frequencies of combinations of different suicide-related outcomes
df[['si', 'nssi', 'att']].value_counts().reset_index(name='count')

Unnamed: 0,si,nssi,att,count
0,0.0,0,0.0,2025
1,1.0,0,0.0,160
2,1.0,1,0.0,78
3,0.0,1,0.0,60
4,1.0,1,1.0,47
5,1.0,0,1.0,37
6,0.0,0,1.0,19
7,0.0,1,1.0,2


In [5]:
#Examining prior SITBs
#SITB variables were initially coded as 1 Yes 2 No; recode to 0 No 1 Yes
#Also recode missing values so there aren't multiple types of missing values (previously had -9 and -3)
for col in df[['hs54a',	'hs54b',	'hs54c',	'hs54d',	'hs54f']]:
    df[col]=df[col].replace({2:0})
    df[col]=df[col].mask(df[col] <0, -9)
#SI
#Creating the SI variable using variables measured in Wave 7 as defined in the paper and in data_clean notebook
#hs54c- seriously consider suicide, hs54d- suicide plan
df['si7']=df['hs54c']
df['si7']=np.where(df['hs54d']==1, 1, df['si7'])
#Renaming attempt variable and recoding it so that it becomes a binary variable as previously defined in data_clean and paper
df['att7']=df['hs54e'].replace([2,3,4],1)
#Creating the NSSI variable 
df['nssi7']=df['hs54b']


#Generating crosstabs and running the chi2 test on the prior SITB variables
sre7=['si7', 'nssi7', 'att7']
df['att7']=df['att7'].mask(df['att7'] <0, -9)

for s in sre7:
    locals()[f'{s}_cross']=pd.crosstab(index=df[s], columns=df['sitbs'], dropna=False, margins=True)
    locals()[f'{s}_cross']
    pd.crosstab(index=df[s], columns=df['sitbs'], dropna=False, normalize='columns')*100
    stat, p, dof, expected=chi2_contingency(locals()[f'{s}_cross'])
    print(f'chi2 p-value= {p}')

sitbs,0,1,All
si7,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-9.0,62,19,81
0.0,1814,214,2028
1.0,149,170,319
All,2025,403,2428


sitbs,0,1
si7,Unnamed: 1_level_1,Unnamed: 2_level_1
-9.0,3.061728,4.71464
0.0,89.580247,53.101737
1.0,7.358025,42.183623


chi2 p-value= 4.274124808642867e-76


sitbs,0,1,All
nssi7,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-9.0,62,20,82
0.0,1853,251,2104
1.0,110,132,242
All,2025,403,2428


sitbs,0,1
nssi7,Unnamed: 1_level_1,Unnamed: 2_level_1
-9.0,3.061728,4.962779
0.0,91.506173,62.282878
1.0,5.432099,32.754342


chi2 p-value= 2.4280551624983583e-59


sitbs,0,1,All
att7,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-9.0,62,18,80
0.0,1901,328,2229
1.0,62,57,119
All,2025,403,2428


sitbs,0,1
att7,Unnamed: 1_level_1,Unnamed: 2_level_1
-9.0,3.061728,4.466501
0.0,93.876543,81.389578
1.0,3.061728,14.143921


chi2 p-value= 1.1944361393926347e-17


In [6]:
#Create function that produces cross-tabs and chi2 p-value
#Note that the display function is used instead of print so that the output is a nicely formatted table
def desc(var):
    locals()[f'{var}_cross']=pd.crosstab(index=df[var], columns=df['sitbs'], dropna=False, margins=True)
    display(locals()[f'{var}_cross'])
    display(pd.crosstab(index=df[var], columns=df['sitbs'], dropna=False, normalize='columns')*100)
    stat, p, dof, expected=chi2_contingency(locals()[f'{var}_cross'])
    print(f'chi sq statistic={stat}')
    print(f'p-value for chi2 test= {p}')


In [12]:
sexcont=pd.crosstab(index=df['f02m1_2.0'], columns=df['sitbs'], dropna=False)
sexcont
stat, p, dof, expected=chi2_contingency(sexcont)
stat
p

sitbs,0,1
f02m1_2.0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1054,173
1,971,230


10.824954389025681

0.0010014116114932334

In [13]:
adhdcont=pd.crosstab(index=df['hs17l'], columns=df['sitbs'], dropna=False)
adhdcont
stat, p, dof, expected=chi2_contingency(adhdcont)
stat
p

sitbs,0,1
hs17l,Unnamed: 1_level_1,Unnamed: 2_level_1
-9.0,27,6
0.0,1947,384
1.0,51,13


0.7237435978296194

0.6963716377190524

In [7]:
#Sex
desc('f02m1_2.0')

sitbs,0,1,All
f02m1_2.0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1054,173,1227
1,971,230,1201
All,2025,403,2428


sitbs,0,1
f02m1_2.0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,52.049383,42.92804
1,47.950617,57.07196


chi sq statistic=11.186874394160657
p-value for chi2 test= 0.024542170238459768


In [8]:
#Depression/ anxiety
desc('hs17v')
#ADHD
desc('hs17l')
#Two parents who are partners
desc('parpart_2.0')
#Parent 1 has anxiety/ depression
df['p1da']=df['hs48a32'].replace([2,3,4],2)
df['p1da']=df['p1da'].mask(df['p1da'] <0, -1)
desc('p1da')


sitbs,0,1,All
hs17v,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-9.0,27,6,33
0.0,1805,307,2112
1.0,193,90,283
All,2025,403,2428


sitbs,0,1
hs17v,Unnamed: 1_level_1,Unnamed: 2_level_1
-9.0,1.333333,1.488834
0.0,89.135802,76.17866
1.0,9.530864,22.332506


chi sq statistic=53.80491981677449
p-value for chi2 test= 8.076488348500428e-10


sitbs,0,1,All
hs17l,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-9.0,27,6,33
0.0,1947,384,2331
1.0,51,13,64
All,2025,403,2428


sitbs,0,1
hs17l,Unnamed: 1_level_1,Unnamed: 2_level_1
-9.0,1.333333,1.488834
0.0,96.148148,95.28536
1.0,2.518519,3.225806


chi sq statistic=0.7237435978296194
p-value for chi2 test= 0.9939642453758665


sitbs,0,1,All
parpart_2.0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,390,90,480
1,1635,313,1948
All,2025,403,2428


sitbs,0,1
parpart_2.0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,19.259259,22.332506
1,80.740741,77.667494


chi sq statistic=2.0014441842089705
p-value for chi2 test= 0.7354932395260891


sitbs,0,1,All
p1da,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-1.0,70,20,90
1.0,1248,212,1460
2.0,707,171,878
All,2025,403,2428


sitbs,0,1
p1da,Unnamed: 1_level_1,Unnamed: 2_level_1
-1.0,3.45679,4.962779
1.0,61.62963,52.605459
2.0,34.91358,42.431762


chi sq statistic=11.862052679959227
p-value for chi2 test= 0.06511794931170066
