In [1]:
## Import packages
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [2]:
## Load data
sirs_demo = pd.read_excel('../data/Dartmouth Data Set- SIRS .xlsx', sheet_name = 'Demographics')
sirs_law_enf = pd.read_excel('../data/Dartmouth Data Set- SIRS .xlsx', sheet_name = 'Law Enforcement')
sirs_ed = pd.read_excel('../data/Dartmouth Data Set- SIRS .xlsx', sheet_name = 'ED Use')
sirs_patient = pd.read_excel('../data/Dartmouth Data Set- SIRS .xlsx', sheet_name = 'In-patient admissions')
sirs_living_change = pd.read_excel('../data/Dartmouth Data Set- SIRS .xlsx', sheet_name = 'living Situation change')
sirs_crisis = pd.read_excel('../data/Dartmouth Data Set- SIRS .xlsx', sheet_name = 'Emergency Crisis Services')

## Recategorizing Race Data

In [3]:
# Examining top race categories
race_value_counts = sirs_demo.Race.value_counts()
print(race_value_counts)

# Examining top race categories (by proprotion)
race_norm = sirs_demo['Race'].value_counts(normalize = True)

# Calculating cumulative sum of proportions and showing top 30
race_norm_cumsum = race_norm.cumsum().head(30)
print(race_norm_cumsum)

Race
White                                        2980
Black or African American                     994
Unknown, not collected                        277
Other: Hispanic                               191
Asian                                         147
                                             ... 
Other: Ecuadorian                               1
Other: Pakistani                                1
Other: Pakistani-Muslim                         1
Other: White and Black or African America       1
Other: mexican                                  1
Name: count, Length: 162, dtype: int64
Race
White                                                0.597673
Black or African American                            0.797032
Unknown, not collected                               0.852587
Other: Hispanic                                      0.890895
Asian                                                0.920377
Other                                                0.927597
Black or African American, Wh

In [60]:
# Taking top 96% of data
def categorize_race(race):
    categories = {
        'White': ['White', 'Other: Middle Eastern'],
        'Black/African American': ['Black or African American'],
        'Other/Unknown': ['Unknown, not collected', 'Other', 'Unknown, not collected, White', 'Other: Biracial'],
        'Hispanic/Latinx': ['Other: Hispanic', 'Other: Latino', 'Other: hispanic', 'Other: Mexican'],
        'Asian/Pacific Islander': ['Asian', 'Native Hawaiian or Other Pacific Islander'],
        'Mixed': ['Black or African American, White', 'Asian, White'],
        'American Indian or Alaska Native': ['American Indian or Alaska Native'],
    }

    for category, labels in categories.items():
        if race in labels:
            return category
    return 'Other/Unknown'

In [61]:
# Applying function to data
sirs_demo['race_clean'] = sirs_demo['Race'].apply(categorize_race)
sirs_demo.race_clean.value_counts()

race_clean
White                               2984
Black/African American               994
Other/Unknown                        529
Hispanic/Latinx                      250
Asian/Pacific Islander               155
Mixed                                 47
American Indian or Alaska Native      27
Name: count, dtype: int64

## Merging cleaned race data with crisis data

In [62]:
# Merging data frames
crisis_merge = pd.merge(sirs_crisis, sirs_demo, how = 'left', left_on = 'Individual ID', right_on = 'Local ID')
crisis_merge.head()
crisis_merge.columns

Unnamed: 0,Individual ID,Date/Time of Contact,Source of Contact,Reason for Contact,Type of Emergency Response,Total episode length (initial call to disposition),Total response time (initial call to arrival on-site),Travel Time,Police Response,If Police Responded,...,Jailed in the past year (at enrollment to START),Jailed in the past 1 - 5 years (at enrollment to START),Employment Status at Enrollment,Grade In School,Current IEP,Current 504 Plan,School Setting,Custody Status,Child Lives With,race_clean
0,11153125,2016-08-16 08:40:00,Case Manager/Service Coordinator,"At risk of losing placement, Other, Self-injur...",Phone consultation only,15.0,,,No,,...,No,No,Employment (PT),,No,No,,,,White
1,11153125,2018-04-27 15:00:00,Case Manager/Service Coordinator,"Mental health symptoms, Self-injurious",In-person: Emergency room,60.0,240.0,60.0,No,,...,No,No,Employment (PT),,No,No,,,,White
2,11153125,2018-06-05 16:00:00,Residential provider - Community,"Mental health symptoms, Suicidal ideation/beha...",Other: phone and email,120.0,,,No,,...,No,No,Employment (PT),,No,No,,,,White
3,11158347,2015-06-01 14:00:00,Family member,"Diagnosis and treatment plan assistance, Other...",Phone consultation only,254.0,,,No,,...,No,No,Not employed,,No,No,,,,White
4,11158347,2015-06-01 16:00:00,Family member,"Aggression (physical, verbal, property destruc...",Phone consultation only,70.0,,,No,,...,No,No,Not employed,,No,No,,,,White


Index(['Individual ID', 'Date/Time of Contact', 'Source of Contact',
       'Reason for Contact', 'Type of Emergency Response',
       'Total episode length (initial call to disposition)',
       'Total response time (initial call to arrival on-site)', 'Travel Time',
       'Police Response', 'If Police Responded', 'Who Contacted Police?',
       'Mobile Crisis', 'If Mobile Crisis', 'Restraints Used',
       'If Restraints Were Used', 'Disposition', 'Disposition Information',
       'Admitted through ED/CPEP', 'Date of Admission',
       'Disposition Information (ER)',
       'Did START response prevent higher level of care (hospital diversion)',
       'RSQ Completed?', 'Local ID', 'Date Enrolled in START', 'Status',
       'Status Date', 'Time Enrolled in START', 'Source of referral to START',
       'Suitability of enrollment in START',
       'Individual/Caregiver reliable access to technology',
       'Presenting problems at time of enrollment', 'Services at Enrollment',
       'D

In [63]:
# Cleaning data frame
crisis_col = ['Individual ID', 'Date/Time of Contact', 'Type of Emergency Response', 
             'Reason for Contact', 'Type of Emergency Response', 'Total episode length (initial call to disposition)',
             'Total response time (initial call to arrival on-site)', 'If Police Responded',
             'Disposition', 'Disposition Information', 'race_clean']
crisis_select = crisis_merge[crisis_col]

crisis_select

Unnamed: 0,Individual ID,Date/Time of Contact,Type of Emergency Response,Reason for Contact,Type of Emergency Response.1,Total episode length (initial call to disposition),Total response time (initial call to arrival on-site),If Police Responded,Disposition,Disposition Information,race_clean
0,11153125,2016-08-16 08:40:00,Phone consultation only,"At risk of losing placement, Other, Self-injur...",Phone consultation only,15.0,,,Other: outpatient MH appt,,White
1,11153125,2018-04-27 15:00:00,In-person: Emergency room,"Mental health symptoms, Self-injurious",In-person: Emergency room,60.0,240.0,,START emergency resource center admission,,White
2,11153125,2018-06-05 16:00:00,Other: phone and email,"Mental health symptoms, Suicidal ideation/beha...",Other: phone and email,120.0,,,Psychiatric Hospital Admission,,White
3,11158347,2015-06-01 14:00:00,Phone consultation only,"Diagnosis and treatment plan assistance, Other...",Phone consultation only,254.0,,,START emergency resource center admission,,White
4,11158347,2015-06-01 16:00:00,Phone consultation only,"Aggression (physical, verbal, property destruc...",Phone consultation only,70.0,,,Maintain current setting,Planned Resource Center referral,White
...,...,...,...,...,...,...,...,...,...,...,...
18163,108482C,2021-12-03 14:05:00,Other: In-person School,"Aggression (physical, verbal, property destruc...",Other: In-person School,40.0,12.0,,Other: Picked up from school and transported t...,,Black/African American
18164,817731C,2021-12-04 14:21:00,Telehealth response,"Family needs assistance, Mental health symptoms",Telehealth response,120.0,,Assist on Site,Emergency Department,,White
18165,471473,2021-12-19 15:00:00,Phone consultation only,Mental health symptoms,Phone consultation only,20.0,,,Maintain current setting,Follow-up with coordinator,
18166,780879W,2021-12-17 11:00:00,In-person: Emergency room,"At risk of losing placement, Decrease in abili...",In-person: Emergency room,210.0,150.0,Transport to ED,Psychiatric Hospital Admission,,Black/African American


In [77]:
# Examining top reasons for contact 
crisis_select['Reason for Contact'].value_counts()

# Examining top reasons for contact (by proportion)
contact_rzn_norm = crisis_select['Reason for Contact'].value_counts(normalize = True)

# Calculating cumulative sum of proportions and showing top 30
contact_rzn_norm_cumsum = contact_rzn_norm.cumsum().head(50)
print(contact_rzn_norm_cumsum)

Reason for Contact
Aggression (physical, verbal, property destruction, threats)                                                                                                                                        3759
Mental health symptoms                                                                                                                                                                              1893
Aggression (physical, verbal, property destruction, threats), Family needs assistance                                                                                                               1297
Aggression (physical, verbal, property destruction, threats), Mental health symptoms                                                                                                                1063
Family needs assistance                                                                                                                                                          

Reason for Contact
Aggression (physical, verbal, property destruction, threats)                                                                                                                0.209031
Mental health symptoms                                                                                                                                                      0.314297
Aggression (physical, verbal, property destruction, threats), Family needs assistance                                                                                       0.386421
Aggression (physical, verbal, property destruction, threats), Mental health symptoms                                                                                        0.445532
Family needs assistance                                                                                                                                                     0.474837
Suicidal ideation/behaviors                                                 

In [65]:
# Recategorizing reasons for contact (BASIC)
crisis_select['is_suicidal'] = np.where(crisis_select['Reason for Contact'].str.contains('suicidal', case = False, na = False), True, False)
crisis_select['is_aggression'] = np.where(crisis_select['Reason for Contact'].str.contains('aggression', case = False, na = False), True, False)
crisis_select['is_mh_symp'] = np.where(crisis_select['Reason for Contact'].str.contains('Mental health symptoms', 
                                                                                        case = False, na = False), True, False)
crisis_select

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crisis_select['is_suicidal'] = np.where(crisis_select['Reason for Contact'].str.contains('suicidal', case = False, na = False), True, False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crisis_select['is_aggression'] = np.where(crisis_select['Reason for Contact'].str.contains('aggression', case = False, na = False), True, False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.

Unnamed: 0,Individual ID,Date/Time of Contact,Type of Emergency Response,Reason for Contact,Type of Emergency Response.1,Total episode length (initial call to disposition),Total response time (initial call to arrival on-site),If Police Responded,Disposition,Disposition Information,race_clean,is_suicidal,is_aggression,is_mh_symp
0,11153125,2016-08-16 08:40:00,Phone consultation only,"At risk of losing placement, Other, Self-injur...",Phone consultation only,15.0,,,Other: outpatient MH appt,,White,False,False,False
1,11153125,2018-04-27 15:00:00,In-person: Emergency room,"Mental health symptoms, Self-injurious",In-person: Emergency room,60.0,240.0,,START emergency resource center admission,,White,False,False,True
2,11153125,2018-06-05 16:00:00,Other: phone and email,"Mental health symptoms, Suicidal ideation/beha...",Other: phone and email,120.0,,,Psychiatric Hospital Admission,,White,True,False,True
3,11158347,2015-06-01 14:00:00,Phone consultation only,"Diagnosis and treatment plan assistance, Other...",Phone consultation only,254.0,,,START emergency resource center admission,,White,False,False,False
4,11158347,2015-06-01 16:00:00,Phone consultation only,"Aggression (physical, verbal, property destruc...",Phone consultation only,70.0,,,Maintain current setting,Planned Resource Center referral,White,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18163,108482C,2021-12-03 14:05:00,Other: In-person School,"Aggression (physical, verbal, property destruc...",Other: In-person School,40.0,12.0,,Other: Picked up from school and transported t...,,Black/African American,False,True,True
18164,817731C,2021-12-04 14:21:00,Telehealth response,"Family needs assistance, Mental health symptoms",Telehealth response,120.0,,Assist on Site,Emergency Department,,White,False,False,True
18165,471473,2021-12-19 15:00:00,Phone consultation only,Mental health symptoms,Phone consultation only,20.0,,,Maintain current setting,Follow-up with coordinator,,False,False,True
18166,780879W,2021-12-17 11:00:00,In-person: Emergency room,"At risk of losing placement, Decrease in abili...",In-person: Emergency room,210.0,150.0,Transport to ED,Psychiatric Hospital Admission,,Black/African American,True,False,True


In [66]:
# Finding proportions of race
print(sirs_demo.race_clean.value_counts())
total_count = sirs_demo['race_clean'].count()

race_clean
White                               2984
Black/African American               994
Other/Unknown                        529
Hispanic/Latinx                      250
Asian/Pacific Islander               155
Mixed                                 47
American Indian or Alaska Native      27
Name: count, dtype: int64


In [67]:
# # Filter by white
# crisis_select_white = crisis_select[crisis_select['race_clean'] == 'White']

# # total white
# cs_white_total = crisis_select_white['race_clean'].count()

# # prop. of suicidal ideation
# white_suicidal = crisis_select_white['is_suicidal'].value_counts().get('Y')
# white_suicidal_prop = white_suicidal / cs_white_total

# print(white_suicidal_prop)

# # prop. of aggression
# white_aggression = crisis_select_white['is_aggression'].value_counts().get('Y')
# white_aggression_prop = white_aggression / cs_white_total

# print(white_aggression_prop)

# # prop. of mental health symptoms
# white_mh = crisis_select_white['is_mh_symp'].value_counts().get('Y')
# white_mh_prop = white_mh / cs_white_total

# print(white_mh_prop)

# # DELETE

In [15]:
# # Define function to calculate white crisis proportions
# def race_crisis(race, colname):
#     race_df = crisis_select[crisis_select['race_clean'] == race] # filter by race

#     race_total = race_df['race_clean'].count() # total number of individuals of that race

#     # find proprtion of crisis by race
#     race_crisis_type = race_df[colname].value_counts().get('Y')
#     race_crisis_prop = race_crisis_type / race_total

#     return race_crisis_prop


# white_suicidal = race_crisis('White', 'is_suicidal')
# white_aggression = race_crisis('White', 'is_aggression')
# white_mh = race_crisis('White', 'is_mh_symp')

# black_suicidal = race_crisis('Black/African American', 'is_suicidal')
# black_aggression = race_crisis('Black/African American', 'is_aggression')
# black_mh = race_crisis('Black/African American')

In [68]:
# Suicidal Proportions
suicidal_crisis = crisis_select.groupby('race_clean')['is_suicidal'].sum()
suicidal_total = suicidal_crisis.sum()
suicidal_prop = suicidal_crisis / suicidal_total

suicidal_prop

race_clean
American Indian or Alaska Native    0.007326
Asian/Pacific Islander              0.005495
Black/African American              0.185592
Hispanic/Latinx                     0.022589
Mixed                               0.005495
Other/Unknown                       0.045788
White                               0.727717
Name: is_suicidal, dtype: float64

In [69]:
# Aggression Proportions
aggression_crisis = crisis_select.groupby('race_clean')['is_aggression'].sum()
aggression_total = aggression_crisis.sum()
aggression_prop = aggression_crisis / aggression_total

aggression_prop

race_clean
American Indian or Alaska Native    0.011739
Asian/Pacific Islander              0.016388
Black/African American              0.264877
Hispanic/Latinx                     0.052417
Mixed                               0.007787
Other/Unknown                       0.066248
White                               0.580544
Name: is_aggression, dtype: float64

In [70]:
# Mental Health Symptoms Proportions
mh_crisis = crisis_select.groupby('race_clean')['is_mh_symp'].sum()
mh_total = mh_crisis.sum()
mh_prop = mh_crisis / mh_total

mh_prop

race_clean
American Indian or Alaska Native    0.004911
Asian/Pacific Islander              0.011786
Black/African American              0.225300
Hispanic/Latinx                     0.030053
Mixed                               0.005696
Other/Unknown                       0.057553
White                               0.664702
Name: is_mh_symp, dtype: float64

In [71]:
race_crisis_df = pd.DataFrame({
    'suicidal_ideation': suicidal_prop,
    'aggression': aggression_prop,
    'mental_health': mh_prop
})

race_crisis_df

Unnamed: 0_level_0,suicidal_ideation,aggression,mental_health
race_clean,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
American Indian or Alaska Native,0.007326,0.011739,0.004911
Asian/Pacific Islander,0.005495,0.016388,0.011786
Black/African American,0.185592,0.264877,0.2253
Hispanic/Latinx,0.022589,0.052417,0.030053
Mixed,0.005495,0.007787,0.005696
Other/Unknown,0.045788,0.066248,0.057553
White,0.727717,0.580544,0.664702


In [72]:
import networkx as nx

import urllib, json

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [73]:
rc = race_crisis_df
race_crisis_df[['suicidal_ideation', 'aggression', 'mental_health']].values

rc

array([[0.00732601, 0.01173873, 0.00491063],
       [0.00549451, 0.01638773, 0.0117855 ],
       [0.18559219, 0.2648768 , 0.22529955],
       [0.02258852, 0.05241748, 0.03005303],
       [0.00549451, 0.00778708, 0.00569633],
       [0.04578755, 0.06624826, 0.05755254],
       [0.72771673, 0.58054393, 0.66470242]])

Unnamed: 0_level_0,suicidal_ideation,aggression,mental_health
race_clean,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
American Indian or Alaska Native,0.007326,0.011739,0.004911
Asian/Pacific Islander,0.005495,0.016388,0.011786
Black/African American,0.185592,0.264877,0.2253
Hispanic/Latinx,0.022589,0.052417,0.030053
Mixed,0.005495,0.007787,0.005696
Other/Unknown,0.045788,0.066248,0.057553
White,0.727717,0.580544,0.664702


In [74]:
rc = rc.reset_index()

In [75]:
# Pivot table to prepare to create each row into a list
rc_long = rc.melt(id_vars = ['race_clean'], value_vars = ['suicidal_ideation', 'aggression', 'mental_health'], var_name = 'Condition')

rc_long

Unnamed: 0,race_clean,Condition,value
0,American Indian or Alaska Native,suicidal_ideation,0.007326
1,Asian/Pacific Islander,suicidal_ideation,0.005495
2,Black/African American,suicidal_ideation,0.185592
3,Hispanic/Latinx,suicidal_ideation,0.022589
4,Mixed,suicidal_ideation,0.005495
5,Other/Unknown,suicidal_ideation,0.045788
6,White,suicidal_ideation,0.727717
7,American Indian or Alaska Native,aggression,0.011739
8,Asian/Pacific Islander,aggression,0.016388
9,Black/African American,aggression,0.264877


In [44]:
# Make each row into a sublist of a list
def df_to_lol(df):
    return df.apply(lambda row: row.tolist(), axis=1).tolist()

race_crisis_lol = df_to_lol(rc_long)

race_crisis_lol

[['American Indian or Alaska Native',
  'suicidal_ideation',
  0.007326007326007326],
 ['Asian/Pacific Islander', 'suicidal_ideation', 0.005494505494505495],
 ['Black/African American', 'suicidal_ideation', 0.1855921855921856],
 ['Hispanic/Latinx', 'suicidal_ideation', 0.022588522588522588],
 ['Mixed', 'suicidal_ideation', 0.005494505494505495],
 ['Other/Unknown', 'suicidal_ideation', 0.045787545787545784],
 ['White', 'suicidal_ideation', 0.7277167277167277],
 ['American Indian or Alaska Native', 'aggression', 0.011738726173872617],
 ['Asian/Pacific Islander', 'aggression', 0.016387726638772665],
 ['Black/African American', 'aggression', 0.26487680148768017],
 ['Hispanic/Latinx', 'aggression', 0.05241748024174803],
 ['Mixed', 'aggression', 0.007787075778707578],
 ['Other/Unknown', 'aggression', 0.06624825662482567],
 ['White', 'aggression', 0.5805439330543933],
 ['American Indian or Alaska Native', 'mental_health', 0.004910626595953644],
 ['Asian/Pacific Islander', 'mental_health', 0.0

In [53]:
edge_list = [(row[0], row[1], row[2]) for row in race_crisis_lol]

# Print edge list
for edge in edge_list:
    print(edge)

('American Indian or Alaska Native', 'suicidal_ideation', 0.007326007326007326)
('Asian/Pacific Islander', 'suicidal_ideation', 0.005494505494505495)
('Black/African American', 'suicidal_ideation', 0.1855921855921856)
('Hispanic/Latinx', 'suicidal_ideation', 0.022588522588522588)
('Mixed', 'suicidal_ideation', 0.005494505494505495)
('Other/Unknown', 'suicidal_ideation', 0.045787545787545784)
('White', 'suicidal_ideation', 0.7277167277167277)
('American Indian or Alaska Native', 'aggression', 0.011738726173872617)
('Asian/Pacific Islander', 'aggression', 0.016387726638772665)
('Black/African American', 'aggression', 0.26487680148768017)
('Hispanic/Latinx', 'aggression', 0.05241748024174803)
('Mixed', 'aggression', 0.007787075778707578)
('Other/Unknown', 'aggression', 0.06624825662482567)
('White', 'aggression', 0.5805439330543933)
('American Indian or Alaska Native', 'mental_health', 0.004910626595953644)
('Asian/Pacific Islander', 'mental_health', 0.011785503830288745)
('Black/African 

In [78]:
## Create network analysis

# Create a graph
G_race_crisis = nx.Graph()

# Add edges
G_race_crisis.add_weighted_edges_from(edge_list)

# Creates biggest subgraph
CC_nodes = list(nx.connected_components(G_race_crisis))[0] # generates list of connected components, extracts the largest
GG = nx.subgraph(G_race_crisis, CC_nodes) # creates the subgraph of the largest CC

In [79]:
from netwulf import visualize
import netwulf as nw

In [80]:
stylized_network, config = visualize(GG,)