In [2]:
## Import packages
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import datetime

In [3]:
## Load data
sirs_demo = pd.read_excel('../data/Dartmouth Data Set- SIRS .xlsx', sheet_name = 'Demographics')
sirs_crisis = pd.read_excel('../data/Dartmouth Data Set- SIRS .xlsx', sheet_name = 'Emergency Crisis Services')

In [4]:
# Examining top race categories
race_value_counts = sirs_demo.Race.value_counts()
print(race_value_counts)

# Examining top race categories (by proprotion)
race_norm = sirs_demo['Race'].value_counts(normalize = True)

# Calculating cumulative sum of proportions and showing top 30
race_norm_cumsum = race_norm.cumsum().head(30)
print(race_norm_cumsum)

Race
White                                        2980
Black or African American                     994
Unknown, not collected                        277
Other: Hispanic                               191
Asian                                         147
                                             ... 
Other: Ecuadorian                               1
Other: Pakistani                                1
Other: Pakistani-Muslim                         1
Other: White and Black or African America       1
Other: mexican                                  1
Name: count, Length: 162, dtype: int64
Race
White                                                0.597673
Black or African American                            0.797032
Unknown, not collected                               0.852587
Other: Hispanic                                      0.890895
Asian                                                0.920377
Other                                                0.927597
Black or African American, Wh

In [12]:
# Taking top 96% of data
def categorize_race(race):
    categories = {
        'White': ['White', 'Other: Middle Eastern']

    }

    for category, labels in categories.items():
        if race in labels:
            return category
    return 'Non-White'

In [13]:
sirs_demo['race_clean'] = sirs_demo['Race'].apply(categorize_race)

In [15]:
sirs_demo['race_clean'].value_counts()

race_clean
White        2984
Non-White    2002
Name: count, dtype: int64

In [16]:
# Merging data frames
crisis_merge = pd.merge(sirs_crisis, sirs_demo, how = 'left', left_on = 'Individual ID', right_on = 'Local ID')

In [19]:
# Recategorizing reasons for contact (BASIC)
crisis_select['is_suicidal'] = np.where(crisis_select['Reason for Contact'].str.contains('suicidal', case = False, na = False), True, False)
crisis_select['is_aggression'] = np.where(crisis_select['Reason for Contact'].str.contains('aggression', case = False, na = False), True, False)
crisis_select['is_mh_symp'] = np.where(crisis_select['Reason for Contact'].str.contains('Mental health symptoms', 
                                                                                        case = False, na = False), True, False)
crisis_select

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crisis_select['is_suicidal'] = np.where(crisis_select['Reason for Contact'].str.contains('suicidal', case = False, na = False), True, False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crisis_select['is_aggression'] = np.where(crisis_select['Reason for Contact'].str.contains('aggression', case = False, na = False), True, False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.

Unnamed: 0,Status Date,Reason for Contact,Type of Emergency Response,If Police Responded,race_clean,is_suicidal,is_aggression,is_mh_symp
0,2019-07-18,"At risk of losing placement, Other, Self-injur...",Phone consultation only,,White,False,False,False
1,2019-07-18,"Mental health symptoms, Self-injurious",In-person: Emergency room,,White,False,False,True
2,2019-07-18,"Mental health symptoms, Suicidal ideation/beha...",Other: phone and email,,White,True,False,True
3,2021-08-27,"Diagnosis and treatment plan assistance, Other...",Phone consultation only,,White,False,False,False
4,2021-08-27,"Aggression (physical, verbal, property destruc...",Phone consultation only,,White,False,True,True
...,...,...,...,...,...,...,...,...
18163,2021-07-14,"Aggression (physical, verbal, property destruc...",Other: In-person School,,Non-White,False,True,True
18164,2020-10-07,"Family needs assistance, Mental health symptoms",Telehealth response,Assist on Site,White,False,False,True
18165,NaT,Mental health symptoms,Phone consultation only,,,False,False,True
18166,2018-01-03,"At risk of losing placement, Decrease in abili...",In-person: Emergency room,Transport to ED,Non-White,True,False,True


In [20]:
# Suicidal Proportions
suicidal_crisis = crisis_select.groupby('race_clean')['is_suicidal'].sum()
total_race_counts = crisis_select['race_clean'].value_counts()
suicidal_prop = suicidal_crisis / total_race_counts

suicidal_prop

race_clean
Non-White    0.082091
White        0.131567
dtype: float64

In [21]:
# Aggression Proportions
aggression_crisis = crisis_select.groupby('race_clean')['is_aggression'].sum()
total_race_counts = crisis_select['race_clean'].value_counts()
aggression_prop = aggression_crisis / total_race_counts

aggression_prop

race_clean
Non-White    0.664274
White        0.551325
dtype: float64

In [22]:
# Mental Health Symptoms Proportions
mh_crisis = crisis_select.groupby('race_clean')['is_mh_symp'].sum()
total_race_counts = crisis_select['race_clean'].value_counts()
mh_prop = mh_crisis / total_race_counts

mh_prop

race_clean
Non-White    0.314191
White        0.373510
dtype: float64

In [23]:
race_crisis_df = pd.DataFrame({
    'suicidal_ideation': suicidal_prop,
    'aggression': aggression_prop,
    'mental_health': mh_prop
})

race_crisis_df

Unnamed: 0_level_0,suicidal_ideation,aggression,mental_health
race_clean,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Non-White,0.082091,0.664274,0.314191
White,0.131567,0.551325,0.37351


In [27]:
# Step 1: Extract and aggregate START encounters by date and race
start_encounters_by_race = crisis_select.groupby([crisis_select['Status Date'].dt.date, 'race_clean', 'is_suicidal', 
                                                    'is_aggression', 'is_mh_symp']).size().reset_index(name='START Encounters')

by_aggression = start_encounters_by_race[start_encounters_by_race['is_aggression'] == True]
by_aggression

Unnamed: 0,Status Date,race_clean,is_suicidal,is_aggression,is_mh_symp,START Encounters
0,2012-05-21,White,False,True,False,1
1,2013-05-09,White,False,True,True,1
2,2013-05-09,White,True,True,True,1
5,2014-07-01,White,False,True,False,1
6,2014-07-01,White,False,True,True,8
...,...,...,...,...,...,...
3567,2021-12-15,Non-White,False,True,False,1
3569,2021-12-15,White,False,True,False,4
3570,2021-12-15,White,False,True,True,1
3573,2021-12-16,White,False,True,False,13
