In [19]:
##00_Data_Wrangling_for_Analysis
import pandas as pd
import numpy as np
import datetime
import random
import re
import os
from dateutil import relativedelta
import math
import plotnine
from plotnine import *

sentencing_raw = pd.read_csv('sentencing.csv')
sentencing_cleaned = sentencing_raw.copy()
print("Distribution of original `RACE` variable:")
sentencing_cleaned.RACE.value_counts()
sentencing_cleaned['is_black'] = np.where(sentencing_raw.RACE.isin(['Black', 'White/Black [Hispanic or Latino]']), True, False)
sentencing_cleaned['is_hisp'] = np.where(sentencing_raw.RACE.isin(['HISPANIC', 'White [Hispanic or Latino]']), True, False)
sentencing_cleaned['is_white'] = np.where(sentencing_raw.RACE.isin(['White']), True, False)
cond = sentencing_cleaned.RACE.isin(['Unknown', 'Biracial'])
sentencing_cleaned.loc[cond, ['is_black', 'is_hisp', 'is_white']] = np.nan
sentencing_cleaned['is_female'] = np.where(sentencing_cleaned.GENDER.isin(['Male', 'Male name, no gender given']), False, np.where(sentencing_cleaned.GENDER.str.contains('Unknown'), np.nan, True))
sentencing_cleaned['age_cleaned'] = np.where(sentencing_cleaned.AGE_AT_INCIDENT >= 
                                             sentencing_cleaned.AGE_AT_INCIDENT.quantile(0.9999), 
                                             sentencing_cleaned.AGE_AT_INCIDENT.quantile(0.9999), 
                                             sentencing_cleaned.AGE_AT_INCIDENT)
sentencing_cleaned['sentence_date'] = sentencing_cleaned.SENTENCE_DATE.str.replace("12:00:00 AM ", "")
sentencing_cleaned['sentence_date'] = [re.sub(r'2[1-9]([0-9]+)', r'20\1', str(date)) 
                                       if bool(re.search(r'2[1-9]([0-9]+)', str(date)))
                                       else str(date) 
                                       for date in sentencing_cleaned.sentence_date]
sentencing_cleaned['sentence_date'] = pd.to_datetime(sentencing_cleaned["sentence_date"])
sentencing_cleaned['sentence_year'] = pd.DatetimeIndex(sentencing_cleaned['sentence_date']).year
sentencing_cleaned['sentence_month'] = pd.DatetimeIndex(sentencing_cleaned['sentence_date']).month
sentencing_cleaned['sentence_day'] = pd.DatetimeIndex(sentencing_cleaned['sentence_date']).day
sentencing_cleaned['sentence_ym'] = sentencing_cleaned['sentence_date'].dt.to_period('M')
sentencing_cleaned['sa_office_period'] = np.where(sentencing_cleaned.sentence_ym >= "2016-12", # SA Foxx assumed office in Dec 1, 2016
                                                  True, False)   

sentencing_cleaned['sa_timedelta'] = (sentencing_cleaned.sentence_year - 2016)*12 + (sentencing_cleaned.sentence_month - 12)

sentencing_cleaned['sa_timedelta_days'] = (sentencing_cleaned['sentence_date'] - pd.to_datetime("2016-12-01")).dt.days

sentencing_cleaned['sa_timedelta_wk'] = [math.floor(delta_days/7) if delta_days >= 0       # 2.14 weeks as 2 weeks
                                         else math.ceil(delta_days/7) if delta_days < 0    # -3.14 weeks as -3 weeks
                                         else np.nan
                                         for delta_days in sentencing_cleaned.sa_timedelta_days]
sentencing_cleaned['BRA_period'] = np.where(sentencing_cleaned.sentence_ym >= "2017-06", # Bail Reform Act
                                            True, False)

sentencing_cleaned['BRA_timedelta'] = (sentencing_cleaned.sentence_year - 2017)*12 + (sentencing_cleaned.sentence_month - 6)

sentencing_cleaned['BRA_timedelta_days'] = (sentencing_cleaned['sentence_date'] - pd.to_datetime("2017-06-12")).dt.days

sentencing_cleaned['BRA_timedelta_wk'] = [math.floor(delta_days/7) if delta_days >= 0
                                          else math.ceil(delta_days/7) if delta_days < 0    # -3.14 weeks as -3 weeks
                                          else np.nan
                                          for delta_days in sentencing_cleaned.BRA_timedelta_days]
replace_dict = {'wrap': "", 
                "two": "2", 
                "months": "", 
                "1,154.00": "1154", 
                "`" : ""}
sentencing_cleaned['sentencing_num'] = pd.to_numeric(sentencing_cleaned['COMMITMENT_TERM'].replace(replace_dict, 
                                                                                                   regex=True))

units = [sentencing_cleaned.COMMITMENT_UNIT == "Year(s)",
        sentencing_cleaned.COMMITMENT_UNIT == "Months", 
        sentencing_cleaned.COMMITMENT_UNIT == "Days", 
        sentencing_cleaned.COMMITMENT_UNIT == "Weeks", 
        sentencing_cleaned.COMMITMENT_UNIT == "Hours", 
        sentencing_cleaned.COMMITMENT_UNIT == "Natural Life",
        sentencing_cleaned.COMMITMENT_UNIT.isin(['Term', 'Dollars', 'Pounds', 'Ounces', 'Kilos'])]

days_equiv = [(sentencing_cleaned.sentencing_num * 365), 
              (sentencing_cleaned.sentencing_num * 30.5), 
              (sentencing_cleaned.sentencing_num * 1), 
              (sentencing_cleaned.sentencing_num * 7), 
              (sentencing_cleaned.sentencing_num * 1/24), 
              (100 - sentencing_cleaned.age_cleaned)*365, 
              np.nan]

sentencing_cleaned['sentencing_term_d'] = np.select(units, days_equiv)
sentencing_cleaned.loc[(pd.isnull(sentencing_cleaned.COMMITMENT_TERM)) & 
                       (pd.isnull(sentencing_cleaned.COMMITMENT_UNIT)), 
                       'sentencing_term_d'] = np.nan       # assigning NaN to these rows because they got coded as 0
sentencing_cleaned['sentencing_term_y'] = sentencing_cleaned.sentencing_term_d / 365

sentencing_cleaned['is_incarcerated'] = np.where(sentencing_cleaned['COMMITMENT_TYPE'] == "Illinois Department of Corrections", 
                                                 True, False)

# defining whether is_on_probation - based on the sentencing_data_glossary
sentencing_cleaned['is_on_probation'] = np.where(sentencing_cleaned['COMMITMENT_TYPE'].isin(["Probation", 
                                                                                             "710/410 Probation", 
                                                                                             "Intensive Probation Services", 
                                                                                             "Mental Health Probation", 
                                                                                             "Intensive Drug Probation Services", 
                                                                                             "Drug Court Probation", 
                                                                                             "Sex Offender Probation", 
                                                                                             "Gang Probation", 
                                                                                             "2nd Chance Probation", 
                                                                                             "Veteran's Court Probation", 
                                                                                             "Repeat Offender Probation", 
                                                                                             "Domestic Violence Probation"]), True, False)
sentencing_cleaned['regrouped_offense'] = sentencing_cleaned.UPDATED_OFFENSE_CATEGORY.str.replace("Aggravated ", "")
sentencing_cleaned['eligible_offense'] = np.where(sentencing_cleaned.DISPOSITION_CHARGED_CLASS.isin(['A', 'B', 'C', '4']),
                                                 True, False)
sentencing_analysis = sentencing_cleaned[sentencing_cleaned.sentence_ym <= "2022-09"].copy()
sentencing_analysis = sentencing_analysis[(sentencing_analysis.sentencing_num != 0) & 
                                          (~pd.isnull(sentencing_analysis.COMMITMENT_UNIT))].copy()
sentencing_analysis = sentencing_analysis[(sentencing_analysis.PRIMARY_CHARGE_FLAG == True) & 
                                        (sentencing_analysis.CURRENT_SENTENCE_FLAG == True)].copy()
#?for CJARS first time users
sentencing_CJARS = sentencing_analysis[['CASE_PARTICIPANT_ID', 'DISPOSITION_CHARGED_OFFENSE_TITLE']].copy()
sentencing_CJARS.to_csv('../data/csv/sentencing_CJARS_input.csv', index = False)

sentencing_analysis = pd.merge(left = sentencing_analysis, 
                               right = CJARS_results, 
                               how = 'inner', 
                               left_on = "CASE_PARTICIPANT_ID", 
                               right_on = "CASE_PARTICIPANT_ID")
sentencing_analysis.to_csv('../data/csv/sentencing_analysis.csv', index = False)

intake_raw = pd.read_csv('../data/csv/intake.csv')
intake_cleaned = intake_raw.copy()
intake_cleaned['is_black'] = np.where(intake_cleaned.RACE.isin(['Black', 'White/Black [Hispanic or Latino]']), 
                                      True, False)

intake_cleaned['is_hispanic'] = np.where(intake_cleaned.RACE.isin(['HISPANIC', 'White [Hispanic or Latino]']), 
                                         True, False)

intake_cleaned['is_white'] = np.where(intake_cleaned.RACE.isin(['White', 'CAUCASIAN']), 
                                      True, False)
cond = intake_cleaned.RACE.isin(['Unknown', 'Biracial', 'Albino'])
intake_cleaned.loc[cond, ['is_black', 'is_hisp', 'is_white']] = np.nan
intake_cleaned['is_female'] = np.where(intake_cleaned.GENDER.isin(['Male', 'Male name, no gender given']), 
                                       False, np.where(intake_cleaned.GENDER.str.contains('Unknown'), "", True))
intake_cleaned['age_cleaned'] = np.where(intake_cleaned.AGE_AT_INCIDENT >= 
                                         intake_cleaned.AGE_AT_INCIDENT.quantile(0.9995), 
                                         intake_cleaned.AGE_AT_INCIDENT.quantile(0.9995), 
                                         intake_cleaned.AGE_AT_INCIDENT)
intake_cleaned['felony_review_date'] = [re.sub(r'2[1-9]([0-9]+)', r'20\1', str(date)) 
                                       if bool(re.search(r'2[1-9]([0-9]+)', str(date)))
                                       else str(date) 
                                       for date in intake_cleaned.FELONY_REVIEW_DATE]
intake_cleaned['felony_review_date'] = pd.to_datetime(intake_cleaned["felony_review_date"])
intake_cleaned['felony_review_year'] = pd.DatetimeIndex(intake_cleaned['felony_review_date']).year
intake_cleaned['felony_review_month'] = pd.DatetimeIndex(intake_cleaned['felony_review_date']).month
intake_cleaned['felony_review_day'] = pd.DatetimeIndex(intake_cleaned['felony_review_date']).day
intake_cleaned['felony_review_ym'] = intake_cleaned['felony_review_date'].dt.to_period('M')
intake_cleaned['sa_office_period'] = np.where(intake_cleaned.felony_review_ym >= "2016-12", # SA Foxx assumed office in Dec 1, 2016
                                              True, False)   

intake_cleaned['sa_timedelta'] = (intake_cleaned.felony_review_year - 2016)*12 + \
                                 (intake_cleaned.felony_review_month - 12)

intake_cleaned['sa_timedelta_days'] = (intake_cleaned['felony_review_date'] - pd.to_datetime("2016-12-01")).dt.days

intake_cleaned['sa_timedelta_wk'] = [math.floor(delta_days/7) if delta_days >= 0       # 2.14 weeks as 2 weeks
                                     else math.ceil(delta_days/7) if delta_days < 0    # -3.14 weeks as -3 weeks
                                     else np.nan
                                     for delta_days in intake_cleaned.sa_timedelta_days]

intake_cleaned['BRA_period'] = np.where(intake_cleaned.felony_review_ym >= "2017-06", # Bail Reform Act
                                        True, False)

intake_cleaned['BRA_timedelta'] = (intake_cleaned.felony_review_year - 2017)*12 + \
                                  (intake_cleaned.felony_review_month - 6)

intake_cleaned['BRA_timedelta_days'] = (intake_cleaned['felony_review_date'] - pd.to_datetime("2017-06-12")).dt.days

intake_cleaned['BRA_timedelta_wk'] = [math.floor(delta_days/7) if delta_days >= 0       # 2.14 weeks as 2 weeks
                                      else math.ceil(delta_days/7) if delta_days < 0    # -3.14 weeks as -3 weeks
                                      else np.nan
                                      for delta_days in intake_cleaned.BRA_timedelta_days]
intake_cleaned['fr_is_approved'] = np.where(intake_cleaned.FELONY_REVIEW_RESULT.isin(['Approved', 
                                                                                      'Charge(S) Approved']), 
                                            True, False)
intake_cleaned['fr_is_rejected'] = np.where(intake_cleaned.FELONY_REVIEW_RESULT.isin(['Rejected', 
                                                                                      'Disregard']), 
                                            True, False)## Cleaning felony review-related variables
intake_cleaned['fr_is_investigated'] = np.where(intake_cleaned.FELONY_REVIEW_RESULT == 'Continued Investigation', 
                                                True, False)
intake_cleaned.to_csv('../data/csv/intake_analysis.csv')





Distribution of original `RACE` variable:


In [21]:
##01_Data_Exploration_Graphs
import matplotlib.pyplot as plt
!pip install gender_guesser
import gender_guesser.detector as gender
!pip install rdrobust
from rdrobust import rdrobust, rdbwselect, rdplot


intake_analysis = pd.read_csv('../Desktop/DS/intake_analysis.csv')
intake_bw = intake_analysis[(intake_analysis.is_white == True) | 
                            (intake_analysis.is_black == True)].copy()
sentencing_analysis = pd.read_csv('sentencing_analysis.csv')
sentencing_bw = sentencing_analysis[(sentencing_analysis.is_black == True) | 
                                    (sentencing_analysis.is_white == True)].copy()



#Graph 1
intake_bw_summary = intake_bw.groupby(['sa_timedelta', 'is_black'])['fr_is_rejected'].agg([('prop', 'mean'), 
                                                                                           ('n', 'sum')]).reset_index()
intake_bw_summary = intake_bw_summary[(intake_bw_summary.sa_timedelta >= -24) & 
                                      (intake_bw_summary.sa_timedelta <= 24)].copy()
intake_bw_graph = (
    
    ggplot(intake_bw_summary, aes(x = 'sa_timedelta', y = 'prop', 
                                  group = 'is_black', color = 'is_black')) +
    geom_point() +
    geom_smooth(data = intake_bw_summary[intake_bw_summary.sa_timedelta < 0]) +
    geom_smooth(data = intake_bw_summary[intake_bw_summary.sa_timedelta >= 0]) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    labs(x = "Months relative to S.A. Foxx's entry", 
         y = 'Proportion of felony reviews resulting in rejection', 
         title = 'Proportion of Rejected Felonies Before/After Kim Foxx, \nby Race') +
    scale_color_discrete(labels = ['White', 'Black']) +
    theme_classic() +
    guides(color = guide_legend(title = 'Race'))
)
intake_bw_graph




#Graph 2
intake_top5_offense = list(intake_bw.UPDATE_OFFENSE_CATEGORY.value_counts().reset_index()['index'][0:5])
intake_bw_top5_summary = intake_bw[intake_bw.UPDATE_OFFENSE_CATEGORY.isin(intake_top5_offense)].copy()
intake_bw_top5_summary['UPDATE_OFFENSE_CATEGORY'] = intake_bw_top5_summary.UPDATE_OFFENSE_CATEGORY.str.replace('UUW - Unlawful Use of Weapon',
                                                                                                               'UUW')
intake_bw_top5_summary = intake_bw_top5_summary.groupby(['sa_timedelta','UPDATE_OFFENSE_CATEGORY', 
                                                         'is_black'])['fr_is_rejected'].agg([('prop', 'mean'), 
                                                                                             ('n', 'sum')]).reset_index()
intake_bw_top5_summary = intake_bw_top5_summary[(intake_bw_top5_summary.sa_timedelta >= -24) & 
                                                (intake_bw_top5_summary.sa_timedelta <= 24)].copy()

intake_bw_top5_summary = intake_bw_top5_summary.sort_values(['UPDATE_OFFENSE_CATEGORY', 'is_black', 'sa_timedelta'])
intake_bw_top5_graph = (
    
    ggplot(intake_bw_top5_summary, aes(x = 'sa_timedelta', y = 'prop', 
                                       group = 'is_black', color = 'is_black')) +
    geom_point() +
    geom_smooth(data = intake_bw_top5_summary[intake_bw_top5_summary.sa_timedelta < 0]) +
    geom_smooth(data = intake_bw_top5_summary[intake_bw_top5_summary.sa_timedelta >= 0]) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    labs(x = "Months relative to Kim Foxx's entry level", 
         y = 'Proportion', 
         title = "Proportion of Rejected Felonies \nBefore/After Kim Foxx's Entry, by Offense Groups") +
    scale_color_discrete(labels = ['White', 'Black']) +
    facet_wrap('UPDATE_OFFENSE_CATEGORY') +
    theme_classic() +
    guides(color = guide_legend(title = 'Race'))
)
intake_bw_top5_graph





#Graph 3
incarc_bw_summary = sentencing_bw.groupby(['sa_timedelta', 'is_black'])['is_incarcerated'].agg([('prop', 'mean'), ('n', 'size')]).reset_index()
incarc_bw_summary = incarc_bw_summary[(incarc_bw_summary.sa_timedelta >= -24) & 
                                      (incarc_bw_summary.sa_timedelta <= 24)].copy()
incarc_bw_graph = (
    ggplot(incarc_bw_summary, aes(x = 'sa_timedelta', y = 'prop', color = 'is_black')) + 
    geom_point() + 
    geom_smooth(data = incarc_bw_summary[incarc_bw_summary['sa_timedelta'] < 0]) +
    geom_smooth(data = incarc_bw_summary[incarc_bw_summary['sa_timedelta'] >= 0]) +
    theme_bw() + 
    labs(x = "Months since S.A. Foxx's entry", 
         y = "Proportion", 
         title = "Incarceration Rate Before and After \nState Attorney's Entry") +
    scale_color_discrete(labels = ['White', 'Black']) + 
    guides(color = guide_legend(title = 'Race')) + 
    geom_vline(xintercept = 0)
)
incarc_bw_graph





#Graph 4
incarc_top5_offense = list(sentencing_bw.UPDATED_OFFENSE_CATEGORY.value_counts().reset_index()['index'][0:5])
incarc_bw_top5_summary = sentencing_bw[sentencing_bw.UPDATED_OFFENSE_CATEGORY.isin(incarc_top5_offense)].copy()
incarc_bw_top5_summary['UPDATED_OFFENSE_CATEGORY'] = incarc_bw_top5_summary.UPDATED_OFFENSE_CATEGORY.str.replace('UUW - Unlawful Use of Weapon',
                                                                                                                 'UUW')
incarc_bw_top5_summary = incarc_bw_top5_summary.groupby(['sa_timedelta','UPDATED_OFFENSE_CATEGORY', 
                                                         'is_black'])['is_incarcerated'].agg([('prop', 'mean'), 
                                                                                             ('n', 'sum')]).reset_index()
incarc_bw_top5_summary = incarc_bw_top5_summary[(incarc_bw_top5_summary.sa_timedelta >= -24) & 
                                                (incarc_bw_top5_summary.sa_timedelta <= 24)].copy()

incarc_bw_top5_summary = incarc_bw_top5_summary.sort_values(['UPDATED_OFFENSE_CATEGORY', 'is_black', 'sa_timedelta'])
incarc_bw_top5_graph = (
    
    ggplot(incarc_bw_top5_summary, aes(x = 'sa_timedelta', y = 'prop', 
                                       group = 'is_black', color = 'is_black')) +
    geom_point() +
    geom_smooth(data = incarc_bw_top5_summary[incarc_bw_top5_summary.sa_timedelta < 0]) +
    geom_smooth(data = incarc_bw_top5_summary[incarc_bw_top5_summary.sa_timedelta >= 0]) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    labs(x = "Months relative to Kim Foxx's entry level", 
         y = 'Proportion', 
         title = "Incarceration Rate Before/After Kim Foxx's Entry,\n by Offense Groups") +
    scale_color_discrete(labels = ['White', 'Black']) +
    facet_wrap('UPDATED_OFFENSE_CATEGORY') +
    theme_classic() +
    guides(color = guide_legend(title = 'Race'))
)

incarc_bw_top5_graph







#Graph 5
incarc_bw_sex_summary = sentencing_bw.groupby(['sa_timedelta','is_female', 
                                               'is_black'])['is_incarcerated'].agg([('prop', 'mean'), 
                                                                                    ('n', 'sum')]).reset_index()

incarc_bw_sex_summary = incarc_bw_sex_summary[(incarc_bw_sex_summary.sa_timedelta >= -24) & 
                                              (incarc_bw_sex_summary.sa_timedelta <= 24)].copy()

incarc_bw_sex_summary = incarc_bw_sex_summary.sort_values(['is_female', 'is_black', 'sa_timedelta'])
incarc_bw_sex_summary['sex'] = np.where(incarc_bw_sex_summary.is_female == 1, "Female", "Male")
incarc_bw_sex_graph = (
    
    ggplot(incarc_bw_sex_summary, aes(x = 'sa_timedelta', y = 'prop', 
                                       group = 'is_black', color = 'is_black')) +
    geom_point() +
    geom_smooth(data = incarc_bw_sex_summary[incarc_bw_sex_summary.sa_timedelta < 0]) +
    geom_smooth(data = incarc_bw_sex_summary[incarc_bw_sex_summary.sa_timedelta >= 0]) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    labs(x = "Months relative to Kim Foxx's entry level", 
         y = 'Proportion', 
         title = "Incarceration Rate Before/After Kim Foxx's Entry,\n by Sex") +
    scale_color_discrete(labels = ['White', 'Black']) +
    facet_wrap('sex') +
    theme_classic() +
    guides(color = guide_legend(title = 'Race'))
)

incarc_bw_sex_graph







#Graph 6
sentencing_bw_summary = sentencing_bw.groupby(['sa_timedelta', 'is_black'])['is_on_probation'].agg([('prop', 'mean'), ('n', 'size')]).reset_index()

sentencing_bw_summary = sentencing_bw_summary[(sentencing_bw_summary.sa_timedelta >= -24) & 
                                              (sentencing_bw_summary.sa_timedelta <= 24)].copy()

probation_gap_bw = (
    ggplot(sentencing_bw_summary, aes(x = 'sa_timedelta', y = 'prop', color = 'is_black')) + 
    geom_point() + 
    geom_smooth(data = sentencing_bw_summary[sentencing_bw_summary['sa_timedelta'] < 0]) +
    geom_smooth(data = sentencing_bw_summary[sentencing_bw_summary['sa_timedelta'] >= 0]) +
    theme_bw() + 
    labs(x = "Months since S.A. Foxx's entry", 
         y = "Proportion of defendants on probation", 
         title = "Probation Probability Before and After \nState Attorney's Entry") +
    scale_color_discrete(labels = ['White', 'Black']) + 
    guides(color = guide_legend(title = 'Race')) + 
    geom_vline(xintercept = 0)
)

probation_gap_bw









#Graph 7
sentencing_bw_summary = sentencing_bw.groupby(['sa_timedelta', 'is_black'])['is_on_probation'].agg([('prop', 'mean'), ('n', 'size')]).reset_index()


sentencing_bw_summary = sentencing_bw_summary[(sentencing_bw_summary.sa_timedelta >= -24) & 
                                              (sentencing_bw_summary.sa_timedelta <= 24)].copy()

sentencing_bw_summary = pd.pivot(sentencing_bw_summary, index=['sa_timedelta'], columns='is_black', values = ['prop', 'n']).reset_index()
new_names = ['sa_timedelta', 'prop_white', 'prop_black', 'n_white', 'n_black']
sentencing_bw_summary.columns = new_names
sentencing_bw_summary['prop_bw_gap'] = sentencing_bw_summary['prop_black'] - sentencing_bw_summary['prop_white'] 

probation_gap_bw2 = (
    ggplot(sentencing_bw_summary, aes(x = 'sa_timedelta', y = 'prop_bw_gap')) + 
    geom_point() + 
    geom_smooth(data = sentencing_bw_summary[sentencing_bw_summary['sa_timedelta'] < 0]) +
    geom_smooth(data = sentencing_bw_summary[sentencing_bw_summary['sa_timedelta'] >= 0]) +
    theme_bw() + 
    labs(x = "Months since S.A. Foxx's entry", 
         y = "Probability gap", 
         title = "Black-White Probation Probability Gaps,\nBefore and After State Attorney's Entry") +
    scale_color_discrete(labels = ['White', 'Black']) + 
    guides(color = guide_legend(title = 'Race')) + 
    geom_vline(xintercept = 0)
)

probation_gap_bw2






#Graph 8
prob_bw_top5_summary = sentencing_bw[sentencing_bw.UPDATED_OFFENSE_CATEGORY.isin(incarc_top5_offense)].copy()

prob_bw_top5_summary['UPDATED_OFFENSE_CATEGORY'] = prob_bw_top5_summary.UPDATED_OFFENSE_CATEGORY.str.replace('UUW - Unlawful Use of Weapon',
                                                                                                             'UUW')
prob_bw_top5_summary = prob_bw_top5_summary.groupby(['sa_timedelta','UPDATED_OFFENSE_CATEGORY', 
                                                     'is_black'])['is_on_probation'].agg([('prop', 'mean'), 
                                                                                          ('n', 'sum')]).reset_index()
prob_bw_top5_summary = prob_bw_top5_summary[(prob_bw_top5_summary.sa_timedelta >= -24) & 
                                            (prob_bw_top5_summary.sa_timedelta <= 24)].copy()

prob_bw_top5_summary = prob_bw_top5_summary.sort_values(['UPDATED_OFFENSE_CATEGORY', 'is_black', 'sa_timedelta'])
prob_bw_top5_graph = (
    
    ggplot(prob_bw_top5_summary, aes(x = 'sa_timedelta', y = 'prop', 
                                     group = 'is_black', color = 'is_black')) +
    geom_point() +
    geom_smooth(data = prob_bw_top5_summary[prob_bw_top5_summary.sa_timedelta < 0]) +
    geom_smooth(data = prob_bw_top5_summary[prob_bw_top5_summary.sa_timedelta >= 0]) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    labs(x = "Months relative to Kim Foxx's entry level", 
         y = 'Proportion', 
         title = "Probation Rate Before/After Kim Foxx's Entry,\n by Offense Groups") +
    scale_color_discrete(labels = ['White', 'Black']) +
    facet_wrap('UPDATED_OFFENSE_CATEGORY', scales = "free_y") +
    theme_classic() +
    theme(panel_spacing=.25) +
    guides(color = guide_legend(title = 'Race'))
)
prob_bw_top5_graph








#Graph 9
prob_bw_sex_summary = sentencing_bw.groupby(['sa_timedelta','is_female', 
                                             'is_black'])['is_on_probation'].agg([('prop', 'mean'), 
                                                                                  ('n', 'sum')]).reset_index()
prob_bw_sex_summary = prob_bw_sex_summary[(prob_bw_sex_summary.sa_timedelta >= -24) & 
                                              (prob_bw_sex_summary.sa_timedelta <= 24)].copy()
prob_bw_sex_summary = prob_bw_sex_summary.sort_values(['is_female', 'is_black', 'sa_timedelta'])
prob_bw_sex_summary['sex'] = np.where(prob_bw_sex_summary.is_female == 1, "Female", "Male")
prob_bw_sex_graph = (
    
    ggplot(prob_bw_sex_summary, aes(x = 'sa_timedelta', y = 'prop', 
                                       group = 'is_black', color = 'is_black')) +
    geom_point() +
    geom_smooth(data = prob_bw_sex_summary[prob_bw_sex_summary.sa_timedelta < 0]) +
    geom_smooth(data = prob_bw_sex_summary[prob_bw_sex_summary.sa_timedelta >= 0]) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    labs(x = "Months relative to Kim Foxx's entry level", 
         y = 'Proportion', 
         title = "Probation Rate Before/After Kim Foxx's Entry,\n by Sex") +
    scale_color_discrete(labels = ['White', 'Black']) +
    facet_wrap('sex') +
    theme_classic() +
    guides(color = guide_legend(title = 'Race'))
)

prob_bw_sex_graph








#Graph 10 
sentence_time_summary = sentencing_bw.groupby(['sa_timedelta', 'is_black'])['sentencing_term_y'].agg([('mean_sentence', 'mean'), 
                                                                                                      ('median_sentence', 'median'), 
                                                                                                      ('n','size')]).reset_index()
sentence_time_summary = sentence_time_summary[(sentence_time_summary.sa_timedelta >= -24) & 
                                              (sentence_time_summary.sa_timedelta <= 24)]

sentencing_term_bw = (
    ggplot(sentence_time_summary, aes(x = "sa_timedelta", y = "mean_sentence", color = "is_black")) +
    geom_point() +
    geom_smooth(data = sentence_time_summary[sentence_time_summary.sa_timedelta < 0]) +
    geom_smooth(data = sentence_time_summary[sentence_time_summary.sa_timedelta >= 0]) +
    geom_vline(aes(xintercept = 0)) +
    labs(x = "Months relative to State Attorney Kim Foxx's entry", 
         y = "Mean sentence term (in years)", 
         title = "Length of Sentence Term, \nBefore/After State Attorney's Entry") +
    scale_color_discrete(labels = ['White', 'Black']) +
    guides(color = guide_legend(title = "Race")) +
    theme_classic()
)
sentencing_term_bw







#Graph 11
sentencing_time_summary_byoff = sentencing_bw.groupby(['sa_timedelta', 'is_black', 
                                                       'UPDATED_OFFENSE_CATEGORY'])['sentencing_term_y'].agg([('mean_sentence', 'mean'), 
                                                                                                              ('n', 'size')]).reset_index()
sentencing_time_summary_byoff = sentencing_time_summary_byoff[(sentencing_time_summary_byoff.UPDATED_OFFENSE_CATEGORY.isin(incarc_top5_offense)) &
                                                              (sentencing_time_summary_byoff.sa_timedelta >= -24) & 
                                                              (sentencing_time_summary_byoff.sa_timedelta <= 24)].copy()

sentencing_time_summary_byoff['UPDATED_OFFENSE_CATEGORY'] = sentencing_time_summary_byoff.UPDATED_OFFENSE_CATEGORY.str.replace('UUW - Unlawful Use of Weapon', 
                                                                                                                               'UUW')
sentencing_term_bw2 = (
    ggplot(sentencing_time_summary_byoff, aes(x = "sa_timedelta", y = 'mean_sentence', color = "is_black")) +
    geom_point() +
    geom_smooth(data = sentencing_time_summary_byoff[sentencing_time_summary_byoff.sa_timedelta < 0]) +
    geom_smooth(data = sentencing_time_summary_byoff[sentencing_time_summary_byoff.sa_timedelta >= 0]) +
    geom_vline(aes(xintercept = 0)) +
    labs(x = "Months relative to State Attorney Kim Foxx's entry", 
         y = "Mean sentence term (in years)", 
         title = "Length of Sentence Term by Offense Groups, \nBefore/After State Attorney's Entry") +
    scale_color_discrete(labels = ['White', 'Black']) +
    facet_wrap("UPDATED_OFFENSE_CATEGORY", scales = "free_y") +
    theme_classic() +
    theme(panel_spacing=.25) +
    guides(color = guide_legend(title = "Race"))
)
sentencing_term_bw2






#Graph 12
sentencing_gender_summary_byoff = sentencing_bw.groupby(['sa_timedelta', 'is_black', 
                                                       'GENDER'])['sentencing_term_y'].agg([('mean_sentence', 'mean'), 
                                                                                                              ('n', 'size')]).reset_index()
sentencing_gender_summary_byoff = sentencing_gender_summary_byoff[(sentencing_gender_summary_byoff.sa_timedelta >= -24) & 
                                                              (sentencing_gender_summary_byoff.sa_timedelta <= 24) &
                                                            ((sentencing_gender_summary_byoff.GENDER == "Male") | 
                                                             (sentencing_gender_summary_byoff.GENDER == "Female"))].copy()
sentencing_term_bw2 = (
    ggplot(sentencing_gender_summary_byoff, aes(x = "sa_timedelta", y = 'mean_sentence', color = "is_black")) +
    geom_point() +
    geom_smooth(data = sentencing_gender_summary_byoff[sentencing_gender_summary_byoff.sa_timedelta < 0]) +
    geom_smooth(data = sentencing_gender_summary_byoff[sentencing_gender_summary_byoff.sa_timedelta >= 0]) +
    geom_vline(aes(xintercept = 0)) +
    labs(x = "Months relative to State Attorney Kim Foxx's entry", 
         y = "Mean sentence term (in years)", 
         title = "Length of Sentence Term by defendant's gender, \nBefore/After State Attorney's Entry") +
    scale_color_discrete(labels = ['White', 'Black']) +
    facet_wrap("GENDER", scales = "free_y") +
    theme_classic() +
    theme(panel_spacing=.25) +
    guides(color = guide_legend(title = "Race"))
)
sentencing_term_bw2




#?Graph 13
sentencing_bw['judge_fname'] = sentencing_bw.SENTENCE_JUDGE.str.split(" ").str[0]
sentencing_bw[['SENTENCE_JUDGE', 'judge_fname']].head()

unique_names = sentencing_bw.judge_fname.unique()
"""Originally there were {} rows; only {} unique first names""".format(sentencing_bw.shape[0],
                                                                      len(unique_names))
sentencing_bw['judge_fname'] = sentencing_bw.SENTENCE_JUDGE.str.split(" ").str[0]
sentencing_bw[['SENTENCE_JUDGE', 'judge_fname']].head()
unique_names = sentencing_bw.judge_fname.unique()
"""Originally there were {} rows; only {} unique first names""".format(sentencing_bw.shape[0],
                                                                      len(unique_names))

d = gender.Detector()
guess_gender = [d.get_gender(one_name) for one_name in unique_names]
names_wgender = pd.DataFrame({'judge_fname': unique_names, 'gender_guess': guess_gender})
names_wgender.sample(n = 5)
sentencing_wjudge = pd.merge(sentencing_bw, names_wgender,
                           on = 'judge_fname',
                           how = 'left')
sentencing_judge = sentencing_wjudge.groupby(['sa_timedelta', 'is_black', 
                                                       'gender_guess'])['sentencing_term_y'].agg([('mean_sentence', 'mean'), 
                                                                                                              ('n', 'size')]).reset_index() 
sentencing_judge = sentencing_judge[(sentencing_judge.sa_timedelta >= -24) & 
                                                              (sentencing_judge.sa_timedelta <= 24) &
                                               ((sentencing_judge.gender_guess == "male")|
                                                (sentencing_judge.gender_guess == "female"))].copy()
sentencing_term_bw2 = (
    ggplot(sentencing_judge, aes(x = "sa_timedelta", y = 'mean_sentence', color = "is_black")) +
    geom_point() +
    geom_smooth(data = sentencing_judge[sentencing_judge.sa_timedelta < 0]) +
    geom_smooth(data = sentencing_judge[sentencing_judge.sa_timedelta >= 0]) +
    geom_vline(aes(xintercept = 0)) +
    labs(x = "Months relative to State Attorney Kim Foxx's entry", 
         y = "Mean sentence term (in years)", 
         title = "Length of Sentence Term by Judges gender, \nBefore/After State Attorney's Entry") +
    scale_color_discrete(labels = ['White', 'Black']) +
    facet_wrap("gender_guess", scales = "free_y") +
    theme_classic() +
    theme(panel_spacing=.25) +
    guides(color = guide_legend(title = "Race"))
)
sentencing_term_bw2


FileNotFoundError: [Errno 2] No such file or directory: '../Desktop/DS/intake_analysis.csv'

In [14]:
##02_Data_Exploration_Regression
def rd_regress(yvar, xvar, sample_type):
    result = rdrobust(y = yvar, x = xvar, all = True)

    result_df = result.coef

    result_df = result_df.merge(result.se, left_index = True, right_index = True)
    result_df = result_df.merge(result.t, left_index = True, right_index = True)
    result_df = result_df.merge(result.pv, left_index = True, right_index = True)
    result_df = result_df.merge(result.ci, left_index = True, right_index = True)
    result_df['obs_left'] = result.N[0]
    result_df['obs_right'] = result.N[1]
    result_df['bandwidth_days'] = result.bws.loc['h'].values[0]
    result_df['outcome_var'] = yvar.name
    result_df['sample_type'] = sample_type

    return result_df

def rd_plot(yvar, xvar, ytitle, xtitle, title):
    
    result = rdrobust(y = yvar, x = xvar)
    
    h_l, h_r = result.bws.loc['h', :].values
    subset = ((-h_l <= xvar) & (xvar <= h_r)).values

    rd_plot = rdplot(y = yvar, 
                     x = xvar, 
                     subset = subset, 
                     title = title, 
                     x_label = xtitle, 
                     y_label = ytitle)
    return rd_plot

gender_male = sentencing_bw[sentencing_bw['is_female'] == 0]
gender_female = sentencing_bw[sentencing_bw['is_female'] == 1]
gender_male_regression = rd_regress(yvar= gender_male.is_incarcerated, xvar= gender_male.sa_timedelta, sample_type= '1')
rd_female_gender= rd_regress(yvar= gender_female.is_incarcerated, xvar= gender_female.sa_timedelta, sample_type= '2')
rd_total_gender=rd_regress(yvar= sentencing_bw.is_incarcerated, xvar= sentencing_bw.sa_timedelta, sample_type= '3')
gender_results= pd.concat([gender_male_regression, rd_female_gender, rd_total_gender]).reset_index()
rd_plot_gender = (
    ggplot(gender_results, aes(x = 'sample_type', y = 'Coeff', color = 'index')) +
    geom_pointrange(aes(ymin = 'CI Lower', ymax = 'CI Upper'), position = position_dodge(width = .3)) +
    labs(x = 'Sample group', y = 'RD Coefficients', title = 'Felony Rejection Probability Regression Discontinuity Coefficients') +
    scale_x_discrete(labels = ['Male', 'Female', 'Full']) +
    theme_classic() +
    guides(color = guide_legend(title = 'Estimate type'))
)

male_probation = rd_regress(yvar= gender_male.is_on_probation, xvar= gender_male.sa_timedelta, sample_type= '1')
female_probation= rd_regress(yvar= gender_female.is_on_probation, xvar= gender_female.sa_timedelta, sample_type= '2')
total_probation= rd_regress(yvar= sentencing_bw.is_on_probation, xvar= sentencing_bw.sa_timedelta, sample_type= '3')
probation = pd.concat([male_probation, female_probation, total_probation]).reset_index()
rd_plot_gender_probation = (
    ggplot(probation, aes(x = 'sample_type', y = 'Coeff', color = 'index')) +
    geom_pointrange(aes(ymin = 'CI Lower', ymax = 'CI Upper'), position = position_dodge(width = .3)) +
    labs(x = 'Sample group', y = 'RD Coefficients', title = 'Probation Probability Regression Discontinuity Coefficients') +
    scale_x_discrete(labels = ['Full', 'Black', 'White']) +
    theme_classic() +
    guides(color = guide_legend(title = 'Estimate type'))
)
male_sentence = rd_regress(yvar= gender_male.sentencing_term_d, xvar= gender_male.sa_timedelta, sample_type= '1')
female_sentence= rd_regress(yvar= gender_female.sentencing_term_d, xvar= gender_female.sa_timedelta, sample_type= '2')
total_sentence= rd_regress(yvar= sentencing_bw.sentencing_term_d, xvar= sentencing_bw.sa_timedelta, sample_type= '3')
sentence = pd.concat([male_sentence, female_sentence, total_sentence]).reset_index()
rd_plot_gender_sentence = (
    ggplot(sentence, aes(x = 'sample_type', y = 'Coeff', color = 'index')) +
    geom_pointrange(aes(ymin = 'CI Lower', ymax = 'CI Upper'), position = position_dodge(width = .3)) +
    labs(x = 'Sample group', y = 'RD Coefficients', title = 'Sentence Length Regression Discontinuity Coefficients') +
    scale_x_discrete(labels = ['Full', 'Black', 'White']) +
    theme_classic() +
    guides(color = guide_legend(title = 'Estimate type'))
)
# Plot: "Sentence Length Regression Discontinuity Coefficients"
rd_plot_gender_sentence


# Regression Results for you to click on 
gender_male_regression
rd_female_gender
rd_total_gender
gender_results
rd_plot_gender
male_probation
female_probation 
total_probation
rd_plot_gender_probation
male_sentence
female_sentence
total_sentence


incarc_top5_offense = list(sentencing_bw.UPDATED_OFFENSE_CATEGORY.value_counts().reset_index()['index'][0:5]) 
incarc_bw_top5_summary = sentencing_bw[sentencing_bw.UPDATED_OFFENSE_CATEGORY.isin(incarc_top5_offense)].copy() 
incarc_bw_top5_summary['UPDATED_OFFENSE_CATEGORY'] = incarc_bw_top5_summary.UPDATED_OFFENSE_CATEGORY.str.replace('UUW - Unlawful Use of Weapon',
                                                          
incarc_bw_top5_summary.UPDATED_OFFENSE_CATEGORY.unique()
dui_df= incarc_bw_top5_summary[incarc_bw_top5_summary['UPDATED_OFFENSE_CATEGORY']== "Aggravated DUI"]
uuw_df= incarc_bw_top5_summary[incarc_bw_top5_summary['UPDATED_OFFENSE_CATEGORY']== "UUW"]
theft_df= incarc_bw_top5_summary[incarc_bw_top5_summary['UPDATED_OFFENSE_CATEGORY']== "Retail Theft"]
burglary_df= incarc_bw_top5_summary[incarc_bw_top5_summary['UPDATED_OFFENSE_CATEGORY']== "Burglary"]
narcotics_df= incarc_bw_top5_summary[incarc_bw_top5_summary['UPDATED_OFFENSE_CATEGORY']== "Narcotics"]
                                                        
                        
top5_rd= rd_regress(yvar= incarc_bw_top5_summary.sentencing_term_d, 
                    xvar= incarc_bw_top5_summary.sa_timedelta, sample_type= '1')
dui_rd= rd_regress(yvar= dui_df.sentencing_term_d, 
                    xvar= dui_df.sa_timedelta, sample_type= '2')
uuw_rd= rd_regress(yvar= uuw_df.sentencing_term_d, 
                    xvar= uuw_df.sa_timedelta, sample_type= '3')
theft_rd= rd_regress(yvar= theft_df.sentencing_term_d, 
                    xvar= theft_df.sa_timedelta, sample_type= '4')
burglary_rd= rd_regress(yvar= burglary_df.sentencing_term_d, 
                    xvar= burglary_df.sa_timedelta, sample_type= '5')
narcotics_rd= rd_regress(yvar= narcotics_df.sentencing_term_d, 
                    xvar= narcotics_df.sa_timedelta, sample_type= '6')
top5_full = pd.concat([top5_rd, dui_rd, uuw_rd, theft_rd, burglary_rd, narcotics_rd]).reset_index()
rd_plot_top5 = (
    ggplot(top5_full, aes(x = 'sample_type', y = 'Coeff', color = 'index')) +
    geom_pointrange(aes(ymin = 'CI Lower', ymax = 'CI Upper'), position = position_dodge(width = .9)) +
    labs(x = 'Sample group', y = 'RD Coefficients', title = 'Top 5 Crimes Regression Discontinuity Coefficients') +
    scale_x_discrete(labels = ['Full', 'Aggravated DUI', 'UUW', 'Retail Theft', 'Burglary', 'Narcotics']) +
    theme_classic() +
     geom_hline(aes(yintercept = 0)) +
    guides(color = guide_legend(title = 'Estimate type'))
)
                                                                                                                 
# Plot: "Top 5 Crimes Regression Discontinuity Coefficients"                                                                                                                 
rd_plot_top5

# Regression Results for you to click on                                                                                                                 
top5_rd                                                                                                                 
dui_rd
uuw_rd
theft_rd                                                                                                                 
burglary_rd                                                                                                                 
narcotics_rd                                                                                                                 

count    268527.00000
mean         32.35875
std          11.74226
min          17.00000
25%          23.00000
50%          29.00000
75%          40.00000
max          81.00000
Name: age_cleaned, dtype: float64

In [16]:
##03_Data_Exploration_Regression_Exploration

# defining a function to get a dataframe of the results (each containing three RD estimates:
# conventional, bias-corrected, and robust)
def rd_regress(yvar, xvar, sample_type):
    result = rdrobust(y = yvar, x = xvar, all = True)

    # initializing the dataframe
    result_df = result.coef

    # merging with other columns to complete the df 
    result_df = result_df.merge(result.se, left_index = True, right_index = True)
    result_df = result_df.merge(result.t, left_index = True, right_index = True)
    result_df = result_df.merge(result.pv, left_index = True, right_index = True)
    result_df = result_df.merge(result.ci, left_index = True, right_index = True)
    result_df['obs_left'] = result.N[0]
    result_df['obs_right'] = result.N[1]
    result_df['bandwidth_days'] = result.bws.loc['h'].values[0]
    result_df['outcome_var'] = yvar.name
    result_df['sample_type'] = sample_type

    # returning the result_df 
    return result_df

# next, we define a function to create RD plot
def rd_plot(yvar, xvar, ytitle, xtitle, title):
    
    # running the rd estimation
    result = rdrobust(y = yvar, x = xvar)
    
    # defining the group subset
    h_l, h_r = result.bws.loc['h', :].values
    subset = ((-h_l <= xvar) & (xvar <= h_r)).values

    # plotting the result
    rd_plot = rdplot(y = yvar, 
                     x = xvar, 
                     subset = subset, 
                     title = title, 
                     x_label = xtitle, 
                     y_label = ytitle)
    return rd_plot

# getting the descriptive plot
plot1 = rd_plot(yvar = list(map(int, intake_bw.fr_is_rejected)), 
                xvar = intake_bw.sa_timedelta_days, 
                ytitle = "Prop. of felonies rejected", 
                xtitle = "Days since Attorney Foxx assumed office", 
                title = "RD Plot: Foxx's Entry and Proportion of Felonies Rejected")

# saving the plot
plot1.rdplot.save('../output/rd_plot_fr_is_rejected.png', width = 5, height = 5, dpi = 125)

# analyzing the RD impact on probability of felony rejection
fl_reject_full =  rd_regress(yvar = intake_bw.fr_is_rejected, 
                             xvar = intake_bw.sa_timedelta_days, 
                             sample_type='1')
fl_reject_black =  rd_regress(yvar = intake_bw[intake_bw.is_black == True]['fr_is_rejected'], 
                              xvar = intake_bw[intake_bw.is_black == True]['sa_timedelta_days'], 
                              sample_type='2')
fl_reject_white =  rd_regress(yvar = intake_bw[intake_bw.is_black == False]['fr_is_rejected'], 
                              xvar = intake_bw[intake_bw.is_black == False]['sa_timedelta_days'], 
                              sample_type='3')

# appending all results
fl_reject = pd.concat([fl_reject_full, fl_reject_black, fl_reject_white]).reset_index()

# plotting the results 
rd_plot_fl_reject_by_race = (
    ggplot(fl_reject, aes(x = 'sample_type', y = 'Coeff', color = 'index')) +
    geom_pointrange(aes(ymin = 'CI Lower', ymax = 'CI Upper'), position = position_dodge(width = .3)) +
    labs(x = 'Sample group', y = 'RD Coefficients', title = 'Felony Rejection Probability Regression Discontinuity Coefficients') +
    scale_x_discrete(labels = ['Full', 'Black', 'White']) +
    theme_classic() +
    guides(color = guide_legend(title = 'Estimate type'))
)

# showing the plot
rd_plot_fl_reject_by_race

rd_plot_fl_reject_by_race.save('../output/rd_plot_fl_reject_by_race.png', width = 5, height = 5, dpi = 125)

# getting the top 5 felonies in the intake data 
intake_top5_offense = list(intake_bw.UPDATE_OFFENSE_CATEGORY.value_counts()[:5].reset_index()['index'])
intake_top5_offense

# analyzing the RD impact on probability of felony rejection
fl_reject_narcs =  rd_regress(yvar = intake_bw[intake_bw.UPDATE_OFFENSE_CATEGORY == 
                                               intake_top5_offense[0]]['fr_is_rejected'], 
                              xvar = intake_bw[intake_bw.UPDATE_OFFENSE_CATEGORY == 
                                               intake_top5_offense[0]]['sa_timedelta_days'], 
                              sample_type='1')

fl_reject_uuw =  rd_regress(yvar = intake_bw[intake_bw.UPDATE_OFFENSE_CATEGORY == 
                                             intake_top5_offense[1]]['fr_is_rejected'], 
                            xvar = intake_bw[intake_bw.UPDATE_OFFENSE_CATEGORY == 
                                             intake_top5_offense[1]]['sa_timedelta_days'], 
                            sample_type = '2')

fl_reject_rtheft =  rd_regress(yvar = intake_bw[intake_bw.UPDATE_OFFENSE_CATEGORY == 
                                                intake_top5_offense[2]]['fr_is_rejected'], 
                               xvar = intake_bw[intake_bw.UPDATE_OFFENSE_CATEGORY == 
                                                intake_top5_offense[2]]['sa_timedelta_days'], 
                               sample_type = '3')

fl_reject_burg =  rd_regress(yvar = intake_bw[intake_bw.UPDATE_OFFENSE_CATEGORY == 
                                               intake_top5_offense[3]]['fr_is_rejected'], 
                             xvar = intake_bw[intake_bw.UPDATE_OFFENSE_CATEGORY == 
                                               intake_top5_offense[3]]['sa_timedelta_days'], 
                             sample_type = '4')

fl_reject_agdui =  rd_regress(yvar = intake_bw[intake_bw.UPDATE_OFFENSE_CATEGORY == 
                                               intake_top5_offense[4]]['fr_is_rejected'], 
                              xvar = intake_bw[intake_bw.UPDATE_OFFENSE_CATEGORY == 
                                               intake_top5_offense[4]]['sa_timedelta_days'], 
                              sample_type = '5')

# appending all results
fl_reject_allcrime = pd.concat([fl_reject_narcs, fl_reject_uuw, 
                                fl_reject_rtheft, fl_reject_burg, 
                                fl_reject_agdui]).reset_index()

fl_reject_allcrime

# plotting the results 
rd_plot_fl_reject_by_crime = (
    ggplot(fl_reject_allcrime, aes(x = 'sample_type', y = 'Coeff', color = 'index')) +
    geom_pointrange(aes(ymin = 'CI Lower', ymax = 'CI Upper'), position = position_dodge(width = .3)) +
    labs(x = 'Sample group', y = 'RD Coefficients', title = 'Felony Rejection Probability Regression Discontinuity Coefficients') +
    scale_x_discrete(labels = ['Narcotics', 'UUW', 'Retail Theft', 'Burglary', 'Aggravated DUI']) +
    theme_classic() +
    guides(color = guide_legend(title = 'Estimate type'))
)

# showing the plot
rd_plot_fl_reject_by_crime

rd_plot_fl_reject_by_crime.save('../output/rd_plot_fl_reject_by_crime.png', width = 5, height = 5, dpi = 125)

# getting the descriptive plot
plot2 = rd_plot(yvar = list(map(int, sentencing_bw.is_incarcerated)), 
                xvar = sentencing_bw.sa_timedelta_days, 
                ytitle = "Prop. of incarcerated defendants", 
                xtitle = "Days since Attorney Foxx assumed office", 
                title = "RD Plot: Foxx's Entry and Incarceration Rate")

# saving the plot
plot2.rdplot.save('../output/rd_plot_incarcerated_prop.png', width = 5, height = 5, dpi = 125)

# analyzing the RD impact on probability of incarceration
inc_full =  rd_regress(yvar = sentencing_bw.is_incarcerated, 
                       xvar = sentencing_bw.sa_timedelta_days, 
                       sample_type='1')

inc_black =  rd_regress(yvar = sentencing_bw[sentencing_bw.is_black == True]['is_incarcerated'], 
                        xvar = sentencing_bw[sentencing_bw.is_black == True]['sa_timedelta_days'], 
                        sample_type='2')

inc_white =  rd_regress(yvar = sentencing_bw[sentencing_bw.is_black == False]['is_incarcerated'], 
                        xvar = sentencing_bw[sentencing_bw.is_black == False]['sa_timedelta_days'], 
                        sample_type='3')

# appending all results
inc_df = pd.concat([inc_full, inc_black, inc_white]).reset_index()
inc_df

# plotting the results 
rd_plot_incarcerated_by_race = (
    ggplot(inc_df, aes(x = 'sample_type', y = 'Coeff', color = 'index')) +
    geom_pointrange(aes(ymin = 'CI Lower', ymax = 'CI Upper'), position = position_dodge(width = .3)) +
    geom_hline(aes(yintercept = 0)) +
    labs(x = 'Sample group', y = 'RD Coefficients', title = 'Incarceration Rate Regression Discontinuity Coefficients') +
    scale_x_discrete(labels = ['Full', 'Black', 'White']) +
    theme_classic() +
    guides(color = guide_legend(title = 'Estimate type'))
)

# showing the plot
rd_plot_incarcerated_by_race

# saving the plot
rd_plot_incarcerated_by_race.save('../output/rd_plot_incarcerated_by_race', width = 5, height = 5, dpi = 125)

# getting the descriptive plot
plot3 = rd_plot(yvar = sentencing_bw.sentencing_term_d, 
                xvar = sentencing_bw.sa_timedelta_days, 
                ytitle = "Sentence length (in days)", 
                xtitle = "Days since Attorney Foxx assumed office", 
                title = "RD Plot: Foxx's Entry and Sentence Length")

# saving the plot
plot3.rdplot.save('../output/rd_plot_sentence_length.png', width = 5, height = 5, dpi = 125)

# analyzing the RD impact on sentencing length
sen_length_full =  rd_regress(yvar = sentencing_bw.sentencing_term_d, 
                              xvar = sentencing_bw.sa_timedelta_days, 
                              sample_type='1')

sen_length_black =  rd_regress(yvar = sentencing_bw[sentencing_bw.is_black == True]['sentencing_term_d'], 
                               xvar = sentencing_bw[sentencing_bw.is_black == True]['sa_timedelta_days'], 
                               sample_type='2')

sen_length_white =  rd_regress(yvar = sentencing_bw[sentencing_bw.is_black == False]['sentencing_term_d'], 
                               xvar = sentencing_bw[sentencing_bw.is_black == False]['sa_timedelta_days'], 
                               sample_type='3')

# appending all results
sen_length_df = pd.concat([sen_length_full, sen_length_black, sen_length_white]).reset_index()
sen_length_df

# plotting the results 
rd_plot_sen_length_by_race = (
    ggplot(sen_length_df, aes(x = 'sample_type', y = 'Coeff', color = 'index')) +
    geom_pointrange(aes(ymin = 'CI Lower', ymax = 'CI Upper'), position = position_dodge(width = .3)) +
    geom_hline(aes(yintercept = 0)) +
    labs(x = 'Sample group', y = 'RD Coefficients', title = 'Sentence Length Regression Discontinuity Coefficients') +
    scale_x_discrete(labels = ['Full', 'Black', 'White']) +
    theme_classic() +
    guides(color = guide_legend(title = 'Estimate type'))
)

# showing the plot
rd_plot_sen_length_by_race

# saving the plot
rd_plot_sen_length_by_race.save('../output/rd_plot_sen_length_by_race', width = 5, height = 5, dpi = 125)

# getting the descriptive plot
plot4 = rd_plot(yvar = list(map(int, sentencing_bw.is_on_probation)), 
                xvar = sentencing_bw.sa_timedelta_days, 
                ytitle = "Prop. of defendants assigned to probation", 
                xtitle = "Days since Attorney Foxx assumed office", 
                title = "RD Plot: Foxx's Entry and Probation Rate")

# saving the plot
plot4.rdplot.save('../output/rd_plot_probation_prop.png', width = 5, height = 5, dpi = 125)

# analyzing the RD impact on sentencing length
prob_full =  rd_regress(yvar = sentencing_bw.is_on_probation, 
                        xvar = sentencing_bw.sa_timedelta_days, 
                        sample_type='1')

prob_black =  rd_regress(yvar = sentencing_bw[sentencing_bw.is_black == True]['is_on_probation'], 
                         xvar = sentencing_bw[sentencing_bw.is_black == True]['sa_timedelta_days'], 
                         sample_type='2')

prob_white =  rd_regress(yvar = sentencing_bw[sentencing_bw.is_black == False]['is_on_probation'], 
                         xvar = sentencing_bw[sentencing_bw.is_black == False]['sa_timedelta_days'], 
                         sample_type='3')

# appending all results
prob_df = pd.concat([prob_full, prob_black, prob_white]).reset_index()

# plotting the results 
rd_plot_probation_by_race = (
    ggplot(prob_df, aes(x = 'sample_type', y = 'Coeff', color = 'index')) +
    geom_pointrange(aes(ymin = 'CI Lower', ymax = 'CI Upper'), position = position_dodge(width = .3)) +
    geom_hline(aes(yintercept = 0)) +
    labs(x = 'Sample group', y = 'RD Coefficients', title = 'Probation Rate Regression Discontinuity Coefficients') +
    scale_x_discrete(labels = ['Full', 'Black', 'White']) +
    theme_classic() +
    guides(color = guide_legend(title = 'Estimate type'))
)

# showing the plot
rd_plot_probation_by_race

# saving the plot
rd_plot_probation_by_race.save('../output/rd_plot_probation_by_race', width = 5, height = 5, dpi = 125)

# converting bool to numeric as RDrobust doesnt work nicely with bool?
intake_bw['is_black_int'] = np.where(intake_bw.is_black == True, 1, 0)
intake_bw['is_female_int'] = np.where(intake_bw.is_female == True, 1, 0)

# generating indicators for top 5 crimes ?
intake_bw['is_narc_offense'] = np.where(intake_bw.UPDATE_OFFENSE_CATEGORY == 'Narcotics', 1, 0)
intake_bw['is_uuw_offense'] = np.where(intake_bw.UPDATE_OFFENSE_CATEGORY == 'UUW - Unlawful Use of Weapon', 1, 0)
intake_bw['is_rtheft_offense'] = np.where(intake_bw.UPDATE_OFFENSE_CATEGORY == 'Retail Theft', 1, 0)
intake_bw['is_burg_offense'] = np.where(intake_bw.UPDATE_OFFENSE_CATEGORY == 'Burglary', 1, 0)
intake_bw['is_agdui_offense'] = np.where(intake_bw.UPDATE_OFFENSE_CATEGORY == 'Aggravated DUI', 1, 0)

# discontinuity in black proportion ?
race_full = rd_regress(yvar = intake_bw.is_black_int, 
                       xvar = intake_bw.sa_timedelta_days, 
                       sample_type='1')

# discontinuity in age ?
age_full =  rd_regress(yvar = intake_bw.age_cleaned, 
                       xvar = intake_bw.sa_timedelta_days, 
                       sample_type='1')

age_black =  rd_regress(yvar = intake_bw[intake_bw.is_black == True]['age_cleaned'], 
                        xvar = intake_bw[intake_bw.is_black == True]['sa_timedelta_days'], 
                        sample_type='2')

age_white =  rd_regress(yvar = intake_bw[intake_bw.is_black == False]['age_cleaned'], 
                        xvar = intake_bw[intake_bw.is_black == False]['sa_timedelta_days'], 
                        sample_type='3')


# discontinuity in gender prop? 
sex_full =  rd_regress(yvar = intake_bw.is_female_int, 
                       xvar = intake_bw.sa_timedelta_days, 
                       sample_type='1')

sex_black =  rd_regress(yvar = intake_bw[intake_bw.is_black == True]['is_female_int'], 
                        xvar = intake_bw[intake_bw.is_black == True]['sa_timedelta_days'], 
                        sample_type='2')

sex_white =  rd_regress(yvar = intake_bw[intake_bw.is_black == False]['is_female_int'], 
                        xvar = intake_bw[intake_bw.is_black == False]['sa_timedelta_days'], 
                        sample_type='3')

# concatting above dfs 
chars_df = pd.concat([race_full, 
                      age_full, age_black, age_white, 
                      sex_full, sex_black, sex_white])

# discontinuity in crime types ?
cols = ['is_narc_offense', 'is_uuw_offense', 'is_rtheft_offense', 
        'is_burg_offense', 'is_agdui_offense']

# initiating empty df
offenses_df = pd.DataFrame()

# for loop 
for offense in cols:
    
    # for full sample regression
    df_full = rd_regress(yvar = intake_bw[offense], 
                         xvar = intake_bw['sa_timedelta_days'], 
                         sample_type='1')
    
    # for blacks sample regression
    df_black = rd_regress(yvar = intake_bw[intake_bw.is_black == True][offense], 
                          xvar = intake_bw[intake_bw.is_black == True]['sa_timedelta_days'], 
                          sample_type='2')
    
    # for white sample regression
    df_white = rd_regress(yvar = intake_bw[intake_bw.is_black == False][offense], 
                          xvar = intake_bw[intake_bw.is_black == False]['sa_timedelta_days'], 
                          sample_type='3')
    
    # concat init df with all above df
    offenses_df = pd.concat([offenses_df, df_full, df_black, df_white])
    
# creating new column for facet labels
chars_df['facets'] = np.where(chars_df.outcome_var == "is_black_int", "Prop. Black", 
                              np.where(chars_df.outcome_var == "age_cleaned", "Age", "Prop. Female"))

# plotting the results - characteristics discontinuity
rd_plot_discont_chars =(
    ggplot(chars_df.reset_index(), aes(x = 'sample_type', y = 'Coeff', color = 'index')) +
    geom_pointrange(aes(ymin = 'CI Lower', ymax = 'CI Upper'), position = position_dodge(width = .3)) +
    geom_hline(aes(yintercept = 0)) +
    facet_wrap("facets", scales = "free_y") +
    labs(x = 'Sample group', y = 'RD Coefficients', title = 'Characteristics Regression Discontinuity Coefficients') +
    scale_x_discrete(labels = ['Full', 'Black', 'White']) +
    theme_classic() +
    theme(panel_spacing=.35) +
    guides(color = guide_legend(title = 'Estimate type'))
)

# display
rd_plot_discont_chars

# save
rd_plot_discont_chars.save('../output/rd_plot_discont_chars.png', width = 5, height = 5, dpi = 125)

# creating new column for facet labels
offenses_df['facets'] = np.where(offenses_df.outcome_var == "is_narc_offense", "Narcs", 
                                 np.where(offenses_df.outcome_var == "is_uuw_offense", "UUW", 
                                          np.where(offenses_df.outcome_var == "is_rtheft_offense", "Retail theft", 
                                                   np.where(offenses_df.outcome_var == "is_burg_offense", "Burglary", 
                                                            "Agg. DUI"))))

# plotting the results - crimes discontinuity
rd_plot_discont_offense = (
    ggplot(offenses_df.reset_index(), aes(x = 'sample_type', y = 'Coeff', color = 'index')) +
    geom_pointrange(aes(ymin = 'CI Lower', ymax = 'CI Upper'), position = position_dodge(width = .3)) +
    geom_hline(aes(yintercept = 0)) +
    facet_wrap("facets", scales = "free_y", nrow = 2) +
    labs(x = 'Sample group', y = 'RD Coefficients', title = 'Crime Type Regression Discontinuity Coefficients') +
    scale_x_discrete(labels = ['Full', 'Black', 'White']) +
    theme_classic() +
    theme(panel_spacing=.35) +
    guides(color = guide_legend(title = 'Estimate type'))
)

# display
rd_plot_discont_offense

# save
rd_plot_discont_offense.save('../output/rd_plot_discont_offense.png', width = 5, height = 5, dpi = 125)


Unnamed: 0,CASE_ID,CASE_PARTICIPANT_ID,RECEIVED_DATE,OFFENSE_CATEGORY,PRIMARY_CHARGE_FLAG,CHARGE_ID,CHARGE_VERSION_ID,DISPOSITION_CHARGED_OFFENSE_TITLE,CHARGE_COUNT,DISPOSITION_DATE,...,is_black,is_hisp,is_white,is_female,age_cleaned,sentence_date,sentence_year,sentence_month,sentence_day,sentence_ym
0,198055620664,85937621020,08/15/1984 12:00:00 AM,PROMIS Conversion,False,1242195814523,155656315869,FIRST DEGREE MURDER,2,12/17/2014 12:00:00 AM,...,True,False,False,0.0,27.0,1986-06-02,1986,6,2,1986-06
1,198055620664,85937621020,08/15/1984 12:00:00 AM,PROMIS Conversion,False,1242198287388,131513547452,HOME INVASION,14,12/17/2014 12:00:00 AM,...,True,False,False,0.0,27.0,1986-06-02,1986,6,2,1986-06
2,198055620664,85937621020,08/15/1984 12:00:00 AM,PROMIS Conversion,False,1242351605056,176626576281,FIRST DEGREE MURDER,4,12/17/2014 12:00:00 AM,...,True,False,False,0.0,27.0,1986-06-02,1986,6,2,1986-06
3,198055620664,85937621020,08/15/1984 12:00:00 AM,PROMIS Conversion,False,1242352841488,176617824190,FIRST DEGREE MURDER,5,12/17/2014 12:00:00 AM,...,True,False,False,0.0,27.0,1986-06-02,1986,6,2,1986-06
4,198055620664,85937621020,08/15/1984 12:00:00 AM,PROMIS Conversion,False,1242356550787,131238606761,HOME INVASION,13,12/17/2014 12:00:00 AM,...,True,False,False,0.0,27.0,2014-10-16,2014,10,16,2014-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
272289,597320386308,452719800958,07/18/2022 12:00:00 AM,Retail Theft,True,11294724616133,1148300237684,RETAIL THEFT,1,09/02/2022 12:00:00 AM,...,True,False,False,0.0,54.0,2022-09-02,2022,9,2,2022-09
272290,597328542979,452727325242,07/18/2022 12:00:00 AM,Aggravated Fleeing and Eluding,True,11295052270826,1148334245809,AGGRAVATED FLEEING OR ATTEMPT TO ELUDE A PEACE...,1,09/06/2022 12:00:00 AM,...,True,False,False,0.0,19.0,2022-09-06,2022,9,6,2022-09
272291,597341166398,452739650297,07/19/2022 12:00:00 AM,Criminal Damage to Property,True,11295731072434,1148405887926,CRIMINAL DAMAGE TO GOVERNMENT SUPPORTED PROPERTY,1,08/23/2022 12:00:00 AM,...,True,False,False,0.0,27.0,2022-08-24,2022,8,24,2022-08
272292,597341166398,452739650297,07/19/2022 12:00:00 AM,Criminal Damage to Property,True,11295731072434,1148405887926,CRIMINAL DAMAGE TO GOVERNMENT SUPPORTED PROPERTY,1,08/23/2022 12:00:00 AM,...,True,False,False,0.0,27.0,2022-08-23,2022,8,23,2022-08
