# Pre-amble

In [1]:
# loading the required packages
import pandas as pd
import numpy as np
import datetime
import random
import re
import os
import plotnine
from plotnine import *
import plotly.graph_objects as go

# for repeated printouts 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# to custom displays of row-column df printouts
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Data import

Here, we'll load our analysis-ready sentencing data, and print out some of the basic information about the data:

In [2]:
# loading the analysis-ready dataset
intake_analysis = pd.read_csv('../data/intake_analysis.csv')

# then, we filter only for the black/white defendants 
intake = intake_analysis[(intake_analysis.is_white == True) | 
                            (intake_analysis.is_black == True)].copy()

In [3]:
# printing out basic dataset info
# printing out the basic info
intake.shape
intake.dtypes

(369399, 40)

Unnamed: 0                   int64
CASE_ID                      int64
CASE_PARTICIPANT_ID          int64
RECEIVED_DATE               object
OFFENSE_CATEGORY            object
PARTICIPANT_STATUS          object
AGE_AT_INCIDENT            float64
RACE                        object
GENDER                      object
INCIDENT_CITY               object
INCIDENT_BEGIN_DATE         object
INCIDENT_END_DATE           object
LAW_ENFORCEMENT_AGENCY      object
LAW_ENFORCEMENT_UNIT        object
ARREST_DATE                 object
FELONY_REVIEW_DATE          object
FELONY_REVIEW_RESULT        object
UPDATE_OFFENSE_CATEGORY     object
is_black                    object
is_hispanic                   bool
is_white                    object
is_hisp                    float64
is_female                   object
age_cleaned                float64
felony_review_date          object
felony_review_year         float64
felony_review_month        float64
felony_review_day          float64
felony_review_ym    

In [4]:
# loading the analysis-ready dataset
sentencing_analysis = pd.read_csv('../data/sentencing_analysis.csv')

# we then filter for only black/white defendants
sentencing = sentencing_analysis[(sentencing_analysis.is_black == True) | 
                                    (sentencing_analysis.is_white == True)].copy()



In [5]:
# what's the shape of the data? 
print("Data shape: " + str(sentencing_analysis.shape))

Data shape: (186824, 76)


In [6]:
sentencing.dtypes

CASE_ID                                  int64
CASE_PARTICIPANT_ID                      int64
RECEIVED_DATE                           object
OFFENSE_CATEGORY                        object
PRIMARY_CHARGE_FLAG                       bool
CHARGE_ID                                int64
CHARGE_VERSION_ID                        int64
DISPOSITION_CHARGED_OFFENSE_TITLE_x     object
CHARGE_COUNT                             int64
DISPOSITION_DATE                        object
DISPOSITION_CHARGED_CHAPTER             object
DISPOSITION_CHARGED_ACT                 object
DISPOSITION_CHARGED_SECTION             object
DISPOSITION_CHARGED_CLASS               object
DISPOSITION_CHARGED_AOIC                object
CHARGE_DISPOSITION                      object
CHARGE_DISPOSITION_REASON               object
SENTENCE_JUDGE                          object
SENTENCE_COURT_NAME                     object
SENTENCE_COURT_FACILITY                 object
SENTENCE_PHASE                          object
SENTENCE_DATE

## Summary Tables

In [18]:
## recode gender and race for summary stats
intake['Gender'] = np.where(intake.is_female, 'Female', 'Male')
                            
intake['Race'] = np.where(intake.is_black, 'Black',
                         np.where(intake.is_white, 'White', 'Other'))
intake.GENDER.value_counts()
intake.Race.value_counts()

intake.fr_is_rejected.value_counts()

Male                          317418
Female                         51738
Unknown Gender                    16
Unknown                           16
Male name, no gender given        13
Name: GENDER, dtype: int64

Black    303727
White     65672
Name: Race, dtype: int64

False    344890
True      24509
Name: fr_is_rejected, dtype: int64

In [28]:
## get summary stats for different vars in intake and sentencing data

## intake

int_age = pd.DataFrame(intake['age_cleaned'].describe())
int_gender = pd.DataFrame(intake.Gender.value_counts())
int_race = pd.DataFrame(intake.Race.value_counts())
int_reject = pd.DataFrame(intake.fr_is_rejected.value_counts())

int_age.rename(columns = {'age_cleaned':'Age'},inplace = True)
int_reject.rename(columns = {'fr_is_rejected':'Felonies Rejected'},inplace = True)


int_age
int_gender
int_race
int_reject

Unnamed: 0,Age
count,361098.0
mean,34.179455
std,12.622652
min,17.0
25%,24.0
50%,31.0
75%,44.0
max,81.0


Unnamed: 0,Gender
Male,317431
Female,51968


Unnamed: 0,Race
Black,303727
White,65672


Unnamed: 0,Felonies Rejected
False,344890
True,24509


In [29]:
st_labs = ['Count', 'Mean', 'Standard Dev.', 'Minimum', '25th Percentile', 'Median', '75th Percentile', 'Maximum']
st_labs

['Count',
 'Mean',
 'Standard Dev.',
 'Minimum',
 '25th Percentile',
 'Median',
 '75th Percentile',
 'Maximum']

In [30]:
##


## age table
int_age_table = go.Figure(data=[go.Table(
    columnwidth=[1,2],
    header=dict(values=[['<b>Intake - Age</b>']],
                fill_color='ForestGreen',
                line_color='ForestGreen',
                align='right',
                font=dict(color='white')),
    cells=dict(values=[st_labs, int_age.Age],
               fill_color='FloralWhite',
               align='left'))
])
int_age_table.show()

## gender table
int_gender_table = go.Figure(data=[go.Table(
    columnwidth=[1,2],
    header=dict(values=[['<b>Intake - Gender</b>']],
                fill_color='ForestGreen',
                line_color='ForestGreen',
                align='right',
                font=dict(color='white')),
    cells=dict(values=[gender.index, int_gender.Gender],
               fill_color='FloralWhite',
               align='left'))
])
int_gender_table.show()

## race table
int_race_table = go.Figure(data=[go.Table(
    columnwidth=[1,2],
    header=dict(values=[['<b>Intake - Race</b>']],
                fill_color='ForestGreen',
                line_color='ForestGreen',
                align='right',
                font=dict(color='white')),
    cells=dict(values=[race.index, int_race.Race],
               fill_color='FloralWhite',
               align='left'))
])
int_race_table.show()

## rejected table
int_reject_table = go.Figure(data=[go.Table(
    columnwidth=[1,2],
    header=dict(values=[['<b>Intake - Felonies Rejected</b>']],
                fill_color='ForestGreen',
                line_color='ForestGreen',
                align='right',
                font=dict(color='white')),
    cells=dict(values=[race.index, int_reject.Race],
               fill_color='FloralWhite',
               align='left'))
])
int_race_table.show()

AttributeError: 'DataFrame' object has no attribute 'Race'

In [31]:
## recode gender and race for summary stats
sentencing['Gender'] = np.where(sentencing.is_female, 'Female', 'Male')
sentencing['Race'] = np.where(sentencing.is_black, 'Black',
                         np.where(sentencing.is_white, 'White', 'Other'))
sentencing.GENDER.value_counts()
sentencing.Race.value_counts()

Male                          130067
Female                         20137
Unknown                            7
Male name, no gender given         3
Unknown Gender                     3
Name: GENDER, dtype: int64

Black    122898
White     27347
Name: Race, dtype: int64

In [32]:


## get summary stats for different vars in sentencing and sentencing data

## sentencing

sent_age = pd.DataFrame(sentencing['age_cleaned'].describe())
sent_gender = pd.DataFrame(sentencing.Gender.value_counts())
sent_race = pd.DataFrame(sentencing.Race.value_counts())
sent_incarc = pd.DataFrame(sentencing.is_incarcerated.value_counts())
sent_probat = pd.DataFrame(sentencing.is_on_probation.value_counts())
sent_term = pd.DataFrame(sentencing.sentencing_term_d.describe())

age.rename(columns = {'age_cleaned':'Age'},inplace = True)

sent_age
sent_gender
sent_race
sent_incarc
sent_probat
sent_term

Unnamed: 0,age_cleaned
count,148328.0
mean,33.079055
std,12.193864
min,17.0
25%,23.0
50%,30.0
75%,42.0
max,81.0


Unnamed: 0,Gender
Male,130070
Female,20175


Unnamed: 0,Race
Black,122898
White,27347


Unnamed: 0,is_incarcerated
True,80840
False,69405


Unnamed: 0,is_on_probation
False,92023
True,58222


Unnamed: 0,sentencing_term_d
count,149221.0
mean,1115.991079
std,1693.549778
min,0.041667
25%,549.0
50%,730.0
75%,1095.0
max,147825.0


In [33]:
##


## age table
sent_age_table = go.Figure(data=[go.Table(
    columnwidth=[1,2],
    header=dict(values=[['<b>Sentencing - Age</b>']],
                fill_color='ForestGreen',
                line_color='ForestGreen',
                align='right',
                font=dict(color='white')),
    cells=dict(values=[st_labs, age.Age],
               fill_color='FloralWhite',
               align='left'))
])
sent_age_table.show()

## gender table
sent_gender_table = go.Figure(data=[go.Table(
    columnwidth=[1,2],
    header=dict(values=[['<b>Sentencing - Gender</b>']],
                fill_color='ForestGreen',
                line_color='ForestGreen',
                align='right',
                font=dict(color='white')),
    cells=dict(values=[gender.index, gender.Gender],
               fill_color='FloralWhite',
               align='left'))
])
sent_gender_table.show()

## race table
sent_race_table = go.Figure(data=[go.Table(
    columnwidth=[1,2],
    header=dict(values=[['<b>Sentencing - Race</b>']],
                fill_color='ForestGreen',
                line_color='ForestGreen',
                align='right',
                font=dict(color='white')),
    cells=dict(values=[race.index, race.Race],
               fill_color='FloralWhite',
               align='left'))
])
sent_race_table.show()

In [34]:
## save the stuff

tables = [int_age_table, int_gender_table, int_race_table, 
         sent_age_table, sent_gender_table, sent_race_table]

##int
int_age_table.write_image("../output/int_age_table.png")
int_gender_table.write_image("../output/int_gender_table.png")
int_race_table.write_image("../output/int_race_table.png")

sent_age_table.write_image("../output/sent_age_table.png")
sent_gender_table.write_image("../output/sent_gender_table.png")
sent_race_table.write_image("../output/sent_race_table.png")



