In [2]:
## imports 
import pandas as pd
import numpy as np
import yaml
import plotnine
from plotnine import *

## way to connect to mysql 
import mysql.connector

## function to feed path name to load
## credentials
def load_creds(path: str):
    with open(path, 'r') as stream:
        try:
            creds = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)
    return(creds)


# Preliminary: define connection and read sample of data

In [3]:
## read in creds; change the path name if stored
## elsewhere
creds = load_creds("../private_data/creds_forclass.yaml")

In [4]:
## connect to the database
cnx = mysql.connector.connect(user=creds['practice_database']['db_user'], 
                            password=creds['practice_database']['db_password'],
                            port=creds['practice_database']['port'],
                            database= creds['practice_database']['database'],
                            host = creds['practice_database']['host'])
cnx

<mysql.connector.connection.MySQLConnection at 0x7f8a49d22a60>

In [5]:
## define a query to pull first 5 rows
## from caseinit table
sample_case_q = """
select * 
from caseinit 
limit 5
"""

In [6]:
## feed read sql query the query
## and my database connection
read_sample_d = pd.read_sql_query(sample_case_q, cnx)

## print columns
print(read_sample_d.columns)
read_sample_d.head()

Index(['CASE_ID', 'CASE_PARTICIPANT_ID', 'RECEIVED_DATE', 'OFFENSE_CATEGORY',
       'PRIMARY_CHARGE_FLAG', 'CHARGE_ID', 'CHARGE_VERSION_ID',
       'CHARGE_OFFENSE_TITLE', 'CHARGE_COUNT', 'CHAPTER', 'ACT', 'SECTION',
       'CLASS', 'AOIC', 'EVENT', 'EVENT_DATE', 'FINDING_NO_PROBABLE_CAUSE',
       'ARRAIGNMENT_DATE', 'BOND_DATE_INITIAL', 'BOND_DATE_CURRENT',
       'BOND_TYPE_INITIAL', 'BOND_TYPE_CURRENT', 'BOND_AMOUNT_INITIAL',
       'BOND_AMOUNT_CURRENT', 'BOND_ELECTRONIC_MONITOR_FLAG_INITIAL',
       'BOND_ELECTROINIC_MONITOR_FLAG_CURRENT', 'AGE_AT_INCIDENT', 'RACE',
       'GENDER', 'INCIDENT_CITY', 'INCIDENT_BEGIN_DATE', 'INCIDENT_END_DATE',
       'LAW_ENFORCEMENT_AGENCY', 'LAW_ENFORCEMENT_UNIT', 'ARREST_DATE',
       'FELONY_REVIEW_DATE', 'FELONY_REVIEW_RESULT',
       'UPDATED_OFFENSE_CATEGORY', 'is_in_diversion'],
      dtype='object')


Unnamed: 0,CASE_ID,CASE_PARTICIPANT_ID,RECEIVED_DATE,OFFENSE_CATEGORY,PRIMARY_CHARGE_FLAG,CHARGE_ID,CHARGE_VERSION_ID,CHARGE_OFFENSE_TITLE,CHARGE_COUNT,CHAPTER,...,INCIDENT_CITY,INCIDENT_BEGIN_DATE,INCIDENT_END_DATE,LAW_ENFORCEMENT_AGENCY,LAW_ENFORCEMENT_UNIT,ARREST_DATE,FELONY_REVIEW_DATE,FELONY_REVIEW_RESULT,UPDATED_OFFENSE_CATEGORY,is_in_diversion
0,403856809175,772672439302,12/14/2013 12:00:00 AM,Retail Theft,True,376264971402,700415450556,RETAIL THEFT,1,720,...,Wheeling,12/14/2013 12:00:00 AM,,WHEELING PD,,12/14/2013 5:04:00 PM,12/14/2013 12:00:00 AM,Approved,Retail Theft,True
1,428224258020,850034064887,12/12/2017 12:00:00 AM,Aggravated Battery Police Officer Firearm,True,419642559930,782745872976,AGGRAVATED BATTERY,1,720,...,Elmwood Park,12/11/2017 12:00:00 AM,,ELMWOOD PARK PD,,12/11/2017 6:29:00 AM,12/12/2017 12:00:00 AM,Approved,Aggravated Battery Police Officer,True
2,408610348048,787586552750,8/27/2014 12:00:00 AM,Narcotics,True,383801260109,714683573367,POSSESSION OF A CONTROLLED SUBSTANCE,1,720,...,Chicago,8/25/2014 12:00:00 AM,,CHICAGO PD,District 9 - Deering,8/25/2014 10:05:00 PM,,,Narcotics,True
3,432664409023,863972841464,9/12/2018 12:00:00 AM,Narcotics,True,427773058046,798230331537,POSSESSION OF A CONTROLLED SUBSTANCE,1,720,...,Chicago,9/5/2018 12:00:00 AM,,CHICAGO PD,District 10 - Ogden,9/5/2018 5:09:00 PM,,,Narcotics,True
4,424815026813,839098495399,3/31/2017 12:00:00 AM,Identity Theft,False,413714945261,771399945421,IDENTITY THEFT,19,720,...,Orland Hills,2/13/2017 12:00:00 AM,,ORLAND PARK PD,,5/8/2017 8:30:00 AM,05/09/2017 12:00:00 AM,Approved,Theft,True


# Activity 1

- Create a new column -- `in_chicago` when pulling from the `caseinit` table that takes on the value of "YES" if INCIDENT_CITY = Chicago; "NO" otherwise (which represents incidents in Cook County suburbs outside the city limits)
- Use that column, along with the `is_in_diversion` column, to find the rate of diversions by whether the incident took place in Chicago or the suburbs
- Similarly, find the rate of diversions by city versus suburb and race 


# Activity 2 

- Use the following crosswalk and `CASE` to create a new variable `DIVERSION_PROGRAM_TEXT` that spells out the diversion programs

DC: Drug Court

DPPP: Drug Deferred Prosecution

DS: Drug School

RJCC: Restorative Justice

MHC: Mental Health Court

VC: Veteran Court

- Filter to Narcotics as the `UPDATED_OFFENSE_CATEGORY` and Black or White defendants

- Among defendants offered diversion (so defendants from caseinit also in the diversons table), the percent sent to each diversion program (separated by RACE) 
