# Prosecution severity - Based on sentencing Days

### Prosecution severity
##### Data:
1. case_offense_v01.csv (merged in bonds_analysis_01.ipynb, but ideally can be merged anywhere?)
2. ACIS_offence_codes_FIXED.csv (from the 00_raw_data folder that Clarissa emailed)
 
##### Steps
1. Load Data 
    * Load the files listed above
 
2. Create sentencing days for each offense class
    * Use the felony chart, adjusting for priors?
    * Create a dataframe of minimum sentencing days for convicted and charged offense code. 
    * Groupby charged offense code and take the minimum of the sentencing days.  
    * **What is the right metric given multiple sentencing days for one offense class. Currently using minimum?**
    * **Should I be referring to the felony chart instead? How to adjust for priors**?

3. Merge with convicted and charged offense classes
    * Rename columns in the sentencing df (step #2 output) as charged_sentencing_days and charged_offense_code to merge with charged offense. 
    * Repeat the same for convicted offenses. 

3. Data Quality Fixes
    * Fix Offense class strings ("1.0" to "1" etc.)
    * Filter for gender values (M/F/U). Drop X ~500 cases. 
 
4.  Determining Delta
    * Based on sentencing days for an offense class
        * "delta_min" is the difference between "min_convicted_offense_sentences" and "min_charged_offense_sentences", i.e. The minimum convicted offense
        * "delta_ax" is the difference between "max_convicted_offense_sentences" and "max_charged_offense_sentences", i.e. The maximum convicted offense
    * In cases where the charged offense code matches the convicted offense code, set delta_min and delta_max to zero

In [21]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
pd.set_option('display.max_columns', None) 
# I am blind without this
%config InlineBackend.figure_format = 'retina' 

### Load the required files
file_dir = "/workspaces/esg-controversy-tracker/nc_acis-main/20_intermediate_data"
offenses = pd.read_csv(os.path.join(file_dir, "case_offense_v01.csv"), low_memory=False, index_col=[0])
offense_desc_final = pd.read_csv(os.path.join(file_dir,'ACIS_offence_codes_FIXED.csv'))

# subset
offs = offenses[
    [
        "case_id",
        "date_of_birth",
        "key_county_num",
        "key_year",
        "court_type",
        "race",
        "sex",
        "process_served",
        "case_creation_date",
        "case_trial_date",
        "court_attorney_type",
        "bond_type",
        "bond_amount",
        "charged_offense_date",
        "charged_offense_code",
        "offense_class",
        "min_sentence",
        "max_sentence",
        "convicted_offense_code",
        "disposition",
        "disposition_date",
        "min_sentence_day", 
        "max_sentence_day"
    ]
]
offs.head(5)

### 2. Create sentencing days for each offense class


In [None]:
convicted_sentencing = offs[['min_sentence_day', 'max_sentence_day' ,'convicted_offense_code']].copy()
convicted_sentencing.rename(columns={"convicted_offense_code":"offense_code", "min_sentence_day":"min_offense_code_sentencing", 
                                     "max_sentence_day":"max_offense_code_sentencing"}, inplace=True)

charged_sentencing = offs[['min_sentence_day', 'max_sentence_day', 'charged_offense_code']].copy()
charged_sentencing.rename(columns={"charged_offense_code":"offense_code", "min_sentence_day":"min_offense_code_sentencing", 
                                     "max_sentence_day":"max_offense_code_sentencing"}, inplace=True)

sentencing_df = pd.concat([convicted_sentencing, charged_sentencing])
sentencing_df.dropna(inplace=True)
sentencing_df.reset_index(inplace=True, drop=True)
sentencing_df = sentencing_df[["offense_code", "min_offense_code_sentencing", "max_offense_code_sentencing"]].groupby("offense_code").min().reset_index()
sentencing_df.head()

Unnamed: 0,offense_code,min_offense_code_sentencing,max_offense_code_sentencing
0,920.0,1140.0,1740.0
1,922.0,420.0,600.0
2,930.0,5040.0,6330.0
3,935.0,1320.0,1950.0
4,940.0,480.0,600.0


### 3. Merge with charged and convicted offense codes

In [None]:
charged_sentencing = sentencing_df.rename(columns={"min_offense_code_sentencing": "min_char_offense_sentences", 
                                                   "max_offense_code_sentencing":"max_char_offense_sentences",
                                                   "offense_code":"charged_offense_code"})

offenses_ranked = pd.merge(offs, charged_sentencing, how='left')

convicted_sentencing = sentencing_df.rename(columns={"min_offense_code_sentencing": "min_conv_offense_sentences", 
                                                   "max_offense_code_sentencing":"max_conv_offense_sentences",
                                                   "offense_code":"convicted_offense_code"})
offenses_ranked = pd.merge(offenses_ranked, convicted_sentencing, how='left')
offenses_ranked.sample(5)

Unnamed: 0,case_id,date_of_birth,key_county_num,key_year,court_type,race,sex,process_served,case_creation_date,case_trial_date,court_attorney_type,bond_type,bond_amount,charged_offense_date,charged_offense_code,offense_class,min_sentence,max_sentence,convicted_offense_code,disposition,disposition_date,min_sentence_day,max_sentence_day,min_char_offense_sentences,max_char_offense_sentences,min_conv_offense_sentences,max_conv_offense_sentences
436255,202014700221,2a70b352abcacba0265ebfce2ab338b4a58aba43320fab...,20,14,CR,W,M,C,2014-05-11,2014-06-24,R,,,2014-05-10,5441,3.0,,,5441.0,WC,2018-04-06,,,60.0,60.0,60.0,60.0
55180,2012058495,db6d60f3a28383d2b1c64737e234c0eb498be43b50803c...,0,12,CR,W,M,W,2012-12-31,2013-06-26,A,SEC,1.0,2012-06-01,3609,,,,,VD,2013-05-16,,,120.0,420.0,,
18326,2004020069,da6f3fdb519c29e074d2cce3e7f99c6f39c5b7dca956d4...,0,4,CR,H,M,C,2004-09-08,2005-03-01,,CSH,200.0,2004-09-02,5418,,,,,VD,2019-01-31,,,19.0,19.0,,
492539,302014703909,a9b2a01122fa20e36d883739bf817f6b5c58c887cdbbe1...,30,14,CR,W,M,C,2014-11-24,2015-01-26,,,,2014-11-23,4716,,,,,VD,2015-01-26,,,,,,
349588,2021050946,092ac5f304840970bc4133bae2ba4b6d469d85ab61a524...,0,21,CR,B,M,M,2021-02-25,2021-07-21,W,UNS,5000.0,2021-02-25,3400,,,,,,,,,,,,


### 5. Create Delta based on difference in the sentencing days between charged and convicted sentencing days

In [None]:

offenses_ranked["delta_min"] = (offenses_ranked["min_conv_offense_sentences"] - offenses_ranked["min_char_offense_sentences"])  
offenses_ranked["delta_max"] = offenses_ranked["max_conv_offense_sentences"] - offenses_ranked["max_char_offense_sentences"]

# if codes are the same, fill NaN with 0
offenses_ranked.loc[
    offenses_ranked["charged_offense_code"] == offenses_ranked["convicted_offense_code"],
    "delta_min",
] = 0
offenses_ranked.loc[
    offenses_ranked["charged_offense_code"] == offenses_ranked["convicted_offense_code"],
    "delta_max",
] = 0
offenses_ranked.sample(5)

Unnamed: 0,case_id,date_of_birth,key_county_num,key_year,court_type,race,sex,process_served,case_creation_date,case_trial_date,court_attorney_type,bond_type,bond_amount,charged_offense_date,charged_offense_code,offense_class,min_sentence,max_sentence,convicted_offense_code,disposition,disposition_date,min_sentence_day,max_sentence_day,min_char_offense_sentences,max_char_offense_sentences,min_conv_offense_sentences,max_conv_offense_sentences,delta_min,delta_max
236800,2017711368,7f28ad62dea212a3413879c87209c5ebb44720b74f2586...,0,17,CR,W,F,C,2017-11-10,2018-07-20,R,,,2017-11-10,5461,,,,,VD,2018-05-24,,,,,,,,
230817,2017707135,b9963b28cf2fdf8327cfaad5bbb0f26e52bd473521b55c...,0,17,CR,W,M,C,2017-07-18,2017-08-30,,,,2017-07-18,5450,,,,,VD,2017-08-22,,,,,,,,
391177,102015051303,d4844c0335e702527c490e69b1d36d7ea90cfe89b5973e...,10,15,CRS,W,M,W,2015-10-01,2019-12-09,A,,,2015-09-23,2212,H,8.0,19.0,2212.0,JU,2016-06-16,240.0,570.0,90.0,150.0,90.0,150.0,0.0,0.0
350070,2021051221,3f1f1d0dcaad20fcd2c253610085cc5b545e10cb2b6981...,0,21,CR,B,M,M,2021-03-11,2021-07-28,A,SEC,1.0,2021-03-10,5641,,,,,,,,,45.0,45.0,,,,
223668,2017702017,cfdb6c698e398521f71dfeac27d2b2186962a7668e598a...,0,17,CR,H,M,C,2017-02-21,2017-04-07,,,,2017-02-21,5441,,,,,VL,2017-09-07,,,60.0,60.0,,,,


In [None]:
offenses_ranked['delta_min'].value_counts()

In [None]:
offenses_ranked['delta_max'].value_counts()