Computing average and median sentences for minors compared to adults for each charge type

### Importing libraries

In [3]:
import pandas as pd
from collections import Counter

### Importing datasets

In [5]:
data_types = {
    "ID": str,
    "CT_FLST_CHAP": str,
    "CT_FLST_SECT": str,
    "CT_FLST_SUBSECT": str
}

columns_to_drop = ["MESSAGE_TYPE", "ARREST_NAME", "FINAL_NAME", "PR_FLST_CHAP", "PR_FLST_SECT", "PR_FLST_SUBSECT",
                  "ST_PROB_LEN", "ST_COMM_LEN", "ST_FINE", "ST_COURT_COST", "ST_RESTITUTION"]

In [6]:
minors = pd.read_csv("data/obts/minors_analysissentences.csv", dtype = data_types)
adults = pd.read_csv("data/obts/adults_analysissentences.csv", dtype = data_types)

minors = minors.drop(columns = columns_to_drop).reset_index(drop = True)
adults = adults.drop(columns = columns_to_drop).reset_index(drop = True)

  minors = pd.read_csv("data/obts/minors_analysissentences.csv", dtype = data_types)
  adults = pd.read_csv("data/obts/adults_analysissentences.csv", dtype = data_types)


In [7]:
### filters for felonies
minors = minors[minors["CT_CHARGE_LEVEL"] == "F"].reset_index(drop = True)
adults = adults[adults["CT_CHARGE_LEVEL"] == "F"].reset_index(drop = True)

### filters for specific years
minors = minors[minors["ST_IMPOSED_YR"] > 2007].reset_index(drop = True)
adults = adults[adults["ST_IMPOSED_YR"] > 2007].reset_index(drop = True)

### removes juvenile sanctions
minors = minors[minors["CT_ACTION_TAKEN"] != "K"].reset_index(drop = True)
adults = adults[adults["CT_ACTION_TAKEN"] != "K"].reset_index(drop = True)

### modifies statute citations
minors["CT_FLST_CHAP"] = minors["CT_FLST_CHAP"].str.split(".").str[0].str.lstrip("0")
minors["CT_FLST_SUBSECT"] = minors["CT_FLST_SUBSECT"].str.replace('(', '', regex = False)
minors["CT_FLST_SUBSECT"] = minors["CT_FLST_SUBSECT"].str.replace(')', '', regex = False)
minors["CT_FLST_SUBSECT"] = minors["CT_FLST_SUBSECT"].str.lower()
minors["CT_FLST_SUBSECT"] = minors["CT_FLST_SUBSECT"].fillna('')
adults["CT_FLST_CHAP"] = adults["CT_FLST_CHAP"].str.split(".").str[0].str.lstrip("0")
adults["CT_FLST_SUBSECT"] = adults["CT_FLST_SUBSECT"].str.replace('(', '', regex = False)
adults["CT_FLST_SUBSECT"] = adults["CT_FLST_SUBSECT"].str.replace(')', '', regex = False)
adults["CT_FLST_SUBSECT"] = adults["CT_FLST_SUBSECT"].str.lower()
adults["CT_FLST_SUBSECT"] = adults["CT_FLST_SUBSECT"].fillna('')

### creates and stores complete citation
minors["CHARGE"] = minors["CT_CHARGE_DEGREE"] + "-" + minors["CT_FLST_CHAP"] + "-" + minors["CT_FLST_SECT"] + "-" + minors["CT_FLST_SUBSECT"]
adults["CHARGE"] = adults["CT_CHARGE_DEGREE"] + "-" + adults["CT_FLST_CHAP"] + "-" + adults["CT_FLST_SECT"] + "-" + adults["CT_FLST_SUBSECT"]

### Analyzing data

#### All felonies

In [10]:
minors_df = minors.copy()
adults_df = adults.copy()

charges = minors_df["CHARGE"].unique().tolist()

In [11]:
results = []
for j in range(0, len(charges)):

    rows = []
    charge = charges[j]

    ### filters by charge for adults
    dfa = adults_df[adults_df["CHARGE"] == charge].reset_index(drop = True)

    ### checks if adult dataframe is empty
    if not dfa.empty: 

        ### filters by charge for minors
        dfm = minors_df[minors_df["CHARGE"] == charge].reset_index(drop = True)
    
        ### computes average sentences
        m_avg_days = round(dfm["ST_DAYS"].mean())
        m_avg_years = round(m_avg_days/365, 1) 
        a_avg_days = round(dfa["ST_DAYS"].mean())
        a_avg_years = round(a_avg_days/365, 1)

        ### computes median sentences

        m_med_days = round(dfm["ST_DAYS"].median())
        m_med_years = round(m_med_days/365, 1)
        a_med_days = round(dfa["ST_DAYS"].median())
        a_med_years = round(a_med_days/365, 1)

            
        ### storing datapoints
        rows.append(charge)
        rows.append(m_avg_days)
        rows.append(a_avg_days)
        rows.append(m_med_days)
        rows.append(a_med_days)
        rows.append(m_avg_years)
        rows.append(a_avg_years)
        rows.append(m_med_years)
        rows.append(a_med_years)
        rows.append(len(dfm))
        rows.append(len(dfa))
    
        ### creating row
        results.append(rows)
    
        ### creating dataframe
        results_df = pd.DataFrame(results)
        results_df.columns = ["CHARGE", "MINORS_AVG_DAYS", "ADULTS_AVG_DAYS", "MINORS_MEDIAN_DAYS", "ADULTS_MEDIAN_DAYS", 
                              "MINORS_AVG_YEARS", "ADULTS_AVG_YEARS", "MINORS_MEDIAN_YEARS", "ADULTS_MEDIAN_YEARS", 
                              "MINORS_INSTANCES", "ADULTS_INSTANCES"]

        ### removing rows with no corresponding figures
        results_df = results_df[results_df["MINORS_AVG_DAYS"] != 0].reset_index(drop = True)
        results_df = results_df[results_df["ADULTS_AVG_DAYS"] != 0].reset_index(drop = True)

        ### computing difference of average sentences
        results_df["AVG_DIFF_YEARS"] = results_df["MINORS_AVG_YEARS"] - results_df["ADULTS_AVG_YEARS"]
        results_df = results_df.sort_values(by = "AVG_DIFF_YEARS", ascending = False).reset_index(drop = True)

### exports dataframe
results_df.to_csv("data/obts/results/charges_all.csv", index = False)

results_df

Unnamed: 0,CHARGE,MINORS_AVG_DAYS,ADULTS_AVG_DAYS,MINORS_MEDIAN_DAYS,ADULTS_MEDIAN_DAYS,MINORS_AVG_YEARS,ADULTS_AVG_YEARS,MINORS_MEDIAN_YEARS,ADULTS_MEDIAN_YEARS,MINORS_INSTANCES,ADULTS_INSTANCES,AVG_DIFF_YEARS
0,S-316-193-3c3a,51645,5077,1825,3770,141.5,13.9,5.0,10.3,7,614,127.6
1,P-810-02-1 &,61598,23594,1198,1825,168.8,64.6,3.3,5.0,6,17,104.2
2,C-782-4.0-1a2,78659,46890,7300,12775,215.5,128.5,20.0,35.0,5,12,87.0
3,F-812-135-2b,46924,21606,3650,2920,128.6,59.2,10.0,8.0,8,60,69.4
4,S-800-4.0-4a1,16005,3063,2920,2920,43.8,8.4,8.0,8.0,27,774,35.4
...,...,...,...,...,...,...,...,...,...,...,...,...
1424,C-782-4.0-1a,17796,59068,15512,34683,48.8,161.8,42.5,95.0,4,24,-113.0
1425,L-812-13-1,11862,56915,11862,5475,32.5,155.9,32.5,15.0,2,7,-123.4
1426,F-787-1.0-,4015,59588,3650,10038,11.0,163.3,10.0,27.5,5,14,-152.3
1427,F-810-2.0-1b1,4380,62942,4380,3070,12.0,172.4,12.0,8.4,1,6,-160.4


#### Capital felonies

In [13]:
minors_df = minors[minors["CT_CHARGE_DEGREE"] == "C"].reset_index(drop = True)
adults_df = adults[adults["CT_CHARGE_DEGREE"] == "C"].reset_index(drop = True)

charges = minors_df["CHARGE"].unique().tolist()

In [14]:
results = []
for j in range(0, len(charges)):

    rows = []
    charge = charges[j]

    ### filters by charge for adults
    dfa = adults_df[adults_df["CHARGE"] == charge].reset_index(drop = True)

    ### checks if adult dataframe is empty
    if not dfa.empty: 

        ### filters by charge for minors
        dfm = minors_df[minors_df["CHARGE"] == charge].reset_index(drop = True)
    
        ### computes average sentences
        m_avg_days = round(dfm["ST_DAYS"].mean())
        m_avg_years = round(m_avg_days/365, 1) 
        a_avg_days = round(dfa["ST_DAYS"].mean())
        a_avg_years = round(a_avg_days/365, 1)

        ### computes median sentences
        m_med_days = round(dfm["ST_DAYS"].median())
        m_med_years = round(m_med_days/365, 1)
        a_med_days = round(dfa["ST_DAYS"].median())
        a_med_years = round(a_med_days/365, 1)

            
        ### storing datapoints
        rows.append(charge)
        rows.append(m_avg_days)
        rows.append(a_avg_days)
        rows.append(m_med_days)
        rows.append(a_med_days)
        rows.append(m_avg_years)
        rows.append(a_avg_years)
        rows.append(m_med_years)
        rows.append(a_med_years)
        rows.append(len(dfm))
        rows.append(len(dfa))
    
        ### creating row
        results.append(rows)
    
        ### creating dataframe
        results_df = pd.DataFrame(results)
        results_df.columns = ["CHARGE", "MINORS_AVG_DAYS", "ADULTS_AVG_DAYS", "MINORS_MEDIAN_DAYS", "ADULTS_MEDIAN_DAYS", 
                              "MINORS_AVG_YEARS", "ADULTS_AVG_YEARS", "MINORS_MEDIAN_YEARS", "ADULTS_MEDIAN_YEARS", 
                              "MINORS_INSTANCES", "ADULTS_INSTANCES"]

        ### removing rows with no corresponding figures
        results_df = results_df[results_df["MINORS_AVG_DAYS"] != 0].reset_index(drop = True)
        results_df = results_df[results_df["ADULTS_AVG_DAYS"] != 0].reset_index(drop = True)

        ### computing difference of average sentences
        results_df["AVG_DIFF_YEARS"] = results_df["MINORS_AVG_YEARS"] - results_df["ADULTS_AVG_YEARS"]
  
        results_df = results_df.sort_values(by = "AVG_DIFF_YEARS", ascending = False).reset_index(drop = True)
### exports dataframe
results_df.to_csv("data/obts/results/charges_cap.csv", index = False)
results_df

Unnamed: 0,CHARGE,MINORS_AVG_DAYS,ADULTS_AVG_DAYS,MINORS_MEDIAN_DAYS,ADULTS_MEDIAN_DAYS,MINORS_AVG_YEARS,ADULTS_AVG_YEARS,MINORS_MEDIAN_YEARS,ADULTS_MEDIAN_YEARS,MINORS_INSTANCES,ADULTS_INSTANCES,AVG_DIFF_YEARS
0,C-782-4.0-1a2,78659,46890,7300,12775,215.5,128.5,20.0,35.0,5,12,87.0
1,C-782-04-1,11558,8030,12775,7300,31.7,22.0,35.0,20.0,3,7,9.7
2,C-782-04-2,7300,5110,7300,4380,20.0,14.0,20.0,12.0,2,4,6.0
3,C-782-04-,34683,34683,34683,34683,95.0,95.0,95.0,95.0,2,7,0.0
4,C-782-4.0-1,9755,11645,9125,7300,26.7,31.9,25.0,20.0,11,7,-5.2
5,C-794-011-2,4380,10709,4380,3650,12.0,29.3,12.0,10.0,3,44,-17.3
6,C-782-04-1a1,27874,39312,16425,34683,76.4,107.7,45.0,95.0,31,361,-31.3
7,C-794-011-2a,7566,23142,5256,9125,20.7,63.4,14.4,25.0,13,441,-42.7
8,C-782-4.0-1a1,25254,44552,14600,34683,69.2,122.1,40.0,95.0,29,260,-52.9
9,C-782-04-1a,9977,30147,8030,34683,27.3,82.6,22.0,95.0,3,36,-55.3


#### Life felonies

In [16]:
minors_df = minors[minors["CT_CHARGE_DEGREE"] == "L"].reset_index(drop = True)
adults_df = adults[adults["CT_CHARGE_DEGREE"] == "L"].reset_index(drop = True)

charges = minors_df["CHARGE"].unique().tolist()

In [17]:
results = []
for j in range(0, len(charges)):

    rows = []
    charge = charges[j]

    ### filters by charge for adults
    dfa = adults_df[adults_df["CHARGE"] == charge].reset_index(drop = True)

    ### checks if adult dataframe is empty
    if not dfa.empty: 

        ### filters by charge for minors
        dfm = minors_df[minors_df["CHARGE"] == charge].reset_index(drop = True)
    
        ### computes average sentences
        m_avg_days = round(dfm["ST_DAYS"].mean())
        m_avg_years = round(m_avg_days/365, 1) 
        a_avg_days = round(dfa["ST_DAYS"].mean())
        a_avg_years = round(a_avg_days/365, 1)

        ### computes median sentences
        m_med_days = round(dfm["ST_DAYS"].median())
        m_med_years = round(m_med_days/365, 1)
        a_med_days = round(dfa["ST_DAYS"].median())
        a_med_years = round(a_med_days/365, 1)

            
        ### storing datapoints
        rows.append(charge)
        rows.append(m_avg_days)
        rows.append(a_avg_days)
        rows.append(m_med_days)
        rows.append(a_med_days)
        rows.append(m_avg_years)
        rows.append(a_avg_years)
        rows.append(m_med_years)
        rows.append(a_med_years)
        rows.append(len(dfm))
        rows.append(len(dfa))
    
        ### creating row
        results.append(rows)
    
        ### creating dataframe
        results_df = pd.DataFrame(results)
        results_df.columns = ["CHARGE", "MINORS_AVG_DAYS", "ADULTS_AVG_DAYS", "MINORS_MEDIAN_DAYS", "ADULTS_MEDIAN_DAYS", 
                              "MINORS_AVG_YEARS", "ADULTS_AVG_YEARS", "MINORS_MEDIAN_YEARS", "ADULTS_MEDIAN_YEARS", 
                              "MINORS_INSTANCES", "ADULTS_INSTANCES"]

        ### removing rows with no corresponding figures
        results_df = results_df[results_df["MINORS_AVG_DAYS"] != 0].reset_index(drop = True)
        results_df = results_df[results_df["ADULTS_AVG_DAYS"] != 0].reset_index(drop = True)

        ### computing difference of average sentences
        results_df["AVG_DIFF_YEARS"] = results_df["MINORS_AVG_YEARS"] - results_df["ADULTS_AVG_YEARS"]
        results_df = results_df.sort_values(by = "AVG_DIFF_YEARS", ascending = False).reset_index(drop = True)

### exports dataframe
results_df.to_csv("data/obts/results/charges_life.csv", index = False)

results_df

Unnamed: 0,CHARGE,MINORS_AVG_DAYS,ADULTS_AVG_DAYS,MINORS_MEDIAN_DAYS,ADULTS_MEDIAN_DAYS,MINORS_AVG_YEARS,ADULTS_AVG_YEARS,MINORS_MEDIAN_YEARS,ADULTS_MEDIAN_YEARS,MINORS_INSTANCES,ADULTS_INSTANCES,AVG_DIFF_YEARS
0,L-787-011A-2,21900,14600,21900,14600,60.0,40.0,60.0,40.0,2,1,20.0
1,L-787-01-3a,14600,7300,14600,7300,40.0,20.0,40.0,20.0,2,1,20.0
2,L-812-133-2a,18250,11560,18250,7300,50.0,31.7,50.0,20.0,1,6,18.3
3,L-800-4-5b,10220,4198,10220,4198,28.0,11.5,28.0,11.5,1,2,16.5
4,L-794-011-2a,9170,5825,9125,5840,25.1,16.0,25.0,16.0,6,20,9.1
...,...,...,...,...,...,...,...,...,...,...,...,...
70,L-787-1.0-1a3,3924,17941,4562,3798,10.8,49.2,12.5,10.4,4,45,-38.4
71,L-812-13-,1824,17835,2190,2578,5.0,48.9,6.0,7.1,7,50,-43.9
72,L-782-04-3,5475,27924,5475,7300,15.0,76.5,15.0,20.0,1,17,-61.5
73,L-812-13-1,11862,56915,11862,5475,32.5,155.9,32.5,15.0,2,7,-123.4


#### Felonies punishable by life

In [19]:
minors_df = minors[minors["CT_CHARGE_DEGREE"] == "P"].reset_index(drop = True)
adults_df = adults[adults["CT_CHARGE_DEGREE"] == "P"].reset_index(drop = True)

charges = minors_df["CHARGE"].unique().tolist()

In [20]:
results = []
for j in range(0, len(charges)):

    rows = []
    charge = charges[j]

    ### filters by charge for adults
    dfa = adults_df[adults_df["CHARGE"] == charge].reset_index(drop = True)

    ### checks if adult dataframe is empty
    if not dfa.empty: 

        ### filters by charge for minors
        dfm = minors_df[minors_df["CHARGE"] == charge].reset_index(drop = True)
    
        ### computes average sentences
        m_avg_days = round(dfm["ST_DAYS"].mean())
        m_avg_years = round(m_avg_days/365, 1) 
        a_avg_days = round(dfa["ST_DAYS"].mean())
        a_avg_years = round(a_avg_days/365, 1)

        ### computes median sentences
        m_med_days = round(dfm["ST_DAYS"].median())
        m_med_years = round(m_med_days/365, 1)
        a_med_days = round(dfa["ST_DAYS"].median())
        a_med_years = round(a_med_days/365, 1)

            
        ### storing datapoints
        rows.append(charge)
        rows.append(m_avg_days)
        rows.append(a_avg_days)
        rows.append(m_med_days)
        rows.append(a_med_days)
        rows.append(m_avg_years)
        rows.append(a_avg_years)
        rows.append(m_med_years)
        rows.append(a_med_years)
        rows.append(len(dfm))
        rows.append(len(dfa))
    
        ### creating row
        results.append(rows)
    
        ### creating dataframe
        results_df = pd.DataFrame(results)
        results_df.columns = ["CHARGE", "MINORS_AVG_DAYS", "ADULTS_AVG_DAYS", "MINORS_MEDIAN_DAYS", "ADULTS_MEDIAN_DAYS", 
                              "MINORS_AVG_YEARS", "ADULTS_AVG_YEARS", "MINORS_MEDIAN_YEARS", "ADULTS_MEDIAN_YEARS", 
                              "MINORS_INSTANCES", "ADULTS_INSTANCES"]

        ### removing rows with no corresponding figures
        results_df = results_df[results_df["MINORS_AVG_DAYS"] != 0].reset_index(drop = True)
        results_df = results_df[results_df["ADULTS_AVG_DAYS"] != 0].reset_index(drop = True)

        ### computing difference of average sentences
        results_df["AVG_DIFF_YEARS"] = results_df["MINORS_AVG_YEARS"] - results_df["ADULTS_AVG_YEARS"]
        results_df = results_df.sort_values(by = "AVG_DIFF_YEARS", ascending = False).reset_index(drop = True)

### exports dataframe
results_df.to_csv("data/obts/results/charges_punlife.csv", index = False)

results_df

Unnamed: 0,CHARGE,MINORS_AVG_DAYS,ADULTS_AVG_DAYS,MINORS_MEDIAN_DAYS,ADULTS_MEDIAN_DAYS,MINORS_AVG_YEARS,ADULTS_AVG_YEARS,MINORS_MEDIAN_YEARS,ADULTS_MEDIAN_YEARS,MINORS_INSTANCES,ADULTS_INSTANCES,AVG_DIFF_YEARS
0,P-810-02-1 &,61598,23594,1198,1825,168.8,64.6,3.3,5.0,6,17,104.2
1,P-810-02-2,12775,5786,12775,5824,35.0,15.9,35.0,16.0,1,4,19.1
2,P-787-01-1a3,12725,8054,12725,4928,34.9,22.1,34.9,13.5,2,60,12.8
3,P-787-02-1,3650,730,3650,730,10.0,2.0,10.0,2.0,1,1,8.0
4,P-787-01-1a1,5475,3560,5475,3240,15.0,9.8,15.0,8.9,1,13,5.2
...,...,...,...,...,...,...,...,...,...,...,...,...
76,P-812-13.0-2b,1273,13506,1273,2738,3.5,37.0,3.5,7.5,1,6,-33.5
77,P-787-01-1a,3650,21720,3650,8212,10.0,59.5,10.0,22.5,1,4,-49.5
78,P-787-2.0-1b,30,18893,30,19532,0.1,51.8,0.1,53.5,1,4,-51.7
79,P-782-051-2a,7300,27375,7300,27375,20.0,75.0,20.0,75.0,1,1,-55.0


#### Overall felonies (excluding life, punishable by life and capital)

In [22]:
minors_df = minors[minors["CT_CHARGE_DEGREE"].isin(["F", "S", "T"])].reset_index(drop = True)
adults_df = adults[adults["CT_CHARGE_DEGREE"].isin(["F", "S", "T"])].reset_index(drop = True)

charges = minors_df["CHARGE"].unique().tolist()

In [23]:
results = []
for j in range(0, len(charges)):

    rows = []
    charge = charges[j]

    ### filters by charge for adults
    dfa = adults_df[adults_df["CHARGE"] == charge].reset_index(drop = True)

    ### checks if adult dataframe is empty
    if not dfa.empty: 

        ### filters by charge for minors
        dfm = minors_df[minors_df["CHARGE"] == charge].reset_index(drop = True)
    
        ### computes average sentences
        m_avg_days = round(dfm["ST_DAYS"].mean())
        m_avg_years = round(m_avg_days/365, 1) 
        a_avg_days = round(dfa["ST_DAYS"].mean())
        a_avg_years = round(a_avg_days/365, 1)

        ### computes median sentences
        m_med_days = round(dfm["ST_DAYS"].median())
        m_med_years = round(m_med_days/365, 1)
        a_med_days = round(dfa["ST_DAYS"].median())
        a_med_years = round(a_med_days/365, 1)

            
        ### storing datapoints
        rows.append(charge)
        rows.append(m_avg_days)
        rows.append(a_avg_days)
        rows.append(m_med_days)
        rows.append(a_med_days)
        rows.append(m_avg_years)
        rows.append(a_avg_years)
        rows.append(m_med_years)
        rows.append(a_med_years)
        rows.append(len(dfm))
        rows.append(len(dfa))
    
        ### creating row
        results.append(rows)
    
        ### creating dataframe
        results_df = pd.DataFrame(results)
        results_df.columns = ["CHARGE", "MINORS_AVG_DAYS", "ADULTS_AVG_DAYS", "MINORS_MEDIAN_DAYS", "ADULTS_MEDIAN_DAYS", 
                              "MINORS_AVG_YEARS", "ADULTS_AVG_YEARS", "MINORS_MEDIAN_YEARS", "ADULTS_MEDIAN_YEARS", 
                              "MINORS_INSTANCES", "ADULTS_INSTANCES"]

        ### removing rows with no corresponding figures
        results_df = results_df[results_df["MINORS_AVG_DAYS"] != 0].reset_index(drop = True)
        results_df = results_df[results_df["ADULTS_AVG_DAYS"] != 0].reset_index(drop = True)

        ### computing difference of average sentences
        results_df["AVG_DIFF_YEARS"] = results_df["MINORS_AVG_YEARS"] - results_df["ADULTS_AVG_YEARS"]
        results_df = results_df.sort_values(by = "AVG_DIFF_YEARS", ascending = False).reset_index(drop = True)

### exports dataframe
results_df.to_csv("data/obts/results/charges_fst.csv", index = False)

results_df

Unnamed: 0,CHARGE,MINORS_AVG_DAYS,ADULTS_AVG_DAYS,MINORS_MEDIAN_DAYS,ADULTS_MEDIAN_DAYS,MINORS_AVG_YEARS,ADULTS_AVG_YEARS,MINORS_MEDIAN_YEARS,ADULTS_MEDIAN_YEARS,MINORS_INSTANCES,ADULTS_INSTANCES,AVG_DIFF_YEARS
0,S-316-193-3c3a,51645,5077,1825,3770,141.5,13.9,5.0,10.3,7,614,127.6
1,F-812-135-2b,46924,21606,3650,2920,128.6,59.2,10.0,8.0,8,60,69.4
2,S-800-4.0-4a1,16005,3063,2920,2920,43.8,8.4,8.0,8.0,27,774,35.4
3,F-782-04-1a1,21102,8862,4745,5110,57.8,24.3,13.0,14.0,23,359,33.5
4,F-782-051-,12775,2190,12775,2190,35.0,6.0,35.0,6.0,1,1,29.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1254,S-777-04-4c,1972,25591,1460,730,5.4,70.1,4.0,2.0,7,87,-64.7
1255,F-812-014-2a3b,1541,29573,1095,1825,4.2,81.0,3.0,5.0,11,245,-76.8
1256,F-794-011-2b,5950,36537,7300,7300,16.3,100.1,20.0,20.0,3,81,-83.8
1257,F-787-1.0-,4015,59588,3650,10038,11.0,163.3,10.0,27.5,5,14,-152.3


#### First-degree felonies

In [25]:
minors_df = minors[minors["CT_CHARGE_DEGREE"] == "F"].reset_index(drop = True)
adults_df = adults[adults["CT_CHARGE_DEGREE"] == "F"].reset_index(drop = True)

charges = minors_df["CHARGE"].unique().tolist()

In [26]:
results = []
for j in range(0, len(charges)):

    rows = []
    charge = charges[j]

    ### filters by charge for adults
    dfa = adults_df[adults_df["CHARGE"] == charge].reset_index(drop = True)

    ### checks if adult dataframe is empty
    if not dfa.empty: 

        ### filters by charge for minors
        dfm = minors_df[minors_df["CHARGE"] == charge].reset_index(drop = True)
    
        ### computes average sentences
        m_avg_days = round(dfm["ST_DAYS"].mean())
        m_avg_years = round(m_avg_days/365, 1) 
        a_avg_days = round(dfa["ST_DAYS"].mean())
        a_avg_years = round(a_avg_days/365, 1)

        ### computes median sentences
        m_med_days = round(dfm["ST_DAYS"].median())
        m_med_years = round(m_med_days/365, 1)
        a_med_days = round(dfa["ST_DAYS"].median())
        a_med_years = round(a_med_days/365, 1)

            
        ### storing datapoints
        rows.append(charge)
        rows.append(m_avg_days)
        rows.append(a_avg_days)
        rows.append(m_med_days)
        rows.append(a_med_days)
        rows.append(m_avg_years)
        rows.append(a_avg_years)
        rows.append(m_med_years)
        rows.append(a_med_years)
        rows.append(len(dfm))
        rows.append(len(dfa))
    
        ### creating row
        results.append(rows)
    
        ### creating dataframe
        results_df = pd.DataFrame(results)
        results_df.columns = ["CHARGE", "MINORS_AVG_DAYS", "ADULTS_AVG_DAYS", "MINORS_MEDIAN_DAYS", "ADULTS_MEDIAN_DAYS", 
                              "MINORS_AVG_YEARS", "ADULTS_AVG_YEARS", "MINORS_MEDIAN_YEARS", "ADULTS_MEDIAN_YEARS", 
                              "MINORS_INSTANCES", "ADULTS_INSTANCES"]

        ### removing rows with no corresponding figures
        results_df = results_df[results_df["MINORS_AVG_DAYS"] != 0].reset_index(drop = True)
        results_df = results_df[results_df["ADULTS_AVG_DAYS"] != 0].reset_index(drop = True)

        ### computing difference of average sentences
        results_df["AVG_DIFF_YEARS"] = results_df["MINORS_AVG_YEARS"] - results_df["ADULTS_AVG_YEARS"]
        results_df = results_df.sort_values(by = "AVG_DIFF_YEARS", ascending = False).reset_index(drop = True)

### exports dataframe
results_df.to_csv("data/obts/results/charges_first.csv", index = False)

results_df

Unnamed: 0,CHARGE,MINORS_AVG_DAYS,ADULTS_AVG_DAYS,MINORS_MEDIAN_DAYS,ADULTS_MEDIAN_DAYS,MINORS_AVG_YEARS,ADULTS_AVG_YEARS,MINORS_MEDIAN_YEARS,ADULTS_MEDIAN_YEARS,MINORS_INSTANCES,ADULTS_INSTANCES,AVG_DIFF_YEARS
0,F-812-135-2b,46924,21606,3650,2920,128.6,59.2,10.0,8.0,8,60,69.4
1,F-782-04-1a1,21102,8862,4745,5110,57.8,24.3,13.0,14.0,23,359,33.5
2,F-782-051-,12775,2190,12775,2190,35.0,6.0,35.0,6.0,1,1,29.0
3,F-316-1935-4b,10227,2683,12775,1825,28.0,7.4,35.0,5.0,5,17,20.6
4,F-810-02-1,9125,1637,9125,1637,25.0,4.5,25.0,4.5,1,1,20.5
...,...,...,...,...,...,...,...,...,...,...,...,...
279,F-782-4.0-4,5840,22017,5475,3998,16.0,60.3,15.0,11.0,3,20,-44.3
280,F-812-014-2a3b,1541,29573,1095,1825,4.2,81.0,3.0,5.0,11,245,-76.8
281,F-794-011-2b,5950,36537,7300,7300,16.3,100.1,20.0,20.0,3,81,-83.8
282,F-787-1.0-,4015,59588,3650,10038,11.0,163.3,10.0,27.5,5,14,-152.3


#### Second-degree felonies

In [28]:
minors_df = minors[minors["CT_CHARGE_DEGREE"] == "S"].reset_index(drop = True)
adults_df = adults[adults["CT_CHARGE_DEGREE"] == "S"].reset_index(drop = True)

charges = minors_df["CHARGE"].unique().tolist()

In [29]:
results = []
for j in range(0, len(charges)):

    rows = []
    charge = charges[j]

    ### filters by charge for adults
    dfa = adults_df[adults_df["CHARGE"] == charge].reset_index(drop = True)

    ### checks if adult dataframe is empty
    if not dfa.empty: 

        ### filters by charge for minors
        dfm = minors_df[minors_df["CHARGE"] == charge].reset_index(drop = True)
    
        ### computes average sentences
        m_avg_days = round(dfm["ST_DAYS"].mean())
        m_avg_years = round(m_avg_days/365, 1) 
        a_avg_days = round(dfa["ST_DAYS"].mean())
        a_avg_years = round(a_avg_days/365, 1)

        ### computes median sentences
        m_med_days = round(dfm["ST_DAYS"].median())
        m_med_years = round(m_med_days/365, 1)
        a_med_days = round(dfa["ST_DAYS"].median())
        a_med_years = round(a_med_days/365, 1)

            
        ### storing datapoints
        rows.append(charge)
        rows.append(m_avg_days)
        rows.append(a_avg_days)
        rows.append(m_med_days)
        rows.append(a_med_days)
        rows.append(m_avg_years)
        rows.append(a_avg_years)
        rows.append(m_med_years)
        rows.append(a_med_years)
        rows.append(len(dfm))
        rows.append(len(dfa))
    
        ### creating row
        results.append(rows)
    
        ### creating dataframe
        results_df = pd.DataFrame(results)
        results_df.columns = ["CHARGE", "MINORS_AVG_DAYS", "ADULTS_AVG_DAYS", "MINORS_MEDIAN_DAYS", "ADULTS_MEDIAN_DAYS", 
                              "MINORS_AVG_YEARS", "ADULTS_AVG_YEARS", "MINORS_MEDIAN_YEARS", "ADULTS_MEDIAN_YEARS", 
                              "MINORS_INSTANCES", "ADULTS_INSTANCES"]

        ### removing rows with no corresponding figures
        results_df = results_df[results_df["MINORS_AVG_DAYS"] != 0].reset_index(drop = True)
        results_df = results_df[results_df["ADULTS_AVG_DAYS"] != 0].reset_index(drop = True)

        ### computing difference of average sentences
        results_df["AVG_DIFF_YEARS"] = results_df["MINORS_AVG_YEARS"] - results_df["ADULTS_AVG_YEARS"]
        results_df = results_df.sort_values(by = "AVG_DIFF_YEARS", ascending = False).reset_index(drop = True)

### exports dataframe
results_df.to_csv("data/obts/results/charges_second.csv", index = False)

results_df

Unnamed: 0,CHARGE,MINORS_AVG_DAYS,ADULTS_AVG_DAYS,MINORS_MEDIAN_DAYS,ADULTS_MEDIAN_DAYS,MINORS_AVG_YEARS,ADULTS_AVG_YEARS,MINORS_MEDIAN_YEARS,ADULTS_MEDIAN_YEARS,MINORS_INSTANCES,ADULTS_INSTANCES,AVG_DIFF_YEARS
0,S-316-193-3c3a,51645,5077,1825,3770,141.5,13.9,5.0,10.3,7,614,127.6
1,S-800-4.0-4a1,16005,3063,2920,2920,43.8,8.4,8.0,8.0,27,774,35.4
2,S-784-0451-a2,7300,1146,7300,793,20.0,3.1,20.0,2.2,1,5,16.9
3,S-812-1312-a,5475,366,5475,366,15.0,1.0,15.0,1.0,1,1,14.0
4,S-784-8.0-2a,7300,2340,7300,2340,20.0,6.4,20.0,6.4,1,1,13.6
...,...,...,...,...,...,...,...,...,...,...,...,...
371,S-794-11.0-2b,3431,10950,3650,10950,9.4,30.0,10.0,30.0,5,1,-20.6
372,S-800-04-4a1,2555,11603,2555,2920,7.0,31.8,7.0,8.0,7,372,-24.8
373,S-827-71.0-3,3477,13209,3285,3650,9.5,36.2,9.0,10.0,14,667,-26.7
374,S-782-04-2,3320,18346,1460,5475,9.1,50.3,4.0,15.0,17,161,-41.2


#### Third-degree felonies

In [31]:
minors_df = minors[minors["CT_CHARGE_DEGREE"] == "T"].reset_index(drop = True)
adults_df = adults[adults["CT_CHARGE_DEGREE"] == "T"].reset_index(drop = True)

charges = minors_df["CHARGE"].unique().tolist()

In [32]:
results = []
for j in range(0, len(charges)):

    rows = []
    charge = charges[j]

    ### filters by charge for adults
    dfa = adults_df[adults_df["CHARGE"] == charge].reset_index(drop = True)

    ### checks if adult dataframe is empty
    if not dfa.empty: 

        ### filters by charge for minors
        dfm = minors_df[minors_df["CHARGE"] == charge].reset_index(drop = True)
    
        ### computes average sentences
        m_avg_days = round(dfm["ST_DAYS"].mean())
        m_avg_years = round(m_avg_days/365, 1) 
        a_avg_days = round(dfa["ST_DAYS"].mean())
        a_avg_years = round(a_avg_days/365, 1)

        ### computes median sentences
        m_med_days = round(dfm["ST_DAYS"].median())
        m_med_years = round(m_med_days/365, 1)
        a_med_days = round(dfa["ST_DAYS"].median())
        a_med_years = round(a_med_days/365, 1)

            
        ### storing datapoints
        rows.append(charge)
        rows.append(m_avg_days)
        rows.append(a_avg_days)
        rows.append(m_med_days)
        rows.append(a_med_days)
        rows.append(m_avg_years)
        rows.append(a_avg_years)
        rows.append(m_med_years)
        rows.append(a_med_years)
        rows.append(len(dfm))
        rows.append(len(dfa))
    
        ### creating row
        results.append(rows)
    
        ### creating dataframe
        results_df = pd.DataFrame(results)
        results_df.columns = ["CHARGE", "MINORS_AVG_DAYS", "ADULTS_AVG_DAYS", "MINORS_MEDIAN_DAYS", "ADULTS_MEDIAN_DAYS", 
                              "MINORS_AVG_YEARS", "ADULTS_AVG_YEARS", "MINORS_MEDIAN_YEARS", "ADULTS_MEDIAN_YEARS", 
                              "MINORS_INSTANCES", "ADULTS_INSTANCES"]

        ### removing rows with no corresponding figures
        results_df = results_df[results_df["MINORS_AVG_DAYS"] != 0].reset_index(drop = True)
        results_df = results_df[results_df["ADULTS_AVG_DAYS"] != 0].reset_index(drop = True)

        ### computing difference of average sentences
        results_df["AVG_DIFF_YEARS"] = results_df["MINORS_AVG_YEARS"] - results_df["ADULTS_AVG_YEARS"]
        results_df = results_df.sort_values(by = "AVG_DIFF_YEARS", ascending = False).reset_index(drop = True)


### exports dataframe
results_df.to_csv("data/obts/results/charges_third.csv", index = False)

results_df

Unnamed: 0,CHARGE,MINORS_AVG_DAYS,ADULTS_AVG_DAYS,MINORS_MEDIAN_DAYS,ADULTS_MEDIAN_DAYS,MINORS_AVG_YEARS,ADULTS_AVG_YEARS,MINORS_MEDIAN_YEARS,ADULTS_MEDIAN_YEARS,MINORS_INSTANCES,ADULTS_INSTANCES,AVG_DIFF_YEARS
0,T-914-22.0-,5475,662,5475,395,15.0,1.8,15.0,1.1,1,199,13.2
1,T-847-135.0-3,5655,1396,7570,1140,15.5,3.8,20.7,3.1,3,193,11.7
2,T-847-138.0-2,5290,1346,7570,1410,14.5,3.7,20.7,3.9,3,639,10.8
3,T-817-61-,4220,958,730,390,11.6,2.6,2.0,1.1,107,7470,9.0
4,T-944-47.0-,3228,698,3228,545,8.8,1.9,8.8,1.5,2,118,6.9
...,...,...,...,...,...,...,...,...,...,...,...,...
594,T-827-03-2c,880,4274,712,509,2.4,11.7,2.0,1.4,8,495,-9.3
595,T-499-3.0-3,363,4999,362,242,1.0,13.7,1.0,0.7,4,78,-12.7
596,T-810-145.0-6b,90,6099,90,1095,0.2,16.7,0.2,3.0,1,217,-16.5
597,T-800-04-7c,112,6949,112,1278,0.3,19.0,0.3,3.5,2,6,-18.7
