In [10]:
import numpy as np
import pandas as pd


In [11]:
csv_path = "C:/Users/qaism/OneDrive - University of Virginia/Documents/GitHub/MSDS/ds6001databases/data1k.csv"
df = pd.read_csv(csv_path)

In [12]:
# Most frequent code sections
codes_freq = df['CodeSection'].value_counts().reset_index()
codes_freq.columns = ['CodeSection', 'count']
print(codes_freq.head(15))


   CodeSection  count
0   B.46.2-301    268
1   A.46.2-862    253
2     46.2-300    177
3   C.46.2-862    123
4   18.2-250.1     96
5      18.2-95     71
6   A.18.2-266     68
7      18.2-57     68
8   A.46.2-852     62
9     18.2-250     58
10     18.2-96     53
11  A.46.2-853     51
12  A.46.2-707     48
13    18.2-388     45
14    18.2-172     44


In [13]:
# Conviction rates by code section
df['convicted'] = df['DispositionCode'].isin(['Guilty', 'Guilty In Abstentia'])
conv_rates = df.groupby('CodeSection')['convicted'].agg(['mean', 'count']).reset_index()
conv_rates = conv_rates[conv_rates['count'] >= 30].sort_values('mean', ascending=False)
print(conv_rates)

    CodeSection      mean  count
303  A.18.2-266  0.852941     68
63     18.2-248  0.707317     41
154     18.2-91  0.684211     38
334  C.46.2-862  0.666667    123
325  B.46.2-301  0.634328    268
317  A.46.2-862  0.628458    253
313  A.46.2-853  0.607843     51
312  A.46.2-852  0.596774     62
157     18.2-95  0.577465     71
163    19.2-128  0.566667     30
69     18.2-250  0.551724     58
158     18.2-96  0.547170     53
70   18.2-250.1  0.500000     96
242    46.2-300  0.468927    177
48     18.2-172  0.409091     44
310  A.46.2-707  0.375000     48
114    18.2-388  0.355556     45
36     18.2-119  0.300000     30
136     18.2-57  0.294118     68
166    19.2-306  0.121951     41


In [14]:
# Racial disparities in convictions
replace_map = {
    'Black(Non-Hispanic)': 'Black',
    'Hispanic': 'Hispanic',
    'White Caucasian(Non-Hispanic)': 'White',
    'MISSING': 'Other',
    'Asian Or Pacific Islander': 'Asian',
    'Black (Non-Hispanic)': 'Black',
    'White Caucasian (Non-Hispanic)': 'White',
    'Other(Includes Not Applicable.. Unknown)': 'Other',
    'Other (Includes Not Applicable.. Unknown)': 'Other',
    'Black': 'Black',
    'White': 'White',
    'Unknown (Includes Not Applicable.. Unknown)': 'Other',
    'American Indian': 'Native',
    'Unknown': 'Other',
    'Asian or Pacific Islander': 'Asian',
    'American Indian Or Alaskan Native': 'Native'
}

df['Race'] = df['Race'].replace(replace_map)
print(df['Race'].value_counts())


Race
White       1452
Black       1185
Hispanic      88
Other         56
Asian         27
Native         3
Name: count, dtype: int64


In [15]:
# Convictions by race and code section
conv_race = df[df['convicted']].groupby(['CodeSection', 'Race']).size().unstack(fill_value=0).reset_index()
conv_race['total'] = conv_race.sum(axis=1, numeric_only=True)
conv_race = conv_race[conv_race['total'] > 50]


In [16]:
conv_race['black_pct'] = conv_race['Black'] / conv_race['total']
conv_race['white_pct'] = conv_race['White'] / conv_race['total']
conv_race = conv_race.sort_values('black_pct', ascending=False)
print(conv_race[['CodeSection', 'black_pct', 'white_pct']].head(15))


Race CodeSection  black_pct  white_pct
220   B.46.2-301   0.547059   0.394118
215   A.46.2-862   0.377358   0.490566
228   C.46.2-862   0.256098   0.658537
201   A.18.2-266   0.224138   0.741379
166     46.2-300   0.204819   0.566265


In [17]:
# Racial disparities by locality (fips)
locality_race = df[df['convicted']].groupby(['fips', 'Race']).size().unstack(fill_value=0).reset_index()
locality_race['total'] = locality_race.sum(axis=1)
locality_race = locality_race[locality_race['total'] > 50]

In [18]:
locality_race['black_pct'] = locality_race['Black'] / locality_race['total']
locality_race['white_pct'] = locality_race['White'] / locality_race['total']
locality_race = locality_race.sort_values('black_pct', ascending=False)
print(locality_race[['fips', 'black_pct', 'white_pct']].head(15))

Race  fips  black_pct  white_pct
13      31   0.418182   0.018182
37      87   0.276730   0.157233
17      41   0.256098   0.195122
24      59   0.201183   0.372781
22      53   0.181818   0.015152
47     111   0.165414   0.000000
25      61   0.129032   0.182796
36      85   0.094340   0.103774
72     179   0.080569   0.066351
35      83   0.077778   0.000000
34      81   0.070796   0.185841
19      47   0.058824   0.019608
62     153   0.055300   0.230415
103    730   0.040576   0.003927
49     117   0.039370   0.039370
