In [1]:
import numpy as np
import pandas as pd

In [2]:
cases = pd.read_csv("C:/Users/shrof/Downloads/data100k.csv")

In [3]:
cases.head(3).T

Unnamed: 0,0,1,2
person_id,102090000000110,343221000000125,343221000000125
HearingDate,2019-02-28,2009-12-07,2011-01-20
CodeSection,A.46.2-862,B.46.2-301,A.46.2-707
codesection,covered elsewhere,covered elsewhere,covered elsewhere
ChargeType,Misdemeanor,Misdemeanor,Misdemeanor
chargetype,Misdemeanor,Misdemeanor,Misdemeanor
Class,1,1,3
DispositionCode,Guilty,Guilty,Guilty
disposition,Conviction,Conviction,Conviction
Plea,,,


In [4]:
## What code sections are most frequent? 

In [5]:
codesections = cases['CodeSection'].value_counts().reset_index()
codesections.head(15)

Unnamed: 0,CodeSection,count
0,A.46.2-862,26379
1,B.46.2-301,25967
2,46.2-300,17934
3,C.46.2-862,11728
4,18.2-250.1,10573
5,A.18.2-266,8568
6,18.2-95,7561
7,18.2-250,6949
8,18.2-57,6699
9,A.46.2-852,6667


### Which codes lead to most convictions? 

In [6]:
cases['DispositionCode'].value_counts()
cases['conviction'] = [x in ['Guilty', 'Guilty In Abstentia'] for x in cases['DispositionCode']]

In [7]:
convict_rate = cases.groupby('CodeSection').agg({'conviction':['mean','count']}).reset_index()
convict_rate.columns = ['CodeSection', 'conviction_rate', 'count']
convict_rate = convict_rate.query("count >= 30")
convict_rate.sort_values('conviction_rate', ascending=False)

Unnamed: 0,CodeSection,conviction_rate,count
1633,21-336,0.960000,50
737,18.2-195(1)(A),0.926829,41
1538,21-1/46.2-301,0.913043,46
4111,G.18.2-266,0.906977,43
4012,B.46.2-357,0.904891,736
...,...,...,...
2690,41.1-2-2,0.015504,129
253,13-60,0.014286,70
1433,19.2-100,0.000000,238
140,11.1-2,0.000000,38


In [8]:
cases.query("CodeSection == '23-55'")['fips']

8585      810
17940     810
34148     810
34168     810
34169     810
46925     810
68396     810
120838    810
120844    810
120862    810
120864    810
121263    810
132073    810
153191    810
153206    810
153208    810
153210    810
153222    810
155161    810
157747    810
159548    810
163720    810
173408    810
192200    810
196647    810
200799    810
200828    810
200922    810
230213    810
240905    810
240909    810
240912    810
240914    810
240916    810
240917    810
240918    810
240919    810
240920    810
240922    810
240923    810
240926    810
240927    810
240945    810
240946    810
240948    810
249880    810
251381    810
257688    810
257697    810
257699    810
257739    810
257747    810
266159    810
291045    810
291048    810
Name: fips, dtype: int64

### Most racial disparities

In [9]:
# filter out ones with small counts
cases['Race'].unique()

array(['Black(Non-Hispanic)', 'Hispanic', 'White Caucasian(Non-Hispanic)',
       'MISSING', 'Asian Or Pacific Islander', 'Black (Non-Hispanic)',
       'White Caucasian (Non-Hispanic)',
       'Other(Includes Not Applicable.. Unknown)',
       'Other (Includes Not Applicable.. Unknown)', 'Black', 'White',
       'Unknown (Includes Not Applicable.. Unknown)', 'American Indian',
       'Unknown', 'Asian or Pacific Islander',
       'American Indian Or Alaskan Native'], dtype=object)

In [10]:
replace_map = {'Black(Non-Hispanic)':'Black',
'Hispanic':'Hispanic', 
'White Caucasian(Non-Hispanic)':'White',
'MISSING':'Missing/Other', 
'Asian Or Pacific Islander':'Asian or Pacific Islander',
'Black (Non-Hispanic)':'Black',
'White Caucasian (Non-Hispanic)':'White',
'Other(Includes Not Applicable.. Unknown)':'Missing/Other',
'Other (Includes Not Applicable.. Unknown)':'Missing/Other',
'Black':'Black', 
'White':'White',
'Unknown (Includes Not Applicable.. Unknown)':'Missing/Other',
'American Indian':'American Indian or Alaskan Native',
'Unknown':'', 
'Asian or Pacific Islander':'Asian or Pacific Islander',
'American Indian Or Alaskan Native':'American Indian or Alaskan Native'}

cases['Race'] = cases['Race'].replace(replace_map)
cases['Race'].value_counts()

Race
White                                159627
Black                                115627
Hispanic                               9319
Missing/Other                          5874
Asian or Pacific Islander              2794
American Indian or Alaskan Native       303
                                         54
Name: count, dtype: int64

In [11]:
# I choose to analyze only the convictions

In [90]:
cases_convict = cases.query("conviction == True")
cases_convict_race = cases_convict.groupby(['CodeSection', 'Race', 'fips']).size().reset_index()
cases_convict_race = cases_convict_race.rename({0:'count'}, axis=1)
cases_convict_race

Unnamed: 0,CodeSection,Race,fips,count
0,01-2007,White,51,1
1,1,Black,550,1
2,1,White,550,1
3,1-12,Black,650,24
4,1-12,White,650,5
...,...,...,...,...
25475,Z.18.2-91,White,840,2
25476,Z.18.2-91; 26,Black,700,1
25477,Z.18.2-95,Black,67,1
25478,Z.18.2-95,Black,83,1


In [91]:
cases_reshape = cases_convict_race.pivot_table(index = ['CodeSection', 'fips'], columns = 'Race', 
                                               values = 'count', fill_value=0).reset_index()
cases_reshape

Race,CodeSection,fips,Unnamed: 3,American Indian or Alaskan Native,Asian or Pacific Islander,Black,Hispanic,Missing/Other,White
0,01-2007,51,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1,550,0.0,0.0,0.0,1.0,0.0,0.0,1.0
2,1-12,650,0.0,0.0,0.0,24.0,0.0,0.0,5.0
3,1-200,29,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,1-200,105,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...
17497,Z.18.2-91,840,0.0,0.0,0.0,0.0,0.0,0.0,2.0
17498,Z.18.2-91; 26,700,0.0,0.0,0.0,1.0,0.0,0.0,0.0
17499,Z.18.2-95,67,0.0,0.0,0.0,1.0,0.0,0.0,0.0
17500,Z.18.2-95,83,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [92]:
cases_reshape = cases_reshape.assign(total=cases_reshape['American Indian or Alaskan Native'] + 
                                              cases_reshape['Asian or Pacific Islander'] +
                                              cases_reshape['Black'] + 
                                              cases_reshape['Hispanic'] + 
                                              cases_reshape['Missing/Other'] + 
                                              cases_reshape['White'])

In [96]:
cases_reshape = cases_reshape.query("total > 25")

In [97]:
cases_reshape = cases_reshape.assign(black_percent = cases_reshape['Black']/cases_reshape['total'],
                                     white_percent = cases_reshape['White']/cases_reshape['total'])


In [98]:
cases_reshape.sort_values('black_percent', ascending=False)

Race,CodeSection,fips,Unnamed: 3,American Indian or Alaskan Native,Asian or Pacific Islander,Black,Hispanic,Missing/Other,White,total,black_percent,white_percent
2901,18.2-248,760,0.0,0.0,0.0,228.0,0.0,0.0,3.0,231.0,0.987013,0.012987
1144,18.2-119,740,0.0,0.0,0.0,131.0,0.0,0.0,5.0,136.0,0.963235,0.036765
9502,24-253,740,0.0,0.0,0.0,75.0,0.0,0.0,3.0,78.0,0.961538,0.038462
2847,18.2-248,117,0.0,0.0,0.0,55.0,0.0,0.0,3.0,58.0,0.948276,0.051724
9721,29-48,711,0.0,0.0,0.0,90.0,0.0,0.0,5.0,95.0,0.947368,0.052632
...,...,...,...,...,...,...,...,...,...,...,...,...
1788,18.2-172,15,0.0,0.0,0.0,0.0,0.0,0.0,52.0,52.0,0.000000,1.000000
1789,18.2-172,19,0.0,0.0,0.0,0.0,0.0,0.0,53.0,53.0,0.000000,1.000000
9107,19.2-306,169,0.0,0.0,0.0,0.0,0.0,0.0,60.0,60.0,0.000000,1.000000
9114,19.2-306,191,0.0,0.0,0.0,0.0,4.0,0.0,47.0,51.0,0.000000,0.921569


### In what localities (fips) are these disparities most severe? 

In [99]:
cases

Unnamed: 0,person_id,HearingDate,CodeSection,codesection,ChargeType,chargetype,Class,DispositionCode,disposition,Plea,...,within10,class1_2,class3_4,expungable,old_expungable,expungable_no_lifetimelimit,reason,sameday,lifetime,conviction
0,102090000000110,2019-02-28,A.46.2-862,covered elsewhere,Misdemeanor,Misdemeanor,1,Guilty,Conviction,,...,True,False,False,Automatic (pending),False,Automatic (pending),Conviction of misdemeanor charges listed in 19...,False,False,True
1,343221000000125,2009-12-07,B.46.2-301,covered elsewhere,Misdemeanor,Misdemeanor,1,Guilty,Conviction,,...,False,False,False,Not eligible,False,Not eligible,Conviction of misdemeanor charges that are not...,False,False,True
2,343221000000125,2011-01-20,A.46.2-707,covered elsewhere,Misdemeanor,Misdemeanor,3,Guilty,Conviction,,...,True,False,False,Not eligible,False,Not eligible,Conviction of misdemeanor charges that are not...,False,False,True
3,343221000000125,2011-07-01,B.46.2-301,covered elsewhere,Misdemeanor,Misdemeanor,1,Guilty In Absentia,Conviction,,...,True,False,False,Not eligible,False,Not eligible,Conviction of misdemeanor charges that are not...,False,False,False
4,343221000000125,2012-10-15,B.46.2-301,covered elsewhere,Misdemeanor,Misdemeanor,1,Guilty,Conviction,,...,True,False,False,Not eligible,False,Not eligible,Conviction of misdemeanor charges that are not...,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
293593,247061000000309,2019-10-04,14.2-81,covered elsewhere,Misdemeanor,Misdemeanor,,Guilty In Absentia,Conviction,Tried In Absentia,...,True,False,False,Not eligible,False,Not eligible,Conviction of misdemeanor charges that are not...,False,False,False
293594,247061000000309,2019-10-18,14.2-81,covered elsewhere,Misdemeanor,Misdemeanor,,Guilty In Absentia,Conviction,Tried In Absentia,...,True,False,False,Automatic (pending),False,Automatic (pending),Conviction of misdemeanor charges listed in 19...,False,False,False
293595,295161000000000,2016-10-04,A.46.2-862,covered elsewhere,Misdemeanor,Misdemeanor,,Guilty,Conviction,,...,True,False,False,Automatic (pending),False,Automatic (pending),Conviction of misdemeanor charges listed in 19...,False,False,True
293596,5120000001160,2017-10-04,A.46.2-862,covered elsewhere,Misdemeanor,Misdemeanor,1,Guilty In Absentia,Conviction,,...,True,False,False,Automatic (pending),False,Automatic (pending),Conviction of misdemeanor charges listed in 19...,False,False,False


### Lab 9

In [114]:
race_url = 'https://virginia.box.com/shared/static/i8i5onrkveks849pkky0gwgxlax8d8fe.xlsx'
hisp_url = 'https://virginia.box.com/shared/static/fegrn0p0igzl95snji3ku6edwu0hy3dj.xlsx'

In [115]:
race_pop = pd.read_excel(race_url, skiprows= [0,1,2,3,5,6, 7])
race_pop.head(10)

  warn(f"Print area cannot be set to Defined name: {defn.value}.")


Unnamed: 0,FIPS,Jurisdiction,Total Population,American Indian,Unnamed: 4,Asian,Unnamed: 6,Black,Unnamed: 8,Pacific Islander,Unnamed: 10,Two or more races,Unnamed: 12,White,Unnamed: 14
0,1,Accomack County,33246,441,0.013265,370,0.011129,9859,0.296547,79,0.002376,609,0.018318,23125,0.695572
1,3,Albemarle County,113535,1069,0.009416,7925,0.069802,12581,0.110812,242,0.002132,3210,0.028273,95210,0.838596
2,5,Alleghany County,14986,126,0.008408,92,0.006139,906,0.060456,17,0.001134,283,0.018884,14136,0.94328
3,7,Amelia County,13268,177,0.01334,138,0.010401,2759,0.207944,15,0.001131,259,0.019521,10445,0.787232
4,9,Amherst County,31273,495,0.015828,339,0.01084,6475,0.207048,55,0.001759,828,0.026477,24796,0.792888
5,11,Appomattox County,16353,130,0.00795,101,0.006176,3200,0.195683,18,0.001101,374,0.02287,13286,0.81245
6,13,Arlington County,232965,3689,0.015835,31145,0.13369,26879,0.115378,760,0.003262,8882,0.038126,180541,0.77497
7,15,Augusta County,77563,672,0.008664,804,0.010366,4618,0.059539,75,0.000967,1443,0.018604,72898,0.939855
8,17,Bath County,4114,27,0.006563,33,0.008021,205,0.04983,0,0.0,67,0.016286,3918,0.952358
9,19,Bedford County,80131,723,0.009023,1430,0.017846,6486,0.080942,77,0.000961,1439,0.017958,72916,0.90996


In [116]:
race_pop = race_pop[['FIPS', 'Jurisdiction', 'Total Population', 'American Indian', 'Asian ', 'Black', 'Pacific Islander', 'Two or more races','White ']]

In [117]:
race_pop

Unnamed: 0,FIPS,Jurisdiction,Total Population,American Indian,Asian,Black,Pacific Islander,Two or more races,White
0,1,Accomack County,33246,441,370,9859,79,609,23125
1,3,Albemarle County,113535,1069,7925,12581,242,3210,95210
2,5,Alleghany County,14986,126,92,906,17,283,14136
3,7,Amelia County,13268,177,138,2759,15,259,10445
4,9,Amherst County,31273,495,339,6475,55,828,24796
...,...,...,...,...,...,...,...,...,...
128,800,Suffolk city,96194,1052,3115,43068,232,2975,51977
129,810,Virginia Beach city,457672,6241,44491,104827,1846,20837,324018
130,820,Waynesboro city,22550,309,522,3665,35,792,18840
131,830,Williamsburg city,15590,202,1164,2754,37,591,12055


In [118]:
race_pop = race_pop.rename({'FIPS': 'fips', 
                            'Jurisdiction': 'jurisdiction', 
                            'Total Population': 'total_pop', 
                            'American Indian': 'amerind_pop', 
                            'Asian ': 'asian_pop',
                            'Black': 'black_pop', 
                            'Pacific Islander': 'pacificisland_pop', 
                            'Two or more races': 'twomore_pop', 
                            'White ': 'white_pop'}, axis=1)

In [119]:
race_pop

Unnamed: 0,fips,jurisdiction,total_pop,amerind_pop,asian_pop,black_pop,pacificisland_pop,twomore_pop,white_pop
0,1,Accomack County,33246,441,370,9859,79,609,23125
1,3,Albemarle County,113535,1069,7925,12581,242,3210,95210
2,5,Alleghany County,14986,126,92,906,17,283,14136
3,7,Amelia County,13268,177,138,2759,15,259,10445
4,9,Amherst County,31273,495,339,6475,55,828,24796
...,...,...,...,...,...,...,...,...,...
128,800,Suffolk city,96194,1052,3115,43068,232,2975,51977
129,810,Virginia Beach city,457672,6241,44491,104827,1846,20837,324018
130,820,Waynesboro city,22550,309,522,3665,35,792,18840
131,830,Williamsburg city,15590,202,1164,2754,37,591,12055


In [120]:
hisp_pop = pd.read_excel(hisp_url, skiprows = [0,1,2,3,5,6,7,8,9])
hisp_pop.head(10)

  warn(f"Print area cannot be set to Defined name: {defn.value}.")


Unnamed: 0,FIPS,Jurisdiction,"Decennial Census Count, April 1, 2010",Unnamed: 3,Unnamed: 4,"Population Estimate, July 1, 2021",Unnamed: 6,Unnamed: 7,"April 1, 2010 - July 1, 2021",Unnamed: 9
0,1,"Accomack County, Virginia",33164,2850,8.593656,33246,3170,0.09535,320,0.112281
1,3,"Albemarle County, Virginia",98970,5417,5.473376,113535,6750,0.059453,1333,0.246077
2,5,"Alleghany County, Virginia",16250,176,1.083077,14986,265,0.017683,89,0.505682
3,7,"Amelia County, Virginia",12690,290,2.285264,13268,507,0.038212,217,0.748276
4,9,"Amherst County, Virginia",32353,625,1.931815,31273,849,0.027148,224,0.3584
5,11,"Appomattox County, Virginia",14973,167,1.115341,16353,360,0.022014,193,1.155689
6,13,"Arlington County, Virginia",207627,31382,15.114605,232965,36284,0.155749,4902,0.156204
7,15,"Augusta County, Virginia",73750,1525,2.067797,77563,2849,0.036731,1324,0.868197
8,17,"Bath County, Virginia",4731,101,2.134855,4114,101,0.02455,0,0.0
9,19,"Bedford County, Virginia",68676,1090,1.587163,80131,2179,0.027193,1089,0.999083


In [121]:
hisp_pop = hisp_pop.rename({'FIPS': 'fips', 'Unnamed: 6': 'hisp_pop'}, axis = 1)

In [122]:
hisp_pop = hisp_pop[['fips', 'hisp_pop']]

# Things that can go wrong
# Rows that should match are unmatched, and either get missing data if a full/outer join, or get deleted in an inner join
# You think you are doing a one to one merge, but are actually doing a many to one merge, or many to many

# If you have small data set, it is recommend to do the merge twice, once with checks, once without


In [123]:
# what a many to many merge looks like, default merge is inner join

In [124]:
# data1 = [{'Country': 'USA', 'Value1': 5},
#          {'Country': 'France', 'Value1': 15},
#          {'Country': 'China', 'Value1': 50},
#          {'Country': 'China', 'Value1': 50},
#          {'Country': 'Spain', 'Value1': 25},
#          {'Country': 'UK', 'Value1': 0.5},
#          {'Country': 'Thailand', 'Value1': 500},
#          ]

# data2 = [{'Country': 'USA', 'Value2': 3},
#          {'Country': 'France', 'Value2': 13},
#          {'Country': 'China', 'Value2': 30},
#          {'Country': 'China', 'Value2': 30},
#          {'Country': 'Spain', 'Value2': 23},
#          {'Country': 'UK', 'Value2': 0.3},
#          {'Country': 'Thailand', 'Value2': 300},
#          ]

# data1 = pd.DataFrame.from_records(data1)
# data2 = pd.DataFrame.from_records(data2)

# data1

In [125]:
#pd.merge(data1, data2, on='Country')

In [126]:
# merge with checks
pop = pd.merge(race_pop, hisp_pop, on='fips', how = 'outer', validate='one_to_one', indicator = 'matched')

In [127]:
pop['matched'].value_counts()

matched
both          133
left_only       0
right_only      0
Name: count, dtype: int64

In [128]:
# Merge without checks

In [129]:
pop = pd.merge(race_pop, hisp_pop, on='fips', how = 'inner')

In [130]:
pop

Unnamed: 0,fips,jurisdiction,total_pop,amerind_pop,asian_pop,black_pop,pacificisland_pop,twomore_pop,white_pop,hisp_pop
0,1,Accomack County,33246,441,370,9859,79,609,23125,3170
1,3,Albemarle County,113535,1069,7925,12581,242,3210,95210,6750
2,5,Alleghany County,14986,126,92,906,17,283,14136,265
3,7,Amelia County,13268,177,138,2759,15,259,10445,507
4,9,Amherst County,31273,495,339,6475,55,828,24796,849
...,...,...,...,...,...,...,...,...,...,...
128,800,Suffolk city,96194,1052,3115,43068,232,2975,51977,4684
129,810,Virginia Beach city,457672,6241,44491,104827,1846,20837,324018,40525
130,820,Waynesboro city,22550,309,522,3665,35,792,18840,2244
131,830,Williamsburg city,15590,202,1164,2754,37,591,12055,1183


In [131]:
cases_pop = pd.merge(cases_reshape, pop, on='fips', how = 'outer', validate = 'many_to_one', indicator = 'matched')

In [132]:
cases_pop

Unnamed: 0,CodeSection,fips,Unnamed: 3,American Indian or Alaskan Native,Asian or Pacific Islander,Black,Hispanic,Missing/Other,White,total,...,jurisdiction,total_pop,amerind_pop,asian_pop,black_pop,pacificisland_pop,twomore_pop,white_pop,hisp_pop,matched
0,17-7,13,0.0,0.0,1.0,17.0,0.0,0.0,33.0,51.0,...,Arlington County,232965,3689,31145,26879,760,8882,180541,36284,both
1,18.2-111,13,0.0,0.0,0.0,50.0,1.0,0.0,22.0,73.0,...,Arlington County,232965,3689,31145,26879,760,8882,180541,36284,both
2,18.2-172,13,0.0,0.0,0.0,28.0,2.0,1.0,25.0,56.0,...,Arlington County,232965,3689,31145,26879,760,8882,180541,36284,both
3,18.2-250,13,0.0,0.0,4.0,44.0,6.0,1.0,36.0,91.0,...,Arlington County,232965,3689,31145,26879,760,8882,180541,36284,both
4,18.2-250.1,13,0.0,0.0,1.0,50.0,2.0,2.0,44.0,99.0,...,Arlington County,232965,3689,31145,26879,760,8882,180541,36284,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
574,,685,,,,,,,,,...,Manassas Park city,17002,488,2235,2990,78,651,11882,7311,right_only
575,,720,,,,,,,,,...,Norton city,3666,35,88,289,4,90,3341,144,right_only
576,,735,,,,,,,,,...,Poquoson city,12574,148,428,335,19,306,11953,461,right_only
577,,750,,,,,,,,,...,Radford city,16499,150,480,1831,34,447,14459,500,right_only


In [133]:
cases_pop.query("matched=='left_only'")['fips'].unique()

array([], dtype=int64)

In [134]:
replace_map = {701: 700, 711: 710, 761: 760, 762: 760, 764: 760, 712: 710, 702: 700}

cases_reshape['fips'] = cases_reshape['fips'].replace(replace_map)

In [135]:
cases_pop['matched'].value_counts()

matched
both          532
right_only     47
left_only       0
Name: count, dtype: int64

In [138]:
cases_pop.query("matched=='right_only'")['fips']

532      7
533     11
534     17
535     29
536     36
537     37
538     45
539     49
540     51
541     57
542     63
543     65
544     71
545     77
546     79
547     91
548     95
549     97
550     99
551    101
552    103
553    111
554    113
555    115
556    119
557    125
558    133
559    141
560    145
561    157
562    159
563    181
564    193
565    530
566    580
567    600
568    610
569    620
570    640
571    660
572    678
573    683
574    685
575    720
576    735
577    750
578    820
Name: fips, dtype: int64

In [139]:
# reasons why rows might fail to match: 
# 1. differences in coding or spelling (USA vs United States), recode values in one dataset
# 2. differences in coverage: nothing we can do, other than collect new data if feasible

In [141]:
cases_pop = pd.merge(cases_reshape, pop, on='fips', how='inner')

In [142]:
cases_pop

Unnamed: 0,CodeSection,fips,Unnamed: 3,American Indian or Alaskan Native,Asian or Pacific Islander,Black,Hispanic,Missing/Other,White,total,...,white_percent,jurisdiction,total_pop,amerind_pop,asian_pop,black_pop,pacificisland_pop,twomore_pop,white_pop,hisp_pop
0,17-7,13,0.0,0.0,1.0,17.0,0.0,0.0,33.0,51.0,...,0.647059,Arlington County,232965,3689,31145,26879,760,8882,180541,36284
1,18.2-111,13,0.0,0.0,0.0,50.0,1.0,0.0,22.0,73.0,...,0.301370,Arlington County,232965,3689,31145,26879,760,8882,180541,36284
2,18.2-172,13,0.0,0.0,0.0,28.0,2.0,1.0,25.0,56.0,...,0.446429,Arlington County,232965,3689,31145,26879,760,8882,180541,36284
3,18.2-250,13,0.0,0.0,4.0,44.0,6.0,1.0,36.0,91.0,...,0.395604,Arlington County,232965,3689,31145,26879,760,8882,180541,36284
4,18.2-250.1,13,0.0,0.0,1.0,50.0,2.0,2.0,44.0,99.0,...,0.444444,Arlington County,232965,3689,31145,26879,760,8882,180541,36284
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
527,B.46.2-301,83,0.0,0.0,0.0,48.0,0.0,0.0,36.0,84.0,...,0.428571,Halifax County,33738,279,313,12533,43,571,21158,830
528,B.46.2-301,135,0.0,0.0,0.0,37.0,3.0,1.0,15.0,56.0,...,0.267857,Nottoway County,15594,204,142,6228,31,275,9270,748
529,B.46.2-301,137,0.0,0.0,0.0,13.0,2.0,1.0,47.0,63.0,...,0.746032,Orange County,37188,508,741,5632,86,1162,31423,2361
530,C.46.2-862,137,0.0,0.0,1.0,8.0,3.0,4.0,41.0,57.0,...,0.719298,Orange County,37188,508,741,5632,86,1162,31423,2361


In [143]:
cases_pop['black_overrep_index'] = cases_pop['black_percent']/(cases_pop['black_pop']/cases_pop['total_pop'])

In [145]:
cases_pop.sort_values('black_overrep_index', ascending=False)[['CodeSection','jurisdiction','Black','total','black_pop','total_pop','black_overrep_index']]

Unnamed: 0,CodeSection,jurisdiction,Black,total,black_pop,total_pop,black_overrep_index
317,A.46.2-862,Carroll County,16.0,72.0,530,29048,12.179455
319,C.46.2-862,Carroll County,45.0,289.0,530,29048,8.534047
33,46.2-1172,Hanover County,65.0,74.0,11853,111603,8.270452
393,46.2-300,Scott County,6.0,53.0,301,21419,8.055789
380,18.2-250,Rockingham County,16.0,54.0,3186,84394,7.848597
...,...,...,...,...,...,...,...
392,19.2-306,Scott County,0.0,60.0,301,21419,0.000000
391,18.2-374.1:1,Scott County,0.0,137.0,301,21419,0.000000
323,18.2-172,Lee County,0.0,94.0,968,21983,0.000000
99,18.2-172,Washington County,0.0,58.0,1110,53635,0.000000
