In [2]:
import pandas as pd
import os

# os.getcwd()

In [3]:
# Load Fatal Encounters data set
fatal_encounters_original = pd.read_csv('fatal_enc.csv')
# Not using other/unknown
fatal_encounters = fatal_encounters_original[fatal_encounters_original['race_imputed'] != 'Other/Unknown']
# fatal_encounters

In [4]:
# Load all US census tracts
all_tracts = pd.read_csv('all_tracts.csv')
all_tracts.rename(columns = {'NH_BlackE': 'Black', 'NH_WhiteE': 'White', 'Hisp_LatinoE': 'Hispanic/Latino'}, inplace = True)
# all_tracts

In [5]:
# Total population race 'r'
observed_population_r = all_tracts[['Black', 'White', 'Hispanic/Latino']].sum()
# convert to millions
observed_population_mil_r = observed_population_r / 1e6
observed_population_r = observed_population_mil_r

In [6]:
# Total population (millions) race 'r' living in quintile 'q'
observed_population_rq = all_tracts.groupby('income_quintile')[['Black', 'White', 'Hispanic/Latino']].sum() / 1000000
observed_population_rq

Unnamed: 0_level_0,Black,White,Hispanic/Latino
income_quintile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,14.548721,23.167387,13.462541
2,8.391139,36.231006,13.302322
3,6.631959,41.665829,12.16811
4,5.744847,45.449264,11.578554
5,4.322901,49.11887,8.550668


In [7]:
# Proportion race 'r' living in income quintile 'q'
observed_proportion_rq = observed_population_rq / observed_population_mil_r
observed_proportion_rq

Unnamed: 0_level_0,Black,White,Hispanic/Latino
income_quintile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.367025,0.118423,0.227938
2,0.211686,0.185199,0.225226
3,0.167307,0.21298,0.206022
4,0.144927,0.23232,0.19604
5,0.109055,0.251077,0.144774


In [8]:
############################
# Deaths by race 'r' per year (six years) ####
############################
annual_observed_deaths_r = fatal_encounters['race_imputed'].value_counts() / 6

#######################################
# Deaths by income quintile 'q' per year (six years) ####
#######################################
annual_observed_deaths_q = fatal_encounters['income_quintile'].value_counts() / 6

#####################################################
# Fatal encounter count: race by income quintile ####
#####################################################
annual_observed_deaths_rq = pd.crosstab(fatal_encounters['income_quintile'], fatal_encounters['race_imputed']) / 6
annual_observed_deaths_rq = annual_observed_deaths_rq.reindex(columns = observed_population_mil_r.index)

In [9]:
#########################
# Column percentages ####
#########################
observed_deaths_col_percent_rq = pd.crosstab(fatal_encounters['income_quintile'], fatal_encounters['race_imputed'], normalize = 'columns') # * 100
# observed_deaths_col_percent_rq

In [10]:
# Annual fatal encounter rate by race only per one million population (six years in data)
observed_rate_r = annual_observed_deaths_r / observed_population_mil_r
observed_rate_r

Black              7.437855
Hispanic/Latino    3.575327
White              2.937483
dtype: float64

In [11]:
# Annual fatal encounter rate (r by q) per one million population (six years in data)
observed_rate_rq = annual_observed_deaths_rq.div(observed_population_rq) / 6
# observed_rate_rq

In [12]:
#######################################################
# Counterfactual death count based on all races being 
# distributed the same as the white distribution
#######################################################
proportion_adjustment = observed_proportion_rq['White'].values[:, None] / observed_proportion_rq
counterfactual_annual_deaths_rq = proportion_adjustment * annual_observed_deaths_rq
counterfactual_annual_deaths_rq = pd.DataFrame(counterfactual_annual_deaths_rq)
counterfactual_annual_deaths_rq.rename(
    columns = {0: 'Black', 1: 'White', 2: 'Latino'}, 
    # index = {0: 'Q1', 1: 'Q2', 2: 'Q3', 3: 'Q4', 4: 'Q5'},
    inplace = True
)
annual_counterfactual_deaths_r = counterfactual_annual_deaths_rq.sum()
annual_counterfactual_deaths_r

Black              249.975423
White              574.666667
Hispanic/Latino    192.085420
dtype: float64

In [20]:
# Counterfactual rate: annual per 1 million population
counterfactual_rate_r = annual_counterfactual_deaths_r / observed_population_r
counterfactual_rate_r

Black              6.306210
White              2.937483
Hispanic/Latino    3.252257
dtype: float64

In [37]:
# Reduction in rate based on counterfactual distribution
round(1 - counterfactual_rate_r.iloc[[0,2]] / observed_rate_r.iloc[0:2], 4)

Black              0.1521
Hispanic/Latino    0.0904
dtype: float64