In [1]:
from pathlib import Path

import pandas as pd
pd.set_option('mode.copy_on_write', True)

First we look at the [Washington Post police shootings database](https://www.washingtonpost.com/graphics/investigations/police-shootings-database).  See [their methodology](https://github.com/washingtonpost/data-police-shootings/tree/master/v2) for more detail.

It's just not clear how many shootings they are missing.

In [2]:
# To fetch the data file, don't forget to do:
# $ git submodule update --init
in_path = (Path('original') / 
           'data-police-shootings' / 'v2' /
           'fatal-police-shootings-data.csv')
df = pd.read_csv(in_path)
df['date'] = pd.to_datetime(df['date'])
df

Unnamed: 0,id,date,threat_type,flee_status,armed_with,city,county,state,latitude,longitude,location_precision,name,age,gender,race,race_source,was_mental_illness_related,body_camera,agency_ids
0,3,2015-01-02,point,not,gun,Shelton,Mason,WA,47.246826,-123.121592,not_available,Tim Elliot,53.0,male,A,not_available,True,False,73
1,4,2015-01-02,point,not,gun,Aloha,Washington,OR,45.487421,-122.891696,not_available,Lewis Lee Lembke,47.0,male,W,not_available,False,False,70
2,5,2015-01-03,move,not,unarmed,Wichita,Sedgwick,KS,37.694766,-97.280554,not_available,John Paul Quintero,23.0,male,H,not_available,False,False,238
3,8,2015-01-04,point,not,replica,San Francisco,San Francisco,CA,37.762910,-122.422001,not_available,Matthew Hoffman,32.0,male,W,not_available,True,False,196
4,9,2015-01-04,point,not,other,Evans,Weld,CO,40.383937,-104.692261,not_available,Michael Rodriguez,39.0,male,H,not_available,False,False,473
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8689,9417,2023-08-08,shoot,not,gun,Ransom,Ness,KS,38.636661,-99.931512,address,Jesse Nicholls,46.0,male,,,False,False,18758
8690,9411,2023-08-09,,not,undetermined,Provo,Utah,UT,40.239087,-111.679172,intersection,Craig Deleeuw Robertson,75.0,male,,,False,False,10637
8691,9413,2023-08-09,shoot,,gun,Columbus,Franklin,OH,39.882964,-83.002053,block,,,,,,False,False,90
8692,9434,2023-08-09,point,,gun,Osceola,Mississippi,AR,35.705677,-89.962523,address,Keivion Jones,33.0,male,B,public_record,True,False,20869


In [3]:
df['armed_with'].value_counts()

gun                          5039
knife                        1462
unarmed                       512
undetermined                  336
vehicle                       306
replica                       287
blunt_object                  213
unknown                       136
other                          88
gun;vehicle                    38
gun;knife                      35
vehicle;gun                    15
other;gun                       4
knife;vehicle                   3
blunt_object;knife              2
blunt_object;blunt_object       2
knife;blunt_object              2
replica;vehicle                 1
vehicle;knife;other             1
replica;knife                   1
other;blunt_object;knife        1
Name: armed_with, dtype: int64

In [4]:
df['armed_with'].isna().sum()

210

In [5]:
# Let's take the most liberal possible interpretation of unarmed.
unarmed = df[df['armed_with'].isin(['unarmed', 'undetermined', 'unknown']) |
             df['armed_with'].isna()]
unarmed

Unnamed: 0,id,date,threat_type,flee_status,armed_with,city,county,state,latitude,longitude,location_precision,name,age,gender,race,race_source,was_mental_illness_related,body_camera,agency_ids
2,5,2015-01-03,move,not,unarmed,Wichita,Sedgwick,KS,37.694766,-97.280554,not_available,John Paul Quintero,23.0,male,H,not_available,False,False,238
8,16,2015-01-06,accident,not,unarmed,Burlington,Des Moines,IA,40.809250,-91.118875,not_available,Autumn Steele,34.0,female,W,not_available,False,True,287
15,29,2015-01-08,undetermined,not,,Huntley,Yellowstone,MT,45.859390,-108.315018,not_available,Loren Simpson,28.0,male,W,not_available,False,False,254
17,36,2015-01-08,attack,not,unarmed,Strong,Union,AR,33.111333,-92.358981,not_available,Artago Damon Howard,36.0,male,B,not_available,False,False,249
26,49,2015-01-14,attack,not,,St. Paul,Ramsey,MN,44.957673,-93.094123,not_available,Marcus Golden,24.0,male,B,not_available,False,False,63
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8679,9419,2023-08-06,,car,undetermined,Mission,Johnson,KS,39.043995,-94.656110,block,Shannon Wayne Marshall,40.0,male,W,clip,False,False,200;10497;3109;17797
8685,9418,2023-08-07,,,undetermined,Los Angeles,Los Angeles,CA,33.875192,-118.248231,block,Arturo Cernas,34.0,male,H,public_record,False,False,20
8686,9425,2023-08-07,,,undetermined,Dallas,Dallas,TX,,,,Corey Wayne Thomas,,male,,,False,False,1173
8687,9410,2023-08-08,,,undetermined,West Hartford,Hartford,CT,41.730253,-72.725644,intersection,,,,,,False,False,27704


In [6]:
# Check that all rows have an ID (for the subsequent count)
assert not unarmed['id'].isna().any()

In [7]:
by_year_race = unarmed.groupby([unarmed['date'].dt.year, unarmed['race']])[['id']].count()
by_year_race

Unnamed: 0_level_0,Unnamed: 1_level_0,id
date,race,Unnamed: 2_level_1
2015,A,2
2015,B,60
2015,H,34
2015,N,2
2015,O,4
2015,W,62
2016,A,4
2016,B,41
2016,H,31
2016,N,2


In [8]:
# Select known complete years.
complete = by_year_race.loc[2016:2022]
complete

Unnamed: 0_level_0,Unnamed: 1_level_0,id
date,race,Unnamed: 2_level_1
2016,A,4
2016,B,41
2016,H,31
2016,N,2
2016,W,79
2017,A,3
2017,B,47
2017,H,35
2017,N,4
2017,O,1


In [9]:
black_labels = [c for c in complete.index if c[1] == 'B']
b_counts = complete.loc[black_labels][['id']]
b_counts

Unnamed: 0_level_0,Unnamed: 1_level_0,id
date,race,Unnamed: 2_level_1
2016,B,41
2017,B,47
2018,B,32
2019,B,17
2020,B,23
2021,B,31
2022,B,32


In [10]:
b_counts.mean()

id    31.857143
dtype: float64

In [11]:
# Mapping police violence
# From data at:
# https://mappingpoliceviolence.org
# Methodology:
# https://mappingpoliceviolence.org/methodology
in_path = Path('original') / 'Mapping Police Violence.csv'
pv = pd.read_csv(in_path)
pv['date'] = pd.to_datetime(pv['date'], dayfirst=True)
pv

Unnamed: 0,name,age,gender,race,victim_image,date,street_address,city,state,zip,...,congressperson_party,prosecutor_head,prosecutor_race,prosecutor_gender,prosecutor_party,prosecutor_term,prosecutor_in_court,prosecutor_special,independent_investigation,prosecutor_url
0,Jabarie Camron Bozeman,20.0,Male,Black,https://s3.amazonaws.com/CFSV2/obituaries/phot...,2023-07-13,Highway 14 near W. Bridge Street,Wetumpka,AL,36092.0,...,,,,,,,,,,
1,Daniel Sheehan,32.0,Male,Unknown race,,2023-07-13,1000 block of East 32nd Street,Tucson,AZ,85713.0,...,,,,,,,,,,
2,Andrew Harrington,31.0,Male,Unknown race,,2023-07-13,3700 block of Minnesota Avenue,St. Louis,MO,,...,,,,,,,,,,
3,Jamal Brown,21.0,Male,Black,https://pbs.twimg.com/media/Fz4c483WIAE87ac?fo...,2023-07-12,706 North Ella Creek Drive,Houston,TX,77067.0,...,,,,,,,,,,
4,Jeremiah Wise,33.0,Male,White,https://cache.legacy.net/legacy/images/cobrand...,2023-07-11,North 9th Street,Byesville,OH,43723.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11877,Andrew L. Closson,21.0,Male,White,http://www.superiortelegram.com/sites/default/...,2013-01-01,U.S. Highway 53,Gordon,WI,54838.0,...,Republican,,,,,,,,,
11878,Abel Gurrola,26.0,Male,Hispanic,http://www.bakersfieldnow.com/news/local/A-26-...,2013-01-01,720 Terrace Way,Bakersfield,CA,93304.0,...,Republican,,,,,,,,,
11879,Andrew Layton,26.0,Male,White,http://bloximages.chicago2.vip.townnews.com/ma...,2013-01-01,410 S Riverfront Drive,Mankato,MN,56001.0,...,Republican,,,,,,,,,
11880,Christopher Calhoun,38.0,Male,Unknown race,,2012-06-29,Oak Street SW and Lee Street SW,Atlanta,GA,30310.0,...,Democrat,,,,,,,,,


In [12]:
list(pv)

['name',
 'age',
 'gender',
 'race',
 'victim_image',
 'date',
 'street_address',
 'city',
 'state',
 'zip',
 'county',
 'agency_responsible',
 'ori',
 'cause_of_death',
 'circumstances',
 'disposition_official',
 'officer_charged',
 'news_urls',
 'signs_of_mental_illness',
 'allegedly_armed',
 'wapo_armed',
 'wapo_threat_level',
 'wapo_flee',
 'wapo_body_camera',
 'wapo_id',
 'off_duty_killing',
 'geography',
 'mpv_id',
 'fe_id',
 'encounter_type',
 'initial_reason',
 'officer_names',
 'officer_races',
 'officer_known_past_shootings',
 'call_for_service',
 'tract',
 'urban_rural_uspsai',
 'urban_rural_nchs',
 'hhincome_median_census_tract',
 'latitude',
 'longitude',
 'pop_total_census_tract',
 'pop_white_census_tract',
 'pop_black_census_tract',
 'pop_native_american_census_tract',
 'pop_asian_census_tract',
 'pop_pacific_islander_census_tract',
 'pop_other_multiple_census_tract',
 'pop_hispanic_census_tract',
 'congressional_district_113',
 'congressperson_lastname',
 'congressperso

In [13]:
pv['allegedly_armed'].value_counts()

Allegedly Armed                       8510
Unarmed/Did Not Have Actual Weapon    1687
Unclear                                965
Vehicle                                688
Name: allegedly_armed, dtype: int64

In [14]:
# Unarmed, most liberally interpreted.
pv_unarmed = pv[~pv['allegedly_armed'].isin(
    ['Unarmed/Did Not Have Actual Weapon', 'Unclear']) |
                pv['allegedly_armed'].isna()]
                
pv_unarmed

Unnamed: 0,name,age,gender,race,victim_image,date,street_address,city,state,zip,...,congressperson_party,prosecutor_head,prosecutor_race,prosecutor_gender,prosecutor_party,prosecutor_term,prosecutor_in_court,prosecutor_special,independent_investigation,prosecutor_url
0,Jabarie Camron Bozeman,20.0,Male,Black,https://s3.amazonaws.com/CFSV2/obituaries/phot...,2023-07-13,Highway 14 near W. Bridge Street,Wetumpka,AL,36092.0,...,,,,,,,,,,
1,Daniel Sheehan,32.0,Male,Unknown race,,2023-07-13,1000 block of East 32nd Street,Tucson,AZ,85713.0,...,,,,,,,,,,
3,Jamal Brown,21.0,Male,Black,https://pbs.twimg.com/media/Fz4c483WIAE87ac?fo...,2023-07-12,706 North Ella Creek Drive,Houston,TX,77067.0,...,,,,,,,,,,
4,Jeremiah Wise,33.0,Male,White,https://cache.legacy.net/legacy/images/cobrand...,2023-07-11,North 9th Street,Byesville,OH,43723.0,...,,,,,,,,,,
5,Sara Weideman-Ramos,40.0,Female,Unknown race,,2023-07-11,Autumn Run Lane off Loop 1604,Converse,TX,78109.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11874,Christopher Tavares,21.0,Male,Hispanic,http://www.krdo.com/image/view/-/17980228/medR...,2013-01-01,Highway 50 and North Elizabeth Street,Pueblo,CO,81008.0,...,Republican,,,,,,,,,
11876,Tyree Bell,31.0,Male,Black,http://content.omaha.com/media/maps/ps/2013/ja...,2013-01-01,3727 N. 42nd St.,Omaha,NE,68111.0,...,Republican,,,,,,,,,
11877,Andrew L. Closson,21.0,Male,White,http://www.superiortelegram.com/sites/default/...,2013-01-01,U.S. Highway 53,Gordon,WI,54838.0,...,Republican,,,,,,,,,
11878,Abel Gurrola,26.0,Male,Hispanic,http://www.bakersfieldnow.com/news/local/A-26-...,2013-01-01,720 Terrace Way,Bakersfield,CA,93304.0,...,Republican,,,,,,,,,


In [15]:
# We can use state for counts (no missing values).
assert not unarmed['state'].isna().any()

In [16]:
pv_year_race = pv_unarmed.groupby([unarmed['date'].dt.year, unarmed['race']])[['state']].count()
pv_year_race

Unnamed: 0_level_0,Unnamed: 1_level_0,state
date,race,Unnamed: 2_level_1
2015.0,A,2
2015.0,B,46
2015.0,H,30
2015.0,N,2
2015.0,O,4
2015.0,W,49
2016.0,A,4
2016.0,B,27
2016.0,H,29
2016.0,N,1


In [17]:
pv_complete = pv_year_race.loc[2016:2022]
pv_complete

Unnamed: 0_level_0,Unnamed: 1_level_0,state
date,race,Unnamed: 2_level_1
2016.0,A,4
2016.0,B,27
2016.0,H,29
2016.0,N,1
2016.0,W,62
2017.0,A,3
2017.0,B,35
2017.0,H,28
2017.0,N,3
2017.0,O,1


In [18]:
pv_black_labels = [c for c in pv_complete.index if c[1] == 'B']
pv_b_counts = pv_complete.loc[pv_black_labels][['state']]
pv_b_counts

Unnamed: 0_level_0,Unnamed: 1_level_0,state
date,race,Unnamed: 2_level_1
2016.0,B,27
2017.0,B,35
2018.0,B,30
2019.0,B,14
2020.0,B,14
2021.0,B,25
2022.0,B,23


In [19]:
# As a reminder
b_counts

Unnamed: 0_level_0,Unnamed: 1_level_0,id
date,race,Unnamed: 2_level_1
2016,B,41
2017,B,47
2018,B,32
2019,B,17
2020,B,23
2021,B,31
2022,B,32


We'll take the WaPo figures, to prefer the higher number.  We've included various ambiguous categories for "unarmed", so this will be higher than the actual number *of those cases found by the WaPo* (some may have been missed).

Let's guess then, that the actual figure is probably below 50 in general, and below 30 for 2019.  Please email me (Matthew Brett) if you have some data to call that guess into question.

## Some notes on the data

[This report](https://www.skeptic.com/research-center/reports/Research-Report-CUPES-007.pdf) states that both the Washington Post and the Mapping Police Violence sites use data from the [FBI Use-Of-Force](https://www.fbi.gov/how-we-can-help-you/more-fbi-services-and-information/ucr/use-of-force) project.

See the [UoF data page](https://cde.ucr.cjis.gov/LATEST/webapp/#/pages/le/uof) for an example.

These statistics are not complete.  Quoting from the page above (as of 16 August 2023):

>  In 2023, 9,340 out of 18,514 federal, state, local, and tribal law enforcement agencies throughout the nation participated and provided use-of-force data. The officers employed by these agencies represent 60% federal, state, local, and tribal sworn officers in the nation. 

From that site, by selecting years, you can find the following data:

In [20]:
pd.read_csv('original/uof_participation.csv')

Unnamed: 0,Year,# Reporting forces,# forces total,Officer%
0,2023,9340,18514,60
1,2022,9836,18514,70
2,2021,8687,18514,65
3,2020,6531,18514,56
4,2019,5411,18514,45


You'd have to guess that the non-reporting forces would tend to be smaller, and therefore, less likely to be in high-crime high-mortality areas.  A quick check on the "participating agencies" includes, for example, the LAPD, NYPD,  

Looking at the Mapping Police Violence methodology page, I can't see evidence they are using the UoF database - and it's not clear to me that the Washington Post is using that database either.

## Estimates of shootings and political orientation

The following data are from [this survey](https://www.skeptic.com/research-center/reports/Research-Report-CUPES-007.pdf).  The data table below comes from the [supplementary materials for the survey](https://www.skeptic.com/research-center/reports/Supplemental-CUPES-007.pdf).

The question asked was:

> If you had to guess, how many unarmed Black men were killed by police in 2019?

It's not clear to me what the sample of people was.  From the report "Participants" heading:

> 980 adults in the United States from the CUPES dataset
that reported a consistent political orientation (see
measures below).

In [22]:
estimates = pd.read_csv('original/cupes-007.csv')
estimates

Unnamed: 0,Guess,Very Liberal,Liberal,Moderate,Conservative,Very Conservative
0,About 10,22,37,125,77,63
1,About 100,43,64,151,67,46
2,About 1000,44,44,61,15,18
3,About 10000,20,11,22,5,4
4,More than 10000,11,9,13,2,6


In [40]:
# Just write estimates out to processed
estimates.to_csv('processed/cupes-007.csv', index=None)
pd.read_csv('processed/cupes-007.csv')

Unnamed: 0,Guess,Very Liberal,Liberal,Moderate,Conservative,Very Conservative,guess_value
0,About 10,22,37,125,77,63,10
1,About 100,43,64,151,67,46,100
2,About 1000,44,44,61,15,18,1000
3,About 10000,20,11,22,5,4,10000
4,More than 10000,11,9,13,2,6,10000
