In [1]:
# Import the Pandas package
import pandas as pd
import numpy as np
import re 
def clean_text(string):
    regex = '[^A-Za-z0-9]+'
    return re.sub(regex, '', string)

# <span style="color: red"> Problem Statement: Police Violence in the US </span>

In the US, the federal state and local governments possess a limited "state monoploly on violence." Through the democratic process, citizens authorize the government's sworn officers to excercise this monopoly by applying reasonable coercive force to enforce the law. This use of force is in turn restricted by the law, which authorizes only a minimum amount of force under specific circumstances (self-defense, for example). 

Our goal is to model aspects of police violence in the US, at some geographic level, in order to investigae trends over time and associations with demographic, health, economoic indicators. Particularly one thing we would like to see if the call to defund police would have as meaningful an impace as one would think.

- Is there evidene of discrimination in police officer's application of unreasonable force, or in the prosection of the officers involved?
- Are there longitudnal or geographic differences in these injustices?
- Do partterns of departmental misconduct and activity predict future violence?
- **What biases exist in the available data that prevent us from being able tell the full and honest story of these incidents?**

 # <span style="color: blue"> Citizen deaths, police deaths, and other outcomes </span>
- Police shootings
- Citizen fatalities involving police
- Police officer deaths suffered in the line of duty

In [11]:
### Fatal Encounters
deaths_arrests = "data/fatal_encounters/deaths_arrests.csv"
fatal_encounters = "data/fatal_encounters/fatal_encounters_dot_org.csv" 
police_deaths = "data/fatal_encounters/police_deaths_538.csv" 
police_killings = "data/fatal_encounters/police_killings_MPV.csv" 
shootings_wash_post = "data/fatal_encounters/shootings_wash_post.csv"

#### Death and Arrests

In [34]:
pdeaths_arrests = pd.read_csv(deaths_arrests)
pdeaths_arrests[pdeaths_arrests['State'].notnull()]

Unnamed: 0,State,City,PD,Black People Killed by Police (1/1/2013-12/31/2019),Hispanic People Killed by Police (1/1/2013-12/31/2019),Native American People Killed by Police (1/1/2013-12/31/2019),Asian People Killed by Police (1/1/2013-12/31/2019),Pacific Islanders Killed by Police (1/1/2013-12/31/2019),White People Killed by Police (1/1/2013-12/31/2019),Unknown Race People Killed by Police (1/1/2013-12/31/2019),...,Average Violent Crimes Reported (2013-17),Violent Crime Rate,2013 Total Arrests (UCR Data),2014 Total Arrests,2015 Total Arrests,2016 Total Arrests,2017 Total Arrests,2018 Total Arrests,Estimated Average Arrests per Year,Killings by Police per 10k Arrests
0,New Mexico,Albuquerque,Albuquerque Police Department,3.0,18.0,,,,11.0,,...,6040.3,11.1,27700.0,25447.0,22126.0,20341.0,21130.0,22254.0,22259.6,14.4
1,California,Anaheim,Anaheim Police Department,3.0,7.0,,,,3.0,,...,1192.7,3.5,7891.0,8137.0,8381.0,9415.0,8869.0,10019.0,8964.2,14.5
2,Alaska,Anchorage,Anchorage Police Department,1.0,1.0,2.0,,,5.0,4.0,...,3179.3,10.9,17601.0,14748.0,14387.0,13294.0,13871.0,13973.0,14054.6,9.2
3,Texas,Arlington,Arlington Police Department,7.0,1.0,,2.0,,3.0,,...,1921.8,5.3,17258.0,16222.0,13989.0,11024.0,8397.0,9566.0,11839.6,11.0
4,Georgia,Atlanta,Atlanta Police Department,13.0,,,,,1.0,1.0,...,4956.0,11.8,30505.0,26958.0,,22161.0,22581.0,,23900,6.3
5,Colorado,Aurora,Aurora Police Department,9.0,3.0,,1.0,,7.0,2.0,...,1906.3,5.9,21182.0,20469.0,19155.0,17455.0,15907.0,15662.0,17729.6,12.4
6,Texas,Austin,Austin Police Department,5.0,7.0,,1.0,,19.0,,...,3642.7,4.6,37024.0,36524.0,33101.0,30687.0,28485.0,24359.0,30631.2,10.4
7,California,Bakersfield,Bakersfield Police Department,3.0,15.0,,1.0,,5.0,,...,1814.7,5.2,16786.0,20912.0,21814.0,21629.0,16822.0,13048.0,18845,12.7
8,Maryland,Baltimore,Baltimore Police Department,27.0,,,,,3.0,1.0,...,10192.2,16.4,47560.0,46232.0,32939.0,29661.0,29044.0,25567.0,32688.6,9.5
9,Louisiana,Baton Rouge,Baton Rouge Police Department,10.0,,,,,,,...,2132.2,9.3,,,15244.0,13322.0,12829.0,10773.0,13042,7.7


In [45]:
pdeaths_arrests.isnull().sum()/pdeaths_arrests.shape[0]

State                                                            0.898608
City                                                             0.897614
PD                                                               0.898608
Black People Killed by Police (1/1/2013-12/31/2019)              0.907555
Hispanic People Killed by Police (1/1/2013-12/31/2019)           0.929423
Native American People Killed by Police (1/1/2013-12/31/2019)    0.988072
Asian People Killed by Police (1/1/2013-12/31/2019)              0.967197
Pacific Islanders Killed by Police (1/1/2013-12/31/2019)         0.991054
White People Killed by Police (1/1/2013-12/31/2019)              0.909543
Unknown Race People Killed by Police (1/1/2013-12/31/2019)       0.950298
All People Killed by Police (1/1/2013-12/31/2019)                0.898608
Total                                                            0.898608
Black                                                            0.898608
White                                 

In [56]:
dfatal_encounters = pd.read_csv(fatal_encounters) 
dfatal_encounters.count()
#dfatal_encounters.rename(columns = lambda col: clean_text(col),inplace=True)

Unique ID                                                         28621
Subject's name                                                    28622
Subject's age                                                     27608
Subject's gender                                                  28521
Subject's race                                                    28621
Subject's race with imputations                                   28448
Imputation probability                                            28439
URL of image of deceased                                          13130
Date of injury resulting in death (month/day/year)                28622
Location of injury (address)                                      28080
Location of death (city)                                          28586
Location of death (state)                                         28621
Location of death (zip code)                                      28432
Location of death (county)                                      

### Questions that arise from the fields
- What areas have the highest number of deaths?
- What races have the highest number of fatalities?
- What is the distribution of race within the dataset?
- What is the breakdown of gender?
- Are there intersections between gender/race/mental illness?
- What datasets would complement this dataset?
- Who is the source of the data? Forget all ideas of objectivity

In [85]:
# Imputed data means that some of the race/ethnicity was predicted rather recorded"
# For more information here
dfatal_encounters[["Subject's name","Subject's age","Subject's gender","Subject's race","Date of injury resulting in death (month/day/year)","Full Address"]]

Unnamed: 0,Subject's name,Subject's age,Subject's gender,Subject's race,Date of injury resulting in death (month/day/year),Full Address
0,Samuel H. Knapp,17,Male,European-American/White,01/01/2000,27898-27804 US-101 Willits CA 95490 Mendocino
1,Mark A. Horton,21,Male,African-American/Black,01/01/2000,Davison Freeway Detroit MI 48203 Wayne
2,Phillip A. Blurbridge,19,Male,African-American/Black,01/01/2000,Davison Freeway Detroit MI 48203 Wayne
3,Mark Ortiz,23,Male,Hispanic/Latino,01/01/2000,600 W Cherry Ln Carlsbad NM 88220 Eddy
4,LaTanya Janelle McCoy,24,Female,African-American/Black,01/02/2000,5700 block Mack Road Sacramento CA 95823 Sacra...
5,Lester Miller,53,Male,Race unspecified,01/02/2000,4850 Flakes Mill Road Ellenwood GA 30294 DeKalb
6,Billy Jones,30,Male,African-American/Black,01/02/2000,Pollock Street Kinston NC 28501 Lenoir
7,Terry Rouse,29,Male,African-American/Black,01/02/2000,Pollock Street Kinston NC 28501 Lenoir
8,James Martin Loy,55,Male,Race unspecified,01/03/2000,107 South Grant St. Uniontown PA 15401 Fayette
9,Doris Murphy,42,Female,Race unspecified,01/03/2000,391 Park Ave Baltimore MD 21201 Baltimore City


### Police Killings

In [80]:
dpolice_killings=pd.read_csv(police_killings)
dpolice_killings.isnull().sum()

Victim's name                                                                                                                                                                    0
Victim's age                                                                                                                                                                     0
Victim's gender                                                                                                                                                                 12
Victim's race                                                                                                                                                                    0
URL of image of victim                                                                                                                                                        3888
Date of Incident (month/day/year)                                                                        

### Washington Post Shooting Dataset
In 2015, The Washington Post began to log every fatal shooting by an on-duty police officer in the United States. In that time there have been more than 5,000 such shootings recorded by The Post.

A Post investigation found that the FBI undercounted fatal police shootings by more than half. This is because reporting by police departments is voluntary and many departments fail to do so. 

In [84]:
dshootings_wash_post=pd.read_csv(shootings_wash_post)
dshootings_wash_post.count()

id                         5552
name                       5552
date                       5552
manner_of_death            5552
armed                      5339
age                        5307
gender                     5551
race                       4966
city                       5552
state                      5552
signs_of_mental_illness    5552
threat_level               5552
flee                       5290
body_camera                5552
dtype: int64

In [86]:
dshootings_wash_post

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
0,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,A,Shelton,WA,True,attack,Not fleeing,False
1,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,W,Aloha,OR,False,attack,Not fleeing,False
2,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,H,Wichita,KS,False,other,Not fleeing,False
3,8,Matthew Hoffman,2015-01-04,shot,toy weapon,32.0,M,W,San Francisco,CA,True,attack,Not fleeing,False
4,9,Michael Rodriguez,2015-01-04,shot,nail gun,39.0,M,H,Evans,CO,False,attack,Not fleeing,False
5,11,Kenneth Joe Brown,2015-01-04,shot,gun,18.0,M,W,Guthrie,OK,False,attack,Not fleeing,False
6,13,Kenneth Arnold Buck,2015-01-05,shot,gun,22.0,M,H,Chandler,AZ,False,attack,Car,False
7,15,Brock Nichols,2015-01-06,shot,gun,35.0,M,W,Assaria,KS,False,attack,Not fleeing,False
8,16,Autumn Steele,2015-01-06,shot,unarmed,34.0,F,W,Burlington,IA,False,other,Not fleeing,True
9,17,Leslie Sapp III,2015-01-06,shot,toy weapon,47.0,M,B,Knoxville,PA,False,attack,Not fleeing,False


### Questions that Arise from this dataset?
- What is the most common manner of death?
- How many people were showing signs of mental illness?
- How many people were fleeing?
- How many cases did not have police body camera's involved?

#### Police Fatalities

The police deaths have no missing data

In [73]:
dpolice_deaths = pd.read_csv(police_deaths)
dpolice_deaths.head(50)

Unnamed: 0,person,dept,eow,cause,cause_short,date,year,canine,dept_name,state
0,Constable Darius Quimby,"Albany County Constable's Office, NY","EOW: Monday, January 3, 1791",Cause of Death: Gunfire,Gunfire,1791-01-03,1791,False,Albany County Constable's Office,NY
1,Sheriff Cornelius Hogeboom,"Columbia County Sheriff's Office, NY","EOW: Saturday, October 22, 1791",Cause of Death: Gunfire,Gunfire,1791-10-22,1791,False,Columbia County Sheriff's Office,NY
2,Deputy Sheriff Isaac Smith,"Westchester County Sheriff's Department, NY","EOW: Thursday, May 17, 1792",Cause of Death: Gunfire,Gunfire,1792-05-17,1792,False,Westchester County Sheriff's Department,NY
3,Marshal Robert Forsyth,United States Department of Justice - United S...,"EOW: Saturday, January 11, 1794",Cause of Death: Gunfire,Gunfire,1794-01-11,1794,False,United States Department of Justice - United S...,US
4,Sheriff Robert Maxwell,"Greenville County Sheriff's Office, SC","EOW: Sunday, November 12, 1797",Cause of Death: Gunfire,Gunfire,1797-11-12,1797,False,Greenville County Sheriff's Office,SC
5,High Sheriff John Caldwell Cook,"Mecklenburg County Sheriff's Office, NC","EOW: Tuesday, October 16, 1804",Cause of Death: Gunfire,Gunfire,1804-10-16,1804,False,Mecklenburg County Sheriff's Office,NC
6,Watchman Christian Luswanger,"New York City Watch, NY","EOW: Thursday, December 25, 1806",Cause of Death: Stabbed,Stabbed,1806-12-25,1806,False,New York City Watch,NY
7,Deputy Sheriff John A. Gooch,"Livingston County Sheriff's Department, KY","EOW: Saturday, March 7, 1807",Cause of Death: Gunfire,Gunfire,1807-03-07,1807,False,Livingston County Sheriff's Department,KY
8,Deputy Sheriff Ebenezer Parker,"Cumberland County Sheriff's Office, ME","EOW: Monday, January 18, 1808",Cause of Death: Assault,Assault,1808-01-18,1808,False,Cumberland County Sheriff's Office,ME
9,Night Watchman George Workner,"Baltimore City Police Department, MD","EOW: Tuesday, March 15, 1808",Cause of Death: Stabbed,Stabbed,1808-03-15,1808,False,Baltimore City Police Department,MD


**This dataset has data from 1808.**

# <span style="color: blue"> Demographics, crime stats, and other data </span>
- Social and economic data
- Political leanings of citizens
- Sales of DoD equipment to law enforcement agencies
- City budgets
- Police department headcounts
- Police department policies and contract provisions
- Juvenile arrests by type of crime and race
- Adult arrests by type of crime, age, gender and race
- Crimes and arrests for the prime city in the four largest metro areas.

In [2]:
budgets = "data/demographics/budgets.csv"
dod_equipment = "data/demographics/dod_equipment_purchases.csv" # probably wont use (KM)
dqs_spending = "data/demographics/dqs_table_88_3.csv" # probably won't use since its by state not city (KM)
education_census = "data/demographics/education_census_bureau.csv" #Hmmm, this seems to be the same as the dqs file..
housing = "data/demographics/housing.csv"
police_contracts = "data/demographics/police_contracts.csv"
police_employment = "data/demographics/police_employment_fbi.csv"
police_policies = "data/demographics/police_policies.csv"
politics = "data/demographics/politics_538.csv"
poverty_census = "data/demographics/poverty_census_bureau.csv"

In [17]:
dfhousing = pd.read_csv(housing)
dfhousing.head()

Unnamed: 0,id,Geographic Area Name,Estimate!!HOUSING OCCUPANCY!!Total housing units,Margin of Error!!HOUSING OCCUPANCY!!Total housing units,Percent!!HOUSING OCCUPANCY!!Total housing units,Percent Margin of Error!!HOUSING OCCUPANCY!!Total housing units,Estimate!!HOUSING OCCUPANCY!!Total housing units!!Occupied housing units,Margin of Error!!HOUSING OCCUPANCY!!Total housing units!!Occupied housing units,Percent!!HOUSING OCCUPANCY!!Total housing units!!Occupied housing units,Percent Margin of Error!!HOUSING OCCUPANCY!!Total housing units!!Occupied housing units,...,Percent!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)!!30.0 to 34.9 percent,Percent Margin of Error!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)!!30.0 to 34.9 percent,Estimate!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)!!35.0 percent or more,Margin of Error!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)!!35.0 percent or more,Percent!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)!!35.0 percent or more,Percent Margin of Error!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)!!35.0 percent or more,Estimate!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)!!Not computed,Margin of Error!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)!!Not computed,Percent!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)!!Not computed,Percent Margin of Error!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)!!Not computed
0,0400000US01,Alabama,2284922,264,2284922,(X),1897576,10370,83.0,0.5,...,8.2,0.7,200448,8100,38.9,1.4,77255,3844,(X),(X)
1,0400000US02,Alaska,319867,200,319867,(X),252199,3658,78.8,1.1,...,8.6,1.7,29456,2795,36.3,3.4,7710,1267,(X),(X)
2,0400000US04,Arizona,3076048,708,3076048,(X),2670441,12014,86.8,0.4,...,9.9,0.6,316994,9214,36.6,1.0,62700,5017,(X),(X)
3,0400000US05,Arkansas,1389159,337,1389159,(X),1163647,8530,83.8,0.6,...,9.3,0.9,122038,6368,34.3,1.5,46548,3201,(X),(X)
4,0400000US06,California,14367012,1286,14367012,(X),13157873,23844,91.6,0.2,...,9.7,0.3,2463771,26326,43.6,0.4,281867,9284,(X),(X)


In [16]:
dfeducation = pd.read_csv(education_census)
dfspending.head()

Unnamed: 0,State,Total,Inter-governmental,Total.1,Basic Education,Higher Education,Public Welfare,Health and Hospitals,Highways,Police,Other,Personal Income ($M)
0,Connecticut......................................,13.2,--,13.2,3.6,1.3,1.5,1.0,0.9,0.5,4.4,257714
1,Maine............................................,18.7,--,18.7,4.1,1.3,5.2,0.7,1.6,0.5,5.4,62060
2,Massachusetts....................................,16.7,0.0,16.7,3.5,1.2,4.8,0.7,0.8,0.6,5.2,463931
3,New Hampshire....................................,13.9,--,13.9,3.7,1.2,3.0,0.2,0.9,0.5,4.2,80122
4,Rhode Island.....................................,19.5,0.0,19.5,4.5,1.2,5.5,0.5,0.8,0.8,6.0,55934


**Education census csv**
This csv contains the same data thats in the dqs_spending csv.

In [14]:
dfspending = pd.read_csv(dqs_spending)
dfspending.head()

Unnamed: 0,State,Total,Inter-governmental,Total.1,Basic Education,Higher Education,Public Welfare,Health and Hospitals,Highways,Police,Other,Personal Income ($M)
0,Connecticut......................................,13.2,--,13.2,3.6,1.3,1.5,1.0,0.9,0.5,4.4,257714
1,Maine............................................,18.7,--,18.7,4.1,1.3,5.2,0.7,1.6,0.5,5.4,62060
2,Massachusetts....................................,16.7,0.0,16.7,3.5,1.2,4.8,0.7,0.8,0.6,5.2,463931
3,New Hampshire....................................,13.9,--,13.9,3.7,1.2,3.0,0.2,0.9,0.5,4.2,80122
4,Rhode Island.....................................,19.5,0.0,19.5,4.5,1.2,5.5,0.5,0.8,0.8,6.0,55934


In [15]:
dfspending.shape

(51, 12)

**DQS Spending CSV**
This file contains spending as a function of income for 50 stats + washington DC. If our data is going to look at a city level, this may not be helpful. The units are also a little weird.

In [11]:
dfDOD = pd.read_csv(dod_equipment)
dfDOD.head(10)

Unnamed: 0,State,Station Name (LEA),NSN,Item Name,Quantity,UI,Acquisition Value,DEMIL Code,DEMIL IC,Ship Date,state
0,AL,ABBEVILLE POLICE DEPT,6115-01-285-3012,"GENERATOR SET,DIESEL ENGINE",2,Each,9922.25,A,7.0,2018-02-12 00:00:00.000,Alabama
1,AL,ABBEVILLE POLICE DEPT,6230-01-439-3732,"FLOODLIGHT SET,ELECTRIC",1,Each,12000.0,A,1.0,2018-01-31 00:00:00.000,Alabama
2,AL,ABBEVILLE POLICE DEPT,7125-01-466-0952,"CABINET,STORAGE",4,Each,860.74,A,1.0,2018-01-24 00:00:00.000,Alabama
3,AL,ABBEVILLE POLICE DEPT,2530-01-558-2138,"WHEEL,PNEUMATIC TIRE",1,Assembly,2015.0,A,1.0,2018-01-24 00:00:00.000,Alabama
4,AL,ABBEVILLE POLICE DEPT,2320-01-447-3892,"TRUCK,WRECKER",1,Each,331680.0,Q,6.0,2017-11-14 00:00:00.000,Alabama
5,AL,ABBEVILLE POLICE DEPT,5180-01-628-2375,"TOOL KIT,AIRCRAFT MAINTENANCE",3,Kit,1298.0,A,1.0,2018-04-12 00:00:00.000,Alabama
6,AL,ABBEVILLE POLICE DEPT,1240-01-411-1265,"SIGHT,REFLEX",9,Each,335.0,D,1.0,2016-09-14 00:00:00.000,Alabama
7,AL,ABBEVILLE POLICE DEPT,4010-00-473-6166,"CHAIN ASSEMBLY,SINGLE LEG",10,Each,160.18,A,1.0,2018-01-23 00:00:00.000,Alabama
8,AL,ABBEVILLE POLICE DEPT,5855-DS-THR-MIMG,THERMAL IMAGINING EQUIPMENT,1,Each,0.0,D,,2016-06-02 00:00:00.000,Alabama
9,AL,ABBEVILLE POLICE DEPT,1005-01-587-7175,"MOUNT,RIFLE",10,Each,1647.0,D,1.0,2016-09-19 00:00:00.000,Alabama


In [13]:
dfDOD.shape

(154780, 11)

**DOD Spending csv**
Contains data on dpt of defense sells to police departments. Data is organized by state, but may be able to infer city by station name column. Overall likely not going to be used in this project.

In [4]:
dfbudgets = pd.read_csv(budgets)
dfbudgets.head(10)

Unnamed: 0,year,city_name,id_city,city_population,cpi,rev_total_city,rev_general_city,intergovt_rev_city,igr_federal_city,igr_state_city,...,cash_other_offsets,cash_other_bonds,cash_other_other,county_name,id_county,county_population,relationship_city_school,enrollment,districts_in_city,consolidated_govt
0,1977,AK: Anchorage,22002001.0,174500,4.044885,5342.24,4956.92,2148.77,279.32,1869.46,...,178.51,787.93,691.32,,,,4.0,36855.0,,1.0
1,1978,AK: Anchorage,22002001.0,177000,3.759509,5948.99,5490.05,2468.11,403.24,2064.86,...,187.53,1395.82,1158.01,,,,4.0,36804.0,,1.0
2,1979,AK: Anchorage,22002001.0,179600,3.376308,6158.68,5746.64,2573.34,496.97,2076.37,...,249.03,1812.04,1314.62,,,,4.0,36757.0,,1.0
3,1980,AK: Anchorage,22002001.0,178800,2.974757,5654.93,5210.77,2313.62,371.46,1942.16,...,986.73,1330.72,1572.66,,,,4.0,36008.0,,1.0
4,1981,AK: Anchorage,22002001.0,174431,2.69659,6192.83,5736.81,2771.43,338.76,2432.67,...,977.32,1549.51,1669.55,,,,4.0,34557.0,,1.0
5,1982,AK: Anchorage,22002001.0,180969,2.540104,6560.51,6046.89,3169.22,255.78,2913.44,...,904.52,1835.36,1961.75,,,,4.0,35260.0,,1.0
6,1983,AK: Anchorage,22002001.0,195216,2.461044,7382.29,6816.43,3840.59,257.52,3583.07,...,865.35,1142.88,2656.94,,,,4.0,37397.0,,1.0
7,1984,AK: Anchorage,22002001.0,211028,2.359191,7245.14,6668.08,3391.51,276.46,3115.05,...,307.55,998.72,4081.36,,,,4.0,39736.0,,1.0
8,1985,AK: Anchorage,22002001.0,220254,2.278067,8169.09,7471.86,4009.81,210.64,3799.17,...,832.07,377.82,3558.56,,,,4.0,40752.0,,1.0
9,1986,AK: Anchorage,22002001.0,226848,2.236496,7279.6,6330.66,2895.35,149.67,2745.68,...,925.74,1903.24,1942.21,,,,4.0,41229.0,,1.0


In [7]:
dfbudgets.shape

(6232, 662)

In [8]:
dfbudgets.isnull().sum()

year                           0
city_name                      0
id_city                       41
city_population                0
cpi                           82
rev_total_city                 0
rev_general_city               0
intergovt_rev_city             0
igr_federal_city               0
igr_state_city                 0
own_source_rev_city            0
taxes_city                     0
tax_property_city              0
tax_sales_grossrec_city        0
tax_sales_general_city         0
tax_sales_selectiv_city        0
tax_sales_motofuel_city        0
tax_sales_alcohol_city         0
tax_sales_tobacco_city         0
tax_sales_utility_city         0
tax_sales_other_city           0
tax_income_indiv_city          0
tax_income_corp_city           0
tax_motor_vehicle_city         0
tax_other_city                 0
charges_misc_rev_city          0
charges_city                   0
chg_educ_city                  0
chg_higher_ed_city             0
chg_school_lunch_city          0
          

In [10]:
dfbudgets.city_name.unique()

array(['AK: Anchorage', 'AK: Fairbanks', 'AL: Birmingham', 'AL: Mobile',
       'AL: Montgomery', 'AR: Ft. Smith', 'AR: Little Rock', 'AZ: Mesa',
       'AZ: Phoenix', 'AZ: Tucson', 'Average for Cities', 'CA: Anaheim',
       'CA: Bakersfield', 'CA: Fremont', 'CA: Fresno',
       'CA: Huntington Beach', 'CA: Long Beach', 'CA: Los Angeles',
       'CA: Modesto', 'CA: Oakland', 'CA: Riverside', 'CA: Sacramento',
       'CA: San Diego', 'CA: San Francisco', 'CA: San Jose',
       'CA: Santa Ana', 'CA: Stockton', 'CO: Aurora',
       'CO: Colorado Springs', 'CO: Denver', 'CT: Bridgeport',
       'CT: Hartford', 'CT: New Haven', 'DC: Washington', 'DE: Dover',
       'DE: Wilmington', 'FL: Ft. Lauderdale', 'FL: Hialeah',
       'FL: Jacksonville', 'FL: Miami', 'FL: Orlando',
       'FL: St. Petersburg', 'FL: Tallahassee', 'FL: Tampa', 'GA: Atlanta',
       'GA: Columbus', 'IA: Cedar Rapids', 'IA: Des Moines', 'ID: Boise',
       'ID: Nampa', 'IL: Aurora', 'IL: Chicago', 'IN: Ft. Wayne',
    

**Budgets.csv contains various income and expense data for select cities by year over serveral different categories**

# <span style="color: blue"> Crime Data </span>
- National Arrests
- Juvenile Arrests
- Crimes in Chicago, Dallas, LA, NY 

In [8]:
national_arrests = "data/crime_data/arrests_national_adults.csv"
chicago_crimes = "data/crime_data/Chicago Crimes_-_2001_to_Present.csv"
dallas_crimes = "data/crime_data/Dallas Police Arrests.csv"
juvenile_arrests = "data/crime_data/juvenile_arrests.csv"
la_crimes = "data/crime_data/LA Crime_Data_from_2010_to_2019.csv"
nypd_arrests_historic="data/crime_data/NYPD_Arrests_Data__Historic_.csv"

# <span style="color: blue">  Protest activity and violence among protesters, police, and others. </span>
- Protest activity
- Police response
- Press activity
- Video clips of incidents

In [9]:
global_protests = "data/protests/global_protests.json"
police_accountability_project = "data/protests/Police_Accountability_Project.csv"
press_incidents = "data/protests/press_incidents.csv"
protests = "data/protests/protests.csv"
protest_violence = "data/protests/protest_violence.txt"
twitter_police_videos = "data/protests/twitter_police_videos.csv"
acled_usa = "data/protests/USA_2020_Aug29_ACLED.csv"

The armed location and event data project ACLED