In [5]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import sklearn

## Cleaning the data

In [6]:
mm_hh_inc = pd.read_csv('./data/clean/med-hh-inc-clean.csv')
hs_complete = pd.read_csv('./data/clean/hs-complete-clean.csv')
percent_below_poverty = pd.read_csv('./data/clean/percent-below-poverty-clean.csv')
race_by_city = pd.read_csv('./data/clean/race-by-city-clean.csv')

In [7]:
hs_complete['percent_completed_hs'] = hs_complete['percent_completed_hs'].astype(float)
percent_below_poverty['poverty_rate'] = percent_below_poverty['poverty_rate'] .astype(float)
race_by_city['share_white share_black share_native_american share_asian share_hispanic'.split()] = race_by_city['share_white share_black share_native_american share_asian share_hispanic'.split()].astype(float)

### Aggregated Demographics Data

In [8]:
demographics = pd.concat([mm_hh_inc, hs_complete['percent_completed_hs'], percent_below_poverty['poverty_rate'], race_by_city['share_white share_black share_native_american share_asian share_hispanic'.split()]], axis=1, sort=False)
demographics.sort_values(by=['Geographic Area'], inplace=True)




In [9]:
# demographics['Median Income'].mean()
demographics['Median Income'].fillna(demographics['Median Income'].mean(),inplace=True)
demographics['percent_completed_hs'].fillna(demographics['percent_completed_hs'].mean(),inplace=True)
demographics['poverty_rate'].fillna(demographics['poverty_rate'].mean(),inplace=True)
demographics['share_white'].fillna(demographics['share_white'].mean(),inplace=True)
demographics['share_black'].fillna(demographics['share_black'].mean(),inplace=True)
demographics['share_native_american'].fillna(demographics['share_native_american'].mean(),inplace=True)
demographics['share_asian'].fillna(demographics['share_asian'].mean(),inplace=True)
demographics['share_hispanic'].fillna(demographics['share_hispanic'].mean(),inplace=True)
demographics.head()

Unnamed: 0,Geographic Area,City,Median Income,percent_completed_hs,poverty_rate,share_white,share_black,share_native_american,share_asian,share_hispanic
717,AK,Hooper Bay city,35938.0,75.0,40.9,28.0,0.0,40.0,0.0,12.0
696,AK,Game Creek CDP,51335.9481,100.0,0.0,6.7,0.0,87.1,0.0,0.0
697,AK,Gateway CDP,92739.0,96.1,7.8,23.5,0.0,76.5,0.0,4.2
698,AK,Glacier View CDP,43000.0,90.2,9.2,91.4,0.0,2.7,1.1,1.6
699,AK,Glennallen CDP,51335.9481,94.4,0.0,81.1,0.6,11.2,0.5,1.7


# Census Data

In [11]:
census_state_race = pd.read_csv('./data/race-by-state-census.csv')
# data from https://factfinder.census.gov/faces/tableservices/jsf/pages/productview.xhtml?src=CF
#cleaned 

In [12]:
census_state_race.head()

Unnamed: 0,STATE,POP,W,B,N,A,O,H
0,AL,308745538.0,72.4,12.6,0.9,4.8,6.2,16.3
1,AK,4779736.0,68.5,26.2,0.6,1.1,2.0,3.9
2,AZ,710231.0,66.7,3.3,14.8,5.4,1.6,5.5
3,AR,6392017.0,73.0,4.1,4.6,2.8,11.9,29.6
4,CA,2915918.0,77.0,15.4,0.8,1.2,3.4,6.4


In [51]:
census_state_race = census_state_race.groupby(census_state_race['STATE']).mean()

# Cleaning Police Data

In [169]:
wp_pol = pd.read_csv('./data/clean/wp-police.csv')

In [179]:
 wp_pol=wp_pol.rename(columns = {'state':'STATE'})

In [181]:
wp_pol_dems = pd.merge(wp_pol, census_state_race, on=['STATE'])

In [182]:
dates = wp_raw['date']

In [183]:
wp_pol_dems = wp_pol_dems.assign(dates=dates.values)

In [187]:
wp_pol_dems.set_index(['STATE', 'POP', 'W', 'B', 'N', 'A', 'O', 'H', 'dates'], inplace=True)

In [188]:
wp_pol_dems.sort_index(inplace=True)

In [192]:
wp_pol_dems.to_csv('./data/clean/main_wp.csv')

In [193]:
maind = pd.read_csv('./data/clean/main_wp.csv')

In [195]:
maind.head(5)

Unnamed: 0,STATE,POP,W,B,N,A,O,H,dates,id,...,manner_of_death,armed,age,gender,race,city,signs_of_mental_illness,threat_level,flee,body_camera
0,AK,4779736.0,68.5,26.2,0.6,1.1,2.0,3.9,02/01/15,3,...,shot,gun,53.0,M,A,Shelton,True,attack,Not fleeing,False
1,AK,4779736.0,68.5,26.2,0.6,1.1,2.0,3.9,02/01/15,4,...,shot,gun,47.0,M,W,Aloha,False,attack,Not fleeing,False
2,AK,4779736.0,68.5,26.2,0.6,1.1,2.0,3.9,03/01/15,5,...,shot and Tasered,unarmed,23.0,M,H,Wichita,False,other,Not fleeing,False
3,AK,4779736.0,68.5,26.2,0.6,1.1,2.0,3.9,04/01/15,8,...,shot,toy weapon,32.0,M,W,San Francisco,True,attack,Not fleeing,False
4,AK,4779736.0,68.5,26.2,0.6,1.1,2.0,3.9,04/01/15,9,...,shot,nail gun,39.0,M,H,Evans,False,attack,Not fleeing,False
