In [1]:
#import dependencies 
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np 
import matplotlib.patches as mpatches


In [3]:
# Load data file
crime_data = 'Resources/Crime_data/fatal-police-shootings-data.csv'
#Read data file
crime_df = pd.read_csv(crime_data)
crime_df.head()

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
0,3229,Kameron Prescott,12/21/2017,shot,unarmed,6.0,M,W,Schertz,TX,False,other,Not fleeing,False
1,16,Autumn Steele,1/6/2015,shot,unarmed,34.0,F,W,Burlington,IA,False,other,Not fleeing,True
2,1165,Ciara Meyer,1/11/2016,shot,unarmed,12.0,F,W,Penn Township,PA,False,other,Not fleeing,False
3,1883,Tyre King,9/14/2016,shot,toy weapon,13.0,M,B,Columbus,OH,False,other,Foot,False
4,27,Omarr Julian Maximillian Jackson,1/7/2015,shot,gun,37.0,M,B,New Orleans,LA,False,attack,Foot,True


In [4]:
#Collect year, age, armed, mental illness, gender, age, flee, body camera, states 
#Race had the most discrepancy --> 
crime_df_new = crime_df[['date', 'armed', 'age', 'gender', 'race', 'city','state', 'signs_of_mental_illness', 'flee', 'body_camera']]
crime_df_new
crime_df_rename = crime_df_new.rename(columns = {'date': 'Date of Incident', 
                                                'armed': 'Weapon Used',
                                                'age': 'Age',
                                                'gender': 'Gender',
                                                'race':'Race',
                                                 'city':'City', 
                                                'state': 'State',
                                                'signs_of_mental_illness': 'Mental Illness',
                                                'flee': 'Flee',
                                                'body_camera': 'Body Camera'})

#Assign Date of Incidient to a new variable 
crime_df_rename['Date of Incident']= pd.to_datetime(crime_df_rename['Date of Incident'])


#Filter incidents between 01-01-2015 to 12-31-2018 only 
mask = (crime_df_rename['Date of Incident'] > '2015-01-01') &(crime_df_rename['Date of Incident'] <='2018-12-31')


#Create new DF from the filtered dates 
clean_date = crime_df_rename.loc[mask]
clean_date.head()

Unnamed: 0,Date of Incident,Weapon Used,Age,Gender,Race,City,State,Mental Illness,Flee,Body Camera
0,2017-12-21,unarmed,6.0,M,W,Schertz,TX,False,Not fleeing,False
1,2015-01-06,unarmed,34.0,F,W,Burlington,IA,False,Not fleeing,True
2,2016-01-11,unarmed,12.0,F,W,Penn Township,PA,False,Not fleeing,False
3,2016-09-14,toy weapon,13.0,M,B,Columbus,OH,False,Foot,False
4,2015-01-07,gun,37.0,M,B,New Orleans,LA,False,Foot,True


In [5]:
#Count number of rows to account of NaN values 
clean_date.count()

Date of Incident    3934
Weapon Used         3689
Age                 3801
Gender              3931
Race                3656
City                3934
State               3934
Mental Illness      3934
Flee                3800
Body Camera         3934
dtype: int64

In [6]:
#Dropping rows with NaN values 
df = clean_date.dropna(how='any')
df.count()

Date of Incident    3272
Weapon Used         3272
Age                 3272
Gender              3272
Race                3272
City                3272
State               3272
Mental Illness      3272
Flee                3272
Body Camera         3272
dtype: int64

In [7]:
 # Create bins for age groups 
bins = [5, 17, 30, 50, 70, 90]

# Create labels for these bins
group_labels = ["5-17", "18-30", "31-50", "51-70", "71-90"]
       
age = pd.cut(df["Age"], bins, labels=group_labels).head()

df["Age Range"] = pd.cut(df["Age"], bins, labels=group_labels)
df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,Date of Incident,Weapon Used,Age,Gender,Race,City,State,Mental Illness,Flee,Body Camera,Age Range
0,2017-12-21,unarmed,6.0,M,W,Schertz,TX,False,Not fleeing,False,5-17
1,2015-01-06,unarmed,34.0,F,W,Burlington,IA,False,Not fleeing,True,31-50
2,2016-01-11,unarmed,12.0,F,W,Penn Township,PA,False,Not fleeing,False,5-17
3,2016-09-14,toy weapon,13.0,M,B,Columbus,OH,False,Foot,False,5-17
4,2015-01-07,gun,37.0,M,B,New Orleans,LA,False,Foot,True,31-50


In [10]:
#bin by year
cut_points = pd.to_datetime([np.datetime64('2015-01-01'), np.datetime64('2015-12-31'), np.datetime64('2016-12-31'), np.datetime64('2017-12-31'), np.datetime64('2018-12-31')])
group_names = ["2015", "2016", "2017", "2018"]
df["Year"] = pd.cut(df['Date of Incident'], cut_points, labels=group_names)
df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Unnamed: 0,Date of Incident,Weapon Used,Age,Gender,Race,City,State,Mental Illness,Flee,Body Camera,Age Range,Year
0,2017-12-21,unarmed,6.0,M,W,Schertz,TX,False,Not fleeing,False,5-17,2017
1,2015-01-06,unarmed,34.0,F,W,Burlington,IA,False,Not fleeing,True,31-50,2015
2,2016-01-11,unarmed,12.0,F,W,Penn Township,PA,False,Not fleeing,False,5-17,2016
3,2016-09-14,toy weapon,13.0,M,B,Columbus,OH,False,Foot,False,5-17,2016
4,2015-01-07,gun,37.0,M,B,New Orleans,LA,False,Foot,True,31-50,2015
