In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from datetime import datetime

Read data from csv file 

In [2]:
stop_file = "static/data/Police_Stop_data.csv"
stop_df=pd.read_csv(stop_file)
stop_df.head()

Unnamed: 0,OBJECTID,masterIncidentNumber,responseDate,reason,problem,callDisposition,citationIssued,personSearch,vehicleSearch,preRace,race,gender,lat,long,x,y,policePrecinct,neighborhood,lastUpdateDate
0,1001,16-406479,2016-11-09T17:43:21.000Z,,Attempt Pick-Up (P),BKG-Booking,,YES,NO,Native American,Native American,Male,44.949552,-93.281346,-10384030.0,5613583.0,5.0,Whittier,2017-08-08T10:24:35.000Z
1,1002,16-406495,2016-11-09T17:58:28.000Z,,Suspicious Vehicle (P),ADV-Advised,,NO,NO,Unknown,Black,Male,45.014157,-93.308244,-10387030.0,5623750.0,4.0,Cleveland,2017-08-08T10:24:35.000Z
2,1003,16-406507,2016-11-09T18:04:48.000Z,,Suspicious Vehicle (P),GOA-Gone on Arrival,,,,,,,44.99521,-93.29942,-10386040.0,5620767.0,4.0,Near - North,2017-08-08T10:25:31.000Z
3,1004,16-406526,2016-11-09T18:20:12.000Z,,Traffic Law Enforcement (P),,,,,,,,44.98247,-93.244,-10379870.0,5618762.0,2.0,Marcy Holmes,2017-08-08T10:25:31.000Z
4,1005,16-406555,2016-11-09T18:40:01.000Z,,Suspicious Vehicle (P),AOK- All OK,,NO,NO,Unknown,Unknown,Unknown,44.911719,-93.216454,-10376810.0,5607634.0,3.0,Minnehaha,2017-08-08T10:25:03.000Z


# Reduce Columns
Reduce the columns to those needed and eliminate rows with null gender  
check count to confirm later operations  

In [20]:
      #select needed columns
select_df=stop_df.loc[:,["OBJECTID", "responseDate","citationIssued","lat","long","gender","neighborhood"]]

# remove null gender values
reduced_df=select_df.dropna(axis=0,subset=['gender'])
reduced_df.head()

Unnamed: 0,OBJECTID,responseDate,citationIssued,lat,long,gender,neighborhood
0,1001,2016-11-09T17:43:21.000Z,,44.949552,-93.281346,Male,Whittier
1,1002,2016-11-09T17:58:28.000Z,,45.014157,-93.308244,Male,Cleveland
4,1005,2016-11-09T18:40:01.000Z,,44.911719,-93.216454,Unknown,Minnehaha
5,1006,2016-11-09T18:40:44.000Z,,44.976087,-93.277436,Male,Downtown West
6,1007,2016-11-09T18:51:47.000Z,,44.95913,-93.28803,Male,Lowry Hill East


# Get a list of neighborhoods

In [29]:
#Get a listing and count of neighborhoods
neighbor_df=reduced_df["neighborhood"]
neighborhood_df=neighbor_df.drop_duplicates()
neighborhood.df=neighbor_df.dropna()
neighborhood_df.tail(50)

102                          Longfellow
103                           Lynnhurst
105                            Beltrami
109                         Elliot Park
110                            Harrison
111                       East Phillips
117                         Morris Park
118         Nicollet Island - East Bank
127                     Powderhorn Park
131                          Logan Park
132                            Northrop
137                             Holland
140                              Bryant
141                          Tangletown
155                            Sheridan
166                                 NaN
168     Prospect Park - East River Road
185                                Page
195                      Northeast Park
211                   Sumner - Glenwood
261                       Columbia Park
264                           Keewaydin
280                          East Isles
285                            Corcoran
306                            Hiawatha


In [27]:
neighborhood_df.to_csv("static/data/neighborhood_data.csv", index=False,header=True)

# modify Gender 
combine gender Unknown and Gender Non-Conforming into gender Other

In [None]:
pd.options.mode.chained_assignment = None  # default='warn'
#fill in Unknown vaue with Other
reduced_df.loc[reduced_df.gender=="Unknown","gender"] = "Other"
      # fill in Gender Non-Conforming values with other
reduced_df.loc[reduced_df.gender =="Gender Non-Conforming","gender"] = "Other"
reduced_df.head()

In [None]:
#what gender values are present ?
#verify count
gender=reduced_df.groupby(["gender"]).count()
gender

# Modify citationIssued 
change NaN to NO

In [None]:
# change null citationIssued to NO
reduced_df[["citationIssued"]]=reduced_df[["citationIssued"]].fillna(value='NO')
reduced_df.head()

In [None]:
#what gender values are present ?
# verify count
citation=reduced_df.groupby(["citationIssued"]).count()
citation

# Parse date
responseDow = day of the week  
responseDay = day of the month as zero padded decimal  
responseDonth = month as a zero padded decimal  
responseMonthName= month as abbreviated name  
responseYear  

In [None]:
#parse date
date_list= [datetime.strptime( entry,"%Y-%m-%dT%H:%M:%S.000Z") for entry in reduced_df["responseDate"]]
# determine response day of week
dow_list= [datetime.strftime(entry,"%w") for entry in date_list]

# split off response day, month and year
day_list = [datetime.strftime(entry,"%d") for entry in date_list]
month_list=[datetime.strftime(entry,"%m") for entry in date_list]
month_name_list=[datetime.strftime(entry,"%b") for entry in date_list]
year_list= [datetime.strftime(entry,"%Y") for entry in date_list]

#add column to reduced_df
reduced_df.loc[:,"responseDow"]=dow_list
reduced_df.loc[:,"responseDay"]=day_list
reduced_df.loc[:,"responseMonth"]=month_list
reduced_df.loc[:,"responseMonthName"]=month_name_list
reduced_df.loc[:,"responseYear"]=year_list
reduced_df.head()

# Deliver clean stop data
to file "static/data/clean_data.csv"  
or to database

In [None]:
reduced_df.to_csv("static/data/clean_stop_data.csv", index=False,header=True)

# Prep responseDow data
get count by neighborhood, responseDow and gender

In [None]:
# get count of genders by neighborhood and responseDow
group_df=reduced_df.groupby(["neighborhood","responseDow","gender"]).count()
group_df.head()

In [None]:
# remove indexing rename columns and reduce columns
new_group=group_df.reset_index()
next_group=new_group.rename(columns={"OBJECTID":"genderCount"})
dow_group=next_group[["neighborhood","responseDow","gender","genderCount"]]
dow_group.head()

# Deliver clean day of week data  
to file "static/data/dow_clean_data.csv" 
or database

In [None]:
# write ad csv file -- json file doesn't come out right
dow_group.to_csv("static/data/clean_dow_data.csv" ,index=False, header=True)