In [1]:
import pandas as pd
import numpy as np
import requests
import plotly.express as px
from datetime import date, time, datetime, timedelta
pd.set_option("display.max_columns", 1000)
pd.set_option("display.max_rows", 1000)
pd.set_option('plotting.backend', 'plotly')

In [2]:
stops = pd.read_csv("sqf-2019.csv")
complaints = pd.read_csv("CCRB_database_raw.csv")
complaints = complaints[(complaints["Incident Date"].notnull()) & (complaints["Incident Date"].str.endswith("/2019"))]
complaints = complaints[(complaints["Command"].str.endswith(" PCT")) & (complaints["Command"].str[0].str.isdigit())]

In [3]:
stops["STOP_LOCATION_PRECINCT"] = stops["STOP_LOCATION_PRECINCT"].apply(lambda x: str(x).zfill(3))
stops["STOP_LOCATION_PRECINCT"] = stops["STOP_LOCATION_PRECINCT"].astype(str) + " PCT"
stops = stops[["STOP_LOCATION_PRECINCT", "STOP_FRISK_DATE"]]

In [4]:
stop_count = stops.groupby(["STOP_LOCATION_PRECINCT", "STOP_FRISK_DATE"]).size().reset_index(name="stop_count")
stop_count = stop_count.rename(columns={"STOP_LOCATION_PRECINCT":"precinct", "STOP_FRISK_DATE":"date", "stop_count":"num_stops"})
stop_count["date"] = stop_count["date"].apply(lambda x: datetime.strptime(x, "%m/%d/%y"))
stop_count["num_stops"] = stop_count["num_stops"].astype(float)

In [5]:
complaint_count = complaints.groupby(["Incident Date", "Command"]).size().reset_index(name="complaint_count")
complaint_count = complaint_count.rename(columns={"Command":"precinct", "Incident Date":"date", "complaint_count":"num_complaints"})
complaint_count["date"] = complaint_count["date"].apply(lambda x: datetime.strptime(x, "%m/%d/%Y"))
complaint_count["num_complaints"] = complaint_count["num_complaints"].astype(float)

In [6]:
stop_count = stop_count.groupby('date').sum().reset_index()
complaint_count = complaint_count.groupby('date').sum().reset_index()

In [7]:
idx = pd.DataFrame(pd.date_range('2019-01-01', '2019-12-31'), columns=['date'])
complaint_count = pd.merge(idx, complaint_count, how='left', left_on='date', right_on='date')
complaint_count['num_complaints'] = np.where(complaint_count['num_complaints'].isnull(), 0.0, complaint_count['num_complaints'])
stop_count = pd.merge(idx, stop_count, how='left', left_on='date', right_on='date')
stop_count['num_stops'] = np.where(stop_count['num_stops'].isnull(), 0.0, stop_count['num_stops'])

In [8]:
city = pd.merge(complaint_count, stop_count, on='date')
city['stops_to_complaints'] = city['num_stops']/city['num_complaints']

In [150]:
city.head()

Unnamed: 0,date,num_complaints,num_stops,stops_to_complaints
0,2019-01-01,20.0,43.0,2.15
1,2019-01-02,18.0,32.0,1.777778
2,2019-01-03,34.0,28.0,0.823529
3,2019-01-04,37.0,54.0,1.459459
4,2019-01-05,29.0,18.0,0.62069


In [151]:
fig = px.line(city, x='date', y=['stops_to_complaints'])
fig.show()