In [1]:
# imports
import sys
sys.path.append('../')

import folium
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# set plotting params
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 32
plt.rcParams['axes.labelsize'] = 32
plt.rcParams['axes.titlesize'] = 32
plt.rcParams['xtick.labelsize'] = 24
plt.rcParams['ytick.labelsize'] = 24
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['legend.title_fontsize'] = 24
plt.rcParams['figure.titlesize'] = 40
plt.rcParams['legend.loc'] = 'center right'

In [3]:
# load data
data_path = '../data/nashville_pd_slice.csv'
df = pd.read_csv(data_path)

In [4]:
df.head()

Unnamed: 0,Event Number,Call Received,Complaint Number,Tencode,Tencode Description,Tencode Suffix,Tencode Suffix Description,Disposition Code,Disposition Description,Block,Street Name,Unit Dispatched,Shift,Sector,Zone,RPA,Latitude,Longitude,Mapped Location
0,PD201600263871,2016-03-11 23:11:55,,43,WANT OFFICER FOR INVESTIGATION / ASSISTA,P,PROGRESS,4,ASSISTED CITIZEN,5100.0,HARDING PL,335C,C,,323W,0.0,36.09,-86.688,POINT (-86.688 36.09)
1,PD202100701476,2021-11-07 20:10:42,,83,SHOTS FIRED,P,PROGRESS,10,NO RESPONSE,,,820B,B,MT,835,8653.0,,,
2,PD201600937915,2016-09-13 13:35:25,,44,DISORDERLY PERSON,R,REPORT,4,ASSISTED CITIZEN,800.0,RICHARDS RD,321A,A,,325,8899.0,36.068,-86.672,POINT (-86.672 36.068)
3,PD201700252287,2017-03-10 15:15:43,,43,WANT OFFICER FOR INVESTIGATION / ASSISTA,PJ,,4,ASSISTED CITIZEN,100.0,WOODLAND ST,4F72,B,,436Y,0.0,36.169,-86.772,POINT (-86.772 36.169)
4,PD202000221259,2020-03-17 14:03:32,20200190000.0,43,WANT OFFICER FOR INVESTIGATION / ASSISTA,P,PROGRESS,1C,,5100.0,HARDING PL,322A,A,,323W,0.0,36.09,-86.688,POINT (-86.688 36.09)


In [5]:
df.shape

(10000, 19)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Event Number                10000 non-null  object 
 1   Call Received               10000 non-null  object 
 2   Complaint Number            867 non-null    float64
 3   Tencode                     10000 non-null  int64  
 4   Tencode Description         9767 non-null   object 
 5   Tencode Suffix              5995 non-null   object 
 6   Tencode Suffix Description  5151 non-null   object 
 7   Disposition Code            9960 non-null   object 
 8   Disposition Description     7783 non-null   object 
 9   Block                       2311 non-null   float64
 10  Street Name                 2755 non-null   object 
 11  Unit Dispatched             9332 non-null   object 
 12  Shift                       10000 non-null  object 
 13  Sector                      7744

In [7]:
df.describe()

Unnamed: 0,Complaint Number,Tencode,Block,RPA,Latitude,Longitude
count,867.0,10000.0,2311.0,8037.0,1347.0,1347.0
mean,20179570000.0,66.5469,2452.271744,6314.240015,35.886653,-86.099165
std,19084520.0,85.088623,2286.616962,10775.579208,3.105546,7.44952
min,20150020000.0,3.0,0.0,0.0,0.0,-86.962
25%,20161000000.0,43.0,500.0,1833.0,36.0895,-86.817
50%,20180430000.0,70.0,1800.0,4575.0,36.151,-86.73
75%,20200040000.0,93.0,4000.0,8527.0,36.2035,-86.675
max,20220010000.0,8000.0,15300.0,95101.0,36.36,0.0


In [8]:
# Remove redundant columns (eg. `Tencode` and `Tencode Description contain the same data`)
df.drop(['Tencode Description', 'Tencode Suffix Description',
         'Disposition Description', 'Mapped Location'],
        axis=1, inplace=True)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Event Number      10000 non-null  object 
 1   Call Received     10000 non-null  object 
 2   Complaint Number  867 non-null    float64
 3   Tencode           10000 non-null  int64  
 4   Tencode Suffix    5995 non-null   object 
 5   Disposition Code  9960 non-null   object 
 6   Block             2311 non-null   float64
 7   Street Name       2755 non-null   object 
 8   Unit Dispatched   9332 non-null   object 
 9   Shift             10000 non-null  object 
 10  Sector            7744 non-null   object 
 11  Zone              8741 non-null   object 
 12  RPA               8037 non-null   float64
 13  Latitude          1347 non-null   float64
 14  Longitude         1347 non-null   float64
dtypes: float64(5), int64(1), object(9)
memory usage: 1.1+ MB


In [60]:
df.shape

(10000, 15)

In [10]:
df.duplicated(subset='Event Number').sum()  # There are no duplicates in the data

0

In [15]:
%%capture --no-display

location_df = df[~df['Latitude'].isna()]
location_df.drop(['Event Number', 'Complaint Number', 'Block',
                  'Street Name', 'Sector', 'Zone', 'RPA'],
                 axis=1, inplace=True)

In [16]:
center_lat = location_df['Latitude'].mean()
center_long = location_df['Longitude'].mean()

lat = location_df['Latitude'].to_numpy()
long = location_df['Longitude'].to_numpy()

In [17]:
location_df

Unnamed: 0,Call Received,Tencode,Tencode Suffix,Disposition Code,Unit Dispatched,Shift,Latitude,Longitude
0,2016-03-11 23:11:55,43,P,4,335C,C,36.090,-86.688
2,2016-09-13 13:35:25,44,R,4,321A,A,36.068,-86.672
3,2017-03-10 15:15:43,43,PJ,4,4F72,B,36.169,-86.772
4,2020-03-17 14:03:32,43,P,1C,322A,A,36.090,-86.688
33,2022-01-01 22:22:21,70,A,11,,B,36.075,-86.921
...,...,...,...,...,...,...,...,...
9975,2019-11-22 13:08:41,45,P,10,830A,A,36.105,-86.742
9978,2020-12-09 01:30:12,40,PV,3,525C,C,36.282,-86.663
9986,2016-09-24 20:24:54,44,P,11,513B,B,36.188,-86.630
9987,2019-06-17 18:30:49,43,PJ,6,722B,B,36.268,-86.702


In [18]:
location_df_a = location_df[location_df['Shift']=='A']
lat_a = location_df_a['Latitude'].to_numpy()
long_a = location_df_a['Longitude'].to_numpy()

location_df_b = location_df[location_df['Shift']=='B']
lat_b = location_df_b['Latitude'].to_numpy()
long_b = location_df_b['Longitude'].to_numpy()

location_df_c = location_df[location_df['Shift']=='C']
lat_c = location_df_c['Latitude'].to_numpy()
long_c = location_df_c['Longitude'].to_numpy()

In [19]:
map_ = folium.Map(location=[center_lat, center_long], zoom_start=9)

In [20]:
for lat_, long_ in zip(lat_a, long_a):
    folium.CircleMarker(
        location=[lat_, long_],
        radius=1,
        color='red',
        fill_color='red').add_to(map_)

In [21]:
for lat_, long_ in zip(lat_b, long_b):
    folium.CircleMarker(
        location=[lat_, long_],
        radius=1,
        color='black',
        fill_color='black').add_to(map_)

In [22]:
for lat_, long_ in zip(lat_c, long_c):
    folium.CircleMarker(
        location=[lat_, long_],
        radius=1,
        color='green',
        fill_color='green').add_to(map_)

In [23]:
map_

In [32]:
sub_df = df[['Tencode', 'Disposition Code', 'Call Received']]

In [47]:
sub_df.groupby(['Tencode', 'Disposition Code']).size().sort_values(ascending=False)

Tencode  Disposition Code
96       12                  1176
93       9T                  1155
43       4                    603
96       15                   520
15       15                   464
                             ... 
58       5S                     1
         6                      1
59       11                     1
         4                      1
8000     10                     1
Length: 402, dtype: int64

In [46]:
df.groupby('Unit Dispatched').size().sort_values(ascending=False)

Unit Dispatched
125C     56
421C     53
411B     46
113A     46
121A     43
         ..
635B      1
635A9     1
3P67      1
634C      1
103       1
Length: 1236, dtype: int64