In [144]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [145]:
# Listing out the datasets to use
main_df = pd.read_csv('pd_calls_for_service_2023_datasd.csv')
beat_codes_df = pd.read_csv('pd_beat_codes_list_datasd.csv')
dispo_codes_df = pd.read_csv('pd_dispo_codes_datasd.csv')
call_types_df = pd.read_csv('pd_cfs_calltypes_datasd.csv')


In [146]:
# Dropping columns that are not needed from call_types_df
call_types_df = call_types_df.drop({'Unnamed: 2','Unnamed: 3','Unnamed: 4'}, axis=1)

In [147]:
# Merging the main, pd_df with the beat_codes so we can see descriptions of the beats
main_df = main_df.merge(beat_codes_df,
                           left_on=['beat'], right_on=['beat'])
main_df.rename(columns={'beat': 'beat_code', 
                        'neighborhood': 'beat_neighborhood'}, inplace=True)

In [148]:
# Merging the pd_df with the disposition codes so we can see descriptions of the dispositions
dispo_codes_df.rename(columns={'dispo_code': 'disposition', 'description': 'dispo_description'}, 
                      inplace=True)
main_df = main_df.merge(dispo_codes_df,
                           left_on=['disposition'], right_on=['disposition'])

In [149]:
# Merging the pd_df with the call type codes so we can see descriptions of the call types
main_df = main_df.merge(call_types_df,
                           left_on=['call_type'], right_on=['call_type'])
main_df.rename(columns={'description': 'call_type_description'}, 
                      inplace=True)

In [150]:
priority_short_description_dictionary = {2: 'Dispatch as quickly as possible', 
                                         3: 'Dispatch as quickly as possible', 
                                         1: 'Dispatch Immediately', 
                                         4: 'Dispatch when no higher priority calls are waiting',
                                         0: 'Dispatch Immediately',
                                         9: 'Calls that are formatted for the Telephone Report Unit (TRU)'}
priority_long_description_dictionary = {2: 'Priority Two calls involve complaints regarding less serious crimes in which there is no threat to life. Examples include: prowlers who have left, preserve the peace, crime reports for residents standing by at an inconvenient location, blocked driveway when the caller is waiting to leave, injured animals, loud parties with mitigating circumstances', 
                                         3: 'Priority Three calls involve minor crimes or requests for service which are not urgent. Examples include: investigating a cold crime, loud parties involving noise only.', 
                                         1: 'Priority One calls involve serious crimes in progress or a threat to life. Examples include: missing children, child abuse, domestic violence, disturbances involving weapons/violence and bomb threats', 
                                         4: ' Priority Four calls involve minor requests for police service. Examples include: found property, most parking violations, etc.',
                                         0: 'Priority E calls involve an imminent threat to life. Examples include: officer or person down, no detail accidents and attempted suicide',
                                         9: 'Priority Nine calls involve requests for police service, which have limited or no suspect information. Examples include: petty theft, vehicle break-in and false use of another’s identity.'}
main_df['priority_short'] = main_df['priority'].map(priority_short_description_dictionary)
main_df['priority_long'] = main_df['priority'].map(priority_long_description_dictionary)



In [151]:
# Converting object of date_time to an actual datetime
main_df['date_time'] = pd.to_datetime(main_df['date_time'])

In [158]:
main_df= main_df.loc[:,['incident_num', 
                        'date_time',
                        'day_of_week',
                        'address_number_primary',
                        'address_dir_primary',
                        'address_road_primary',
                        'address_sfx_primary',
                        'address_dir_intersecting',
                        'address_road_intersecting',
                        'address_sfx_intersecting',
                        'call_type',
                        'call_type_description',
                        'disposition',
                        'dispo_description',
                        'beat_code',
                        'beat_neighborhood',
                        'priority',
                        'priority_short',
                        'priority_long']]


In [159]:
main_df.head()

Unnamed: 0,incident_num,date_time,day_of_week,address_number_primary,address_dir_primary,address_road_primary,address_sfx_primary,address_dir_intersecting,address_road_intersecting,address_sfx_intersecting,call_type,call_type_description,disposition,dispo_description,beat_code,beat_neighborhood,priority,priority_short,priority_long
0,E23010000001,2023-01-01 00:00:05,1,0,,05TH,AVE,,G,,FD,FLAG DOWN/FIELD INITIATED,CAN,CANCEL,523,Gaslamp,2,Dispatch as quickly as possible,Priority Two calls involve complaints regardin...
1,E23010000116,2023-01-01 01:07:04,1,500,,F,ST,,,,FD,FLAG DOWN/FIELD INITIATED,CAN,CANCEL,523,Gaslamp,2,Dispatch as quickly as possible,Priority Two calls involve complaints regardin...
2,E23010000130,2023-01-01 01:27:25,1,0,,04TH,AVE,,MARKET,,FD,FLAG DOWN/FIELD INITIATED,CAN,CANCEL,523,Gaslamp,2,Dispatch as quickly as possible,Priority Two calls involve complaints regardin...
3,E23010000213,2023-01-01 02:49:54,1,500,,MARKET,ST,,,,FD,FLAG DOWN/FIELD INITIATED,CAN,CANCEL,523,Gaslamp,2,Dispatch as quickly as possible,Priority Two calls involve complaints regardin...
4,E23010008343,2023-01-07 00:49:56,7,800,,05TH,AVE,,,,FD,FLAG DOWN/FIELD INITIATED,CAN,CANCEL,523,Gaslamp,2,Dispatch as quickly as possible,Priority Two calls involve complaints regardin...


In [154]:
main_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 28308 entries, 0 to 28307
Data columns (total 19 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   incident_num               28308 non-null  object        
 1   date_time                  28308 non-null  datetime64[ns]
 2   day_of_week                28308 non-null  int64         
 3   address_number_primary     28308 non-null  int64         
 4   address_dir_primary        1352 non-null   object        
 5   address_road_primary       28307 non-null  object        
 6   address_sfx_primary        26077 non-null  object        
 7   address_dir_intersecting   0 non-null      float64       
 8   address_road_intersecting  4490 non-null   object        
 9   address_sfx_intersecting   0 non-null      float64       
 10  call_type                  28308 non-null  object        
 11  call_type_description      28308 non-null  object        
 12  disp

In [155]:
main_df.call_type.value_counts()

415        4000
SELENF     1676
T          1549
459A       1386
CW         1134
           ... 
ALERT3        1
272           1
INFOX         1
1181-CR       1
595           1
Name: call_type, Length: 203, dtype: int64

In [156]:
main_df.priority.value_counts()

2    13775
1     5520
3     5449
4     1697
0     1543
9      324
Name: priority, dtype: int64