In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
sys.path.append("../code")

In [2]:
import loaders.cityprotect as cp

In [3]:
prop_list = ["Breaking & Entering", "Property Crime", "Theft of Vehicle", "Theft from Vehicle", "Theft"]
violent_list = ["Assault", "Robbery", "Sexual Offense", "Homicide", "Rape", "Other Violent Offense"]

In [4]:
dept = "San Pablo Police Department"

In [5]:
df = cp.cityprotect("../data/raw_data/{}".format(dept.replace(" ","_")))

Loading from 24 files.
Data file ../data/raw_data/San_Pablo_Police_Department/Jul_Sep_2022_San_Pablo_Police_Department_report.csv is empty.
Data file ../data/raw_data/San_Pablo_Police_Department/Oct_Dec_2022_San_Pablo_Police_Department_report.csv is empty.


In [6]:
reclassification_dir = {"Theft": ["theft", "burglary", "larceny"],
                        "Property Crime": ["property crime", "vandalism"],
                        "Assault": ["assault"],
                        "Robbery": ["robbery"],
                        "Homicide": ["homicide", "murder"],
                        "Sexual Offense": ["sexual assault", "rape"],
                        "Other Violent Offense": ["shooting", "shots fired", "car jacking", "carjacking", "kidnapping"]
                       }

In [8]:
unaccounted_parents = set(df["parentIncidentType"]) - set(prop_list) - set(violent_list)

In [9]:
for parent, words in reclassification_dir.items():
    for word in words:
        df.loc[df["parentIncidentType"].isin(unaccounted_parents) &
               df["incidentType"].str.contains(word, case=False), "parentIncidentType"]= parent

In [10]:
df[df["parentIncidentType"].isin(prop_list)]["incidentType"].value_counts()

VC 10851 VEHICLE THEFT                                           1230
THEFT                                                            1110
PC 488 PETTY THEFT                                                849
PC 459A AUTO BURGLARY                                             665
PC 594 VANDALISM                                                  627
MOTOR VEHICLE THEFT                                               587
DAMAGE TO PROPERTY/VANDALISM                                      477
BURGLARY                                                          362
PC 602 TRESPASSING                                                354
PC 487 GRAND THEFT                                                225
PC 459R RESIDENTIAL BURGLARY                                      162
PC 459C COMMERCIAL BURGLARY                                       153
PC 530 5 IDENTITY THEFT                                           118
PC 470 FORGERY                                                     77
VC 10852 TAMPER WITH

In [11]:
df[df["parentIncidentType"].isin(set(df["parentIncidentType"]) - set(prop_list) - set(violent_list))]

Unnamed: 0,ccn,date,updateDate,city,state,postalCode,blocksizedAddress,incidentType,parentIncidentType,narrative
13177,2017-00001830,2017-01-01 07:00:00,2017-10-12 15:25:57,SAN PABLO,CA,,2400 Block MERRITT AVE,PC 166.4 COURT ORDER VIOLATION,Other,PC 166.4 COURT ORDER VIOLATION
13507,2017-00000024,2017-01-01 10:02:00,2017-10-12 15:25:56,SAN PABLO,CA,,1600 Block DOVER AVE,PC 647 F DRUNK IN PUBLIC,Quality of Life,PC 647 F DRUNK IN PUBLIC
13509,2017-00000041,2017-01-01 18:40:00,2017-10-12 15:25:56,SAN PABLO,CA,,EVERGREEN TER & SAN PABLO AVE,OSW MIS OUTSIDE WARRANT - MISD,Proactive Policing,OSW MIS OUTSIDE WARRANT - MISD
13510,2017-00000056,2017-01-01 22:33:00,2017-10-12 15:25:56,SAN PABLO,CA,,1900 Block 23RD ST,RECOVERED STOLEN VEHICLE RECOVERED STOLEN VEHICLE,Vehicle Recovery,RECOVERED STOLEN VEHICLE RECOVERED STOLEN VEHICLE
13526,2017-00000068,2017-01-02 02:09:00,2017-10-12 15:25:56,SAN PABLO,CA,,2200 Block RD20,HIT AND RUN,Traffic,HIT AND RUN
...,...,...,...,...,...,...,...,...,...,...
11726,2022-00008761,2022-04-07 15:18:46,2022-04-07 22:55:23,SAN PABLO,CA,.,13300 Block SAN PABLO AVE,TRAFFIC STOP,Other,TRAFFIC STOP
11727,2022-00008762,2022-04-07 15:25:56,2022-04-07 22:55:23,SAN PABLO,CA,.,VALE RD,TRAFFIC STOP,Other,TRAFFIC STOP
11721,2022-00008763,2022-04-07 16:47:56,2022-04-08 06:09:01,SAN PABLO,CA,.,2300 Block EL PORTAL DR,SECURITY CHECK,Other,SECURITY CHECK
11720,2022-00008764,2022-04-07 17:18:34,2022-04-08 06:09:01,SAN PABLO,CA,94806,1 Block ST,MISCELLANEOUS,Other,MISCELLANEOUS


In [12]:
other_incidents = set(df[df["parentIncidentType"].isin(set(df["parentIncidentType"]) - set(prop_list) - set(violent_list))]["incidentType"])

In [13]:
other_incidents

{'459A USED FOR PC 459A ON COPLOGIC',
 '911 DISCONNECT',
 '911 DISCONNECT;PCAD - ENVIRONMENTAL',
 'ABANDONED AUTO',
 'ACCIDENT',
 'ACCIDENT / NON-INJURY ACCIDENT / NON-INJURY',
 'ALARM',
 'ALARM;CAD S106-Special Assignment',
 'ALARM;CAD S65-Foot Patrol',
 'ALARM;CFS DISORDERLY',
 'ANNOYING TELEPHONE CALLS',
 'ARSON',
 'BP VIOLATION BP VIOLATIONS - ALL OTHERS',
 'BW FEL BENCH WARRANT FELONY',
 'BW MIS BENCH WARRANT MISDEMEANOR',
 'CIVIL MATTERS',
 'COURT VIOLATION',
 'CRUELTY TO ANIMALS',
 'DEAD BODY',
 'DISTURBANCE',
 'DISTURBANCE;CAD S106-Special Assignment',
 'DISTURBANCE;CFS CKPREM PREMISE CHECK',
 'DISTURBANCE;TRAFFIC',
 'DOMESTIC',
 'DRUNK IN PUBLIC',
 'DUI',
 'ELDERLY / DEPENDENT ADULT ABUSE',
 'EMBEZZLEMENT',
 'EXTRA PATROL',
 'FIREWORKS',
 'FLAG DOWN',
 'FLAG DOWN;CFS CKPREM PREMISE CHECK',
 'FOLLOW UP',
 'FOLLOW UP;WARRANT ARREST',
 'FOUND PROPERTY',
 'FRAUD',
 'HIT AND RUN',
 'HS 11350 POSSESS CONTROLLED SUBSTANCE',
 'HS 11351 5 POSS/ETC BASE/ROCK COCAINE FOR SALE',
 'HS 1135

In [14]:
violent_words = ["KIDNAPPING", "MURDER", "CAR JACKING", "CARJACKING", 
                 "SHOOTING", "SHOTS FIRED",
                 "ASSAULT", "ROBBERY", "SEXUAL ASSUALT", "RAPE", "HOMICIDE"]

In [15]:
any(word in inc for word in violent_words for inc in other_incidents)

False

In [16]:
{inc for inc in other_incidents if any(word in inc for word in violent_words) }

set()

In [17]:
df["parentIncidentType"].value_counts()

Other                               37424
Traffic                              5903
Theft                                3389
Property Crime                       1989
Proactive Policing                   1567
Vehicle Recovery                     1540
Quality of Life                      1424
Theft of Vehicle                     1231
Assault                              1158
Breaking & Entering                  1027
Other Violent Offense                 487
Robbery                               435
Sexual Offense                        150
Homicide                               20
Theft from Vehicle                      3
Other ;Proactive Policing               2
Property Crime ;Vehicle Recovery        2
Other ;Traffic                          1
Other ;Property Crime                   1
Name: parentIncidentType, dtype: int64

In [18]:
df[df["parentIncidentType"]=="Theft"]["incidentType"].value_counts()

THEFT                                                      1110
PC 488 PETTY THEFT                                          849
MOTOR VEHICLE THEFT                                         587
BURGLARY                                                    362
PC 487 GRAND THEFT                                          225
IDENTITY THEFT                                               50
LARCENY                                                      45
PC 484 PETTY THEFT                                           44
MAIL THEFT                                                   35
VEHICLE BURGLARY (PC459)                                     20
Other;THEFT                                                  14
Other;MOTOR VEHICLE THEFT                                    11
Other;BURGLARY                                                9
PC 537(A)(1) DEFRAUDING INNKEEPER (EC)                        6
459A USED FOR PC 459A ON COPLOGIC;PC 459A AUTO BURGLARY       4
PC 664/487 ATTEMPTED GRAND THEFT        

In [None]:
df.loc[df["parentIncidentType"].isin(unaccounted_parents) &
       df["incidentType"].str.contains("theft", case=False), "parentIncidentType"]= "NewTheft"

In [None]:
df[df["parentIncidentType"]=="NewTheft"]

In [None]:
df[df["incidentType"].str.contains("shooting", case=False)]

In [None]:
any([True, False, True, True])

In [None]:
df["incidentType"].str.contains("shooting", case=False)