# SafeNet Crime Data Analysis (San Francisco)
-Crime Trends Over Time - create series plots or histograms to visualize trends, seasonal patterns
-Crime by Geographic Location - explore how crimes are distributed across different polic areas
-Crime Types and Severity - 
-Demographic Analysis
-Time of Day Analysis
-Weapon Usage
-Crime Clearance Rates
-Crime Correlations - types of crime (ex: crm code)
-Create spatial analysis (Hotspot Analysis)
-Crime Trends During Special Events
-Comparative Analysis with SF Crime Data

In [1]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path

In [2]:
# Create path to file
crimedata_path = Path("data/SF_Crime_Data_from_2018_to_Present.csv")

# Read .csv file
crimedata_df = pd.read_csv(crimedata_path)
crimedata_df.head(50)

Unnamed: 0,Incident Datetime,Incident Date,Incident Time,Incident Year,Incident Day of Week,Report Datetime,Row ID,Incident ID,Incident Number,CAD Number,...,Longitude,Point,Neighborhoods,ESNCAG - Boundary File,Central Market/Tenderloin Boundary Polygon - Updated,Civic Center Harm Reduction Project Boundary,HSOC Zones as of 2018-06-05,Invest In Neighborhoods (IIN) Areas,Current Supervisor Districts,Current Police Districts
0,2023/03/13 11:41:00 PM,2023/03/13,23:41,2023,Monday,2023/03/13 11:41:00 PM,125373607041,1253736,230167874,,...,,,,,,,,,,
1,2023/03/01 05:02:00 AM,2023/03/01,05:02,2023,Wednesday,2023/03/11 03:40:00 PM,125379506374,1253795,236046151,,...,,,,,,,,,,
2,2023/03/13 01:16:00 PM,2023/03/13,13:16,2023,Monday,2023/03/13 01:17:00 PM,125357107041,1253571,220343896,,...,,,,,,,,,,
3,2023/03/13 10:59:00 AM,2023/03/13,10:59,2023,Monday,2023/03/13 11:00:00 AM,125355107041,1253551,230174885,,...,,,,,,,,,,
4,2023/03/14 06:44:00 PM,2023/03/14,18:44,2023,Tuesday,2023/03/14 06:45:00 PM,125402407041,1254024,230176728,,...,,,,,,,,,,
5,2023/02/15 03:00:00 AM,2023/02/15,03:00,2023,Wednesday,2023/03/11 04:55:00 PM,125378606372,1253786,236046123,,...,,,,,,,,,,
6,2023/03/11 12:30:00 PM,2023/03/11,12:30,2023,Saturday,2023/03/12 04:15:00 PM,125381606244,1253816,236046004,,...,,,,,,,,,,
7,2023/03/13 11:26:00 AM,2023/03/13,11:26,2023,Monday,2023/03/13 01:37:00 PM,125419506244,1254195,236046850,,...,,,,,,,,,,
8,2023/03/11 03:00:00 PM,2023/03/11,15:00,2023,Saturday,2023/03/13 08:29:00 AM,125420606244,1254206,236045937,,...,,,,,,,,,,
9,2023/03/11 02:00:00 PM,2023/03/11,14:00,2023,Saturday,2023/03/15 11:21:00 AM,125431804134,1254318,230182844,230741133.0,...,-122.454285,POINT (-122.45428511766733 37.772895177200766),,,,,,,4.0,7.0


In [3]:
crimedata_headers = crimedata_df.columns.tolist()
crimedata_headers

['Incident Datetime',
 'Incident Date',
 'Incident Time',
 'Incident Year',
 'Incident Day of Week',
 'Report Datetime',
 'Row ID',
 'Incident ID',
 'Incident Number',
 'CAD Number',
 'Report Type Code',
 'Report Type Description',
 'Filed Online',
 'Incident Code',
 'Incident Category',
 'Incident Subcategory',
 'Incident Description',
 'Resolution',
 'Intersection',
 'CNN',
 'Police District',
 'Analysis Neighborhood',
 'Supervisor District',
 'Supervisor District 2012',
 'Latitude',
 'Longitude',
 'Point',
 'Neighborhoods',
 'ESNCAG - Boundary File',
 'Central Market/Tenderloin Boundary Polygon - Updated',
 'Civic Center Harm Reduction Project Boundary',
 'HSOC Zones as of 2018-06-05',
 'Invest In Neighborhoods (IIN) Areas',
 'Current Supervisor Districts',
 'Current Police Districts']

In [4]:
unique_crime_codes = crimedata_df["Incident Code"].unique()
unique_crime_description = crimedata_df["Incident Description"].unique()

#Create a list of tuples, each will contain crime code and its description
unique_crime_tp = [(code, desc) for code, desc in zip(unique_crime_codes, unique_crime_description)]

#Sort the list
unique_crime_tp_sorted = sorted(unique_crime_tp, key=lambda x: x[0])

#Unpack the sorted list
unique_crime_codes_sorted, unique_crime_descrioption_sorted = zip(*unique_crime_tp_sorted)

unique_codes_sorted = pd.DataFrame({
    "SF Incident Code": unique_crime_codes_sorted,
    "SF Incident Description": unique_crime_descrioption_sorted
 
})
print(len(unique_codes_sorted))
unique_codes_sorted.head(50)

840


Unnamed: 0,SF Incident Code,SF Incident Description
0,1000,Soliciting or Engaging In Lewd Conduct
1,1001,Loitering Without Lawful Business With Owner o...
2,1002,"Robbery, Att., Service Station, W/ Gun"
3,1003,"Hypodermic Needle or Syringe, Possession"
4,1004,Escapes
5,1005,"Suicide By Fire, Att."
6,1007,"Telephone or Telegraph Message, Sending False"
7,1062,"Theft, By Prostitute, $50-$200"
8,1133,Escape From Hospital With Force
9,1160,"Burglary, Hot Prowl, Att. Forcible Entry"


In [6]:
#Write the sorted result to .csv file for further analysis
unique_codes_sorted.to_csv("data/unique_codes_sorted.csv", index=False)  #Total of 841 records

#Create DataFrame from the unique_codes_sorted.csv for the purpose of grouping 
#the incident type into higher level category. (Broad category) 
#Suggested categories:
#1.Violent Crime = (Homicide, Rape, Assault, kidnapping)
#2.Property Crime = (Robbery, Burglary, Theft, larcery, Motor vehicle theft, Arson)
#3.White Collar Crime = (Fraud, Embezzlement, Identity Theft, Counterfeiting)
#4.Drug and Alchohol Crime = (Drug Trafficking, Drug Dealing, Drug Possession, DUI)
#5.Sex Crime = (Prostitution, Solicitation, Child Exploitation)
#6.Hate Crime: #based on race, religion, sexual orientation, other protected classification 
#7:Gang Related Crime (#TBD)
#8.Quality-of-life Crime = (disruption, vandalism, graffiti, trespassing)
#9.Domestic Violence = (battery, partner abuse)


In [20]:
#Grouping the keywords in the SF Incident Desciption as Violent Crime
violent_crime_keywords = ["suicide","assault","rape", "homicide", "shooting", "incest", "kidnapping", "death",\
                          "sexual assault", "manslaughter", "sodomy"]

mask_violent_description = unique_codes_sorted["SF Incident Description"].str.contains("|".join(violent_crime_keywords), case=False)
violent_crime_group = unique_codes_sorted[mask_violent_description]

#Collect the corresponding "SF Incident Code" and save to a variable
violent_crime_codes = violent_crime_group["SF Incident Code"].tolist()

#Display the results:
print(f"Total number of Violent Crime type: {len(violent_crime_codes)} of 840")
print("List of Violent Crime by SF Incident Code")
print(f"SF Incident Code: {violent_crime_codes}")


Total number of Violent Crime type: 91 of 840
List of Violent Crime by SF Incident Code
SF Incident Code: [1005, 2003, 3462, 4011, 4026, 4054, 4104, 5083, 5111, 5121, 5131, 5231, 6120, 6134, 6157, 6301, 6304, 6340, 6375, 6377, 6386, 6398, 7055, 7201, 7205, 9021, 9023, 9125, 9175, 9261, 9268, 9330, 10070, 10085, 10090, 10110, 10145, 11011, 11014, 11015, 12010, 12026, 12050, 12080, 12110, 12165, 12169, 13072, 13111, 14020, 14072, 15160, 16060, 16410, 16430, 16711, 17070, 19058, 19070, 19072, 19075, 19080, 26120, 26135, 26170, 26177, 27067, 27140, 27165, 27173, 27196, 28040, 28103, 28166, 30080, 30140, 30170, 30192, 30205, 60070, 61010, 61030, 62010, 64001, 64011, 64072, 65016, 66010, 66050, 68055, 71019]


In [17]:
#Grouping the keywords in the SF Incident Description
property_crime_keywords = ["robbery", "burglary","theft","larcery", "stolen","counterfeiting", \
                           "access card","carjacking", "extortion","contraband","arson"]
mask_property_description = unique_codes_sorted["SF Incident Description"].str.contains("|".join(property_crime_keywords), case=False)
property_crime_group = unique_codes_sorted[mask_property_description]

#Collect the corresponding "SF Incident Code" and save to a variable
property_crime_codes = property_crime_group["SF Incident Code"].tolist()

#Display the results:
print(f"Total number of Property Crime type: {len(property_crime_codes)}")
print("List of Property Crime by SF Incident Code")
print(f"SF Incident Code: {property_crime_codes}")


Total number of Property Crime type: 279
List of Property Crime by SF Incident Code
SF Incident Code: [1002, 1062, 1160, 2002, 2005, 2020, 2101, 2103, 2104, 2204, 3012, 3014, 3021, 3022, 3023, 3024, 3031, 3034, 3051, 3052, 3053, 3054, 3063, 3064, 3071, 3074, 3081, 3082, 3083, 3091, 3412, 3413, 3414, 3421, 3423, 3433, 3434, 3443, 3444, 3452, 3463, 3464, 3474, 3483, 3484, 3492, 4012, 4013, 4022, 4023, 4024, 4053, 4061, 4073, 4074, 4076, 4114, 4124, 4138, 4145, 4146, 4154, 4164, 5011, 5013, 5014, 5021, 5041, 5043, 5051, 5053, 5072, 5112, 5133, 5141, 5142, 5151, 5152, 5162, 5172, 5251, 5261, 5371, 6112, 6113, 6121, 6131, 6132, 6133, 6141, 6143, 6150, 6151, 6153, 6154, 6221, 6222, 6224, 6230, 6233, 6234, 6240, 6241, 6243, 6244, 6303, 6310, 6313, 6314, 6353, 6364, 6370, 6372, 6374, 6378, 6381, 6394, 6395, 6396, 6399, 7021, 7022, 7025, 7044, 7051, 7053, 7054, 7056, 7060, 9024, 9030, 9032, 9034, 9035, 9040, 9060, 9161, 9162, 9164, 9165, 9215, 9250, 9262, 9263, 9264, 9266, 9269, 9270, 9320, 100