In [302]:
# OAKLAND CRIME DATA
# This notebook is utilized for the manipulation and visualization through heatmapping in the city of Oakland, CA—known for its high-crime rates.
# The data being used comes directly from the city of Oakland's website and maps the crime during the past 90-days.
# As of today Jun 15, 2025, this data is then from March 2025.

In [303]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as plt

In [304]:
# Importing the data
df = pd.read_csv('/Users/jovannareyes/Downloads/CrimeWatch_Maps_Past_90-Days_20250615.csv')
df

Unnamed: 0,CRIMETYPE,DATETIME,CASENUMBER,DESCRIPTION,POLICEBEAT,ADDRESS,CITY,STATE,Location
0,VANDALISM,03/17/2025 02:10:00 PM,25-012120,VANDALISM-$400+,77X,8TH AVE AND CLINTO LN,Oakland,CA,
1,PETTY THEFT,03/17/2025 07:00:00 AM,25-012218,THEFT,30X,2427 68TH AV,Oakland,CA,POINT (-122.18566 37.76508)
2,PETTY THEFT,04/07/2025 12:00:00 AM,25-012374,THEFT,77X,UNKNOWN,Oakland,CA,
3,PETTY THEFT,03/19/2025 11:30:00 PM,25-012505,THEFT,26X,789 54TH AV,Oakland,CA,POINT (-122.2106 37.76353)
4,PETTY THEFT,03/18/2025 12:00:00 PM,25-012784,THEFT,20X,3020 E 18TH ST,Oakland,CA,POINT (-122.22443 37.78225)
...,...,...,...,...,...,...,...,...,...
11212,ROBBERY,03/26/2025 07:52:00 PM,25-013538,"ROBBERY - STRONG ARM (HANDS, FISTS, FEET, ETC.)",23X,4108 INTERNATIONAL BLVD,Oakland,CA,POINT (-122.21636 37.77357)
11213,DOMESTIC VIOLENCE,05/31/2025 12:00:00 PM,25-023804,BATTERY:SPOUSE/EX SPOUSE/DATE/ETC,11X,5700 CARBERRY AV,Oakland,CA,POINT (-122.26244 37.84323)
11214,STOLEN VEHICLE,05/30/2025 12:00:00 AM,25-023687,VEHICLE THEFT - AUTO,08X,2800 VALDEZ ST,Oakland,CA,POINT (-122.26213 37.81694)
11215,STOLEN VEHICLE,04/12/2025 02:11:00 PM,25-016263,VEHICLE THEFT - AUTO,31Y,7825 SAN LEANDRO ST,Oakland,CA,POINT (-122.19408 37.75015)


In [305]:
# Eliminating columns that are redundant to the project such as the 'casenumber', 'policebeat', 'address', 'description' 'city', and 'state' columns and updating the dataframe
# Addresses in this case are not really needed as therein lies more precision in coordinates.
todrop = ['CASENUMBER', 'CITY', 'STATE', 'POLICEBEAT', 'ADDRESS', 'DESCRIPTION']
df = df.drop(todrop, axis=1)
df

Unnamed: 0,CRIMETYPE,DATETIME,Location
0,VANDALISM,03/17/2025 02:10:00 PM,
1,PETTY THEFT,03/17/2025 07:00:00 AM,POINT (-122.18566 37.76508)
2,PETTY THEFT,04/07/2025 12:00:00 AM,
3,PETTY THEFT,03/19/2025 11:30:00 PM,POINT (-122.2106 37.76353)
4,PETTY THEFT,03/18/2025 12:00:00 PM,POINT (-122.22443 37.78225)
...,...,...,...
11212,ROBBERY,03/26/2025 07:52:00 PM,POINT (-122.21636 37.77357)
11213,DOMESTIC VIOLENCE,05/31/2025 12:00:00 PM,POINT (-122.26244 37.84323)
11214,STOLEN VEHICLE,05/30/2025 12:00:00 AM,POINT (-122.26213 37.81694)
11215,STOLEN VEHICLE,04/12/2025 02:11:00 PM,POINT (-122.19408 37.75015)


In [306]:
# checking for size of data
df.shape

(11217, 3)

In [307]:
# setting index 'crimetype' as the initial column for more readability
df = df.set_index('CRIMETYPE')
df

Unnamed: 0_level_0,DATETIME,Location
CRIMETYPE,Unnamed: 1_level_1,Unnamed: 2_level_1
VANDALISM,03/17/2025 02:10:00 PM,
PETTY THEFT,03/17/2025 07:00:00 AM,POINT (-122.18566 37.76508)
PETTY THEFT,04/07/2025 12:00:00 AM,
PETTY THEFT,03/19/2025 11:30:00 PM,POINT (-122.2106 37.76353)
PETTY THEFT,03/18/2025 12:00:00 PM,POINT (-122.22443 37.78225)
...,...,...
ROBBERY,03/26/2025 07:52:00 PM,POINT (-122.21636 37.77357)
DOMESTIC VIOLENCE,05/31/2025 12:00:00 PM,POINT (-122.26244 37.84323)
STOLEN VEHICLE,05/30/2025 12:00:00 AM,POINT (-122.26213 37.81694)
STOLEN VEHICLE,04/12/2025 02:11:00 PM,POINT (-122.19408 37.75015)


In [308]:
df.columns

Index(['DATETIME', 'Location'], dtype='object')

In [309]:
# converting the newly changed column as index to get the types of crimes.
# set() was used to get the full list of types of crimes WITHOUT repetition.
# len() used to obtain the amount of types of crimes in total [comes out to 40] but not really needed.
crimetypes = set(df.index)
crimetypes
# len(crimetypes)

{'ARSON',
 'BRANDISHING',
 'BURG - AUTO',
 'BURG - COMMERCIAL',
 'BURG - OTHER',
 'BURG - RESIDENTIAL',
 'CHILD ABUSE',
 'CURFEW & LOITERING',
 'DISORDERLY CONDUCT',
 'DOMESTIC VIOLENCE',
 'DUI',
 'EMBEZZLEMENT',
 'FELONY ASSAULT',
 'FELONY WARRANT',
 'FORCIBLE RAPE',
 'FORGERY & COUNTERFEITING',
 'FRAUD',
 'GRAND THEFT',
 'HOMICIDE',
 'INCIDENT TYPE',
 'KIDNAPPING',
 'MISCELLANEOUS TRAFFIC CRIME',
 'MISDEMEANOR ASSAULT',
 'MISDEMEANOR WARRANT',
 'MISSING',
 'NARCOTICS',
 'OTHER',
 'OTHER SEX OFFENSES',
 'PETTY THEFT',
 'POSSESSION - STOLEN PROPERTY',
 'PROSTITUTION',
 'RECOVERED O/S STOLEN',
 'RECOVERED VEHICLE - OAKLAND STOLEN',
 'ROBBERY',
 'STOLEN AND RECOVERED VEHICLE',
 'STOLEN VEHICLE',
 'THREATS',
 'VANDALISM',
 'WEAPONS',
 nan}

In [310]:
# filter out once more to focus solely on the types of crimes relevant to this project and to which most immigrant workers are usually exposed to.
# the crimes im considering are those that pose immediate physical danger, economic harm, or mental distress. this, due to the fact that many of these low-paying jobs tend to be minimally staffed or sometimes even a one-person show.
# i am not claiming these dropped crimes are not relevant or urgent.
# they are, however, less relevant to the main goal unless the project expands and or are difficult to model without ethical/legal considerations.

In [311]:
crimedrop = ['DOMESTIC VIOLENCE',
             'CHILD ABUSE',
             'PROSTITUTION',
             'EMBEZZLEMENT',
             'FORGERY & COUNTERFEITING',
             'CURFEW & LOITERING',
             'MISDEMEANOR WARRANT',
             'FELONY WARRANT',
             'MISCELLANEOUS TRAFFIC CRIME',
             'OTHER SEX OFFENSES',
             'FORCIBLE RAPE',
             'OTHER'
             ]
df = df.drop(crimedrop, axis=0)

In [312]:
df.shape

(10045, 2)

In [320]:
newcrimeset = set(df.index)
len(newcrimeset)
# we have narrowed down to the 28 crimes that immigrants are most likely to be a victim to

28