In this notebook we download the raw LAPD crime data and store locally as a CSV file.

NIBR stands for the National Incident-Based Reporting System, a U.S. system used by law enforcement to report detailed information on crimes to the Federal Bureau of Investigation (FBI)

In [31]:
from sodapy import Socrata
import pandas as pd

# Socrata client
DOMAIN = "data.lacity.org"
client = Socrata(DOMAIN, None)  # None = anonymous access

# Dataset IDs: https://data.lacity.org/
OFFENSES_ID = "y8y3-fqfu"
VICTIMS_ID = "gqf2-vm2j"

# ---- Fetch offenses ----
offenses = client.get(OFFENSES_ID, limit=500000)
df_off = pd.DataFrame.from_records(offenses)
print("Offenses:", df_off.shape)

# ---- Fetch victims ----
victims = client.get(VICTIMS_ID, limit=500000)
df_vic = pd.DataFrame.from_records(victims)
print("Victims:", df_vic.shape)

# ---- Merge ----
if "caseno" in df_off.columns and "caseno" in df_vic.columns:
    df = df_off.merge(df_vic, on="caseno", how="left")
else:
    raise ValueError("caseno not found in one of the datasets")

print("Merged:", df.shape)

# Save locally
df.to_csv("lapd_offenses_victims_merged.csv", index=False)



Offenses: (212391, 28)
Victims: (197422, 16)
Merged: (243639, 43)


In [32]:
df_off.columns

Index(['caseno', 'uniquenibrno', 'date_rptd', 'date_occ', 'time_occ', 'area',
       'area_name', 'rpt_dist_no', 'totaloffensecount', 'group', 'nibr_code',
       'nibr_description', 'crime_against', 'premis_cd', 'premis_desc',
       'weapon_used_cd', 'weapon_desc', 'status', 'status_desc',
       'totalvictimcount', 'victim_shot', 'domestic_violence_crime',
       'hate_crime', 'gang_related_crime', 'transit_related_crime',
       'homeless_victim_crime', 'homeless_suspect_crime',
       'homeless_arrestee_crime'],
      dtype='object')

In [33]:
df_vic.columns

Index(['caseno', 'uniquevictimno', 'date_rptd', 'date_occ', 'time_occ', 'area',
       'area_name', 'rpt_dist_no', 'totalvictimcount', 'victim_type',
       'victim_shot', 'status', 'status_desc', 'vict_age', 'vict_sex',
       'vict_descent'],
      dtype='object')

In [None]:
# cases with multiple victims
df_vic[(df_vic["totalvictimcount"].astype(float)>1) & (df_vic["caseno"].isin(df_off["caseno"].unique()))]

Unnamed: 0,caseno,uniquevictimno,date_rptd,date_occ,time_occ,area,area_name,rpt_dist_no,totalvictimcount,victim_type,victim_shot,status,status_desc,vict_age,vict_sex,vict_descent
6,25174642,25174642_1,2025-09-17T00:00:00.000,2025-09-17T00:00:00.000,530,10,West Valley,1004,2,Person,No,10,Cleared by Arrest,42,M,Hispanic
27,25169629,25169629_0,2025-09-09T00:00:00.000,2025-09-09T00:00:00.000,0,20,Olympic,2025,2,Person,No,40,Investigation Continued,32,M,Hispanic
57,25036161,25036161_1,2025-03-02T00:00:00.000,2025-03-02T00:00:00.000,1530,18,Southeast,1863,2,Person,No,40,Investigation Continued,47,F,Hispanic
59,25029792,25029792_0,2025-02-21T00:00:00.000,2025-02-21T00:00:00.000,736,3,Southwest,397,2,Person,No,40,Investigation Continued,43,F,Black/African American
70,25094706,25094706_1,2025-05-24T00:00:00.000,2025-05-24T00:00:00.000,1355,7,Wilshire,765,2,Person,No,40,Investigation Continued,22,F,Hispanic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197393,C259035710,C259035710_2,2025-09-06T00:00:00.000,2025-09-04T00:00:00.000,2315,6,Hollywood,646,3,Person,No,40,Investigation Continued,20,F,White/Caucasian
197405,25088614,25088614_0,2025-05-16T00:00:00.000,2025-05-16T00:00:00.000,930,14,Pacific,1415,2,Person,No,40,Investigation Continued,56,F,Hispanic
197410,25166115,25166115_0,2025-09-04T00:00:00.000,2025-09-04T00:00:00.000,1630,19,Mission,1964,2,Business,No,40,Investigation Continued,,,
197417,25159707,25159707_0,2025-08-26T00:00:00.000,2025-08-26T00:00:00.000,2056,15,N Hollywood,1548,4,Person,No,10,Cleared by Arrest,35,F,Other


In [36]:
df_off[df_off["caseno"]=="25174642"]

Unnamed: 0,caseno,uniquenibrno,date_rptd,date_occ,time_occ,area,area_name,rpt_dist_no,totaloffensecount,group,...,status_desc,totalvictimcount,victim_shot,domestic_violence_crime,hate_crime,gang_related_crime,transit_related_crime,homeless_victim_crime,homeless_suspect_crime,homeless_arrestee_crime
197892,25174642,25174642_13A_1,2025-09-17T00:00:00.000,2025-09-17T00:00:00.000,530,10,West Valley,1004,2,A,...,Cleared by Arrest,2,No,No,No,No,No,No,No,No
200841,25174642,25174642_13C_0,2025-09-17T00:00:00.000,2025-09-17T00:00:00.000,530,10,West Valley,1004,2,A,...,Cleared by Arrest,2,No,No,No,No,No,No,No,No


In [37]:
df_vic[df_vic["caseno"]=="25174642"]

Unnamed: 0,caseno,uniquevictimno,date_rptd,date_occ,time_occ,area,area_name,rpt_dist_no,totalvictimcount,victim_type,victim_shot,status,status_desc,vict_age,vict_sex,vict_descent
6,25174642,25174642_1,2025-09-17T00:00:00.000,2025-09-17T00:00:00.000,530,10,West Valley,1004,2,Person,No,10,Cleared by Arrest,42,M,Hispanic
1619,25174642,25174642_0,2025-09-17T00:00:00.000,2025-09-17T00:00:00.000,530,10,West Valley,1004,2,Person,No,10,Cleared by Arrest,37,F,Hispanic


In [38]:
df[df["caseno"]=="25174642"]

Unnamed: 0,caseno,uniquenibrno,date_rptd_x,date_occ_x,time_occ_x,area_x,area_name_x,rpt_dist_no_x,totaloffensecount,group,...,area_name_y,rpt_dist_no_y,totalvictimcount_y,victim_type,victim_shot_y,status_y,status_desc_y,vict_age,vict_sex,vict_descent
226745,25174642,25174642_13A_1,2025-09-17T00:00:00.000,2025-09-17T00:00:00.000,530,10,West Valley,1004,2,A,...,West Valley,1004,2,Person,No,10,Cleared by Arrest,42,M,Hispanic
226746,25174642,25174642_13A_1,2025-09-17T00:00:00.000,2025-09-17T00:00:00.000,530,10,West Valley,1004,2,A,...,West Valley,1004,2,Person,No,10,Cleared by Arrest,37,F,Hispanic
230063,25174642,25174642_13C_0,2025-09-17T00:00:00.000,2025-09-17T00:00:00.000,530,10,West Valley,1004,2,A,...,West Valley,1004,2,Person,No,10,Cleared by Arrest,42,M,Hispanic
230064,25174642,25174642_13C_0,2025-09-17T00:00:00.000,2025-09-17T00:00:00.000,530,10,West Valley,1004,2,A,...,West Valley,1004,2,Person,No,10,Cleared by Arrest,37,F,Hispanic
