In [1]:
from datetime import date

import polars as pl

from util.general import list_to_parsed_set

In [2]:
# Import incidents and format columns
df = pl.read_csv(
    "./data/incident_dump.csv",
).with_columns(
    pl.col("reported")
    .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%z")
    .dt.convert_time_zone("America/Chicago"),
    pl.col("reported_date").str.to_date("%Y-%m-%d"),
    pl.col("validated_location").str.split(",").cast(pl.List(pl.Float64)),
)

In [3]:
df.groupby(["incident"]).agg(pl.count()).sort("count", descending=True)

incident,count
str,u32
"""Theft""",2981
"""Information""",1383
"""Found Property…",930
"""Lost Property""",843
"""Traffic Violat…",534
"""Information / …",519
"""Liquor Law Vio…",447
"""Medical Call""",438
"""Battery""",425
"""Information / …",358


In [4]:
print(list_to_parsed_set(df["incident"].to_list()))



In [5]:
excluded_list = [
    "Aggravated Criminal Sexual Assault",
    "Aggravated Domestic Assault",
    "Aggravated Domestic Battery",
    "Attempted Sexual Assault",
    "Criminal Sexual Abuse",
    "Criminal Sexual Assault",
    "Dating Violence",
    "Domestic Aggravated Assault",
    "Domestic Aggravated Battery",
    "Domestic Assault",
    "Domestic Battery",
    "Domestic Dispute",
    "Domestic Disturbance",
    "Domestic Issue",
    "Domestic",
    "Eavesdropping",
    "Fondling",
    "Harassing Messages",
    "Harassment by Electronic Means",
    "Indecent Exposure",
    "Luring a Minor",
    "Medical Call",
    "Medical Transport",
    "Mental Health",
    "Sex Crime",
    "Sex Offender",
    "Sex Offense",
    "Sex Related",
    "Sexual Abuse",
    "Sexual Assault",
    "Stalking",
    "Suspicious Mail",
    "Threatening Phone Call",
    "Violation of Order of Protection",
    "Warrant",
    "Well-Being",
]
# df = df.filter(~pl.col("incident").str.contains("|".join(excluded_list)))
df.groupby(["incident"]).agg(pl.count()).sort("count", descending=True)

incident,count
str,u32
"""Theft""",2981
"""Information""",1383
"""Found Property…",930
"""Lost Property""",843
"""Traffic Violat…",534
"""Information / …",519
"""Liquor Law Vio…",447
"""Medical Call""",438
"""Battery""",425
"""Information / …",358


In [6]:
print(list_to_parsed_set(df["incident"].to_list()))



In [7]:
len(list_to_parsed_set(df["incident"].to_list()))

387

In [8]:
df.head()

comments,disposition,incident,location,occurred,predicted_incident,reported,reported_date,season,ucpd_id,validated_address,validated_location
str,str,str,str,str,str,"datetime[μs, America/Chicago]",date,str,str,str,list[f64]
"""Unknown subjec…","""Referred""","""Information / …","""4950 S. Chicag…","""2/26/24 2:15 A…",,2024-02-26 16:40:00 CST,2024-02-26,"""Winter""","""2024-007641""","""4950 South Chi…","[41.804531, -87.584494]"
"""2019 Kia Optim…","""Referred""","""Information / …","""5341 S. Maryla…","""2/26/24 5:00 P…",,2024-02-26 20:16:00 CST,2024-02-26,"""Winter""","""2024-007672""","""5341 S MARYLAN…","[41.797757, -87.605034]"
"""Two unknown su…","""CPD""","""Information / …","""1526 E. 53rd S…","""2/26/24 10:30 …",,2024-02-26 22:40:00 CST,2024-02-26,"""Winter""","""2024-007690""","""1526 E 53RD ST…","[41.799611, -87.588343]"
"""Unknown suspec…","""Open""","""Burglary""","""6300 S. Univer…","""2/23/24 to 2/2…",,2024-02-26 06:12:00 CST,2024-02-26,"""Winter""","""24-00192""","""6300 S UNIVERS…","[41.780454, -87.597322]"
"""A person kicke…","""Arrest""","""Criminal Damag…","""5700 S. Maryla…","""2/26/24 9:16 A…",,2024-02-26 09:18:00 CST,2024-02-26,"""Winter""","""24-00193""","""5700 S MARYLAN…","[41.790399, -87.605017]"


In [9]:
df_filtered = df.filter(pl.col("reported_date") > date(2019, 12, 31)).filter(
    pl.col("reported_date").dt.month() <= 2
)
df_filtered = df_filtered.with_columns(
    (df_filtered["reported_date"].dt.year()).alias("year")
)
df_filtered.head()

comments,disposition,incident,location,occurred,predicted_incident,reported,reported_date,season,ucpd_id,validated_address,validated_location,year
str,str,str,str,str,str,"datetime[μs, America/Chicago]",date,str,str,str,list[f64],i32
"""Unknown subjec…","""Referred""","""Information / …","""4950 S. Chicag…","""2/26/24 2:15 A…",,2024-02-26 16:40:00 CST,2024-02-26,"""Winter""","""2024-007641""","""4950 South Chi…","[41.804531, -87.584494]",2024
"""2019 Kia Optim…","""Referred""","""Information / …","""5341 S. Maryla…","""2/26/24 5:00 P…",,2024-02-26 20:16:00 CST,2024-02-26,"""Winter""","""2024-007672""","""5341 S MARYLAN…","[41.797757, -87.605034]",2024
"""Two unknown su…","""CPD""","""Information / …","""1526 E. 53rd S…","""2/26/24 10:30 …",,2024-02-26 22:40:00 CST,2024-02-26,"""Winter""","""2024-007690""","""1526 E 53RD ST…","[41.799611, -87.588343]",2024
"""Unknown suspec…","""Open""","""Burglary""","""6300 S. Univer…","""2/23/24 to 2/2…",,2024-02-26 06:12:00 CST,2024-02-26,"""Winter""","""24-00192""","""6300 S UNIVERS…","[41.780454, -87.597322]",2024
"""A person kicke…","""Arrest""","""Criminal Damag…","""5700 S. Maryla…","""2/26/24 9:16 A…",,2024-02-26 09:18:00 CST,2024-02-26,"""Winter""","""24-00193""","""5700 S MARYLAN…","[41.790399, -87.605017]",2024


In [10]:
df_filtered.groupby("year").count().sort(by="year")

year,count
i32,u32
2020,191
2021,180
2022,224
2023,285
2024,290


In [11]:
VIOLENT_INCIDENT_TYPES = [
    "aggravated assault of police officer",
    "aggravated assault",
    "aggravated battery of a police officer",
    "aggravated battery of police officer",
    "aggravated battery to police officer",
    "aggravated criminal sexual assault",
    "aggravated discharge of a firearm",
    "aggravated domestic battery",
    "aggravated robbery",
    "aggravated vehicular hijacking",
    "armed robbery",
    "arson",
    "assault and harassment by electronic means",
    "assault",
    "attempted aggravated robbery",
    "attempted armed robbery",
    "attempted sexual assault",
    "bomb threat",
    "dating violence",
    "domestic aggravated battery",
    "homicide",
    "murder",
    "sex offense",
    "sexual assault",
]


df_filtered = df_filtered.filter(
    pl.col("incident").str.contains("|".join(VIOLENT_INCIDENT_TYPES))
)
df_filtered

comments,disposition,incident,location,occurred,predicted_incident,reported,reported_date,season,ucpd_id,validated_address,validated_location,year
str,str,str,str,str,str,"datetime[μs, America/Chicago]",date,str,str,str,list[f64],i32


In [12]:
df_filtered.groupby("year").count().sort(by="year")

year,count
i32,u32
