In [5]:
import polars as pl

In [6]:
# Import incidents and format columns
df = pl.read_csv(
    "./data/incident_dump.csv",
).with_columns(
    pl.col("reported")
    .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%z")
    .dt.convert_time_zone("America/Chicago"),
    pl.col("reported_date").str.to_date("%Y-%m-%d"),
    pl.col("validated_location").str.split(",").cast(pl.List(pl.Float64)),
    pl.col("incident")
    .str.replace("Information / |/ Information ", "")
    .str.replace("  ", " ")
    .str.replace(" \(", " / ")
    .str.replace("\(", "")
    .str.replace("^ ", "")
    .str.replace("\)", "")
    .str.replace("Inforation", "Information")
    .str.replace("Infformation", "Information")
    .str.replace("Hit & Run", "Hit and Run"),
)

In [7]:
df.groupby(["incident"]).agg(pl.count()).sort("count", descending=True)

incident,count
str,u32
"""Theft""",232
"""Found Property…",93
"""Criminal Damag…",79
"""Information""",74
"""Battery""",66
"""Motor Vehicle …",54
"""Armed Robbery""",51
"""Medical Call""",39
"""Theft from Per…",36
"""Lost Property""",27


In [9]:
excluded_list = [
    "Medical Call",
    "Lost Property",
    "Stalking",
    "Sexual Assault",
    "Dating",
    "Stalking",
    "Domestic",
    "Sex",
    "Found Property",
    "Mental Health",
    "Harassment by Electronic Means",
]
df = df.filter(~pl.col("incident").str.contains("|".join(excluded_list)))
df.groupby(["incident"]).agg(pl.count()).sort("count", descending=True)

incident,count
str,u32
"""Theft""",232
"""Criminal Damag…",79
"""Information""",74
"""Battery""",66
"""Motor Vehicle …",54
"""Armed Robbery""",51
"""Theft from Per…",36
"""Criminal Damag…",25
"""Theft from Mot…",22
"""DUI""",20
