In [1]:
import polars as pl

In [2]:
# Import incidents and format columns
df = pl.read_csv(
    "./data/incident_dump.csv",
).with_columns(
    pl.col("reported")
    .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%z")
    .dt.convert_time_zone("America/Chicago"),
    pl.col("reported_date").str.to_date("%Y-%m-%d"),
    pl.col("validated_location").str.split(",").cast(pl.List(pl.Float64)),
    pl.col("incident").str.to_titlecase(),
)

In [3]:
df.groupby(["incident"]).agg(pl.count()).sort("count", descending=True)

incident,count
str,u32
"""Theft""",934
"""Information""",618
"""Found Property…",570
"""Information / …",383
"""Medical Call""",287
"""Battery""",274
"""Lost Property""",258
"""Information / …",231
"""Mental Health …",221
"""Liquor Law Vio…",199


In [4]:
excluded_list = [
    "Fondling",
    "Medical Call",
    "Luring a Minor",
    "Lost Property",
    "Stalking",
    "Sexual Assault",
    "Dating",
    "Stalking",
    "Domestic",
    "Sex",
    "Found Property",
    "Mental Health",
    "Harassment by Electronic Means",
    "Well-Being Check",
    "Threatening Phone Call",
]
df = df.filter(~pl.col("incident").str.contains("|".join(excluded_list)))
df.groupby(["incident"]).agg(pl.count()).sort("count", descending=True)

incident,count
str,u32
"""Theft""",934
"""Information""",618
"""Information / …",383
"""Battery""",274
"""Information / …",231
"""Liquor Law Vio…",199
"""Found Narcotic…",155
"""Information / …",150
"""Information / …",120
"""Criminal Damag…",116
