In [127]:
from datetime import date

import polars as pl

In [128]:
def list_to_parsed_set(unparsed_list: [str]):
    unparsed_list.sort()
    parsed_set = set()
    for element in unparsed_list:
        if "/" in element:
            for p in element.split("/"):
                fmt_element = p.strip().lower()
                if p:
                    parsed_set.add(fmt_element)
        else:
            fmt_element = element.strip().lower()
            parsed_set.add(fmt_element)
    if "" in parsed_set:
        parsed_set.remove("")
    return parsed_set

In [129]:
# Import incidents and format columns
df = pl.read_csv(
    "./data/incident_dump.csv",
).with_columns(
    pl.col("reported")
    .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%z")
    .dt.convert_time_zone("America/Chicago"),
    pl.col("reported_date").str.to_date("%Y-%m-%d"),
    pl.col("validated_location").str.split(",").cast(pl.List(pl.Float64)),
    pl.col("incident").str.to_lowercase(),
)

In [130]:
df.groupby(["incident"]).agg(pl.count()).sort("count", descending=True)

incident,count
str,u32
"""theft""",2980
"""information""",1383
"""found property…",929
"""lost property""",843
"""traffic violat…",534
"""information / …",519
"""liquor law vio…",447
"""medical call""",437
"""battery""",425
"""information / …",358


In [131]:
print(list_to_parsed_set(df["incident"].to_list()))



In [132]:
excluded_list = [
    "Fondling",
    "Medical Call",
    "Luring a Minor",
    "Lost Property",
    "Stalking",
    "Sexual Assault",
    "Dating",
    "Stalking",
    "Domestic",
    "Sex",
    "Found Property",
    "Mental Health",
    "Harassment by Electronic Means",
    "Well-Being",
    "Threatening Phone Call",
    "Medical Transport",
    "Warrant",
    "Lost Wallet",
    "Fire Alarm",
    "Chemical Spill",
    "Suspicious Mail",
    "Eavesdropping",
    "Sex Offense",
    "Sex Offender",
    "Sex Crime",
    "Domestic Aggravated Battery",
    "Dating Violence",
    "Harassing Messages",
]
# df = df.filter(~pl.col("incident").str.contains("|".join(excluded_list)))
df.groupby(["incident"]).agg(pl.count()).sort("count", descending=True)

incident,count
str,u32
"""theft""",2980
"""information""",1383
"""found property…",929
"""lost property""",843
"""traffic violat…",534
"""information / …",519
"""liquor law vio…",447
"""medical call""",437
"""battery""",425
"""information / …",358


In [133]:
print(list_to_parsed_set(df["incident"].to_list()))



In [134]:
len(list_to_parsed_set(df["incident"].to_list()))

391

In [135]:
df.head()

comments,disposition,incident,location,occurred,predicted_incident,reported,reported_date,season,ucpd_id,validated_address,validated_location
str,str,str,str,str,str,"datetime[μs, America/Chicago]",date,str,str,str,list[f64]
"""Unknown person…","""CPD""","""information / …","""S. Blackstone …","""2/25/24 4:15 A…",,2024-02-25 07:47:00 CST,2024-02-25,"""Winter""","""2024-007468""","""5420 South Bla…","[41.797329, -87.590774]"
"""An underage in…","""Referred""","""liquor law vio…","""5630 S. Univer…","""2/25/24 2:27 A…",,2024-02-25 02:27:00 CST,2024-02-25,"""Winter""","""24-00188""","""5630 S UNIVERS…","[41.792667, -87.598182]"
"""An underage in…","""Referred""","""liquor law vio…","""1009 E. 57th S…","""2/25/24 3:24 A…",,2024-02-25 03:24:00 CST,2024-02-25,"""Winter""","""24-00189""","""1009 E 57TH ST…","[41.791339, -87.601044]"
"""Two tool boxes…","""Closed""","""found property…","""850 E. 61st St…","""2/25/24 1:34 P…",,2024-02-25 13:34:00 CST,2024-02-25,"""Winter""","""24-00190""","""850 E 61ST ST,…","[41.784124, -87.605663]"
"""A visitor was …","""Arrest""","""unlawful posse…","""5700 S. Maryla…","""2/25/24 2:11 P…",,2024-02-25 14:11:00 CST,2024-02-25,"""Winter""","""24-00191""","""5700 S MARYLAN…","[41.790399, -87.605017]"


In [136]:
df_filtered = df.filter(pl.col("reported_date") > date(2019, 12, 31)).filter(
    pl.col("reported_date").dt.month() <= 2
)
df_filtered = df_filtered.with_columns(
    (df_filtered["reported_date"].dt.year()).alias("year")
)
df_filtered.head()

comments,disposition,incident,location,occurred,predicted_incident,reported,reported_date,season,ucpd_id,validated_address,validated_location,year
str,str,str,str,str,str,"datetime[μs, America/Chicago]",date,str,str,str,list[f64],i32
"""Unknown person…","""CPD""","""information / …","""S. Blackstone …","""2/25/24 4:15 A…",,2024-02-25 07:47:00 CST,2024-02-25,"""Winter""","""2024-007468""","""5420 South Bla…","[41.797329, -87.590774]",2024
"""An underage in…","""Referred""","""liquor law vio…","""5630 S. Univer…","""2/25/24 2:27 A…",,2024-02-25 02:27:00 CST,2024-02-25,"""Winter""","""24-00188""","""5630 S UNIVERS…","[41.792667, -87.598182]",2024
"""An underage in…","""Referred""","""liquor law vio…","""1009 E. 57th S…","""2/25/24 3:24 A…",,2024-02-25 03:24:00 CST,2024-02-25,"""Winter""","""24-00189""","""1009 E 57TH ST…","[41.791339, -87.601044]",2024
"""Two tool boxes…","""Closed""","""found property…","""850 E. 61st St…","""2/25/24 1:34 P…",,2024-02-25 13:34:00 CST,2024-02-25,"""Winter""","""24-00190""","""850 E 61ST ST,…","[41.784124, -87.605663]",2024
"""A visitor was …","""Arrest""","""unlawful posse…","""5700 S. Maryla…","""2/25/24 2:11 P…",,2024-02-25 14:11:00 CST,2024-02-25,"""Winter""","""24-00191""","""5700 S MARYLAN…","[41.790399, -87.605017]",2024


In [137]:
df_filtered.groupby("year").count().sort(by="year")

year,count
i32,u32
2020,191
2021,180
2022,224
2023,285
2024,280


In [138]:
VIOLENT_INCIDENT_TYPES = [
    "aggravated assault of police officer",
    "aggravated assault",
    "aggravated battery of a police officer",
    "aggravated battery of police officer",
    "aggravated battery to police officer",
    "aggravated criminal sexual assault",
    "aggravated discharge of a firearm",
    "aggravated domestic battery",
    "aggravated robbery",
    "aggravated vehicular hijacking",
    "armed robbery",
    "armed robbery",
    "arson",
    "assault and harassment by electronic means",
    "assault",
    "attempted aggravated robbery",
    "attempted armed robbery",
    "attempted sexual assault",
    "bomb threat",
    "dating violence",
    "domestic aggravated battery",
    "homicide",
    "murder",
    "sex offense",
    "sexual assault",
]


df_filtered = df_filtered.filter(
    pl.col("incident").str.contains("|".join(VIOLENT_INCIDENT_TYPES))
)
df_filtered

comments,disposition,incident,location,occurred,predicted_incident,reported,reported_date,season,ucpd_id,validated_address,validated_location,year
str,str,str,str,str,str,"datetime[μs, America/Chicago]",date,str,str,str,list[f64],i32
"""DSS was notifi…","""No Investigati…","""sexual assault…","""5700 S. Maryla…","""2/13/24 time n…",,2024-02-20 12:49:00 CST,2024-02-20,"""Winter""","""CSA Report …","""5700 S MARYLAN…","[41.790399, -87.605017]",2024
"""Three unknown …","""CPD""","""information / …","""5601 S. Kenwoo…","""2/19/24 10:55 …",,2024-02-19 23:02:00 CST,2024-02-19,"""Winter""","""2024-006689""","""5601 S KENWOOD…","[41.793254, -87.593181]",2024
"""A person makin…","""Ex. Cleared""","""assault""","""1100 E. 57th S…","""2/19/24 1:20 P…",,2024-02-19 13:21:00 CST,2024-02-19,"""Winter""","""24-00164""","""1100 E 57TH ST…","[41.791485, -87.598153]",2024
"""Subject assaul…","""Ex. Cleared""","""assault""","""5800 S. Univer…","""2/16/24 1:00 P…",,2024-02-16 13:00:00 CST,2024-02-16,"""Winter""","""24-00154""","""5800 S UNIVERS…","[41.790511, -87.598131]",2024
"""Person found s…","""CPD""","""information / …","""1101 E. 47th S…","""2/13/24 12:40 …",,2024-02-13 00:42:00 CST,2024-02-13,"""Winter""","""2024-005783""","""1101 E 47TH ST…","[41.809561, -87.59919]",2024
"""Victim reporte…","""CPD""","""information / …","""S. East End Av…","""10/20/23 3:00 …",,2024-02-07 17:23:00 CST,2024-02-07,"""Winter""","""24-00130""","""5020 South Eas…","[41.803201, -87.585309]",2024
"""A person walki…","""CPD""","""information / …","""S. Dorchester …","""2/2/24 10:45 P…",,2024-02-02 23:13:00 CST,2024-02-02,"""Winter""","""2024-004492""","""5120 South Dor…","[41.801658, -87.592142]",2024
"""Victim walking…","""Referred""","""information / …","""5550 S. Dorche…","""1/24/24 5:15 P…",,2024-01-24 17:17:00 CST,2024-01-24,"""Winter""","""2024-003266""","""5550 S DORCHES…","[41.793893, -87.591731]",2024
"""A person walki…","""Open""","""attempted arme…","""1425 E. Midway…","""1/24/24 5:20 P…",,2024-01-24 17:25:00 CST,2024-01-24,"""Winter""","""24-00089""","""1425 East Midw…","[41.787184, -87.599254]",2024
"""A subject mena…","""Arrest""","""assault""","""1055 E. 55th S…","""1/22/24 10:19 …",,2024-01-22 22:19:00 CST,2024-01-22,"""Winter""","""24-00077""","""1055 E 55TH ST…","[41.794964, -87.600471]",2024


In [139]:
df_filtered.groupby("year").count().sort(by="year")

year,count
i32,u32
2020,15
2021,20
2022,27
2023,34
2024,18
