In [None]:


import pandas as pd
import re

#combine csvs
current_calls = pd.read_csv("data/callcenterdatacurrent.csv")
historical_calls = pd.read_csv("data/callcenterdatahistorical.csv")
calls = pd.concat([current_calls, historical_calls])


#classes: animals/roadkill, sewage, forestry, roads, info, garbage/sanitation, buildings
def title_to_annotation(title):
    title = title.lower()
    if "info only" in title:
        return "info"
    elif re.findall(r"\b(trees?|brush|forestry|weeds|grass|branch(?:es)?|leaf)\b", title):
        return "forestry"
    elif re.findall(r"\b(drains?|sewage|backwater|flood(?:ing|s)?|water|sewers?|plumbing)\b", title):
        return "sewage"
    elif re.findall(r"\b(bins?|garbage|trash|bulky pickup|recycl(:e|ing)|sanitation|litter|collection)\b", title):
        return "garbage/sanitation"
    elif re.findall(
            r"\b(buildings?|property|graffiti|debris|disrepair|abandoned|vacant|electric(?:ity|al)?|houses?|garages?|construction)\b",
            title):
        return "buildings"
    elif re.findall(r"\b(roads?|streets?|lamps?|dark|potholes?|signs?|sidewalks?|zoning|lights?|barricade|guardrail)\b",
                    title):
        return "roads"
    elif re.findall(r"\b(roadkill|animals?|dogs?|cats?|rats?|mice|roach(?:es)?|vermin)\b", title):
        return "animals/roadkill/pests"
    return "other"


calls["CLASS"] = calls["TITLE"].apply(title_to_annotation)





In [None]:

#get zipcode
def get_zip_from_address(address):
    address = str(address)

    zipstr = re.findall(r"(\d{5}).(\d{4})$", address.strip())
    if zipstr:
        return zipstr[0][0]
    return None


calls["ZIP"] = calls["OBJECTDESC"].apply(get_zip_from_address)

calls["ZIP"]



In [None]:
trees_calls = calls.loc[calls['CLASS'] == "forestry"]

zip_freq = trees_calls.groupby(["ZIP"]).size().reset_index(name="FREQUENCY").sort_values("FREQUENCY",
                                                                                         ascending=False).head(10)
# zip_freq.columns ['ZIP', 'FREQUENCY']
zip_freq

In [None]:
from matplotlib import pyplot as plt


def plot_by_class(class_):
    class_calls = calls.loc[calls['CLASS'] == class_]

    zip_freq = class_calls.groupby(["ZIP"]).size().reset_index(name="FREQUENCY").sort_values("FREQUENCY",
                                                                                             ascending=False).head(10)

    x_axis = zip_freq["ZIP"]
    y_axis = zip_freq["FREQUENCY"]
    plt.bar(x_axis, y_axis, width=0.8, color='blue')
    plt.xticks(rotation=90)
    plt.xlabel("Zipcodes")
    plt.ylabel("Number of Calls")
    plt.title(f"Number of {class_} Calls per Zipcode")
    fig = plt.figure()
    fig.patch.set_facecolor('white')
    plt.show(block=False)


In [None]:
for class_ in ["forestry", "sewage", "garbage/sanitation", "buildings", "roads", "animals/roadkill/pests"]:
    plot_by_class(class_)
    
