In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go

from datetime import datetime, timedelta
import json

In [None]:
cen_df = gpd.read_file("data/census_by_community_2019.geojson")

In [None]:
communities_df = cen_df[["name", "geometry"]]
communities_df = communities_df \
    .loc[~communities_df["name"].str.contains("^\d")] \
    .set_index("name")
communities_df.shape

In [None]:
crime_df = pd.read_csv("data/community_crime_statistics.csv", parse_dates=["Date"])
crime_df.loc[:, "QUARTER"] = crime_df.loc[:, "Date"].dt.to_period("Q").astype("str")
crime_df = crime_df \
    .loc[~crime_df["Community Name"].str.contains("^\d")] \
    .loc[~crime_df["lat"].isna()] \
    .groupby(["Community Name", "QUARTER"]) \
    .sum(["Crime Count"])["Crime Count"] \
    .reset_index() \
    .set_index("Community Name") \
    .join(communities_df, how="right")

crime_df

In [None]:
crime_gdf = gpd.GeoDataFrame(crime_df, geometry=crime_df["geometry"])
crime_gdf = crime_gdf.to_crs(epsg=4326)

crime_gdf["Crime Count"].sort_values()
# print(crime_gdf.index.unique().shape)
# crime_gdf.dtypes
# crime_gdf = crime_gdf.loc[(crime_gdf["Date"].dt.year == 2019) & (crime_gdf["Date"].dt.month == 5)]


In [None]:
shelter_df = pd.read_csv("data/2013-2022-emergency-shelter-occupancy-machine-readable.csv", parse_dates=["Date"])
# manually add the missing value for the Brenda House 2022-04-01 capacity
shelter_df.loc[(shelter_df["Date"] == "2022-04-01") & (shelter_df["ShelterName"] == "Brendas House"), "Capacity"] = 60
shelter_df.head(3)

In [None]:
shelter_df = shelter_df \
    .loc[shelter_df["City"] == "Calgary", ["City", "Date", "ShelterName", "Capacity", "Overnight", "ShelterType", "YEAR", "MONTH"]] \
    .astype({"Capacity": "int32", "Overnight": "int32"})

shelter_df.loc[:, "QUARTER"] = shelter_df["Date"].dt.to_period("Q").astype("str")

shelter_df = shelter_df \
    .loc[shelter_df["Date"].dt.year >= 2017] \
    .loc[shelter_df["Capacity"] > 0] \
    .groupby(["QUARTER", "ShelterName"]) \
    .sum(["Capacity", "Overnight"]) \
    .reset_index() \
    .set_index("ShelterName")

In [None]:
shelter_df["occupancy_rate"] = shelter_df["Overnight"] / shelter_df["Capacity"]

In [None]:
import re
import urllib.request

from bs4 import BeautifulSoup, NavigableString

In [None]:
user_agent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7"
url = "https://www.alberta.ca/find-shelters.aspx"
headers = {
    "User-Agent": user_agent,
}

request = urllib.request.Request(url, None, headers)
response = urllib.request.urlopen(request)
data = response.read()

shelter_soup = BeautifulSoup(data)

In [None]:
calgary_shelters_start = shelter_soup.find(id="goa-grid11591").find(
    lambda tag: tag.name == "h3" and tag.string == "Calgary"
)

calgary_shelters_end = shelter_soup.find(id="goa-grid11591").find(
    lambda tag: tag.name == "h3" and tag.string == "Edmonton"
)

In [None]:
shelters = []

for sib in calgary_shelters_start.next_siblings:
    if sib == calgary_shelters_end:
        break
    elif sib == "\n":
        continue
    shelters.append(sib)

In [None]:
shelter_names = [shelter.find("a", target="_blank").string for shelter in shelters]
shelter_addresses = [
    shelter.find(string=re.compile("^\n[\d]+|^\n[\w]+\s[\w]+,\s[\d]+"))
    for shelter in shelters
]
shelter_addresses = [str(s).strip().replace("\xa0", " ") for s in shelter_addresses]

In [None]:
shelters_dict = {
    name: address.split(",")[-1] for (name, address) in zip(shelter_names, shelter_addresses)
}

In [None]:
adjusted_names = [
    "Alpha House",
    "Brendas House",
    "CDIC - Days Inn Overflow",
    "Inn From the Cold - 3rd Floor",
    "Calgary YWCA",
    "Mustard Seed",
    "Centre of Hope",
    "daytime"
]

In [None]:
adjusted_shelter_addresses = {}
for new_name, old_name in zip(adjusted_names, shelter_names):
    adjusted_shelter_addresses[new_name] = shelters_dict[old_name]

adjusted_shelter_addresses.pop("daytime", None)
adjusted_shelter_addresses

In [None]:
from geopy.geocoders import Nominatim

In [None]:
geolocator = Nominatim(user_agent="DATA_601_project")

shelter_location_df = pd.DataFrame({"ShelterName": adjusted_shelter_addresses.keys(), "ShelterAddress": adjusted_shelter_addresses.values()})
shelter_location_df["ShelterAddress"] = shelter_location_df["ShelterAddress"] + " Calgary"
shelter_location_df["ShelterLongitude"] = shelter_location_df["ShelterAddress"].map(
    lambda x: geolocator.geocode(x).longitude
)
shelter_location_df["ShelterLatitude"] = shelter_location_df["ShelterAddress"].map(
    lambda x: geolocator.geocode(x).latitude
)
shelter_location_df = shelter_location_df.set_index("ShelterName")
shelter_location_df

In [None]:
shelter_df = shelter_df.join(shelter_location_df, how="inner")

In [None]:
shelter_df.dtypes

In [None]:
shelter_gdf = gpd.GeoDataFrame(
    shelter_df, 
    geometry=gpd.points_from_xy(shelter_df.ShelterLongitude, shelter_df.ShelterLatitude)
)
shelter_gdf.crs = "EPSG:4326"
shelter_gdf = shelter_gdf \
    .sort_values("occupancy_rate")
shelter_gdf["color"] = (255 - shelter_gdf["occupancy_rate"] * 255).astype("int64").clip(0, None)

In [None]:
shelter_gdf



In [None]:
# fig = px.choropleth_mapbox(crime_gdf, geojson=crime_gdf,
#                            locations=crime_gdf.index, 
#                            color="Crime Count",
#                            color_continuous_scale = 'Oranges',
#                            center={"lat": 51.0486, "lon": -114.0708}, # Calgary
#                            mapbox_style="carto-positron",
#                            opacity=0.75,
#                            zoom=9, 
#                            title = 'YYC Crime Rate (2019)')

# for i in range(b.shape[0]):
#     fig.add_trace(go.Scattermapbox(
#         lat=[b.geometry.iloc[i].y],
#         lon=[b.geometry.iloc[i].x],
#         mode = 'markers',
#         marker = go.scattermapbox.Marker(
#             size = 20,
#             color = f'rgb({b["color"][i]}, {b["color"][i]}, {b["color"][i]})',
#             opacity = 0.9,
            
#         ),
#         line=dict(color="black", width=10),
#         hoverinfo = "lon+lat+name",
#         name=b.index[i] 
#     ))

# fig.update_layout(
#     margin={"r":50,"t":50,"l":50,"b":50}, 
#     autosize=True,
#     height=600,
#     legend={
#         "yanchor": "top",
#         "y": 0.99,
#         "xanchor": "left",
#         "x": 0.01
#     }
# )
# fig.show()

In [None]:
fig = go.Figure()

slice_lengths = []
quarters = sorted(list(shelter_gdf["QUARTER"].unique()))
# choose the one with less data
for quarter in quarters:
        crime_slice = crime_gdf.loc[crime_gdf["QUARTER"] == quarter]
        shelter_slice = shelter_gdf.loc[shelter_gdf["QUARTER"] == quarter]
        slice_lengths.append(1 + shelter_slice.shape[0])

        fig.add_trace(
            go.Choroplethmapbox(
                visible=False,
                geojson=json.loads(crime_slice.to_json()),
                locations=crime_slice.index, 
                z=crime_slice["Crime Count"],
                zmin=0,
                zmax=651,
                colorscale="Oranges",
                marker_opacity=0.75,
                marker_line_width=0.5,
                name=quarter
            )
        )

        for i in range(shelter_slice.shape[0]):
            c = f'rgb({shelter_slice["color"][i]}, {shelter_slice["color"][i]}, {shelter_slice["color"][i]})'
            fig.add_trace(
                go.Scattermapbox(
                    visible=False,
                    lat=[shelter_slice.geometry.iloc[i].y],
                    lon=[shelter_slice.geometry.iloc[i].x],
                    mode = 'markers',
                    marker = go.scattermapbox.Marker(
                        size = 15,
                        color = c,
                        opacity = 0.9,                        
                    ),
                    line=dict(color="black", width=10),
                    hoverinfo = "all",
                    name=f"{shelter_slice.index[i]} ({shelter_slice['occupancy_rate'][i]:.3f})"
                )
            )

steps = []
for i, slice in enumerate(slice_lengths):
    begin = sum(slice_lengths[:i])
    end = begin + slice
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "YYC Crime Rate and Shelter Occupancy: " + quarters[i]}],  # layout attribute
        label=quarters[i]
    )
    step["args"][0]["visible"][begin:end] = [True] * slice 
    steps.append(step)

sliders = [dict(
    active=10,
    currentvalue={"prefix": "Quarter: "},
    pad={"t": 50},
    steps=steps
)]


fig.update_layout(
    margin={"r":50,"t":50,"l":50,"b":50}, 
    autosize=True,
    height=600,
    legend={
        "yanchor": "top",
        "y": 0.99,
        "xanchor": "left",
        "x": 0.01
    },
    mapbox_style="carto-positron",
    mapbox_center={"lat": 51.0486, "lon": -114.0708}, # Calgary
    mapbox_zoom=9,
    sliders=sliders,
    title="YYC Crime Rate and Shelter Occupancy"
)

for i in range(70, 77):
    fig.data[i].visible = True

fig.show()