### L.A. County places timeseries

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt

In [3]:
%matplotlib inline
import json
import numpy as np
import altair as alt
import altair_latimes as lat

In [4]:
import requests
from bs4 import BeautifulSoup
import re
import unicodedata
from datetime import datetime, date
from slugify import slugify

In [5]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

In [6]:
surge_begin = "2020-11-15"
surge_end = "2021-01-15"

### Counties

In [7]:
counties = pd.read_csv(
    "../../coronavirus-tracker/_notebooks/data/processed/agency-survey/counties-timeseries.csv",
    low_memory=False,
)

In [8]:
la = counties[
    (counties["county"] == "Los Angeles")
    & ((counties["date"] >= surge_begin) & (counties["date"] <= surge_end))
]

In [9]:
la.head()

Unnamed: 0,date,county,fips,population,confirmed_cases,deaths,recoveries,confirmed_cases_per_100k,deaths_per_100k,new_confirmed_cases,new_deaths,new_recoveries,agencies_count,agencies_updated,updated,in_progress
7152,2020-11-15,Los Angeles,37,10098052,339570.0,7269.0,13116.0,3362.727782,71.984181,2899.0,2.0,0.0,3.0,2.0,True,False
7153,2020-11-16,Los Angeles,37,10098052,342489.0,7275.0,13187.0,3391.634347,72.043598,2919.0,6.0,71.0,3.0,3.0,True,False
7154,2020-11-17,Los Angeles,37,10098052,344741.0,7300.0,13244.0,3413.935678,72.291171,2252.0,25.0,57.0,3.0,3.0,True,False
7155,2020-11-18,Los Angeles,37,10098052,348536.0,7337.0,13348.0,3451.517184,72.657578,3795.0,37.0,104.0,3.0,3.0,True,False
7156,2020-11-19,Los Angeles,37,10098052,353479.0,7365.0,13435.0,3500.467219,72.934859,4943.0,28.0,87.0,3.0,3.0,True,False


### Regions

In [10]:
regions = pd.read_csv(
    "../../coronavirus-tracker/_notebooks/data/processed/agency-survey/regions-cases-timeseries.csv",
    low_memory=False,
)

In [11]:
socal = regions[
    (regions["region"] == "Southern California")
    & ((regions["date"] >= surge_begin) & (regions["date"] <= surge_end))
]

In [12]:
socal[
    socal["new_confirmed_cases_seven_day_per_100k"]
    == socal["new_confirmed_cases_seven_day_per_100k"].max()
][["date", "new_confirmed_cases_seven_day_per_100k"]]

Unnamed: 0,date,new_confirmed_cases_seven_day_per_100k
1874,2021-01-10,1001.84761


In [13]:
alt.Chart(socal).mark_line().encode(
    x=alt.X(
        "date:T",
        axis=alt.Axis(tickCount=2, format=("%b. %-d"), grid=False, tickColor="#ffffff"),
    ),
    y=alt.Y(
        "new_confirmed_cases_seven_day_per_100k",
        title="",
        axis=alt.Axis(tickColor="#ffffff"),
    ),
).properties(width=500, height=300).configure_axis()

---

In [14]:
cases = pd.read_csv(
    "../../coronavirus-tracker/_notebooks/data/processed/places/timeseries.csv",
    low_memory=False,
)

### Get places in Los Angeles County

In [15]:
places = cases[(cases["zcta_id"].isna()) & (cases["county"] == "Los Angeles")]

In [16]:
places_latest = places[places["date"] == places["date"].max()][
    [
        "name",
        "date",
        "new_confirmed_cases_seven_day_per_100k",
        "new_confirmed_cases_seven_day_total",
        "population",
    ]
]

In [17]:
len(places_latest)

334

---

### Cases during the surge? 

In [18]:
surge_places = places[(places["date"] >= surge_begin) & (places["date"] <= surge_end)][
    ["name", "date", "new_confirmed_cases_seven_day_per_100k"]
]

In [19]:
two_cities = surge_places[
    (surge_places["name"].str.contains("West Los Angeles"))
    | surge_places["name"].str.contains("East Los Angeles")
]

In [20]:
two_cities2 = surge_places[
    (surge_places["name"].str.contains("Playa Del Rey"))
    | surge_places["name"].str.contains("Leimert Park")
]

### Chart two divergent neighborhoods

In [21]:
twocities_chart = (
    alt.Chart(two_cities)
    .mark_line()
    .encode(
        x=alt.X(
            "date:T",
            axis=alt.Axis(
                tickCount=4,
                format=("%b. %-d"),
                grid=False,
                tickColor="#ffffff",
                title="",
            ),
        ),
        y=alt.Y(
            "new_confirmed_cases_seven_day_per_100k",
            title="",
            axis=alt.Axis(tickColor="#ffffff"),
        ),
        facet=alt.Facet("name:N", columns=2, title=""),
    )
    .properties(width=500, height=300)
    .configure_axis()
)

In [22]:
twocities_chart2 = (
    alt.Chart(two_cities2)
    .mark_line()
    .encode(
        x=alt.X(
            "date:T",
            axis=alt.Axis(
                tickCount=4,
                format=("%b. %-d"),
                grid=False,
                tickColor="#ffffff",
                title="",
            ),
        ),
        y=alt.Y(
            "new_confirmed_cases_seven_day_per_100k",
            title="",
            axis=alt.Axis(tickColor="#ffffff"),
        ),
        facet=alt.Facet("name:N", columns=2, title=""),
    )
    .properties(width=500, height=300)
    .configure_axis()
)

In [23]:
twocities_chart

In [24]:
twocities_chart2

### Change from Nov. 15 to Jan. 15?

In [25]:
surge_change = places[(places["date"] == surge_begin) | (places["date"] == surge_end)][
    ["name", "date", "new_confirmed_cases_seven_day_per_100k"]
]

### Make the change dataframe wide 

In [26]:
surge_change_pivot = (
    pd.pivot_table(
        surge_change[surge_change["new_confirmed_cases_seven_day_per_100k"] > 0],
        values="new_confirmed_cases_seven_day_per_100k",
        index=["name"],
        columns=["date"],
        fill_value=0,
    )
    .dropna()
    .reset_index()
)

### Clean up column headers, round figures and remove places with no cases

In [27]:
surge_change_pivot.rename(
    columns={"2020-11-15": "mid_november", "2021-01-15": "mid_january"}, inplace=True
)

In [28]:
surge_change_pivot[["mid_november", "mid_january"]] = round(
    surge_change_pivot[["mid_november", "mid_january"]], 2
)

In [29]:
surge_change_pivot = surge_change_pivot[surge_change_pivot["mid_november"] > 0]

### Calculate the change

In [30]:
surge_change_pivot["pct_change"] = round(
    (
        (surge_change_pivot["mid_january"] - surge_change_pivot["mid_november"])
        / surge_change_pivot["mid_november"]
    )
    * 100,
    2,
)

### Which places had the least increase in coronavirus rates? 

In [31]:
surge_change_pivot.sort_values("pct_change", ascending=True).head(10)

date,name,mid_november,mid_january,pct_change
206,San Pasqual,98.28,49.14,-50.0
181,Playa Del Rey,250.31,187.73,-25.0
230,Sunrise Village,540.12,540.12,0.0
164,North Lancaster,751.25,834.72,11.11
238,Toluca Woods,322.93,376.75,16.67
171,Palisades Highlands,104.0,130.01,25.01
279,West Hollywood,211.09,308.52,46.16
251,Unincorporated - Pomona,103.2,154.8,50.0
3,Agua Dulce,240.5,408.85,70.0
59,Del Sur,331.26,579.71,75.0


### And the greatest increase? 

In [32]:
surge_change_pivot.sort_values("pct_change", ascending=False).head(10)

date,name,mid_november,mid_january,pct_change
250,Unincorporated - Monrovia,25.77,747.23,2799.61
282,West Rancho Dominguez,73.58,1986.75,2600.12
205,San Marino,15.06,316.34,2000.53
73,El Camino Village,45.5,898.65,1875.05
121,La Rambla,48.19,819.28,1600.1
273,Wellington Square,81.38,1281.79,1475.07
268,View Heights,54.14,757.99,1300.06
132,Leimert Park,59.07,774.53,1211.21
237,Toluca Terrace,76.57,995.41,1200.0
114,Kagel/Lopez Canyons,70.82,849.86,1100.03


### Rank them

In [33]:
surge_change_pivot["rank"] = surge_change_pivot["pct_change"].rank(method="max")

### Greatest/least change?

In [34]:
surge_change_pivot_min_max = pd.concat(
    [
        surge_change_pivot.sort_values("pct_change", ascending=False).head(20),
        surge_change_pivot.sort_values("pct_change", ascending=False).tail(20),
    ]
)

In [35]:
surge_change_pivot_min_max.rename(
    columns={"mid_november": "2020-11-15", "mid_january": "2021-01-15"}, inplace=True
)

In [36]:
surge_change_pivot_min_max_melt = surge_change_pivot_min_max.melt(
    id_vars=["name"], var_name="date"
)

In [37]:
surge_change_pivot_min_max_melt.rename(columns={"value": "rate"}, inplace=True)

In [38]:
surge_change_pivot_min_max_melt = surge_change_pivot_min_max_melt[
    surge_change_pivot_min_max_melt["date"] != "pct_change"
]

In [39]:
surge_change_pivot_min_max_melt["date"] = surge_change_pivot_min_max_melt["date"]

In [40]:
surge_change_pivot_min_max_melt.head()

Unnamed: 0,name,date,rate
0,Unincorporated - Monrovia,2020-11-15,25.77
1,West Rancho Dominguez,2020-11-15,73.58
2,San Marino,2020-11-15,15.06
3,El Camino Village,2020-11-15,45.5
4,La Rambla,2020-11-15,48.19


---

In [41]:
min_max_minimultiples = (
    alt.Chart(surge_change_pivot_min_max_melt)
    .mark_line(size=5, color="red")
    .encode(
        x=alt.X("date:T", title="", axis=alt.Axis(tickCount=3, format="%b. %d")),
        y=alt.Y("rate", title=""),
        facet=alt.Facet("name", columns=8),
    )
    .properties(
        height=120,
        width=120,
        title="Places with greatest, least rate increases",
    )
)
min_max_minimultiples

---

### Import geography

In [42]:
places_map = gpd.read_file(
    "input/cities-neighborhoods-unincorporated-la-county.geojson"
).fillna("")

In [43]:
places_map["NAME"] = np.where(
    (places_map["LCITY"] == "Los Angeles") | (places_map["LCITY"] == "Unincorporated"),
    places_map["COMMUNITY"],
    places_map["LCITY"],
)

In [44]:
places_map_geo = places_map.merge(
    surge_change_pivot, left_on="NAME", right_on="name", how="inner"
)

In [45]:
len(places_map_geo)

301

In [46]:
places_map_geo.drop(
    [
        "LCITY",
        "LABEL",
        "COMMUNITY",
        "CONFIRMED",
        "SUSPECTED",
        "RECOVERED",
        "DEATHS",
        "Shape__Area",
        "Shape__Length",
        "OBJECTID",
    ],
    axis=1,
    inplace=True,
)

---

### Export

In [47]:
places_map_geo.to_file("output/places_map_geo.geojson", driver="GeoJSON")

In [48]:
min_max_minimultiples.save("visuals/min_max_minimultiples.png")

In [49]:
twocities_chart.save("visuals/twocities_chart.png")

In [50]:
twocities_chart2.save("visuals/twocities_chart2.png")

In [51]:
surge_change_pivot.sort_values("pct_change", ascending=False).to_csv(
    "output/hayley/change_by_community.csv", index=False
)