# L.A. County places timeseries

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt

In [3]:
%matplotlib inline
import json
import numpy as np
import altair as alt
import altair_latimes as lat

In [4]:
import requests
from bs4 import BeautifulSoup
import re
import unicodedata
from datetime import datetime, date
from slugify import slugify

In [5]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

In [6]:
surge_begin = "2020-11-15"
surge_end = "2021-01-15"

### Counties

In [7]:
counties = pd.read_csv(
    "../../coronavirus-tracker/_notebooks/data/processed/agency-survey/counties-timeseries.csv",
    low_memory=False,
)

In [8]:
la = counties[
    (counties["county"] == "Los Angeles")
    & ((counties["date"] >= surge_begin) & (counties["date"] <= surge_end))
]

In [9]:
la.head()

Unnamed: 0,date,county,fips,population,confirmed_cases,deaths,recoveries,confirmed_cases_per_100k,deaths_per_100k,new_confirmed_cases,new_deaths,new_recoveries,agencies_count,agencies_updated,updated,in_progress
7152,2020-11-15,Los Angeles,37,10098052,339570.0,7269.0,13116.0,3362.727782,71.984181,2899.0,2.0,0.0,3.0,2.0,True,False
7153,2020-11-16,Los Angeles,37,10098052,342489.0,7275.0,13187.0,3391.634347,72.043598,2919.0,6.0,71.0,3.0,3.0,True,False
7154,2020-11-17,Los Angeles,37,10098052,344741.0,7300.0,13244.0,3413.935678,72.291171,2252.0,25.0,57.0,3.0,3.0,True,False
7155,2020-11-18,Los Angeles,37,10098052,348536.0,7337.0,13348.0,3451.517184,72.657578,3795.0,37.0,104.0,3.0,3.0,True,False
7156,2020-11-19,Los Angeles,37,10098052,353479.0,7365.0,13435.0,3500.467219,72.934859,4943.0,28.0,87.0,3.0,3.0,True,False


### Regions

In [10]:
regions = pd.read_csv(
    "../../coronavirus-tracker/_notebooks/data/processed/agency-survey/regions-cases-timeseries.csv",
    low_memory=False,
)

In [11]:
socal = regions[
    (regions["region"] == "Southern California")
    & ((regions["date"] >= surge_begin) & (regions["date"] <= surge_end))
]

In [12]:
socal[
    socal["new_confirmed_cases_seven_day_per_100k"]
    == socal["new_confirmed_cases_seven_day_per_100k"].max()
][["date", "new_confirmed_cases_seven_day_per_100k"]]

Unnamed: 0,date,new_confirmed_cases_seven_day_per_100k
1874,2021-01-10,1001.84761


In [13]:
alt.Chart(socal).mark_line().encode(
    x=alt.X(
        "date:T",
        axis=alt.Axis(tickCount=2, format=("%b. %-d"), grid=False, tickColor="#ffffff"),
    ),
    y=alt.Y(
        "new_confirmed_cases_seven_day_per_100k",
        title="",
        axis=alt.Axis(tickColor="#ffffff"),
    ),
).properties(width=500, height=300).configure_axis()

---

In [14]:
cases = pd.read_csv(
    "../../coronavirus-tracker/_notebooks/data/processed/places/timeseries.csv",
    low_memory=False,
)

### Get places in Los Angeles County

In [15]:
places = cases[(cases["zcta_id"].isna()) & (cases["county"] == "Los Angeles")]

In [16]:
places_latest = places[places["date"] == places["date"].max()][
    [
        "name",
        "date",
        "new_confirmed_cases_seven_day_per_100k",
        "new_confirmed_cases_seven_day_total",
        "population",
    ]
]

In [17]:
len(places_latest)

334

In [18]:
places_latest.head()

Unnamed: 0,name,date,new_confirmed_cases_seven_day_per_100k,new_confirmed_cases_seven_day_total,population
66956,Acton,2021-02-10,112.909296,9.0,7971.0
67288,Adams-Normandie,2021-02-10,329.188003,27.0,8202.0
67620,Agoura Hills,2021-02-10,114.926016,24.0,20883.0
67952,Agua Dulce,2021-02-10,264.550265,11.0,4158.0
68284,Alhambra,2021-02-10,167.197085,145.0,86724.0


### What's the distribution of population by place? 

In [19]:
places_latest.population.median()

16464.5

In [20]:
places_latest.population.std()

34448.38930503192

In [21]:
pop_histogram = (
    alt.Chart(places_latest)
    .mark_bar()
    .encode(
        alt.X("population:Q", bin=alt.Bin(maxbins=25), title="Population ranges"),
        alt.Y("count()", title="Places in population range"),
    )
)

In [22]:
pop_histogram

### Limit the dataframe to places with > 5000 population

In [23]:
places_lrg = places[places["population"] >= 5000]

---

### Cases during the surge (in large places)? 

In [24]:
surge_places = places_lrg[
    (places_lrg["date"] >= surge_begin) & (places_lrg["date"] <= surge_end)
][["name", "date", "new_confirmed_cases_seven_day_per_100k"]]

In [25]:
two_cities = surge_places[
    (surge_places["name"].str.contains("West Los Angeles"))
    | surge_places["name"].str.contains("East Los Angeles")
]

In [26]:
two_cities2 = surge_places[
    (surge_places["name"].str.contains("West Hollywood"))
    | surge_places["name"].str.contains("Leimert Park")
]

### Chart two divergent neighborhoods

In [27]:
twocities_chart = (
    alt.Chart(two_cities)
    .mark_line()
    .encode(
        x=alt.X(
            "date:T",
            axis=alt.Axis(
                tickCount=4,
                format=("%b. %-d"),
                grid=False,
                tickColor="#ffffff",
                title="",
            ),
        ),
        y=alt.Y(
            "new_confirmed_cases_seven_day_per_100k",
            title="",
            axis=alt.Axis(tickColor="#ffffff"),
        ),
        facet=alt.Facet("name:N", columns=2, title=""),
    )
    .properties(width=500, height=300)
    .configure_axis()
)

In [28]:
twocities_chart2 = (
    alt.Chart(two_cities2)
    .mark_line()
    .encode(
        x=alt.X(
            "date:T",
            axis=alt.Axis(
                tickCount=4,
                format=("%b. %-d"),
                grid=False,
                tickColor="#ffffff",
                title="",
            ),
        ),
        y=alt.Y(
            "new_confirmed_cases_seven_day_per_100k",
            title="",
            axis=alt.Axis(tickColor="#ffffff"),
        ),
        facet=alt.Facet("name:N", columns=2, title=""),
    )
    .properties(width=500, height=300)
    .configure_axis()
)

In [29]:
twocities_chart

In [30]:
twocities_chart2

### Change from Nov. 15 to Jan. 15?

In [31]:
surge_change = places_lrg[
    (places_lrg["date"] == surge_begin) | (places_lrg["date"] == surge_end)
][["name", "date", "new_confirmed_cases_seven_day_per_100k"]]

### Make the change dataframe wide 

In [32]:
surge_change_pivot = (
    pd.pivot_table(
        surge_change[surge_change["new_confirmed_cases_seven_day_per_100k"] > 0],
        values="new_confirmed_cases_seven_day_per_100k",
        index=["name"],
        columns=["date"],
        fill_value=0,
    )
    .dropna()
    .reset_index()
)

### Clean up column headers, round figures and remove places with no cases

In [33]:
surge_change_pivot.rename(
    columns={"2020-11-15": "mid_november", "2021-01-15": "mid_january"}, inplace=True
)

In [34]:
surge_change_pivot[["mid_november", "mid_january"]] = round(
    surge_change_pivot[["mid_november", "mid_january"]], 2
)

In [35]:
surge_change_pivot = surge_change_pivot[surge_change_pivot["mid_november"] > 0]

### Calculate the change

In [36]:
surge_change_pivot["pct_change"] = round(
    (
        (surge_change_pivot["mid_january"] - surge_change_pivot["mid_november"])
        / surge_change_pivot["mid_november"]
    )
    * 100,
    2,
)

### Which places had the least increase in coronavirus rates? 

In [37]:
surge_change_pivot.sort_values("pct_change", ascending=True).head(10)

date,name,mid_november,mid_january,pct_change
218,West Hollywood,211.09,308.52,46.16
120,Malibu,100.3,208.32,107.7
147,Park La Brea,117.82,265.1,125.0
60,East Pasadena,46.85,124.94,166.68
70,Encino,172.67,464.89,169.24
93,Hollywood Hills,139.29,397.5,185.38
155,Rancho Park,106.71,304.88,185.71
204,Venice,103.29,309.87,200.0
24,Brentwood,125.99,377.97,200.0
169,Santa Monica,128.72,390.5,203.37


### And the greatest increase? 

In [38]:
surge_change_pivot.sort_values("pct_change", ascending=False).head(10)

date,name,mid_november,mid_january,pct_change
165,San Marino,15.06,316.34,2000.53
64,El Camino Village,45.5,898.65,1875.05
112,Leimert Park,59.07,774.53,1211.21
78,Gramercy Place,92.9,1096.25,1080.03
206,Vermont Square,156.72,1828.39,1066.66
180,South San Gabriel,90.42,1039.78,1049.94
95,Hyde Park,101.61,1152.73,1034.47
168,Santa Fe Springs,119.8,1236.11,931.81
22,Beverlywood,53.14,538.98,914.26
11,Avocado Heights,118.08,1195.57,912.51


### Rank them

In [39]:
surge_change_pivot["rank_lower_is_better"] = surge_change_pivot["pct_change"].rank(
    method="max"
)

In [40]:
surge_change_pivot.sort_values("rank_lower_is_better", ascending=True).head()

date,name,mid_november,mid_january,pct_change,rank_lower_is_better
218,West Hollywood,211.09,308.52,46.16,1.0
120,Malibu,100.3,208.32,107.7,2.0
147,Park La Brea,117.82,265.1,125.0,3.0
60,East Pasadena,46.85,124.94,166.68,4.0
70,Encino,172.67,464.89,169.24,5.0


### Greatest/least change?

In [41]:
surge_change_pivot_min_max = pd.concat(
    [
        surge_change_pivot.sort_values("pct_change", ascending=False).head(20),
        surge_change_pivot.sort_values("pct_change", ascending=False).tail(20),
    ]
)

In [42]:
surge_change_pivot_min_max.rename(
    columns={"mid_november": "2020-11-15", "mid_january": "2021-01-15"}, inplace=True
)

In [43]:
surge_change_pivot_min_max_melt = surge_change_pivot_min_max.melt(
    id_vars=["name"], var_name="date"
)

In [44]:
surge_change_pivot_min_max_melt.rename(columns={"value": "rate"}, inplace=True)

In [45]:
surge_change_pivot_min_max_melt = surge_change_pivot_min_max_melt[
    surge_change_pivot_min_max_melt["date"] != "pct_change"
]

In [46]:
surge_change_pivot_min_max_melt["date"] = surge_change_pivot_min_max_melt["date"]

In [47]:
surge_change_pivot_min_max_melt.head()

Unnamed: 0,name,date,rate
0,San Marino,2020-11-15,15.06
1,El Camino Village,2020-11-15,45.5
2,Leimert Park,2020-11-15,59.07
3,Gramercy Place,2020-11-15,92.9
4,Vermont Square,2020-11-15,156.72


---

In [48]:
min_max_minimultiples = (
    alt.Chart(surge_change_pivot_min_max_melt)
    .mark_line(size=5, color="red")
    .encode(
        x=alt.X("date:T", title="", axis=alt.Axis(tickCount=3, format="%b. %d")),
        y=alt.Y("rate", title=""),
        facet=alt.Facet("name", columns=8),
    )
    .properties(
        height=120,
        width=120,
        title="Places with greatest, least rate increases",
    )
)
min_max_minimultiples

---

### Import geography

In [49]:
places_map = gpd.read_file(
    "input/cities-neighborhoods-unincorporated-la-county.geojson"
).fillna("")

In [50]:
places_map["NAME"] = np.where(
    (places_map["LCITY"] == "Los Angeles") | (places_map["LCITY"] == "Unincorporated"),
    places_map["COMMUNITY"],
    places_map["LCITY"],
)

In [51]:
places_map_geo = places_map.merge(
    surge_change_pivot, left_on="NAME", right_on="name", how="inner"
)

In [52]:
len(places_map_geo)

251

In [53]:
places_map_geo.drop(
    [
        "LCITY",
        "LABEL",
        "COMMUNITY",
        "CONFIRMED",
        "SUSPECTED",
        "RECOVERED",
        "DEATHS",
        "Shape__Area",
        "Shape__Length",
        "OBJECTID",
    ],
    axis=1,
    inplace=True,
)

---

### Mapping

In [54]:
places_map_geo.head()

Unnamed: 0,geometry,NAME,name,mid_november,mid_january,pct_change,rank_lower_is_better
0,"POLYGON ((-118.72865 34.16793, -118.73655 34.1...",Agoura Hills,Agoura Hills,71.83,397.45,453.32,109.0
1,"POLYGON ((-118.10976 33.90982, -118.10984 33.9...",Bellflower,Bellflower,144.08,1130.76,684.81,200.0
2,"POLYGON ((-118.39160 34.11243, -118.39230 34.1...",Beverly Hills,Beverly Hills,98.49,492.47,400.02,87.0
3,"POLYGON ((-118.30781 34.16116, -118.30451 34.1...",Burbank,Burbank,146.48,600.86,310.2,45.0
4,"POLYGON ((-118.69385 34.16856, -118.69454 34.1...",Calabasas,Calabasas,57.56,386.47,571.42,163.0


---

### Export

In [55]:
places_map_geo.to_file("output/places_map_geo.geojson", driver="GeoJSON")

In [56]:
min_max_minimultiples.save("visuals/min_max_minimultiples.png")

In [57]:
twocities_chart.save("visuals/twocities_chart.png")

In [None]:
twocities_chart2.save("visuals/twocities_chart2.png")

In [None]:
pop_histogram.save("visuals/pop_histogram.png")

In [None]:
surge_change_pivot.rename(
    columns={
        "mid_november": "case_rate_per_100k_mid-november",
        "mid_january": "case_rate_per_100k_mid-january",
    },
    inplace=True,
)

In [61]:
surge_change_pivot.sort_values("rank_lower_is_worse", ascending=True).to_csv(
    "output/hayley/change_by_community.csv", index=False
)

KeyError: 'rank_lower_is_worse'