### Local Covid deaths

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt

In [3]:
%matplotlib inline
import json
import numpy as np
import altair as alt
import altair_latimes as lat

In [4]:
import requests
from bs4 import BeautifulSoup
import re
import unicodedata
from datetime import datetime, date
from slugify import slugify

In [5]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Download

Retrieve the page

In [6]:
url = "http://publichealth.lacounty.gov/media/Coronavirus/locations.htm"

In [7]:
page = requests.get(url)

## Parse

In [8]:
soup = BeautifulSoup(page.content, "html.parser")

Get content well

In [9]:
content = soup.find("div", {"id": "content"})

Get table

In [10]:
for tag in content.find_all(text=re.compile("CITY/COMMUNITY")):
    table = tag.findParent("table")

In [11]:
tbody = soup.tbody

In [12]:
row_list = tbody.find_all("tr")

In [13]:
dict_list = []

In [14]:
def safetxt(element):
    v = element.text.strip()
    v = v.replace("\u200b", "")
    return v

In [15]:
def safenumber(element):
    v = safetxt(element)
    v = v.replace(",", "")
    v = v.replace(" ", "")
    return v

In [16]:
for row in row_list:
    cell_content = row.find_all("td")
    d = dict(
        county="Los Angeles",
        area=safetxt(cell_content[0]),
        confirmed_cases=safenumber(cell_content[1]),
        cases_rate=safenumber(cell_content[2]),
        confirmed_deaths=safenumber(cell_content[3]),
        deaths_rate=safenumber(cell_content[4]),
    )
    dict_list.append(d)

In [17]:
df = pd.DataFrame(dict_list)

In [18]:
df.head()

Unnamed: 0,county,area,confirmed_cases,cases_rate,confirmed_deaths,deaths_rate
0,Los Angeles,City of Agoura Hills,926,4434,13,62
1,Los Angeles,City of Alhambra,6505,7501,163,188
2,Los Angeles,City of Arcadia,2516,4356,98,170
3,Los Angeles,City of Artesia,1925,11462,50,298
4,Los Angeles,City of Avalon,14,362,0,0


Get timestamp

In [19]:
date_url = "http://publichealth.lacounty.gov/media/Coronavirus/js/casecounter.js"

In [20]:
date_url

'http://publichealth.lacounty.gov/media/Coronavirus/js/casecounter.js'

In [21]:
response = requests.get(date_url)
date_page = response.text

In [22]:
date_text = re.search(r"([0-9][0-9]/[0-9][0-9])", date_page).group(1)
date_text = date_text + "/" + str(date.today().year)

In [23]:
latest_date = pd.to_datetime(date_text).date()

In [24]:
df["county_date"] = latest_date

In [25]:
df.tail(1)

Unnamed: 0,county,area,confirmed_cases,cases_rate,confirmed_deaths,deaths_rate,county_date
341,Los Angeles,- Under Investigation,20754,,66,,2021-02-13


In [26]:
df.loc[df.area == "-  Under Investigation", "area"] = "Under Investigation"

In [27]:
df.loc[df.area == "- Under Investigation", "area"] = "Under Investigation"

## Vet

In [28]:
len(df)

342

In [29]:
try:
    assert not len(df) > 342
except AssertionError:
    raise AssertionError("L.A. County's scraper has extra rows")

In [30]:
try:
    assert not len(df) < 342
except AssertionError:
    raise AssertionError("L.A. County's scraper is missing rows")

## Export

Set the date

In [31]:
now = pd.Timestamp.today()

In [32]:
latest = df["county_date"].max()

In [33]:
df.to_csv(f"output/deaths/los-angeles-{latest}.csv", index=False)

In [34]:
len(df)

342

---

In [35]:
places = df.copy()

In [36]:
places["area"] = places["area"].str.replace("*", "", regex=False)

In [37]:
places[places["area"].str.contains("Azusa")]

Unnamed: 0,county,area,confirmed_cases,cases_rate,confirmed_deaths,deaths_rate,county_date
5,Los Angeles,City of Azusa,5968,11926,92,184,2021-02-13
234,Los Angeles,Unincorporated - Azusa,2240,14068,30,188,2021-02-13


---

## Geography

### Get neighborhoods

In [38]:
hoods = gpd.read_file("input/cities-neighborhoods-unincorporated-la-county.geojson")

In [39]:
hoods = hoods.rename(columns=str.lower)

In [40]:
hoods.drop(
    [
        "shape__area",
        "shape__length",
        "confirmed",
        "suspected",
        "recovered",
        "deaths",
        "objectid",
    ],
    axis=1,
    inplace=True,
)

KeyError: "['shape__area' 'shape__length' 'confirmed' 'suspected' 'recovered'\n 'deaths' 'objectid'] not found in axis"

---

## Merge 

### Join deaths dataframe to geography

In [None]:
deaths_hoods = hoods.merge(places, left_on="label", right_on="area")

In [None]:
len(deaths_hoods)

In [None]:
deaths_hoods.plot()

In [None]:
deaths_hoods[
    ["confirmed_deaths", "confirmed_cases", "cases_rate", "deaths_rate"]
] = deaths_hoods[
    ["confirmed_deaths", "confirmed_cases", "cases_rate", "deaths_rate"]
].astype(
    int
)

In [None]:
def normalize(column):
    upper = column.max()
    lower = column.min()
    y = (column - lower) / (upper - lower)
    return y

In [None]:
deaths_hoods["normalized_deaths_rate"] = normalize(deaths_hoods.deaths_rate)

In [None]:
deaths_hoods["county_date"] = pd.to_datetime(deaths_hoods["county_date"])

In [None]:
deaths_hoods.to_file("output/deaths_hoods.geojson", driver="GeoJSON")

In [None]:
deaths_hoods.head()

---

### Deaths over time

In [None]:
deaths_hoods.county_date.value_counts()

---

In [None]:
url = 'https://lacdph.shinyapps.io/covid19_surveillance_dashboard/_w_3342d5d9/session/309691838c75e017ae9701a56f370694/download/download7?w=3342d5d9'

In [None]:
deaths_df = pd.read_csv(url, low-memory=False)