# Data from HTML tables: L.A. County covid cases

In [22]:
%load_ext lab_black
import pandas as pd

pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


#### L.A. County COVID cases, deaths by community

In [23]:
url = "http://publichealth.lacounty.gov/media/Coronavirus/locations.htm"

#### Read the "CITY/COMMUNITY" cases table into a dataframe

In [26]:
covid = pd.read_html(url)[1]
covid.head()

Unnamed: 0,CITY/COMMUNITY**,Cases,Case Rate1,Deaths,Death Rate2
0,City of Agoura Hills,4094,19604.0,22,105.0
1,City of Alhambra,16498,19024.0,249,287.0
2,City of Arcadia,7869,13625.0,163,282.0
3,City of Artesia,4109,24466.0,88,524.0
4,City of Avalon,62,1602.0,0,0.0


#### Define cleaner column names

In [46]:
covid.columns = (
    covid.columns.str.lower()
    .str.replace("/", "_", regex=False)
    .str.replace("*", "", regex=False)
    .str.replace("1", "", regex=False)
    .str.replace("2", "", regex=False)
    .str.replace(" ", "_", regex=False)
)
covid.head()

Unnamed: 0,city_community,cases,case_rate,deaths,death_rate
0,City of Agoura Hills,4094,19604.0,22,105.0
1,City of Alhambra,16498,19024.0,249,287.0
2,City of Arcadia,7869,13625.0,163,282.0
3,City of Artesia,4109,24466.0,88,524.0
4,City of Avalon,62,1602.0,0,0.0


---

#### How many deaths in Los Angeles - Del Rey? Encino?

In [47]:
covid[covid["city_community"].str.contains("Los Angeles - Del Rey")]

Unnamed: 0,city_community,cases,case_rate,deaths,death_rate
112,Los Angeles - Del Rey,5857,19565.0,42,140.0


In [48]:
covid[covid["city_community"].str.contains("Los Angeles - Encino")]

Unnamed: 0,city_community,cases,case_rate,deaths,death_rate
120,Los Angeles - Encino,10075,22304.0,87,193.0


#### How many cases in places outside Los Angeles? [Hint](https://www.google.com/search?q=Python+pandas+string+does+not+contain&oq=Python+pandas+string+does+not+contain)

In [66]:
nola = covid[covid["city_community"].str.contains("Los Angeles") == False]
nolasum = nola["cases"].sum()

#### Which large cities/neighborhoods with more than 10K cases have the highest rates? 

In [67]:
covid[covid["cases"] > 10000].sort_values("case_rate", ascending=False).head()

Unnamed: 0,city_community,cases,case_rate,deaths,death_rate,LA
220,Los Angeles - Wholesale District*,16161,44731.0,134,371.0,True
66,City of San Fernando,10368,42126.0,80,325.0,False
113,Los Angeles - Downtown*,11540,41953.0,65,236.0,True
166,Los Angeles - Pacoima,31883,41418.0,279,362.0,True
208,Los Angeles - Vernon Central,21536,41417.0,194,373.0,True


---

#### Bonus: Create a true/false column for Los Angeles

In [68]:
covid["LA"] = covid["city_community"].str.contains("Los Angeles")
covid.head()

Unnamed: 0,city_community,cases,case_rate,deaths,death_rate,LA
0,City of Agoura Hills,4094,19604.0,22,105.0,False
1,City of Alhambra,16498,19024.0,249,287.0,False
2,City of Arcadia,7869,13625.0,163,282.0,False
3,City of Artesia,4109,24466.0,88,524.0,False
4,City of Avalon,62,1602.0,0,0.0,False


#### Bonus: Were there more cases in the county vs. the city of Los Angeles? 

In [73]:
la = covid[covid["city_community"].str.contains("Los Angeles") == True]
lasum = la["cases"].sum()
print(
    f"The number of cases in LA are {lasum} while the number of cases in the county are {nolasum}."
)

The number of cases in LA are 2357279 while the number of cases in the county are 1454948.


#### Discussion: How would you determine whether there were disproportionately more cases in the county? 