# Data from HTML tables: L.A. County covid cases

In [52]:
import pandas as pd

#### L.A. County COVID cases, deaths by community

In [53]:
url = 'http://publichealth.lacounty.gov/media/Coronavirus/locations.htm'

#### Read the "CITY/COMMUNITY" cases table into a dataframe

In [54]:
cases = pd.read_html(url)[1]
cases

Unnamed: 0,CITY/COMMUNITY**,Cases,Case Rate1,Deaths,Death Rate2
0,City of Agoura Hills,4094,19604.0,22,105.0
1,City of Alhambra,16498,19024.0,249,287.0
2,City of Arcadia,7869,13625.0,163,282.0
3,City of Artesia,4109,24466.0,88,524.0
4,City of Avalon,62,1602.0,0,0.0
...,...,...,...,...,...
337,Unincorporated - Whittier,724,19133.0,10,264.0
338,Unincorporated - Whittier Narrows,64,533333.0,1,8333.0
339,Unincorporated - Willowbrook,13618,39006.0,128,367.0
340,Unincorporated - Wiseburn,1398,23196.0,14,232.0


#### Define cleaner column names

In [55]:
cases.columns = ["CITY_COMMUNITY", "CASES",	"CASE_RATE",	"DEATHS",	"DEATH_RATE"]

In [56]:
cases.head(0)

Unnamed: 0,CITY_COMMUNITY,CASES,CASE_RATE,DEATHS,DEATH_RATE


---

#### How many deaths in Los Angeles - Del Rey? Encino?

In [57]:
cases[cases["CITY_COMMUNITY"] == "Los Angeles - Del Rey"]["DEATHS"]

112    42
Name: DEATHS, dtype: int64

In [58]:
cases[cases["CITY_COMMUNITY"] == "Los Angeles - Encino"]["DEATHS"]

120    87
Name: DEATHS, dtype: int64

#### How many cases in places outside Los Angeles? [Hint](https://www.google.com/search?q=Python+pandas+string+does+not+contain&oq=Python+pandas+string+does+not+contain)

In [59]:
cases[~cases["CITY_COMMUNITY"].str.startswith("Los Angeles")]["DEATHS"].sum()

17286

#### Which large cities/neighborhoods have the highest rates? 

In [60]:
# What areas have more than 10k cases?
cases[cases["CASES"] > 10000].sort_values(['CASE_RATE'], ascending=False).head(5)

Unnamed: 0,CITY_COMMUNITY,CASES,CASE_RATE,DEATHS,DEATH_RATE
220,Los Angeles - Wholesale District*,16161,44731.0,134,371.0
66,City of San Fernando,10368,42126.0,80,325.0
113,Los Angeles - Downtown*,11540,41953.0,65,236.0
166,Los Angeles - Pacoima,31883,41418.0,279,362.0
208,Los Angeles - Vernon Central,21536,41417.0,194,373.0


---

#### Bonus: Create a true/false column for Los Angeles

In [61]:
cases["IS_LA"] = cases["CITY_COMMUNITY"].str.startswith("Los Angeles")
cases.sample(10)

Unnamed: 0,CITY_COMMUNITY,CASES,CASE_RATE,DEATHS,DEATH_RATE,IS_LA
143,Los Angeles - Lafayette Square,1222,26804.0,10,219.0,True
237,Unincorporated - Bradbury,67,62037.0,26,24074.0,False
17,City of Commerce*,4478,34264.0,46,352.0,False
244,Unincorporated - Covina (Charter Oak),3560,27085.0,26,198.0,False
294,Unincorporated - Pellissier Village,296,47819.0,1,162.0,False
303,Unincorporated - Rowland Heights,8622,16899.0,101,198.0,False
155,Los Angeles - Mar Vista,7069,16644.0,41,97.0,True
89,Los Angeles - Angelino Heights,617,24660.0,8,320.0,True
31,City of Hawthorne,22325,25144.0,272,306.0,False
185,Los Angeles - South Park,14738,38824.0,125,329.0,True


#### Bonus: Were there more cases in the county vs. the city of Los Angeles? 

In [62]:
# How many in LA City?
cases[cases["IS_LA"] == True]["CASES"].sum()

2306227

In [63]:
# How many NOT LA City?
cases[cases["IS_LA"] == False]["CASES"].sum()

1506000

#### Discussion: How would you determine whether there were disproportionately more cases in the county? 

In [64]:
cases[cases["IS_LA"] == True]["CASE_RATE"].mean()

26413.585714285713

In [65]:
cases[cases["IS_LA"] == False]["CASE_RATE"].mean()

30348.915422885573