# Data from HTML tables: L.A. County covid cases

In [2]:
import pandas as pd

#### L.A. County COVID cases, deaths by community

In [3]:
url = 'http://publichealth.lacounty.gov/media/Coronavirus/locations.htm'

#### Read the "CITY/COMMUNITY" cases table into a dataframe

In [30]:
cases_df = pd.read_html(url)[1]

#### Define cleaner column names

In [31]:
cases_df.head()

Unnamed: 0,CITY/COMMUNITY**,Cases,Case Rate1,Deaths,Death Rate2
0,City of Agoura Hills,4094,19604.0,22,105.0
1,City of Alhambra,16498,19024.0,249,287.0
2,City of Arcadia,7869,13625.0,163,282.0
3,City of Artesia,4109,24466.0,88,524.0
4,City of Avalon,62,1602.0,0,0.0


In [32]:
cases_df.columns = ["place", "cases", "case_rates", "deaths", "death_rates"]

In [33]:
cases_df.head()

Unnamed: 0,place,cases,case_rates,deaths,death_rates
0,City of Agoura Hills,4094,19604.0,22,105.0
1,City of Alhambra,16498,19024.0,249,287.0
2,City of Arcadia,7869,13625.0,163,282.0
3,City of Artesia,4109,24466.0,88,524.0
4,City of Avalon,62,1602.0,0,0.0


---

#### How many deaths in Los Angeles - Del Rey? Encino?

In [34]:
cases_df [cases_df["place"].str.contains('Del Rey')]

Unnamed: 0,place,cases,case_rates,deaths,death_rates
112,Los Angeles - Del Rey,5857,19565.0,42,140.0
172,Los Angeles - Playa Del Rey,399,12484.0,3,94.0
246,Unincorporated - Del Rey,87,27358.0,1,314.0


In [35]:
cases_df[cases_df['place'].str.contains("Encino")]

Unnamed: 0,place,cases,case_rates,deaths,death_rates
120,Los Angeles - Encino,10075,22304.0,87,193.0


#### How many cases in places outside Los Angeles? [Hint](https://www.google.com/search?q=Python+pandas+string+does+not+contain&oq=Python+pandas+string+does+not+contain)

In [46]:
new_cases_df= cases_df[~cases_df["place"].str.contains("Los Angeles")]

In [47]:
new_cases_df

Unnamed: 0,place,cases,case_rates,deaths,death_rates
0,City of Agoura Hills,4094,19604.0,22,105.0
1,City of Alhambra,16498,19024.0,249,287.0
2,City of Arcadia,7869,13625.0,163,282.0
3,City of Artesia,4109,24466.0,88,524.0
4,City of Avalon,62,1602.0,0,0.0
...,...,...,...,...,...
337,Unincorporated - Whittier,724,19133.0,10,264.0
338,Unincorporated - Whittier Narrows,64,533333.0,1,8333.0
339,Unincorporated - Willowbrook,13618,39006.0,128,367.0
340,Unincorporated - Wiseburn,1398,23196.0,14,232.0


#### Which large cities/neighborhoods have the highest rates? 

In [48]:
cases_df[cases_df["cases"]>10000].sort_values('death_rates', ascending=False).head()

Unnamed: 0,place,cases,case_rates,deaths,death_rates
171,Los Angeles - Pico-Union,13599,32501.0,278,664.0
218,Los Angeles - Westlake,16861,28407.0,351,591.0
58,City of Pico Rivera,20778,32322.0,331,515.0
6,City of Baldwin Park,22686,29551.0,383,499.0
177,Los Angeles - Reseda,24395,31840.0,356,465.0


In [49]:
cases_df[cases_df["cases"]>10000].sort_values('case_rates', ascending=False).head()

Unnamed: 0,place,cases,case_rates,deaths,death_rates
220,Los Angeles - Wholesale District*,16161,44731.0,134,371.0
66,City of San Fernando,10368,42126.0,80,325.0
113,Los Angeles - Downtown*,11540,41953.0,65,236.0
166,Los Angeles - Pacoima,31883,41418.0,279,362.0
208,Los Angeles - Vernon Central,21536,41417.0,194,373.0


---

#### Bonus: Create a true/false column for Los Angeles

In [None]:
df['is_la'] = df['place'].str.contains('Los Angeles')

In [54]:
cases.sample(10)

Unnamed: 0,place,cases,case_rates,deaths,death_rates
151,Los Angeles - Longwood,1300,30204.0,14,325.0
184,Los Angeles - South Carthay,2244,21180.0,8,76.0
334,Unincorporated - Westfield/Academy Hills,160,12308.0,0,0.0
210,Los Angeles - View Heights,737,19951.0,9,244.0
137,Los Angeles - Historic Filipinotown,4210,30353.0,87,627.0
52,City of Montebello,19046,29586.0,277,430.0
278,Unincorporated - Leona Valley,257,14677.0,5,286.0
266,Unincorporated - Hi Vista,132,12022.0,2,182.0
95,Los Angeles - Beverlywood,2967,22523.0,4,30.0
43,City of Lakewood,18740,23319.0,164,204.0


#### Bonus: Were there more cases in the county vs. the city of Los Angeles? 

In [None]:
df[df['is_la'] == True]['cases'].sum()

#### Discussion: How would you determine whether there were disproportionately more cases in the county? 

In [None]:
df[df['is_la'] == False]['cases'].sum()