# Data from HTML tables: L.A. County covid cases

In [1]:
import pandas as pd

#### L.A. County COVID cases, deaths by community

In [2]:
url = 'http://publichealth.lacounty.gov/media/Coronavirus/locations.htm'

#### Read the "CITY/COMMUNITY" cases table into a dataframe

In [11]:
df = pd.read_html(url)[1]

#### Define cleaner column names

In [12]:
df.head(1)

Unnamed: 0,CITY/COMMUNITY**,Cases,Case Rate1,Deaths,Death Rate2
0,City of Agoura Hills,4094,19604.0,22,105.0


In [13]:
df.columns = ['place', 'cases', 'case_rate', 'deaths', 'death_rate']

In [15]:
df.head()

Unnamed: 0,place,cases,case_rate,deaths,death_rate
0,City of Agoura Hills,4094,19604.0,22,105.0
1,City of Alhambra,16498,19024.0,249,287.0
2,City of Arcadia,7869,13625.0,163,282.0
3,City of Artesia,4109,24466.0,88,524.0
4,City of Avalon,62,1602.0,0,0.0


---

#### How many deaths in Los Angeles - Del Rey? Encino?

In [18]:
df[df['place'].str.contains('Del Rey')]

Unnamed: 0,place,cases,case_rate,deaths,death_rate
112,Los Angeles - Del Rey,5857,19565.0,42,140.0
172,Los Angeles - Playa Del Rey,399,12484.0,3,94.0
246,Unincorporated - Del Rey,87,27358.0,1,314.0


In [17]:
df[df['place'].str.contains('Encino')]

Unnamed: 0,place,cases,case_rate,deaths,death_rate
120,Los Angeles - Encino,10075,22304.0,87,193.0


#### How many cases in places outside Los Angeles? [Hint](https://www.google.com/search?q=Python+pandas+string+does+not+contain&oq=Python+pandas+string+does+not+contain)

In [20]:
not_la = df[~df['place'].str.contains('Los Angeles')]

#### Which cities/neighborhoods with more than 10k cases have the highest rates? 

In [25]:
df[df['cases']>10000].sort_values('case_rate', ascending=False).head()

Unnamed: 0,place,cases,case_rate,deaths,death_rate
220,Los Angeles - Wholesale District*,16161,44731.0,134,371.0
66,City of San Fernando,10368,42126.0,80,325.0
113,Los Angeles - Downtown*,11540,41953.0,65,236.0
166,Los Angeles - Pacoima,31883,41418.0,279,362.0
208,Los Angeles - Vernon Central,21536,41417.0,194,373.0


---

#### Bonus: Create a true/false column for Los Angeles

In [27]:
df['is_la'] = df['place'].str.contains('Los Angeles')

In [34]:
df.sample(5)

Unnamed: 0,place,cases,case_rate,deaths,death_rate,is_la
41,City of La Puente,12588,30931.0,148,364.0,False
10,City of Beverly Hills,8157,23630.0,42,122.0,False
303,Unincorporated - Rowland Heights,8622,16899.0,101,198.0,False
335,Unincorporated - Westhills,127,15137.0,1,119.0,False
283,Unincorporated - Marina del Rey,1621,17225.0,4,43.0,False


In [40]:
df[df['is_la'] == True]['cases'].sum()

2357279

In [39]:
df[df['is_la'] == False]['cases'].sum()

1454948

#### Discussion: How would you determine whether there were disproportionately more cases in the county? 