In [5]:
import pandas as pd
DATA_URL = ('https://data.sfgov.org/resource/tpyr-dvnc.csv?$$app_token=yh5qaeaJSvJrdOSv77ZnroO2u')
DATE_COLUMN = 'last_updated_at'

In [18]:
def load_data(nrows):
    # Caches the data
    # Cleans up the dates and ZIP codes
    data = pd.read_csv(DATA_URL, nrows=nrows)
    data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
    data = data[data['area_type']=='ZCTA']
    data = data.drop(['area_type', 'multipolygon'], axis=1).rename(columns={'id': 'zip_code'})
    #data = data.reset_index()
    return data

data = load_data(264)
data.head()

Unnamed: 0,zip_code,count,rate,deaths,acs_population,last_updated_at
6,94118,46.0,11.092891,,41468,2020-06-10 17:44:26.930
7,94111,,,0.0,3620,2020-06-10 17:44:26.930
17,94122,72.0,11.588418,,62131,2020-06-10 17:44:26.930
22,94123,37.0,14.263136,0.0,25941,2020-06-10 17:44:26.930
26,94127,28.0,13.070065,0.0,21423,2020-06-10 17:44:26.930


In [42]:
CA_DATA_URL = 'https://data.chhs.ca.gov/dataset/6882c390-b2d7-4b9a-aefa-2068cee63e47/resource/6cd8d424-dfaa-4bdd-9410-a3d656e1176e/download/covid19data.csv'
CA_DATE_COL = 'Most Recent Date'

def load_ca_data():
    ca_data = pd.read_csv(CA_DATA_URL)
    #lowercase = lambda x: str(x).lower()
    #data.rename(lowercase, axis='columns', inplace=True)
    ca_data.loc[:, CA_DATE_COL] = pd.to_datetime(ca_data.loc[:, CA_DATE_COL], format='%m/%d/%Y', infer_datetime_format=True)
    return ca_data

ca_data = load_ca_data()
ca_data.head()

Unnamed: 0,County Name,Most Recent Date,Total Count Confirmed,Total Count Deaths,COVID-19 Positive Patients,Suspected COVID-19 Positive Patients,ICU COVID-19 Positive Patients,ICU COVID-19 Suspected Patients
0,Los Angeles,2020-04-01,3502.0,66.0,739.0,1332.0,335.0,220.0
1,San Bernardino,2020-04-01,245.0,5.0,95.0,196.0,39.0,52.0
2,Orange,2020-04-01,579.0,11.0,117.0,221.0,50.0,48.0
3,Riverside,2020-04-01,306.0,11.0,85.0,182.0,29.0,47.0
4,Sacramento,2020-04-01,299.0,8.0,53.0,138.0,20.0,33.0


In [33]:
ca_data.dtypes

County Name                                     object
Most Recent Date                                object
Total Count Confirmed                          float64
Total Count Deaths                             float64
COVID-19 Positive Patients                     float64
Suspected COVID-19 Positive Patients           float64
ICU COVID-19 Positive Patients                 float64
ICU COVID-19 Suspected Patients                float64
last_updated_at                         datetime64[ns]
dtype: object

In [8]:
cap_url = 'https://data.sfgov.org/resource/rh24-ebzg.csv?&&$$app_token=yh5qaeaJSvJrdOSv77ZnroO2u'
hospital_cap = pd.read_csv(cap_url)
hospital_cap['date'] = pd.to_datetime(hospital_cap['date'])
hospital_cap.head()

Unnamed: 0,hospital,date,bed_type,status,count
0,All SF Acute Hospitals,2020-04-03,Intensive Care Surge,Available,312
1,All SF Acute Hospitals,2020-04-03,Acute Care,Available,918
2,All SF Acute Hospitals,2020-04-03,Acute Care Surge,Available,375
3,All SF Acute Hospitals,2020-04-03,Intensive Care,Available,147
4,All SF Acute Hospitals,2020-04-03,Intensive Care,COVID-19 (Confirmed & Suspected),41


In [9]:
hospital_cap.groupby(['bed_type', 'status']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
bed_type,status,Unnamed: 2_level_1
Acute Care,Available,57991
Acute Care,COVID-19 (Confirmed & Suspected),4811
Acute Care,Other Patients,46566
Acute Care Surge,Available,31500
Intensive Care,Available,9248
Intensive Care,COVID-19 (Confirmed & Suspected),2402
Intensive Care,Other Patients,11786
Intensive Care Surge,Available,26208


In [10]:
hospitalizations_url = 'https://data.sfgov.org/resource/nxjg-bhem.csv?$$app_token=yh5qaeaJSvJrdOSv77ZnroO2u'
hospitalizations = pd.read_csv(hospitalizations_url)
hospitalizations['reportdate'] = pd.to_datetime(hospitalizations['reportdate'])
hospitalizations.head()

Unnamed: 0,reportdate,hospital,dphcategory,covidstatus,patientcount
0,2020-03-23,All SF Hospitals,Med/Surg,COVID+,12
1,2020-03-24,All SF Hospitals,Med/Surg,COVID+,20
2,2020-03-25,All SF Hospitals,Med/Surg,COVID+,22
3,2020-03-26,All SF Hospitals,Med/Surg,COVID+,19
4,2020-03-27,All SF Hospitals,Med/Surg,COVID+,28


In [11]:
import altair as alt

alt.Chart(hospitalizations).mark_circle().encode(
    x='reportdate',
    y='patientcount',
    color='dphcategory',
    tooltip=['reportdate', 'patientcount']
).interactive()

<VegaLite 3 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


In [55]:
hospitalizations['covidstatus'].unique()

array(['COVID+', 'PUI'], dtype=object)

In [71]:
age_gender_url = 'https://data.sfgov.org/resource/sunc-2t3k.csv?$$app_token=yh5qaeaJSvJrdOSv77ZnroO2u'
age_gender = pd.read_csv(age_gender_url)
age_gender = age_gender.drop('last_updated_at', axis=1)
age_gender = age_gender.replace('under 18', '18 under')
age_gender.head()

Unnamed: 0,age_group,gender,confirmed_cases
0,61-70,Male,157
1,61-70,Female,103
2,71-80,Female,72
3,31-40,Male,408
4,31-40,Female,245


In [91]:
alt.Chart(age_gender).mark_bar().encode(
    alt.X('gender:N', title='Gender', axis=None),
    alt.Y('confirmed_cases', axis=alt.Axis(title='Confirmed Cases')),
    column=alt.Column('age_group:O', title='Age Group'),
    color='gender',
    tooltip=['gender', 'confirmed_cases']
).interactive()

<VegaLite 3 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


In [2]:
import pandas as pd
CA_DATA_URL = 'https://data.ca.gov/dataset/590188d5-8545-4c93-a9a0-e230f0db7290/resource/926fd08f-cc91-4828-af38-bd45de97f8c3/download/statewide_cases.csv'

ca_data = pd.read_csv(CA_DATA_URL)

In [4]:
ca_data.head()

Unnamed: 0,COUNTY,TOTALCOUNTCONFIRMED,TOTALCOUNTDEATHS,NEWCOUNTCONFIRMED,NEWCOUNTDEATHS,DATE
0,Santa Clara,151.0,6.0,151,6,2020-03-18
1,Santa Clara,183.0,8.0,32,2,2020-03-19
2,Santa Clara,246.0,8.0,63,0,2020-03-20
3,Santa Clara,269.0,10.0,23,2,2020-03-21
4,Santa Clara,284.0,13.0,15,3,2020-03-22


In [6]:
ca_data.COUNTY.unique()

array(['Santa Clara', 'San Mateo', 'Santa Barbara', 'Tuolumne', 'Sierra',
       'Placer', 'San Luis Obispo', 'Solano', 'Monterey', 'Yuba',
       'San Francisco', 'Orange', 'Mono', 'Calaveras', 'Alpine', 'Nevada',
       'Butte', 'Santa Cruz', 'Mendocino', 'Fresno', 'Inyo',
       'San Joaquin', 'Ventura', 'Trinity', 'Mariposa', 'El Dorado',
       'Sonoma', 'Shasta', 'Lassen', 'Colusa', 'Lake', 'Modoc', 'Tulare',
       'San Benito', 'Alameda', 'Marin', 'San Diego', 'Out Of Country',
       'Glenn', 'Siskiyou', 'Madera', 'Tehama', 'Sutter', 'Napa',
       'Los Angeles', 'Sacramento', 'Merced', 'Kings', 'Kern',
       'Riverside', 'Unassigned', 'Contra Costa', 'Del Norte',
       'San Bernardino', 'Stanislaus', 'Plumas', 'Humboldt', 'Amador',
       'Imperial', 'Yolo'], dtype=object)

In [19]:
alt.Chart(ca_data[ca_data['COUNTY']=='Santa Clara']).mark_line().encode(
    alt.Y('TOTALCOUNTCONFIRMED', axis=alt.Axis(title='Confirmed Cases')),
    x='DATE',
    color='COUNTY'
)

<VegaLite 3 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html
