# Package installation

In [1]:
import pandas as pd
import plotly as py
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import matplotlib.ticker as ticker
%matplotlib inline 
import numpy as np 
init_notebook_mode(connected=True) 
import os
import chart_studio
import chart_studio.plotly as py
import chart_studio.tools as tls

# Cumulative COVID-19 Cases by Country and Week of Year (source:  Our Word in Data)

In [2]:
df = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv', parse_dates=['date'])

In [3]:
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,aged_70_older,gdp_per_capita,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
0,AFG,Asia,Afghanistan,2019-12-31,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
1,AFG,Asia,Afghanistan,2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
2,AFG,Asia,Afghanistan,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
3,AFG,Asia,Afghanistan,2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
4,AFG,Asia,Afghanistan,2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83


In [4]:
print("Shape of Data Set is: ", df.shape)
print("Min and Max dates are: ", df.date.min(), df.date.max())

Shape of Data Set is:  (26329, 34)
Min and Max dates are:  2019-12-31 00:00:00 2020-06-26 00:00:00


In [5]:
df['new_date'] = pd.to_datetime(df['date'])
df['Year-Week'] = df['new_date'].dt.strftime('%Y-%U')
df['Year-Week'].head()

0    2019-52
1    2020-00
2    2020-00
3    2020-00
4    2020-00
Name: Year-Week, dtype: object

In [6]:
df_usa=df[df.location=='United States']
df_usa.to_csv('american.csv')

In [7]:
df['text']= df.location+'<br>'+'Date:'+df.date.astype(str)+'<br>'+'Cases:'+df.total_cases.astype(str)

In [8]:
fig = px.choropleth(df, locations="iso_code",
                    color="total_cases",
                    hover_name="text", # column to add to hover information
                    hover_data={'Year-Week':False,
                                      'iso_code':False,
                                      'total_cases': False},
                    animation_frame="Year-Week",
                    title = "Cumulative COVID-19 Cases by Country and Week of Year (source:Our Word in Data)",
                    color_continuous_scale=px.colors.sequential.OrRd)
# fig.customdata= custom_data

# fig.update_traces(hovertemplate='%{custom_data}')
# fig["layout"].pop("updatemenus")
fig.show()

In [9]:
pio.write_html(fig, file='Cumulative COVID-19 Cases by Country and Week of Year.html', auto_open=False)

# Cumulative COVID-19 Cases per 100,000 People by Week of Year: Select Countries (source:Our Word in Data)

In [10]:
countries = ['Canada', 'Germany', 'United Kingdom', 'United States', 'France', 'China', 'Chile', 'Peru', 'Spain', 'Brazil', 'Russia', 'Italy', 'Iran','World']
covid = df[df['location'].isin(countries)]
covid

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,new_date,Year-Week,text
3394,BRA,South America,Brazil,2019-12-31,0.0,0.0,0.0,0.0,0.000,0.000,...,177.961,8.11,10.100,17.900,,2.200,75.88,2019-12-31,2019-52,Brazil<br>Date:2019-12-31<br>Cases:0.0
3395,BRA,South America,Brazil,2020-01-01,0.0,0.0,0.0,0.0,0.000,0.000,...,177.961,8.11,10.100,17.900,,2.200,75.88,2020-01-01,2020-00,Brazil<br>Date:2020-01-01<br>Cases:0.0
3396,BRA,South America,Brazil,2020-01-02,0.0,0.0,0.0,0.0,0.000,0.000,...,177.961,8.11,10.100,17.900,,2.200,75.88,2020-01-02,2020-00,Brazil<br>Date:2020-01-02<br>Cases:0.0
3397,BRA,South America,Brazil,2020-01-03,0.0,0.0,0.0,0.0,0.000,0.000,...,177.961,8.11,10.100,17.900,,2.200,75.88,2020-01-03,2020-00,Brazil<br>Date:2020-01-03<br>Cases:0.0
3398,BRA,South America,Brazil,2020-01-04,0.0,0.0,0.0,0.0,0.000,0.000,...,177.961,8.11,10.100,17.900,,2.200,75.88,2020-01-04,2020-00,Brazil<br>Date:2020-01-04<br>Cases:0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26260,OWID_WRL,,World,2020-06-22,8926614.0,129779.0,468257.0,3965.0,1145.201,16.649,...,233.070,8.51,6.434,34.635,60.13,2.705,72.58,2020-06-22,2020-25,World<br>Date:2020-06-22<br>Cases:8926614.0
26261,OWID_WRL,,World,2020-06-23,9063414.0,136800.0,471681.0,3424.0,1162.752,17.550,...,233.070,8.51,6.434,34.635,60.13,2.705,72.58,2020-06-23,2020-25,World<br>Date:2020-06-23<br>Cases:9063414.0
26262,OWID_WRL,,World,2020-06-24,9229383.0,165969.0,477271.0,5590.0,1184.044,21.292,...,233.070,8.51,6.434,34.635,60.13,2.705,72.58,2020-06-24,2020-25,World<br>Date:2020-06-24<br>Cases:9229383.0
26263,OWID_WRL,,World,2020-06-25,9400695.0,171312.0,482471.0,5200.0,1206.022,21.978,...,233.070,8.51,6.434,34.635,60.13,2.705,72.58,2020-06-25,2020-25,World<br>Date:2020-06-25<br>Cases:9400695.0


In [11]:
covid['country']=covid['location']
print(covid)

       iso_code      continent location       date  total_cases  new_cases  \
3394        BRA  South America   Brazil 2019-12-31          0.0        0.0   
3395        BRA  South America   Brazil 2020-01-01          0.0        0.0   
3396        BRA  South America   Brazil 2020-01-02          0.0        0.0   
3397        BRA  South America   Brazil 2020-01-03          0.0        0.0   
3398        BRA  South America   Brazil 2020-01-04          0.0        0.0   
...         ...            ...      ...        ...          ...        ...   
26260  OWID_WRL            NaN    World 2020-06-22    8926614.0   129779.0   
26261  OWID_WRL            NaN    World 2020-06-23    9063414.0   136800.0   
26262  OWID_WRL            NaN    World 2020-06-24    9229383.0   165969.0   
26263  OWID_WRL            NaN    World 2020-06-25    9400695.0   171312.0   
26264  OWID_WRL            NaN    World 2020-06-26    9581803.0   181108.0   

       total_deaths  new_deaths  total_cases_per_million  \
339



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [12]:
covid_case = covid.pivot(index='date', columns='country', values='total_cases')
countries = list(covid_case.columns)
print(covid_case)

# # covid=df.copy()
# # covid = df.reset_index('Date')
# # covid.set_index(['Date'], inplace=True)
# covid.columns = countries


       iso_code      continent location       date  total_cases  new_cases  \
3394        BRA  South America   Brazil 2019-12-31          0.0        0.0   
3395        BRA  South America   Brazil 2020-01-01          0.0        0.0   
3396        BRA  South America   Brazil 2020-01-02          0.0        0.0   
3397        BRA  South America   Brazil 2020-01-03          0.0        0.0   
3398        BRA  South America   Brazil 2020-01-04          0.0        0.0   
...         ...            ...      ...        ...          ...        ...   
26260  OWID_WRL            NaN    World 2020-06-22    8926614.0   129779.0   
26261  OWID_WRL            NaN    World 2020-06-23    9063414.0   136800.0   
26262  OWID_WRL            NaN    World 2020-06-24    9229383.0   165969.0   
26263  OWID_WRL            NaN    World 2020-06-25    9400695.0   171312.0   
26264  OWID_WRL            NaN    World 2020-06-26    9581803.0   181108.0   

       total_deaths  new_deaths  total_cases_per_million  \
339

In [13]:
covid_case.World

date
2019-12-31         27.0
2020-01-01         27.0
2020-01-02         27.0
2020-01-03         44.0
2020-01-04         44.0
                ...    
2020-06-22    8926614.0
2020-06-23    9063414.0
2020-06-24    9229383.0
2020-06-25    9400695.0
2020-06-26    9581803.0
Name: World, Length: 179, dtype: float64

In [19]:
populations = {'Canada':37664517, 'Germany': 83721496 , 'United Kingdom': 67802690 , 'United States': 330548815, 'France': 65239883, 'China':1438027228,\
               'Chile':19116201, 'Peru':32971854,'Spain':46754778,'Brazil':212559417,'Russia':145934462,'Italy':60461826,\
              'Iran':83992949,'World':7792507702}
percapita = covid_case.copy()
for country in list(percapita.columns):
    percapita[country] = round(percapita[country]/populations[country]*100000)
print(percapita)

country         Brazil      Canada        Chile     China      France  \
date                                                                    
2019-12-31    0.000000    0.000000          NaN  0.001878    0.000000   
2020-01-01    0.000000    0.000000          NaN  0.001878    0.000000   
2020-01-02    0.000000    0.000000          NaN  0.001878    0.000000   
2020-01-03    0.000000    0.000000          NaN  0.003060    0.000000   
2020-01-04    0.000000    0.000000          NaN  0.003060    0.000000   
...                ...         ...          ...       ...         ...   
2020-06-22  510.463387  269.022433  1267.798973  5.881113  245.826621   
2020-06-23  520.546215  269.848144  1291.904181  5.884729  246.398357   
2020-06-24  539.099145  270.713680  1311.803533  5.886745  247.190817   
2020-06-25  559.199407  271.454430  1330.892053  5.888136  247.314974   
2020-06-26  577.774449  272.434132  1355.206508  5.890083  247.314974   

country        Germany        Iran       Italy    

In [26]:
covid_death = covid.pivot(index='date', columns='country', values='total_deaths')
percapita_death = covid_death.copy()
for country in list(percapita_death.columns):
    percapita_death[country] = percapita_death[country]/populations[country]*100000
print(percapita_death)

country        Brazil     Canada      Chile     China     France    Germany  \
date                                                                          
2019-12-31   0.000000   0.000000        NaN  0.000000   0.000000   0.000000   
2020-01-01   0.000000   0.000000        NaN  0.000000   0.000000   0.000000   
2020-01-02   0.000000   0.000000        NaN  0.000000   0.000000   0.000000   
2020-01-03   0.000000   0.000000        NaN  0.000000   0.000000   0.000000   
2020-01-04   0.000000   0.000000        NaN  0.000000   0.000000   0.000000   
...               ...        ...        ...       ...        ...        ...   
2020-06-22  23.813106  22.381808  23.430388  0.322595  45.432332  10.612567   
2020-06-23  24.120785  22.397738  23.550704  0.322595  45.467586  10.624512   
2020-06-24  24.767193  22.445529  23.566398  0.322664  45.554956  10.647206   
2020-06-25  25.324684  22.525179  24.748641  0.322664  45.571817  10.662733   
2020-06-26  25.861475  22.578280  25.648402  0.32273

In [55]:
fig = go.Figure()
for country in list(percapita_death.columns):
    fig = fig.add_trace(go.Scatter(x=percapita_death.index,y=percapita_death[country],name= country))

fig.update_layout(title = 'Cumulative COVID-19 Deaths per 100,000 People by Week of Year: Select Countries <br>(source:Our Word in Data)',)
fig.update_traces(hovertemplate='%{x} <br>Cases: %{y}') # 
fig.add_annotation(
            x = percapita_death.index[30], 
            y = int(percapita_death.min().min())-2,
            showarrow=False,
            ax=0,
            ay=-40,
            text="Notes: Spain adjusts the death data on May 25",
            font=dict(family="sans serif",size=10,color="crimson")
)
# fig.update_traces(texttemplate='Source: https://github.com/datasets/covid-19/blob/master/data/countries-aggregated.csv', textposition='bottom center')
fig.show()

In [18]:
fig = go.Figure()
for country in list(percapita.columns):
    fig = fig.add_trace(go.Scatter(x=percapita.index,y=percapita[country],name= country))

fig.update_layout(title = 'Cumulative COVID-19 Cases per 100,000 People by Week of Year: Select Countries (source:Our Word in Data)',)
fig.update_traces(hovertemplate='%{x} <br>Cases: %{y}') # 
fig.show()

In [16]:
pio.write_html(fig, file='Cumulative COVID-19 Cases per 100,000 People by Week of Year.html', auto_open=False)

# Cumulative COVID-19  Cases by State and Week of Year (source: New York Times)

In [17]:
# !git clone https://github.com/nytimes/covid-19-data.git
df_us = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv',parse_dates=['date'])
df_us.head()

Unnamed: 0,date,state,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0


In [18]:
df_us['new_date'] = pd.to_datetime(df_us['date'])
df_us['Year-Week'] = df_us['new_date'].dt.strftime('%Y-%U')
df_us.head()

Unnamed: 0,date,state,fips,cases,deaths,new_date,Year-Week
0,2020-01-21,Washington,53,1,0,2020-01-21,2020-03
1,2020-01-22,Washington,53,1,0,2020-01-22,2020-03
2,2020-01-23,Washington,53,1,0,2020-01-23,2020-03
3,2020-01-24,Illinois,17,1,0,2020-01-24,2020-03
4,2020-01-24,Washington,53,1,0,2020-01-24,2020-03


In [19]:
df_us.shape
df_us = df_us.sort_values(by=['state', 'new_date'])
df_us_week = df_us.groupby(['state', 'fips', 'Year-Week']).first().reset_index()
df_us_week.head(100)

Unnamed: 0,state,fips,Year-Week,date,cases,deaths,new_date
0,Alabama,1,2020-10,2020-03-13,6,0,2020-03-13
1,Alabama,1,2020-11,2020-03-15,23,0,2020-03-15
2,Alabama,1,2020-12,2020-03-22,157,0,2020-03-22
3,Alabama,1,2020-13,2020-03-29,830,5,2020-03-29
4,Alabama,1,2020-14,2020-04-05,1840,45,2020-04-05
...,...,...,...,...,...,...,...
95,Colorado,8,2020-16,2020-04-19,9730,421,2020-04-19
96,Colorado,8,2020-17,2020-04-26,13440,678,2020-04-26
97,Colorado,8,2020-18,2020-05-03,16527,840,2020-05-03
98,Colorado,8,2020-19,2020-05-10,19595,969,2020-05-10


In [20]:
df_us_week['cases'].max(), df_us_week['cases'].min()

(388096, 1)

In [21]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
counties["features"][100]

{'type': 'Feature',
 'properties': {'GEO_ID': '0500000US16067',
  'STATE': '16',
  'COUNTY': '067',
  'NAME': 'Minidoka',
  'LSAD': 'County',
  'CENSUSAREA': 757.591},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-113.931799, 42.535275],
    [-113.932904, 42.765032],
    [-113.763862, 42.764508],
    [-113.713928, 42.849733],
    [-113.714701, 43.20003],
    [-113.413693, 43.199785],
    [-113.413026, 42.84925],
    [-113.472155, 42.849218],
    [-113.472177, 42.669251],
    [-113.557609, 42.656416],
    [-113.655338, 42.535663],
    [-113.779811, 42.55687],
    [-113.931799, 42.535275]]]},
 'id': '16067'}

In [22]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

In [23]:
df_us_week['state_code'] = df_us_week['state'].map(us_state_abbrev)
df_us_week.head()

Unnamed: 0,state,fips,Year-Week,date,cases,deaths,new_date,state_code
0,Alabama,1,2020-10,2020-03-13,6,0,2020-03-13,AL
1,Alabama,1,2020-11,2020-03-15,23,0,2020-03-15,AL
2,Alabama,1,2020-12,2020-03-22,157,0,2020-03-22,AL
3,Alabama,1,2020-13,2020-03-29,830,5,2020-03-29,AL
4,Alabama,1,2020-14,2020-04-05,1840,45,2020-04-05,AL


In [24]:
df_us_week = df_us_week.sort_values(by=['Year-Week'])
df_us_week['text']= df_us_week.state+'<br>'+'Date:'+df_us_week.date.astype(str)+'<br>'+'Cases:'+df_us_week.cases.astype(str)
fig = px.choropleth(df_us_week[df_us_week['date']>='2020-02-15'], locations='state_code', color='cases',
                           color_continuous_scale=px.colors.sequential.OrRd,
                           hover_name = 'text',
                    hover_data={'Year-Week':False,
                                      'state_code':False,
                                      'cases': False},
                           locationmode = 'USA-states',
                           animation_frame="Year-Week",
                          )
fig.update_layout(
    title_text = 'Cumulative COVID-19 Cases by State and Week of Year (source:New York Times)', # Create a Title
    geo_scope='usa',  # Plot only the USA instead of globe
)
fig.show()  # Output the plot to the screen

In [25]:
pio.write_html(fig, file='Cumulative COVID-19 Cases by State and Week of Year.html', auto_open=False)

In [26]:
df_us_week['text_death']= df_us_week.state+'<br>'+'Date:'+df_us_week.date.astype(str)+'<br>'+'Deaths:'+df_us_week.deaths.astype(str)

fig = px.choropleth(df_us_week[df_us_week['date']>='2020-03-15'], locations='state_code', color='deaths',
                           color_continuous_scale=px.colors.sequential.OrRd,
                           hover_name = 'text_death',
                           locationmode = 'USA-states',
                           animation_frame="Year-Week",
                    hover_data={'Year-Week':False,
                                      'state_code':False,
                                      'deaths': False},
                          )
fig.update_layout(
    title_text = 'Cumulative COVID-19 Deaths by State and Week of Year (source:New York Times)', # Create a Title
    geo_scope='usa',  # Plot only the USA instead of globe
)
fig.show()  # Output the plot to the screen

In [27]:
pio.write_html(fig, file='Cumulative COVID-19 Deaths by State and Week of Year.html', auto_open=False)

# Cumulative COVID-Cases by Race Ethnicity

# 2 California
https://www.cdph.ca.gov/Programs/CID/DCDC/Pages/COVID-19/Race-Ethnicity.aspx
## 2.1 LA county
Top ten cities: Los Angeles, San Diego, Riverside, Orange, San Bernardino, Santa Clara, Alameda, San Francisco, Kern, Tulare
http://publichealth.lacounty.gov/media/Coronavirus/locations.htm
Racial, Ethnic & Socioeconomic Data & Strategies Report
http://publichealth.lacounty.gov/media/Coronavirus/locations.htm
## 2.2 San Diego
https://www.sandiegocounty.gov/content/sdc/hhsa/programs/phs/community_epidemiology/dc/2019-nCoV/status.html
Summary
https://www.sandiegocounty.gov/content/dam/sdc/hhsa/programs/phs/Epidemiology/COVID-19%20Race%20and%20Ethnicity%20Summary.pdf
## 2.3 Riverside (no race data)
## 2.4 Orange
https://occovid19.ochealthinfo.com/coronavirus-in-oc
## 2.5 San Bernardino
http://sbcovid19.com/
Dashboard: https://sbcph.maps.arcgis.com/apps/opsdashboard/index.html#/44bb35c804c44c8281da6d82ee602dff
## 2.6 Santa Clara
https://www.sccgov.org/sites/covid19/Pages/dashboard.aspx#cases
## 2.7 Alameda
http://www.acphd.org/2019-ncov.aspx
Race data dashboard:
https://ac-hcsa.maps.arcgis.com/apps/opsdashboard/index.html#/332a092bbc3641bd9ec8373e7c7b5b3d
## 2.8 San Francisco
https://data.sfgov.org/stories/s/fjki-2fab
Race data dashboard:
https://data.sfgov.org/stories/s/w6za-6st8
## 2.9 Kern (no race data)
## 2.10 Tulare (no race data)
## 2.11 Sacramento
https://sac-epidemiology.maps.arcgis.com/apps/opsdashboard/index.html#/a33a1c52e7fe4252bc1f81e44d725fd1
## 2.12 Contra Costa
https://www.coronavirus.cchealth.org/dashboard
## 2.13 Fresno
https://cofgisonline.maps.arcgis.com/apps/MapSeries/index.html?appid=1f82e8eb24c0403c90e774202c5dafea
## 2.14 Ventura (no race data)
## 2.15 San Joaquin
https://sjcphs.maps.arcgis.com/apps/opsdashboard/index.html#/1234d2c9d6e043709a85f6b519cc8757
## 2.16 Imperial
https://www.arcgis.com/apps/opsdashboard/index.html#/684c52c01a0c4dbda380d7b905ef0b46
## 2.17 Stanislaus
https://experience.arcgis.com/experience/c29aa0c6a84844ceab6601da4b124c0b/
## 2.18 Solano (no race data)
## 2.19 Sonoma
https://sonomacounty.maps.arcgis.com/apps/MapSeries/index.html?appid=21a1653b79ba42039ff22bcb85fa5b19
## 2.20 Kings (no race data)

In [28]:
df = pd.read_csv("Race data/California_County_Racedata.csv")
df.head()

Unnamed: 0,County,Race,Count
0,California,American Indian/Alaska Native,92.0
1,California,Asian,5339.0
2,California,Black or African American,2937.0
3,California,Hispanic/Latino,25582.0
4,California,Native Hawaiian/Pacific Islander,488.0


In [29]:
df_california = df[df['County'] == 'California']
df_california

Unnamed: 0,County,Race,Count
0,California,American Indian/Alaska Native,92.0
1,California,Asian,5339.0
2,California,Black or African American,2937.0
3,California,Hispanic/Latino,25582.0
4,California,Native Hawaiian/Pacific Islander,488.0
5,California,White,11625.0
6,California,Other,3151.0
7,California,Under Investigation,0.0


In [30]:
fig = px.pie(df_california, values = 'Count',names='Race', height=600)
fig.update_traces(textposition='inside', textinfo='percent+label')

fig.update_layout(
    title_x = 0.5,
    geo=dict(
        showframe = False,
        showcoastlines = False,
    ),title_text="California Cumulative COVID-19 Cases by Race (source:California Department of Public Health, as of May 15)")

fig.show()

In [31]:
pio.write_html(fig, file='California Cumulative COVID-19 Cases by Race.html', auto_open=False)

In [32]:
labels = df_california['Race']

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=3, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}],[{'type':'domain'}, {'type':'domain'}],[{'type':'domain'}, {'type':'domain'}]], 
                    subplot_titles=['Los Angeles','San Diego','Orange','San Bernardino','Alameda','San Francisco'])

fig.add_trace(go.Pie(labels=labels, values=df[df['County'] == 'Los Angeles']['Count'], name="Los Angeles"),
              1, 1)
fig.add_trace(go.Pie(labels=labels, values=df[df['County'] == 'San Diego']['Count'], name="San Diego"),
              1, 2)
fig.add_trace(go.Pie(labels=labels, values=df[df['County'] == 'Orange']['Count'], name="Orange"),
              2, 1)
fig.add_trace(go.Pie(labels=labels, values=df[df['County'] == 'San Bernardino']['Count'], name="San Bernardino"),
              2, 2)
fig.add_trace(go.Pie(labels=labels, values=df[df['County'] == 'Alameda']['Count'], name="Alameda"),
              3, 1)
fig.add_trace(go.Pie(labels=labels, values=df[df['County'] == 'San Francisco']['Count'], name="San Francisco"),
              3, 2)
# Use `hole` to create a donut-like pie chart
# fig.update_traces(hole=.4, hoverinfo="label+percent+name")
fig.update_traces(hoverinfo="label+value+name")


fig.update_layout(height=1000, width=1000,
    title_text='Cumulative COVID-19 Cases by Race, Select California Counties <br>(source: Department of Public Health for each county, as of May 15)')
fig.show()

In [33]:
pio.write_html(fig, file='Cumulative COVID-19 Cases by Race, Select California Counties.html', auto_open=False)

# % of Cumulative COVID-19 Cases in Long-term Care Facilities (source: Kaiser Family Foundation, as of DATE)

In [34]:
df_us_LTC= pd.read_csv("Map-Data_Share-of-Cases.csv")

In [35]:
df_us_LTC

Unnamed: 0,State,Map category,Description
0,Alabama,Reporting,12% of cases are in LTC facilities\nUnknown sh...
1,Alaska,Not Reporting,Not Reporting
2,Arizona,Reporting,Unknown share of cases in LTC facilities\nUnkn...
3,Arkansas,Reporting,7% of cases are in LTC facilities\nUnknown sha...
4,California,Reporting,8% of cases are in LTC facilities\nUnknown sha...
5,Colorado,Reporting,17% of cases are in LTC facilities\n50% of dea...
6,Connecticut,Reporting,8% of cases are in LTC facilities\n24% of deat...
7,Delaware,Reporting,7% of cases are in LTC facilities\n58% of deat...
8,District of Columbia,Reporting,6% of cases are in LTC facilities\n11% of case...
9,Florida,Reporting,8% of cases are in LTC facilities\n27% of deat...


In [36]:
df_us_LTC['state_code'] = df_us_LTC['State'].map(us_state_abbrev)
df_us_LTC.head()

Unnamed: 0,State,Map category,Description,state_code
0,Alabama,Reporting,12% of cases are in LTC facilities\nUnknown sh...,AL
1,Alaska,Not Reporting,Not Reporting,AK
2,Arizona,Reporting,Unknown share of cases in LTC facilities\nUnkn...,AZ
3,Arkansas,Reporting,7% of cases are in LTC facilities\nUnknown sha...,AR
4,California,Reporting,8% of cases are in LTC facilities\nUnknown sha...,CA


In [37]:
fig = px.choropleth(df_us_LTC, locations='state_code', color='Map category',
                           color_continuous_scale=px.colors.sequential.OrRd,
                           hover_name = 'State',

                           hover_data={'Description':True,
                                      'state_code':False,
                                      'Map category': False},
                           locationmode = 'USA-states'
                          )
fig.update_layout(
    title_text = '% of Cumulative COVID-19 Cases in Long-term Care Facilities <br> (source: Kaiser Family Foundation, as of June 20)', # Create a Title
    geo_scope='usa',  # Plot only the USA instead of globe
)
# fig.update_traces(hovertemplate="%{locations}<br> Description: %{hover_data.Description}") # 
fig.show()  # Output the plot to the screen

In [38]:
pio.write_html(fig, file='of Cumulative COVID-19 Cases in Long-term Care Facilities.html', auto_open=False)