# Package installation

In [2]:
import pandas as pd
import plotly as py
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import matplotlib.ticker as ticker
%matplotlib inline 
import numpy as np 
init_notebook_mode(connected=True) 
import os
import chart_studio
import chart_studio.plotly as py
import chart_studio.tools as tls

# Cumulative COVID-19 Cases by Country and Week of Year (source:  Our Word in Data)

In [46]:
df = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv', parse_dates=['date'])

In [47]:
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,aged_70_older,gdp_per_capita,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy
0,AFG,Asia,Afghanistan,2019-12-31,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
1,AFG,Asia,Afghanistan,2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
2,AFG,Asia,Afghanistan,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
3,AFG,Asia,Afghanistan,2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83
4,AFG,Asia,Afghanistan,2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,...,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83


In [48]:
print("Shape of Data Set is: ", df.shape)
print("Min and Max dates are: ", df.date.min(), df.date.max())

Shape of Data Set is:  (24535, 34)
Min and Max dates are:  2019-12-31 00:00:00 2020-06-19 00:00:00


In [49]:
df['new_date'] = pd.to_datetime(df['date'])
df['Year-Week'] = df['new_date'].dt.strftime('%Y-%U')
df['Year-Week'].head()

0    2019-52
1    2020-00
2    2020-00
3    2020-00
4    2020-00
Name: Year-Week, dtype: object

In [51]:
df[df.location=='Germany']

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,new_date,Year-Week
8571,DEU,Europe,Germany,2019-12-31,0.0,0.0,0.0,0.0,0.000,0.000,...,,156.139,8.31,28.2,33.1,,8.0,81.33,2019-12-31,2019-52
8572,DEU,Europe,Germany,2020-01-01,0.0,0.0,0.0,0.0,0.000,0.000,...,,156.139,8.31,28.2,33.1,,8.0,81.33,2020-01-01,2020-00
8573,DEU,Europe,Germany,2020-01-02,0.0,0.0,0.0,0.0,0.000,0.000,...,,156.139,8.31,28.2,33.1,,8.0,81.33,2020-01-02,2020-00
8574,DEU,Europe,Germany,2020-01-03,0.0,0.0,0.0,0.0,0.000,0.000,...,,156.139,8.31,28.2,33.1,,8.0,81.33,2020-01-03,2020-00
8575,DEU,Europe,Germany,2020-01-04,0.0,0.0,0.0,0.0,0.000,0.000,...,,156.139,8.31,28.2,33.1,,8.0,81.33,2020-01-04,2020-00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8738,DEU,Europe,Germany,2020-06-15,186461.0,192.0,8791.0,4.0,2225.498,2.292,...,,156.139,8.31,28.2,33.1,,8.0,81.33,2020-06-15,2020-24
8739,DEU,Europe,Germany,2020-06-16,186839.0,378.0,8800.0,9.0,2230.010,4.512,...,,156.139,8.31,28.2,33.1,,8.0,81.33,2020-06-16,2020-24
8740,DEU,Europe,Germany,2020-06-17,187184.0,345.0,8830.0,30.0,2234.127,4.118,...,,156.139,8.31,28.2,33.1,,8.0,81.33,2020-06-17,2020-24
8741,DEU,Europe,Germany,2020-06-18,187764.0,580.0,8856.0,26.0,2241.050,6.923,...,,156.139,8.31,28.2,33.1,,8.0,81.33,2020-06-18,2020-24


In [96]:
fig = px.choropleth(df, locations="iso_code",
                    color="total_cases",
                    hover_name="location", # column to add to hover information
                    animation_frame="Year-Week",
                    title = "Cumulative COVID-19 Cases by Country and Week of Year (source:Our Word in Data)",
                    color_continuous_scale=px.colors.sequential.OrRd)

# fig["layout"].pop("updatemenus")
fig.show()

In [53]:
pio.write_html(fig, file='Cumulative COVID-19 Cases by Country and Week of Year.html', auto_open=True)

# Cumulative COVID-19 Cases per 100,000 People by Week of Year: Select Countries (source:              )

In [76]:
countries = ['Canada', 'Germany', 'United Kingdom', 'United States', 'France', 'China', 'Chile', 'Peru', 'Spain', 'Brazil', 'Russia', 'Italy', 'Iran','World']
covid = df[df['location'].isin(countries)]
covid

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,extreme_poverty,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,new_date,Year-Week
3205,BRA,South America,Brazil,2019-12-31,0.0,0.0,0.0,0.0,0.000,0.000,...,3.4,177.961,8.11,10.100,17.900,,2.200,75.88,2019-12-31,2019-52
3206,BRA,South America,Brazil,2020-01-01,0.0,0.0,0.0,0.0,0.000,0.000,...,3.4,177.961,8.11,10.100,17.900,,2.200,75.88,2020-01-01,2020-00
3207,BRA,South America,Brazil,2020-01-02,0.0,0.0,0.0,0.0,0.000,0.000,...,3.4,177.961,8.11,10.100,17.900,,2.200,75.88,2020-01-02,2020-00
3208,BRA,South America,Brazil,2020-01-03,0.0,0.0,0.0,0.0,0.000,0.000,...,3.4,177.961,8.11,10.100,17.900,,2.200,75.88,2020-01-03,2020-00
3209,BRA,South America,Brazil,2020-01-04,0.0,0.0,0.0,0.0,0.000,0.000,...,3.4,177.961,8.11,10.100,17.900,,2.200,75.88,2020-01-04,2020-00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24530,OWID_WRL,,World,2020-06-15,7881616.0,122105.0,435154.0,3129.0,1011.138,15.665,...,10.0,233.070,8.51,6.434,34.635,60.13,2.705,72.58,2020-06-15,2020-24
24531,OWID_WRL,,World,2020-06-16,8000417.0,118801.0,438529.0,3375.0,1026.379,15.241,...,10.0,233.070,8.51,6.434,34.635,60.13,2.705,72.58,2020-06-16,2020-24
24532,OWID_WRL,,World,2020-06-17,8141722.0,141305.0,445380.0,6851.0,1044.507,18.128,...,10.0,233.070,8.51,6.434,34.635,60.13,2.705,72.58,2020-06-17,2020-24
24533,OWID_WRL,,World,2020-06-18,8318148.0,176426.0,450630.0,5250.0,1067.141,22.634,...,10.0,233.070,8.51,6.434,34.635,60.13,2.705,72.58,2020-06-18,2020-24


In [77]:
covid['country']=covid['location']
print(covid)

       iso_code      continent location       date  total_cases  new_cases  \
3205        BRA  South America   Brazil 2019-12-31          0.0        0.0   
3206        BRA  South America   Brazil 2020-01-01          0.0        0.0   
3207        BRA  South America   Brazil 2020-01-02          0.0        0.0   
3208        BRA  South America   Brazil 2020-01-03          0.0        0.0   
3209        BRA  South America   Brazil 2020-01-04          0.0        0.0   
...         ...            ...      ...        ...          ...        ...   
24530  OWID_WRL            NaN    World 2020-06-15    7881616.0   122105.0   
24531  OWID_WRL            NaN    World 2020-06-16    8000417.0   118801.0   
24532  OWID_WRL            NaN    World 2020-06-17    8141722.0   141305.0   
24533  OWID_WRL            NaN    World 2020-06-18    8318148.0   176426.0   
24534  OWID_WRL            NaN    World 2020-06-19    8455738.0   137590.0   

       total_deaths  new_deaths  total_cases_per_million  \
320



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [78]:
covid = covid.pivot(index='date', columns='country', values='total_cases')
countries = list(covid.columns)
print(covid)
# # covid=df.copy()
# # covid = df.reset_index('Date')
# # covid.set_index(['Date'], inplace=True)
# covid.columns = countries


country       Brazil    Canada     Chile    China    France   Germany  \
date                                                                    
2019-12-31       0.0       0.0       NaN     27.0       0.0       0.0   
2020-01-01       0.0       0.0       NaN     27.0       0.0       0.0   
2020-01-02       0.0       0.0       NaN     27.0       0.0       0.0   
2020-01-03       0.0       0.0       NaN     44.0       0.0       0.0   
2020-01-04       0.0       0.0       NaN     44.0       0.0       0.0   
...              ...       ...       ...      ...       ...       ...   
2020-06-15  867624.0   98776.0  174293.0  84335.0  157220.0  186461.0   
2020-06-16  888271.0   99136.0  179436.0  84378.0  157372.0  186839.0   
2020-06-17  923189.0   99456.0  184449.0  84422.0  157716.0  187184.0   
2020-06-18  955377.0   99842.0  220628.0  84458.0  158174.0  187764.0   
2020-06-19  978142.0  100209.0  225103.0  84494.0  158641.0  187764.0   

country         Iran     Italy      Peru    Russia

In [79]:
covid.World

date
2019-12-31         27.0
2020-01-01         27.0
2020-01-02         27.0
2020-01-03         44.0
2020-01-04         44.0
                ...    
2020-06-15    7881616.0
2020-06-16    8000417.0
2020-06-17    8141722.0
2020-06-18    8318148.0
2020-06-19    8455738.0
Name: World, Length: 172, dtype: float64

In [82]:
populations = {'Canada':37664517, 'Germany': 83721496 , 'United Kingdom': 67802690 , 'United States': 330548815, 'France': 65239883, 'China':1438027228,\
               'Chile':19116201, 'Peru':32971854,'Spain':46754778,'Brazil':212559417,'Russia':145934462,'Italy':60461826,\
              'Iran':83992949,'World':7792507702}
percapita = covid.copy()
for country in list(percapita.columns):
    percapita[country] = round(percapita[country]/populations[country]*100000)
print(percapita)

country     Brazil  Canada   Chile  China  France  Germany   Iran  Italy  \
date                                                                       
2019-12-31     0.0     0.0     NaN    0.0     0.0      0.0    0.0    0.0   
2020-01-01     0.0     0.0     NaN    0.0     0.0      0.0    0.0    0.0   
2020-01-02     0.0     0.0     NaN    0.0     0.0      0.0    0.0    0.0   
2020-01-03     0.0     0.0     NaN    0.0     0.0      0.0    0.0    0.0   
2020-01-04     0.0     0.0     NaN    0.0     0.0      0.0    0.0    0.0   
...            ...     ...     ...    ...     ...      ...    ...    ...   
2020-06-15   408.0   262.0   912.0    6.0   241.0    223.0  223.0  392.0   
2020-06-16   418.0   263.0   939.0    6.0   241.0    223.0  226.0  392.0   
2020-06-17   434.0   264.0   965.0    6.0   242.0    224.0  229.0  393.0   
2020-06-18   449.0   265.0  1154.0    6.0   242.0    224.0  232.0  393.0   
2020-06-19   460.0   266.0  1178.0    6.0   243.0    224.0  235.0  394.0   

country    

In [83]:
fig = go.Figure()
for country in list(percapita.columns):
    fig = fig.add_trace(go.Scatter(x=percapita.index,y=percapita[country],name= country))

fig.update_layout(title = 'Per Capita COVID-19 Cases by Country',)

fig.show()

# Cumulative COVID-19  Cases by State and Week of Year (source:          )

In [84]:
# !git clone https://github.com/nytimes/covid-19-data.git
df_us = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv',parse_dates=['date'])
df_us.head()

Unnamed: 0,date,state,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0


In [85]:
df_us['new_date'] = pd.to_datetime(df_us['date'])
df_us['Year-Week'] = df_us['new_date'].dt.strftime('%Y-%U')
df_us.head()

Unnamed: 0,date,state,fips,cases,deaths,new_date,Year-Week
0,2020-01-21,Washington,53,1,0,2020-01-21,2020-03
1,2020-01-22,Washington,53,1,0,2020-01-22,2020-03
2,2020-01-23,Washington,53,1,0,2020-01-23,2020-03
3,2020-01-24,Illinois,17,1,0,2020-01-24,2020-03
4,2020-01-24,Washington,53,1,0,2020-01-24,2020-03


In [86]:
df_us.shape
df_us = df_us.sort_values(by=['state', 'new_date'])
df_us_week = df_us.groupby(['state', 'fips', 'Year-Week']).first().reset_index()
df_us_week.head(100)

Unnamed: 0,state,fips,Year-Week,date,cases,deaths,new_date
0,Alabama,1,2020-10,2020-03-13,6,0,2020-03-13
1,Alabama,1,2020-11,2020-03-15,23,0,2020-03-15
2,Alabama,1,2020-12,2020-03-22,157,0,2020-03-22
3,Alabama,1,2020-13,2020-03-29,830,5,2020-03-29
4,Alabama,1,2020-14,2020-04-05,1840,45,2020-04-05
...,...,...,...,...,...,...,...
95,Colorado,8,2020-16,2020-04-19,9730,421,2020-04-19
96,Colorado,8,2020-17,2020-04-26,13440,678,2020-04-26
97,Colorado,8,2020-18,2020-05-03,16527,840,2020-05-03
98,Colorado,8,2020-19,2020-05-10,19595,969,2020-05-10


In [87]:
df_us_week['cases'].max(), df_us_week['cases'].min()

(388096, 1)

In [88]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
counties["features"][100]

{'type': 'Feature',
 'properties': {'GEO_ID': '0500000US16067',
  'STATE': '16',
  'COUNTY': '067',
  'NAME': 'Minidoka',
  'LSAD': 'County',
  'CENSUSAREA': 757.591},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-113.931799, 42.535275],
    [-113.932904, 42.765032],
    [-113.763862, 42.764508],
    [-113.713928, 42.849733],
    [-113.714701, 43.20003],
    [-113.413693, 43.199785],
    [-113.413026, 42.84925],
    [-113.472155, 42.849218],
    [-113.472177, 42.669251],
    [-113.557609, 42.656416],
    [-113.655338, 42.535663],
    [-113.779811, 42.55687],
    [-113.931799, 42.535275]]]},
 'id': '16067'}

In [89]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

In [90]:
df_us_week['state_code'] = df_us_week['state'].map(us_state_abbrev)
df_us_week.head()

Unnamed: 0,state,fips,Year-Week,date,cases,deaths,new_date,state_code
0,Alabama,1,2020-10,2020-03-13,6,0,2020-03-13,AL
1,Alabama,1,2020-11,2020-03-15,23,0,2020-03-15,AL
2,Alabama,1,2020-12,2020-03-22,157,0,2020-03-22,AL
3,Alabama,1,2020-13,2020-03-29,830,5,2020-03-29,AL
4,Alabama,1,2020-14,2020-04-05,1840,45,2020-04-05,AL


In [104]:
df_us_week = df_us_week.sort_values(by=['Year-Week'])
fig = px.choropleth(df_us_week[df_us_week['date']>='2020-02-15'], locations='state_code', color='cases',
                           color_continuous_scale=px.colors.sequential.OrRd,
                           hover_name = 'state_code',
                           locationmode = 'USA-states',
                           animation_frame="Year-Week",
                          )
fig.update_layout(
    title_text = 'Cumulative COVID-19  Cases by State and Week of Year (source:New York Times)', # Create a Title
    geo_scope='usa',  # Plot only the USA instead of globe
)
fig.show()  # Output the plot to the screen

In [92]:
fig = px.choropleth(df_us_week[df_us_week['date']>='2020-03-15'], locations='state_code', color='deaths',
                           color_continuous_scale=px.colors.sequential.OrRd,
                           hover_name = 'state_code',
                           locationmode = 'USA-states',
                           animation_frame="Year-Week",
                          )
fig.update_layout(
    title_text = 'Cumulative COVID-19  Cases by State and Week of Year (source:New York Times)', # Create a Title
    geo_scope='usa',  # Plot only the USA instead of globe
)
fig.show()  # Output the plot to the screen