In [6]:
import pandas as pd
import numpy as np
import plotly as py
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [9]:
df = pd.read_csv(r"data/covid_19_data.csv")
df.columns

Index(['SNo', 'ObservationDate', 'Province/State', 'Country/Region',
       'Last Update', 'Confirmed', 'Deaths', 'Recovered'],
      dtype='object')

In [10]:
df = df.rename(columns = {'Country/Region':'Country'})
df = df.rename(columns = {'ObservationDate': 'Date'})
df.head(2)

Unnamed: 0,SNo,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20574 entries, 0 to 20573
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   SNo             20574 non-null  int64  
 1   Date            20574 non-null  object 
 2   Province/State  10026 non-null  object 
 3   Country         20574 non-null  object 
 4   Last Update     20574 non-null  object 
 5   Confirmed       20574 non-null  float64
 6   Deaths          20574 non-null  float64
 7   Recovered       20574 non-null  float64
dtypes: float64(3), int64(1), object(4)
memory usage: 1.3+ MB


In [18]:
df_countries = df.groupby(['Country', 'Date']).sum().reset_index().sort_values('Date', ascending=False)


In [20]:
df_countries.Country.value_counts()

US                     101
Thailand               101
Mainland China         101
South Korea            101
Macau                  101
                      ... 
Channel Islands          1
Cape Verde               1
 Azerbaijan              1
Republic of Ireland      1
('St. Martin',)          1
Name: Country, Length: 222, dtype: int64

In [21]:
df_countries = df_countries.drop_duplicates(subset=['Country'])
df_countries = df_countries[df_countries['Confirmed']>0]
df_countries

Unnamed: 0,Country,Date,SNo,Confirmed,Deaths,Recovered
2491,Cyprus,05/01/2020,20293,857.0,15.0,296.0
4179,Haiti,05/01/2020,20322,85.0,8.0,10.0
9702,Sudan,05/01/2020,20408,533.0,36.0,46.0
4085,Guinea-Bissau,05/01/2020,20320,257.0,1.0,19.0
7273,Netherlands,05/01/2020,102262,39989.0,4909.0,138.0
...,...,...,...,...,...,...
7806,Palestine,03/09/2020,4322,22.0,0.0,0.0
10961,Vatican City,03/09/2020,4507,1.0,0.0,0.0
8298,Republic of Ireland,03/08/2020,4067,21.0,0.0,0.0
7489,North Ireland,02/28/2020,2685,1.0,0.0,0.0


In [22]:
df_countries.Country.value_counts()

San Marino                1
Uganda                    1
Martinique                1
Bosnia and Herzegovina    1
Egypt                     1
                         ..
Saint Kitts and Nevis     1
Burkina Faso              1
Macau                     1
Guatemala                 1
Ukraine                   1
Name: Country, Length: 213, dtype: int64

In [25]:
fig = px.choropleth(df_countries, 
                    locations='Country', locationmode="country names",
                    color = "Confirmed", hover_name="Country", 
                    animation_frame="Date")
fig.update_layout(title_text = 'Global Spread of Coronovirus',
                  title_x = 0.5,
                  geo = dict(showframe = False, showcoastlines = False))
fig.show()