# COVID-19 Animated Bar Chart

In [18]:
pip install plotly==4.5.2



In [0]:
import pandas as pd
import numpy as np
import plotly.express as px

In [20]:
import plotly
plotly.__version__

'4.5.2'

## 1. Read CSV

In [0]:
# Use pandas read_csv method to parse csv file
covid_19 = pd.read_csv("covid_19_data.csv", date_parser=True)

In [22]:
# Inspect first 20 rows of datasets
covid_19.head(20)

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0
5,6,01/22/2020,Guangdong,Mainland China,1/22/2020 17:00,26.0,0.0,0.0
6,7,01/22/2020,Guangxi,Mainland China,1/22/2020 17:00,2.0,0.0,0.0
7,8,01/22/2020,Guizhou,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
8,9,01/22/2020,Hainan,Mainland China,1/22/2020 17:00,4.0,0.0,0.0
9,10,01/22/2020,Hebei,Mainland China,1/22/2020 17:00,1.0,0.0,0.0


In [23]:
# Confirmed case in each country, descending order
country = covid_19.groupby("Country/Region", as_index=False).sum().sort_values("Confirmed", ascending=False)
country

Unnamed: 0,Country/Region,SNo,Confirmed,Deaths,Recovered
122,Mainland China,10461980,4829567.0,172245.0,2997441.0
205,US,17625603,3657903.0,92199.0,143637.0
100,Italy,332079,2217867.0,240848.0,317538.0
186,Spain,333600,1762565.0,152262.0,387132.0
75,Germany,332973,1363130.0,16381.0,300324.0
...,...,...,...,...,...
41,Channel Islands,4686,1.0,0.0,0.0
37,Cape Verde,7585,1.0,0.0,0.0
57,East Timor,7589,1.0,0.0,0.0
198,The Gambia,35095,1.0,0.0,0.0


In [24]:
# Top 20 Countries corresponding to confirmed case
top20 = country["Country/Region"].head(20)
top20

122    Mainland China
205                US
100             Italy
186             Spain
75            Germany
69             France
96               Iran
204                UK
184       South Korea
192       Switzerland
203            Turkey
141       Netherlands
20            Belgium
12            Austria
36             Canada
159          Portugal
27             Brazil
99             Israel
191            Sweden
148            Norway
Name: Country/Region, dtype: object

In [25]:
top20.values

array(['Mainland China', 'US', 'Italy', 'Spain', 'Germany', 'France',
       'Iran', 'UK', 'South Korea', 'Switzerland', 'Turkey',
       'Netherlands', 'Belgium', 'Austria', 'Canada', 'Portugal',
       'Brazil', 'Israel', 'Sweden', 'Norway'], dtype=object)

In [26]:
# Index to be padded because not all country have the same date index
pad_index = covid_19[covid_19["Country/Region"] == "Mainland China"].groupby("ObservationDate").sum().index
pad_index

Index(['01/22/2020', '01/23/2020', '01/24/2020', '01/25/2020', '01/26/2020',
       '01/27/2020', '01/28/2020', '01/29/2020', '01/30/2020', '01/31/2020',
       '02/01/2020', '02/02/2020', '02/03/2020', '02/04/2020', '02/05/2020',
       '02/06/2020', '02/07/2020', '02/08/2020', '02/09/2020', '02/10/2020',
       '02/11/2020', '02/12/2020', '02/13/2020', '02/14/2020', '02/15/2020',
       '02/16/2020', '02/17/2020', '02/18/2020', '02/19/2020', '02/20/2020',
       '02/21/2020', '02/22/2020', '02/23/2020', '02/24/2020', '02/25/2020',
       '02/26/2020', '02/27/2020', '02/28/2020', '02/29/2020', '03/01/2020',
       '03/02/2020', '03/03/2020', '03/04/2020', '03/05/2020', '03/06/2020',
       '03/07/2020', '03/08/2020', '03/09/2020', '03/10/2020', '03/11/2020',
       '03/12/2020', '03/13/2020', '03/14/2020', '03/15/2020', '03/16/2020',
       '03/17/2020', '03/18/2020', '03/19/2020', '03/20/2020', '03/21/2020',
       '03/22/2020', '03/23/2020', '03/24/2020', '03/25/2020', '03/26/2020',

In [0]:
#
# Select each country rows
# Pad the rows
# Stack the rows

Declare a function to arange data frame.
What it will do:
1. Copy the data frame to avoid overwriting.
2. Create rows for every country
3. Pad the rows with the previous pad_index.
4. Add another columns containing country name and date.
5. Fill the NaN (not a number) rows with 0 because padded rows will be filled with NaN.



In [28]:
def arrangeData(df, countries, pad_index):
    dfCopy = df.copy()
    pad = pad_index.copy()
    rowList = []
    
    for country in countries:
        rows = dfCopy[dfCopy["Country/Region"] == country].groupby("ObservationDate", as_index=False).sum()
        rows = rows.set_index("ObservationDate")
        rows = rows.reindex(pad)
        rows["Country"] = country
        rows["Dates"] = rows.index
        rows.fillna(0, inplace=True)
        rowList.append(rows)
        
    dfNew = pd.concat(rowList, axis=0)
    return dfNew

covid_aranged = arrangeData(covid_19, top20.values, pad_index)
covid_aranged

Unnamed: 0_level_0,SNo,Confirmed,Deaths,Recovered,Country,Dates
ObservationDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
01/22/2020,535.0,547.0,17.0,28.0,Mainland China,01/22/2020
01/23/2020,1713.0,639.0,18.0,30.0,Mainland China,01/23/2020
01/24/2020,3004.0,916.0,26.0,36.0,Mainland China,01/24/2020
01/25/2020,4224.0,1399.0,42.0,39.0,Mainland China,01/25/2020
01/26/2020,5548.0,2062.0,56.0,49.0,Mainland China,01/26/2020
...,...,...,...,...,...,...
04/04/2020,11737.0,5550.0,62.0,32.0,Norway,04/04/2020
04/05/2020,12053.0,5687.0,71.0,32.0,Norway,04/05/2020
04/06/2020,12372.0,5865.0,76.0,32.0,Norway,04/06/2020
04/07/2020,12692.0,6086.0,89.0,32.0,Norway,04/07/2020


Visualization part with plotly library.
It takes data frame and its columns as arguments.

In [30]:
fig = px.bar(covid_aranged, x="Confirmed", y="Country", 
             orientation='h', color="Country", animation_frame="Dates",
             animation_group="Country", range_x=[0, 500000])
fig.show()