# Covid_19 Analysis - Part 2 (Interactive Visualisation)

To create an interactive visual comparison of CovId19 infections between different countries

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import datetime

In [3]:
from IPython.display import display
import cufflinks as cf
import plotly.offline as pyo
import plotly.express as px
import plotly.graph_objects as go

from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)
cf.go_offline()
from plotly.subplots import make_subplots
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [4]:
import warnings
warnings.filterwarnings(action='once')

In [5]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 50)

In [6]:
path = "D:/datasets/covid"

### 1. Load Processed Data

In [7]:
df1 = pd.read_csv(os.path.join(path, "covid_processed.csv"))

In [8]:
df1.columns

Index(['Unnamed: 0', 'ISO_Code', 'Country', 'continent', 'Date', 'Confirmed', 'Deaths', 'Recovered', 'ActiveCases', 'NewConfirmed', 'NewDeaths', 'NewRecovered', 'NewConfirmedPer1MilPop', 'NewDeathPer1MilPop', 'CaseFatalityRate', 'CaseRecoveryRate', 'DeathVsRecoveryRatio', 'ConfirmedPer1MilPop', 'DeathPer1MilPop', 'ActiveCasesPer1MilPop', 'DailyConfirmedGrowthRate', 'DailyDeathsGrowthRate', 'DailyRecoveredGrowthRate', 'DaysSince1stDeath', 'DaysSince10thDeath', 'DaysSince50thDeath', 'DaysSince1stCase', 'DaysSince100thCase', 'DaysSince500thCase', 'Population', 'Population_Density', 'Median_Age', 'Aged_65+', 'Aged_70+', 'Gdp_per_Capita', 'Extreme_Poverty', 'Cardiovasc_Death_Rate', 'Diabetes_Prevalence', 'Female_Smokers', 'Male_Smokers', 'Handwashing_Facilities', 'Hospital_Beds_per_Thousand', 'Life_Expectancy', 'Human_Development_Index', '1stConfirmed', '100thConfirmed', '500thConfirmed', '1stDeath', '10thDeath', '50thDeath', 'Lat', 'Long', 'Region_Code', 'Subregion_Code',
       'Intermeid

In [9]:
df1.shape

(63720, 55)

In [10]:
min(df1.Date), max(df1.Date)

('2020-01-22', '2020-11-11')

In [11]:
df1.tail()

Unnamed: 0.1,Unnamed: 0,ISO_Code,Country,continent,Date,Confirmed,Deaths,Recovered,ActiveCases,NewConfirmed,NewDeaths,NewRecovered,NewConfirmedPer1MilPop,NewDeathPer1MilPop,CaseFatalityRate,CaseRecoveryRate,DeathVsRecoveryRatio,ConfirmedPer1MilPop,DeathPer1MilPop,ActiveCasesPer1MilPop,DailyConfirmedGrowthRate,DailyDeathsGrowthRate,DailyRecoveredGrowthRate,DaysSince1stDeath,DaysSince10thDeath,DaysSince50thDeath,DaysSince1stCase,DaysSince100thCase,DaysSince500thCase,Population,Population_Density,Median_Age,Aged_65+,Aged_70+,Gdp_per_Capita,Extreme_Poverty,Cardiovasc_Death_Rate,Diabetes_Prevalence,Female_Smokers,Male_Smokers,Handwashing_Facilities,Hospital_Beds_per_Thousand,Life_Expectancy,Human_Development_Index,1stConfirmed,100thConfirmed,500thConfirmed,1stDeath,10thDeath,50thDeath,Lat,Long,Region_Code,Subregion_Code,Intermeidate_Region_Code
63715,63715,ZWE,Zimbabwe,Africa,2020-11-07,8498,251,7995,252,27.0,1.0,12.0,1.817,0.067,0.029536,0.94081,0.030439,571.758,16.888,16.955,0.003187,0.004,0.001503,229.0,120.0,100.0,232.0,164.0,138.0,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,True,True,True,True,True,True,-19.015438,29.154857,2,202,14.0
63716,63716,ZWE,Zimbabwe,Africa,2020-11-08,8531,253,8005,273,33.0,2.0,10.0,2.22,0.135,0.029657,0.938343,0.030637,573.978,17.022,18.368,0.003883,0.007968,0.001251,230.0,121.0,101.0,233.0,165.0,139.0,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,True,True,True,True,True,True,-19.015438,29.154857,2,202,14.0
63717,63717,ZWE,Zimbabwe,Africa,2020-11-09,8561,254,8023,284,30.0,1.0,18.0,2.018,0.067,0.029669,0.937157,0.030687,575.997,17.09,19.108,0.003517,0.003953,0.002249,231.0,122.0,102.0,234.0,166.0,140.0,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,True,True,True,True,True,True,-19.015438,29.154857,2,202,14.0
63718,63718,ZWE,Zimbabwe,Africa,2020-11-10,8610,255,8040,315,49.0,1.0,17.0,3.297,0.067,0.029617,0.933798,0.030741,579.294,17.157,21.194,0.005724,0.003937,0.002119,232.0,123.0,103.0,235.0,167.0,141.0,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,True,True,True,True,True,True,-19.015438,29.154857,2,202,14.0
63719,63719,ZWE,Zimbabwe,Africa,2020-11-11,8667,255,8046,366,57.0,0.0,6.0,3.835,0.0,0.029422,0.928349,0.030719,583.129,17.157,24.625,0.00662,0.0,0.000746,233.0,124.0,104.0,236.0,168.0,142.0,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,True,True,True,True,True,True,-19.015438,29.154857,2,202,14.0


### separate columns into categories

In [12]:
measured_stats_cols1= [ 
 'NewConfirmed',
 'NewDeaths',
 'NewRecovered',
 'ActiveCases',                       
 'Confirmed',
 'Deaths',
 'Recovered']

measured_stats_cols2= [
 'CaseFatalityRate',
 'CaseRecoveryRate',
 'DeathVsRecoveryRatio',
 'NewConfirmedPer1MilPop',
 'NewDeathPer1MilPop',
 'ConfirmedPer1MilPop',
 'DeathPer1MilPop',
 'ActiveCasesPer1MilPop',
 'DailyConfirmedGrowthRate',
 'DailyDeathsGrowthRate',
 'DailyRecoveredGrowthRate']

In [13]:
date_stats_cols = [  
 'Date',
 'DaysSince1stCase',                   
 'DaysSince100thCase',
 'DaysSince500thCase',                    
 'DaysSince1stDeath',
 'DaysSince10thDeath',
 'DaysSince50thDeath'                   
]

In [14]:
country_demo_cols1 = [
 'Population',
 'Population_Density',
 'Median_Age'
]
country_demo_cols2 = [ 'Aged_65+',
 'Aged_70+',
 'Gdp_per_Capita',
 'Extreme_Poverty',
 'Cardiovasc_Death_Rate',
 'Diabetes_Prevalence',
 'Female_Smokers',
 'Male_Smokers',
 'Handwashing_Facilities',
 'Hospital_Beds_per_Thousand',
 'Life_Expectancy',
 'Human_Development_Index'
]    

In [15]:
df1.index

RangeIndex(start=0, stop=63720, step=1)

In [16]:
idx = df1.index.to_series().groupby(level=0).agg(['first', 'last']).stack()
df1.loc[idx]

Unnamed: 0.1,Unnamed: 0,ISO_Code,Country,continent,Date,Confirmed,Deaths,Recovered,ActiveCases,NewConfirmed,NewDeaths,NewRecovered,NewConfirmedPer1MilPop,NewDeathPer1MilPop,CaseFatalityRate,CaseRecoveryRate,DeathVsRecoveryRatio,ConfirmedPer1MilPop,DeathPer1MilPop,ActiveCasesPer1MilPop,DailyConfirmedGrowthRate,DailyDeathsGrowthRate,DailyRecoveredGrowthRate,DaysSince1stDeath,DaysSince10thDeath,DaysSince50thDeath,DaysSince1stCase,DaysSince100thCase,DaysSince500thCase,Population,Population_Density,Median_Age,Aged_65+,Aged_70+,Gdp_per_Capita,Extreme_Poverty,Cardiovasc_Death_Rate,Diabetes_Prevalence,Female_Smokers,Male_Smokers,Handwashing_Facilities,Hospital_Beds_per_Thousand,Life_Expectancy,Human_Development_Index,1stConfirmed,100thConfirmed,500thConfirmed,1stDeath,10thDeath,50thDeath,Lat,Long,Region_Code,Subregion_Code,Intermeidate_Region_Code
0,0,AFG,Afghanistan,Asia,2020-01-22,0,0,0,0,,,,,,,,,0.000,0.000,0.000,,,,,,,,,,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498,False,False,False,False,False,False,33.939110,67.709953,142,34,
0,0,AFG,Afghanistan,Asia,2020-01-22,0,0,0,0,,,,,,,,,0.000,0.000,0.000,,,,,,,,,,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498,False,False,False,False,False,False,33.939110,67.709953,142,34,
1,1,AFG,Afghanistan,Asia,2020-01-23,0,0,0,0,0.0,0.0,0.0,0.000,0.000,,,,0.000,0.000,0.000,,,,,,,,,,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498,False,False,False,False,False,False,33.939110,67.709953,142,34,
1,1,AFG,Afghanistan,Asia,2020-01-23,0,0,0,0,0.0,0.0,0.0,0.000,0.000,,,,0.000,0.000,0.000,,,,,,,,,,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498,False,False,False,False,False,False,33.939110,67.709953,142,34,
2,2,AFG,Afghanistan,Asia,2020-01-24,0,0,0,0,0.0,0.0,0.0,0.000,0.000,,,,0.000,0.000,0.000,,,,,,,,,,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.498,False,False,False,False,False,False,33.939110,67.709953,142,34,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63717,63717,ZWE,Zimbabwe,Africa,2020-11-09,8561,254,8023,284,30.0,1.0,18.0,2.018,0.067,0.029669,0.937157,0.030687,575.997,17.090,19.108,0.003517,0.003953,0.002249,231.0,122.0,102.0,234.0,166.0,140.0,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,True,True,True,True,True,True,-19.015438,29.154857,2,202,14.0
63718,63718,ZWE,Zimbabwe,Africa,2020-11-10,8610,255,8040,315,49.0,1.0,17.0,3.297,0.067,0.029617,0.933798,0.030741,579.294,17.157,21.194,0.005724,0.003937,0.002119,232.0,123.0,103.0,235.0,167.0,141.0,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,True,True,True,True,True,True,-19.015438,29.154857,2,202,14.0
63718,63718,ZWE,Zimbabwe,Africa,2020-11-10,8610,255,8040,315,49.0,1.0,17.0,3.297,0.067,0.029617,0.933798,0.030741,579.294,17.157,21.194,0.005724,0.003937,0.002119,232.0,123.0,103.0,235.0,167.0,141.0,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,True,True,True,True,True,True,-19.015438,29.154857,2,202,14.0
63719,63719,ZWE,Zimbabwe,Africa,2020-11-11,8667,255,8046,366,57.0,0.0,6.0,3.835,0.000,0.029422,0.928349,0.030719,583.129,17.157,24.625,0.006620,0.000000,0.000746,233.0,124.0,104.0,236.0,168.0,142.0,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.535,True,True,True,True,True,True,-19.015438,29.154857,2,202,14.0


## Part 1

### A. Get data from the most recent date

In [17]:
df_last = df1.iloc[df1.groupby(["Country"])["Unnamed: 0"].idxmax(), :]

In [18]:
df_last

Unnamed: 0.1,Unnamed: 0,ISO_Code,Country,continent,Date,Confirmed,Deaths,Recovered,ActiveCases,NewConfirmed,NewDeaths,NewRecovered,NewConfirmedPer1MilPop,NewDeathPer1MilPop,CaseFatalityRate,CaseRecoveryRate,DeathVsRecoveryRatio,ConfirmedPer1MilPop,DeathPer1MilPop,ActiveCasesPer1MilPop,DailyConfirmedGrowthRate,DailyDeathsGrowthRate,DailyRecoveredGrowthRate,DaysSince1stDeath,DaysSince10thDeath,DaysSince50thDeath,DaysSince1stCase,DaysSince100thCase,DaysSince500thCase,Population,Population_Density,Median_Age,Aged_65+,Aged_70+,Gdp_per_Capita,Extreme_Poverty,Cardiovasc_Death_Rate,Diabetes_Prevalence,Female_Smokers,Male_Smokers,Handwashing_Facilities,Hospital_Beds_per_Thousand,Life_Expectancy,Human_Development_Index,1stConfirmed,100thConfirmed,500thConfirmed,1stDeath,10thDeath,50thDeath,Lat,Long,Region_Code,Subregion_Code,Intermeidate_Region_Code
294,294,AFG,Afghanistan,Asia,2020-11-11,42609,1581,34967,6061,146.0,4.0,13.0,3.750,0.103,0.037105,0.820648,0.043258,1094.550,40.613,155.696,0.003438,0.002536,0.000372,234.0,219.0,199.0,261.0,229.0,215.0,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.50,64.83,0.498,True,True,True,True,True,True,33.939110,67.709953,142,34,
589,589,ALB,Albania,Europe,2020-11-11,25801,590,12493,12718,507.0,11.0,140.0,176.176,3.822,0.022867,0.484206,0.045097,8965.529,205.018,4419.348,0.020044,0.018998,0.011333,245.0,228.0,138.0,247.0,233.0,209.0,2877800.0,104.871,38.0,13.188,8.643,11803.431,1.1,304.195,10.08,7.1,51.2,,2.89,78.57,0.785,True,True,True,True,True,True,41.153300,20.168300,150,39,
884,884,DZA,Algeria,Africa,2020-11-11,64257,2093,42980,19184,811.0,16.0,354.0,18.494,0.365,0.032572,0.668877,0.046436,1465.347,47.730,437.481,0.012783,0.007703,0.008305,244.0,236.0,224.0,260.0,235.0,227.0,43851043.0,17.348,29.1,6.211,3.857,13913.839,0.5,278.364,6.73,0.7,30.4,83.741,1.90,76.88,0.754,True,True,True,True,True,True,28.033900,1.659600,2,15,
1179,1179,AND,Andorra,Europe,2020-11-11,5567,75,4488,1004,90.0,0.0,83.0,1164.822,0.000,0.013472,0.806179,0.016437,72050.734,970.685,12994.241,0.016432,0.000000,0.018842,234.0,225.0,179.0,254.0,234.0,220.0,77265.0,163.755,,,,,,109.135,7.97,29.0,37.8,,,83.73,0.858,True,True,True,True,True,True,42.506300,1.521800,150,39,
1474,1474,AGO,Angola,Africa,2020-11-11,12953,312,6125,6516,137.0,4.0,89.0,4.168,0.122,0.024087,0.472863,0.048470,394.112,9.493,198.258,0.010690,0.012987,0.014745,227.0,142.0,104.0,236.0,154.0,122.0,32866268.0,23.890,16.8,2.405,1.362,5819.495,,276.045,3.94,,,26.664,,61.15,0.581,True,True,True,True,True,True,-11.202700,17.873900,2,202,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62539,62539,PSE,West Bank and Gaza,Asia,2020-11-11,60065,538,52226,7301,643.0,5.0,801.0,126.043,0.980,0.008957,0.869491,0.010196,11774.182,105.461,1431.171,0.010821,0.009381,0.015576,230.0,131.0,118.0,251.0,227.0,149.0,5101416.0,778.202,20.4,3.043,1.726,4449.898,1.0,265.910,10.59,,,,,74.05,0.686,True,True,True,True,True,True,31.952200,35.233200,142,145,
62834,62834,ESH,Western Sahara,Africa,2020-11-11,10,1,8,1,0.0,0.0,0.0,0.000,0.000,0.100000,0.800000,0.111111,16.741,1.674,1.674,0.000000,0.000000,0.000000,169.0,,,220.0,,,597330.0,,28.4,,1.380,,,,,,,,,70.26,,True,False,False,True,False,False,24.215500,-12.885800,2,15,
63129,63129,YEM,Yemen,Asia,2020-11-11,2071,605,1394,72,0.0,0.0,0.0,0.000,0.000,0.292129,0.673105,0.302651,69.436,20.284,2.414,0.000000,0.000000,0.000000,195.0,183.0,168.0,215.0,180.0,155.0,29825968.0,53.508,20.3,2.922,1.583,1479.147,18.8,495.003,5.35,7.6,29.2,49.542,0.70,66.12,0.452,True,True,True,True,True,True,15.552727,48.516388,142,145,
63424,63424,ZMB,Zambia,Africa,2020-11-11,17036,350,16070,616,39.0,0.0,51.0,2.121,0.000,0.020545,0.943297,0.021315,926.678,19.038,33.507,0.002295,0.000000,0.003184,223.0,156.0,117.0,238.0,195.0,181.0,18383956.0,22.995,17.7,2.480,1.542,3689.251,57.5,234.499,3.94,3.1,24.7,13.938,2.00,63.89,0.588,True,True,True,True,True,True,-13.133897,27.849332,2,202,14.0


In [19]:
cols_to_show = ["Country"] + country_demo_cols1 + measured_stats_cols1 + measured_stats_cols2

### 1. see which countries have the highest date rate, taking population into account

In [20]:
df_last.sort_values("DeathPer1MilPop", ascending=False).loc[df_last["Population"]>=1000000,cols_to_show].head(10)

Unnamed: 0,Country,Population,Population_Density,Median_Age,NewConfirmed,NewDeaths,NewRecovered,ActiveCases,Confirmed,Deaths,Recovered,CaseFatalityRate,CaseRecoveryRate,DeathVsRecoveryRatio,NewConfirmedPer1MilPop,NewDeathPer1MilPop,ConfirmedPer1MilPop,DeathPer1MilPop,ActiveCasesPer1MilPop,DailyConfirmedGrowthRate,DailyDeathsGrowthRate,DailyRecoveredGrowthRate
5604,Belgium,11589616.0,375.564,41.8,7916.0,197.0,626.0,470503,515391,13758,31130,0.026694,0.060401,0.306496,683.025,16.998,44470.067,1187.097,40596.945,0.015599,0.014527,0.020522
45724,Peru,32971846.0,25.129,29.1,1904.0,49.0,0.0,39769,925431,34992,850670,0.037812,0.919215,0.039509,57.746,1.486,28067.309,1061.269,1206.15,0.002062,0.001402,0.0
54279,Spain,46754783.0,93.105,45.5,36491.0,760.0,0.0,1227228,1417709,40105,150376,0.028289,0.10607,0.210546,780.476,16.255,30322.224,857.773,26248.181,0.026419,0.019316,0.0
8259,Brazil,212559409.0,25.04,33.5,48655.0,566.0,38967.0,361355,5747660,163368,5222937,0.028423,0.908707,0.03033,228.901,2.663,27040.252,768.576,1700.019,0.008537,0.003477,0.007517
12389,Chile,19116209.0,24.282,35.4,897.0,22.0,1284.0,8745,524804,14633,501426,0.027883,0.955454,0.028355,46.924,1.151,27453.351,765.476,457.465,0.001712,0.001506,0.002567
2359,Argentina,45195777.0,16.177,31.9,10880.0,348.0,0.0,156928,1273356,34531,1081897,0.027118,0.849642,0.03093,240.73,7.7,28174.225,764.032,3472.183,0.008618,0.01018,0.0
7079,Bolivia,11673029.0,10.202,25.4,112.0,10.0,519.0,18240,142776,8818,115718,0.061761,0.810486,0.070807,9.595,0.857,12231.273,755.417,1562.576,0.000785,0.001135,0.004505
38939,Mexico,128932753.0,66.444,29.3,7646.0,588.0,0.0,65392,986177,96430,824355,0.097782,0.83591,0.104726,59.302,4.561,7648.77,747.909,507.179,0.007814,0.006135,0.0
60769,United Kingdom,67886004.0,272.898,40.8,22950.0,595.0,0.0,1206360,1256725,50365,0,0.040076,0.0,1.0,338.067,8.765,18512.284,741.906,17770.379,0.018601,0.011955,
17404,Ecuador,17643060.0,66.939,28.1,919.0,71.0,0.0,8754,176630,12920,154956,0.073147,0.877292,0.076962,52.088,4.024,10011.302,732.299,496.172,0.00523,0.005526,0.0


### 2. see which countries have the highest case count, taking population into account

In [21]:
df_last.sort_values("ConfirmedPer1MilPop", ascending=False).loc[df_last["Population"]>=1000000,cols_to_show].head(10)

Unnamed: 0,Country,Population,Population_Density,Median_Age,NewConfirmed,NewDeaths,NewRecovered,ActiveCases,Confirmed,Deaths,Recovered,CaseFatalityRate,CaseRecoveryRate,DeathVsRecoveryRatio,NewConfirmedPer1MilPop,NewDeathPer1MilPop,ConfirmedPer1MilPop,DeathPer1MilPop,ActiveCasesPer1MilPop,DailyConfirmedGrowthRate,DailyDeathsGrowthRate,DailyRecoveredGrowthRate
4424,Bahrain,1701583.0,1935.907,32.4,231.0,1.0,243.0,2052,84042,332,81658,0.00395,0.971633,0.004049,135.756,0.588,49390.479,195.112,1205.936,0.002756,0.003021,0.002985
46904,Qatar,2881060.0,227.322,31.9,224.0,0.0,193.0,2728,134887,233,131926,0.001727,0.978048,0.001763,77.749,0.0,46818.532,80.873,946.874,0.001663,0.0,0.001465
5604,Belgium,11589616.0,375.564,41.8,7916.0,197.0,626.0,470503,515391,13758,31130,0.026694,0.060401,0.306496,683.025,16.998,44470.067,1187.097,40596.945,0.015599,0.014527,0.020522
15929,Czechia,10708982.0,137.176,43.3,8925.0,247.0,7768.0,147994,438805,5570,285241,0.012694,0.65004,0.019153,833.413,23.065,40975.417,520.124,13819.614,0.020762,0.046402,0.027996
2654,Armenia,2963234.0,102.931,35.7,1861.0,27.0,1501.0,40576,110548,1636,68336,0.014799,0.618157,0.023381,628.03,9.112,37306.537,552.099,13693.147,0.017123,0.016781,0.022458
29499,Israel,8655541.0,402.606,30.6,665.0,16.0,765.0,8021,321326,2700,310605,0.008403,0.966635,0.008618,76.829,1.849,37123.734,311.939,926.69,0.002074,0.005961,0.002469
44839,Panama,4314768.0,55.133,29.7,1163.0,6.0,1116.0,18819,142465,2823,120823,0.019815,0.848089,0.022831,269.539,1.391,33017.998,654.265,4361.532,0.008231,0.00213,0.009323
59589,US,331002647.0,35.608,38.3,143231.0,1435.0,35302.0,6160810,10399325,241340,3997175,0.023207,0.384369,0.05694,432.719,4.335,31417.649,729.118,18612.57,0.013965,0.005982,0.00891
32154,Kuwait,4270563.0,232.128,33.7,778.0,5.0,749.0,8428,134159,826,124905,0.006157,0.931022,0.00657,182.177,1.171,31414.828,193.417,1973.51,0.005833,0.00609,0.006033
54279,Spain,46754783.0,93.105,45.5,36491.0,760.0,0.0,1227228,1417709,40105,150376,0.028289,0.10607,0.210546,780.476,16.255,30322.224,857.773,26248.181,0.026419,0.019316,0.0


### 3. Define function for interactive plot

In [22]:
def plot_mult_stats(df, cols, numShow=10, threshold=1000000):
    num_plots=len(cols)
    nrows = (num_plots+ (3-1)) // 3
    fig = make_subplots(rows=nrows, cols=3, 
                    subplot_titles=(cols))
    annot = []
    
    for i in range(num_plots):
        r = i //3 + 1
        c = (i % 3) + 1
        df_plot = df.sort_values(cols[i], ascending=False).loc[df_last["Population"]>=threshold,cols_to_show].head(numShow)   #cols_to_show     
        fig.add_trace(go.Bar(y=df_plot['Country'], x=df_plot[cols[i]], orientation='h'), row=r, col=c)

        if(max(df_plot[cols[i]])) > 1000:
            offset = max(df_plot[cols[i]]) * 0.05
            annot_content = [dict(x=value+offset, y=country, xref="x"+str(i+1), yref="y"+str(i+1), text=f'{value:,.0f}',  font=dict(family='Arial', size=10, color='rgb(0,0,0)'), showarrow=False) 
                for country, value in zip(df_plot.Country, df_plot[cols[i]])]
        elif (max(df_plot[cols[i]])) > 1:
            offset = int(max(df_plot[cols[i]])* 0.15)
            annot_content = [dict(x=value+offset, y=country, xref="x"+str(i+1), yref="y"+str(i+1), text=f'{value:,.2f}',  font=dict(family='Arial', size=10, color='rgb(0,0,0)'), showarrow=False) 
                for country, value in zip(df_plot.Country, df_plot[cols[i]])]
        else:
            offset = max(df_plot[cols[i]]) * 0.15
            annot_content = [dict(x=value-offset, y=country, xref="x"+str(i+1), yref="y"+str(i+1), text=f'{value:,.2f}',  font=dict(family='Arial', size=10, color='rgb(250,250,250)'), showarrow=False) 
                for country, value in zip(df_plot.Country, df_plot[cols[i]])]
       
        annot.append(annot_content[:])
            

    if num_plots < 4:
        h = 400
    elif num_plots < 7:
        h = 600
    elif num_plots < 10:
        h = 900
    elif num_plots < 13:
        h = 1200
    else:
        h = 1500
    
    fig.update_layout(width=1500, height=h,
        title={"text":'Countries (over 1mil in Population) with Highest Counts on '+list(df_last.Date.unique())[0], 'y':0.95, 'x':0.45, 'xanchor': 'center', 'yanchor': 'top'})#, yaxis=[{'categoryorder':"total ascending"}, {'categoryorder':"total ascending"}])
    
    annot = [item for sublist in annot for item in sublist]

    fig["layout"].update(annotations=annot)
    
    fig.update_yaxes(categoryorder="total ascending")
        
    fig.show()

In [23]:
list(df_last.Date.unique())[0]

'2020-11-11'

In [24]:
measured_stats_cols1

['NewConfirmed',
 'NewDeaths',
 'NewRecovered',
 'ActiveCases',
 'Confirmed',
 'Deaths',
 'Recovered']

In [25]:
y_widget1 = widgets.SelectMultiple(options = measured_stats_cols1, value=[ 'NewConfirmed', 'NewDeaths', 'ActiveCases'])

interact(lambda stats, num_countries, min_pop_size: plot_mult_stats(df_last, stats, num_countries, min_pop_size), 
         stats = y_widget1, num_countries = 10, min_pop_size=1000000)

interactive(children=(SelectMultiple(description='stats', index=(0, 1, 3), options=('NewConfirmed', 'NewDeaths…

<function __main__.<lambda>(stats, num_countries, min_pop_size)>

In [26]:
y_widget2 = widgets.SelectMultiple(options = measured_stats_cols2, value=[ 'NewConfirmedPer1MilPop',
 'NewDeathPer1MilPop',
 'ConfirmedPer1MilPop',
 'DeathPer1MilPop',
 'ActiveCasesPer1MilPop',
 'CaseFatalityRate',
 'DailyConfirmedGrowthRate',
 'DailyDeathsGrowthRate'
])

interact(lambda stats, num_countries, min_pop_size: plot_mult_stats(df_last, stats, num_countries, min_pop_size), 
         stats = y_widget2, num_countries = 10, min_pop_size=1000000)

interactive(children=(SelectMultiple(description='stats', index=(3, 4, 5, 6, 7, 0, 8, 9), options=('CaseFatali…

<function __main__.<lambda>(stats, num_countries, min_pop_size)>

In [27]:
def create_annotation_country1(df, cols):
    print(cols)
    df_anon1 = df.groupby("Country").tail(1)
    annon1 = [dict(x=xdate, y=value, xref="x", yref="y", text=f'{country}<br> {odate}<br> {value:,.3f}') 
          for country, xdate, odate, value in zip(df_anon1.Country, df_anon1.DaysSince100thCase, df_anon1.ObservationDate, df_anon1.value)]
    return annon1

In [28]:
def plot_country_info3(df, country, x, y, show_counts):
    cols = list(y)
#    print(cols)
    country_info = df[df["Country"].isin(country)]
    country_info_filtered = country_info.loc[:, cols+ [x, "Date", "Country"]]
    df_melt = country_info_filtered.melt(id_vars=[x, "Country", "Date"], value_vars=cols)

    if show_counts:
        fig = px.scatter(df_melt, x=x, y="value", text="value", color="Country").update_traces(mode='lines+markers+text', textposition = "top right")
    else:
        fig = px.scatter(df_melt, x=x, y="value", text='Date', color="Country").update_traces(mode='lines+markers')
    
    fig.update_layout(title={"text":'Count of {} '.format(', '.join(cols)), 'y':0.95, 'x':0.45, 'xanchor': 'center', 'yanchor': 'top'}, 
                     annotations=create_annotation(df_melt, cols))        
    fig.show(config={"editable":True})


#### 2. Simple Interactive Plot

In [29]:
x_widget = widgets.Dropdown(options =date_stats_cols, value = 'DaysSince100thCase')
y_widget1 = widgets.SelectMultiple(options = measured_stats_cols1+measured_stats_cols2, value=[ 'NewConfirmed', 'NewDeaths'])

country_widget1 = widgets.SelectMultiple(
    options=sorted(df1["Country"].unique()),
    value=["Portugal"]
)

In [30]:
def show_infection_info3(df, country, x, y):
    cols = list(y)
    print(cols)
    country_info = df[df["Country"].isin(country)]
    country_info_filtered = country_info.loc[:, cols+ [x, "Date", "Country"]]
    df_melt = country_info_filtered.melt(id_vars=[x, "Country", "Date"], value_vars=cols)
    return df_melt

In [31]:
interact(lambda country, x, y: show_infection_info3(df1, country, x, y), 
        country = country_widget1, x = x_widget, y = y_widget1)

interactive(children=(SelectMultiple(description='country', index=(157,), options=('Afghanistan', 'Albania', '…

<function __main__.<lambda>(country, x, y)>

In [32]:
def plot_infection_info3(df, country, x, y, show_counts, use_log):
    cols = list(y)
#    print(cols)
    country_info = df[df["Country"].isin(country)]
    country_info_filtered = country_info.loc[:, cols+ [x, "ObservationDate", "Country"]]
    df_melt = country_info_filtered.melt(id_vars=[x, "Country", "ObservationDate"], value_vars=cols)
    #return df_melt
#    print(df_melt)
    if show_counts:
        fig = px.scatter(df_melt, x=x, y="value", text="value", color="Country").update_traces(mode='lines+text', textposition = "top right")
    else:
        fig = px.scatter(df_melt, x=x, y="value", text='ObservationDate', color="Country").update_traces(mode='lines')
    
    fig.update_layout(title={"text":'Count of {} '.format(', '.join(cols)), 'y':0.95, 'x':0.45, 'xanchor': 'center', 'yanchor': 'top'}, 
                     annotations=create_annotation(df_melt, cols, x))        
    
    if use_log:
        fig.update_layout(yaxis_type="log")
    fig.show(config={"editable":True})


In [33]:
def create_annotation(df, cols, measure):
    print(cols)
    df_anon1 = df.groupby("Country").tail(1)
    annon1 = [dict(x=xdate, y=value, xref="x", yref="y", text=f'{country}<br> {odate}<br> {value:,.3f}') 
          for country, xdate, odate, value in zip(df_anon1.Country, df_anon1[measure], df_anon1.ObservationDate, df_anon1.value)]
    return annon1

In [34]:
x_widget2 = widgets.Dropdown(options =date_stats_cols, value = 'DaysSince100thCase')
y_widget2 = widgets.SelectMultiple(options = measured_stats_cols1+measured_stats_cols2, value=['NewConfirmedPer1MilPop'])

country_widget2 = widgets.SelectMultiple(
    options=sorted(df1["Country"].unique()),
    value=["Portugal", "US", "Italy"]
)

In [35]:
interact(lambda country, x, y, show_counts, use_log: plot_infection_info3(df1, country, x, y, show_counts, use_log), 
        country = country_widget2, x = x_widget2, y = y_widget2, show_counts=False, use_log=False)

interactive(children=(SelectMultiple(description='country', index=(157, 201, 100), options=('Afghanistan', 'Al…

<function __main__.<lambda>(country, x, y, show_counts, use_log)>

In [36]:
x_widget3 = widgets.Dropdown(options =date_stats_cols, value = 'DaysSince10thDeath')
y_widget3 = widgets.SelectMultiple(options = measured_stats_cols1+measured_stats_cols2, value=['NewDeathPer1MilPop'])

country_widget3 = widgets.SelectMultiple(
    options=sorted(df1["Country"].unique()),
    value=["Portugal", "US", "Italy"]
)

In [37]:
interact(lambda country, x, y, show_counts, use_log: plot_infection_info3(df1, country, x, y, show_counts, use_log), 
        country = country_widget3, x = x_widget3, y = y_widget3, show_counts=False, use_log=False)

interactive(children=(SelectMultiple(description='country', index=(157, 201, 100), options=('Afghanistan', 'Al…

<function __main__.<lambda>(country, x, y, show_counts, use_log)>

In [38]:
x_widget4 = widgets.Dropdown(options =date_stats_cols, value = 'DaysSince100thCase')
y_widget4 = widgets.SelectMultiple(options = measured_stats_cols1+measured_stats_cols2, value=['ActiveCases'])

country_widget4 = widgets.SelectMultiple(
    options=sorted(df1["Country"].unique()),
    value=["Portugal", "US", "Italy"]
)

In [39]:
interact(lambda country, x, y, show_counts, use_log: plot_infection_info3(df1, country, x, y, show_counts, use_log), 
        country = country_widget3, x = x_widget3, y = y_widget3, show_counts=False, use_log=False)

interactive(children=(SelectMultiple(description='country', index=(157, 201, 100), options=('Afghanistan', 'Al…

<function __main__.<lambda>(country, x, y, show_counts, use_log)>