## Plotting Animated Bubble Charts in Python

In [40]:
#importing libraries

import pandas as pd
import numpy as np
from pandas.tseries.offsets import *
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import datetime 
import warnings
warnings.filterwarnings('ignore')

In [41]:
#importing the data set

df = pd.read_csv("owid-covid-data_upd.csv")
df_copy = df

In [42]:
print(df.shape)
df.head(5)

(100445, 8)


Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths
0,AFG,Asia,Afghanistan,2/24/2020,1.0,1.0,,
1,AFG,Asia,Afghanistan,2/25/2020,1.0,0.0,,
2,AFG,Asia,Afghanistan,2/26/2020,1.0,0.0,,
3,AFG,Asia,Afghanistan,2/27/2020,1.0,0.0,,
4,AFG,Asia,Afghanistan,2/28/2020,1.0,0.0,,


In [43]:
#checking for null

df.isnull().sum()

iso_code            0
continent        4705
location            0
date                0
total_cases      3660
new_cases        3663
total_deaths    13820
new_deaths      13664
dtype: int64

In [44]:
#As my target is to color the continents separately, I am focusing more on null continent values

In [45]:
#removing the country names which are having the continent names (used in the dataset to get 
#                                                                 aggregated values continent wise and globally)

#can use a "or" condtion also

df = df[df.location != 'World']
df = df[df.location != 'International']
df = df[df['location'] != 'Asia']
df = df[df['location'] != 'Africa']
df = df[df['location'] != 'European Union']
df = df[df['location'] != 'North America']
df = df[df['location'] != 'Europe']
df = df[df['location'] != 'Oceania']
df = df[df['location'] != 'South America']

In [46]:
df2 = df[df['continent'].isnull()]

In [47]:
df2.groupby(['location']).count()

Unnamed: 0_level_0,iso_code,continent,date,total_cases,new_cases,total_deaths,new_deaths
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


In [48]:
#df['continent'].fillna(df['location'], inplace = True)

In [49]:
df['continent'].unique()

array(['Asia', 'Europe', 'Africa', 'North America', 'South America',
       'Oceania'], dtype=object)

In [50]:
df_plot = df[['date','continent','location','total_cases','total_deaths']]

In [51]:
#getting the week end date (friday of that week)
df_plot['date'] = pd.to_datetime(df_plot['date'])

In [52]:
df_plot['week'] = df_plot['date'] + Week(weekday=4)

In [53]:
df_plot.head(2)

Unnamed: 0,date,continent,location,total_cases,total_deaths,week
0,2020-02-24,Asia,Afghanistan,1.0,,2020-02-28
1,2020-02-25,Asia,Afghanistan,1.0,,2020-02-28


In [54]:
df_plot = df_plot[df_plot['date'] > '2020-03-05']
print(df_plot.shape)
df_plot.head(5)

(93784, 6)


Unnamed: 0,date,continent,location,total_cases,total_deaths,week
11,2020-03-06,Asia,Afghanistan,4.0,,2020-03-13
12,2020-03-07,Asia,Afghanistan,4.0,,2020-03-13
13,2020-03-08,Asia,Afghanistan,5.0,,2020-03-13
14,2020-03-09,Asia,Afghanistan,7.0,,2020-03-13
15,2020-03-10,Asia,Afghanistan,8.0,,2020-03-13


In [55]:
df_fin = df_plot.groupby(['week','continent','location'], as_index = False)[['total_cases','total_deaths']].max()

In [56]:
df_fin['week'] = df_fin['week'].dt.strftime('%Y-%m-%d')

In [57]:
df_pop = pd.read_csv('population.csv')
df_pop.head(5)

Unnamed: 0,location,population
0,Afghanistan,38928340.0
1,Africa,1340598000.0
2,Albania,2877800.0
3,Algeria,43851040.0
4,Andorra,77265.0


In [58]:
df_viz = pd.merge(df_fin, df_pop, on='location', how = 'left')

In [59]:
df_viz.isnull().sum()

week               0
continent          0
location           0
total_cases      478
total_deaths    1700
population        20
dtype: int64

In [60]:
#dropping the records with 0 or null total deaths, total cases

df_viz['total_cases'].fillna(0, inplace=True)
df_viz['total_deaths'].fillna(0, inplace=True)
df_viz['population'].fillna(0, inplace=True)

In [61]:
df_viz = df_viz[df_viz['total_cases'] != 0]
df_viz = df_viz[df_viz['total_deaths'] != 0]

In [62]:
df_viz.rename(columns = {'location' : 'country'}, inplace = True)
df_viz.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11844 entries, 0 to 13543
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   week          11844 non-null  object 
 1   continent     11844 non-null  object 
 2   country       11844 non-null  object 
 3   total_cases   11844 non-null  float64
 4   total_deaths  11844 non-null  float64
 5   population    11844 non-null  float64
dtypes: float64(3), object(3)
memory usage: 647.7+ KB


In [63]:
print(df_viz.total_cases.max())
print(df_viz.total_cases.min())

33723095.0
1.0


In [64]:
print(df_viz.total_deaths.max())
print(df_viz.total_deaths.min())

605567.0
1.0


In [65]:
print(df_viz.population.max())
print(df_viz.population.min())

1439323774.0
33938.0


In [66]:
#df_viz.to_csv('df_viz_final.csv')
df_viz.head(5)

Unnamed: 0,week,continent,country,total_cases,total_deaths,population
0,2020-03-13,Africa,Algeria,24.0,1.0,43851040.0
5,2020-03-13,Africa,Egypt,67.0,1.0,102334400.0
8,2020-03-13,Africa,Morocco,6.0,1.0,36910560.0
23,2020-03-13,Asia,China,80803.0,3169.0,1439324000.0
25,2020-03-13,Asia,Hong Kong,129.0,3.0,7496988.0


In [67]:
fig = px.scatter(df_viz, x= 'total_cases', y= 'total_deaths', animation_frame= 'week', 
                 animation_group= 'country', size= 'population', color= 'continent',
                 hover_name= 'country', log_x=True, log_y = True,
                 size_max=45, range_x= [1,68723095], range_y= [1, 2000567])


In [68]:
fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 300
fig.show()

In [69]:
fig = px.scatter(df_viz, x= 'total_cases', y= 'total_deaths', animation_frame= 'week', 
                 animation_group= 'country', size= 'population', color= 'continent',
                 hover_name= 'country', log_x=True, log_y = True,
                 size_max=45, range_x= [1,68723095], range_y= [1, 2000567])
fig.update_traces(mode= 'markers', marker=dict(sizemode='area'
 ))

fig.update_layout(
 title= 'Global Covid Cases Vs Deaths, from 13th March, 2020',
 xaxis=dict(
     title='Total Covid Cases',
     gridcolor= None,
     type= 'log',
     gridwidth= None,),
    
 yaxis=dict(
     title= 'Total Covid Deaths',
     gridcolor= None,
     gridwidth= None,),
 paper_bgcolor= 'rgb(255, 255, 255)',
 plot_bgcolor= 'rgb(255, 255, 255)',
)
fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 300
fig.show()