In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv('../input/covid-world-vaccination-progress/country_vaccinations.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
plt.figure(figsize=(15,5))
sns.heatmap(df.isnull())

* # All the missing data will be filled with 0 rather than null 
* # This will allow us to have the datatypes as integers
* # Some of the ISO codes are missing, this will be dealt with seperately

In [None]:
df[df['iso_code'].isnull()]['country'].value_counts()

* It seems that the ISO Code for these 4 countries are mssing. 
* All these countries are part of United Kingdom
* Using the weblink I found the ISO Code for UK which is GBR

In [None]:
df['iso_code'].fillna('GBR', inplace=True)

In [None]:
df.fillna(0, inplace=True)

In [None]:
df['date'] = pd.to_datetime(df['date'])

In [None]:
df = df.sort_values('date', ascending=True)

In [None]:
df['date'] = df['date'].dt.strftime('%m-%d-%Y')

In [None]:
df.info()

# Data is now ready!

In [None]:
pip install plotly

# Everyday total vaccinations by each country

In [None]:
import plotly.express as px
fig = px.choropleth(df,                            # Input Dataframe
                     locations="iso_code",           # identify country code column
                     color="total_vaccinations",                     # identify representing column
                     hover_name="country",              # identify hover name
                     animation_frame="date",        # identify date column
                     projection="natural earth",        # select projection
                     color_continuous_scale= 'RdBu',  # select prefer color scale
                     range_color=[0,5000000])             # select range of dataset     
fig.show()          
fig.write_html("example_map.html")            

In [None]:
USA = df[df['iso_code']== 'USA']
USA['cummulative'] = USA['total_vaccinations'].cumsum()

In [None]:
USA

# Cummulative Vaccination since 22nd Dec in USA

In [None]:
import plotly.express as px
fig = px.choropleth(USA,                            # Input Dataframe
                     locations="iso_code",           # identify country code column
                     color="cummulative",                     # identify representing column
                     hover_name="country",              # identify hover name
                     animation_frame="date",        # identify date column
                     projection="robinson",        # select projection
                     color_continuous_scale= 'magma',  # select prefer color scale
                     range_color=[0,150000000])             # select range of dataset     
fig.show()          
fig.write_html("example_map.html")            

In [None]:
df['iso_code'].value_counts()[:10]

In [None]:
a = pd.DataFrame(df[df['iso_code']== 'USA']['total_vaccinations'].cumsum()).reset_index()
b = pd.DataFrame(df[df['iso_code']== 'GBR']['total_vaccinations'].cumsum()).reset_index()
c = pd.DataFrame(df[df['iso_code']== 'CAN']['total_vaccinations'].cumsum()).reset_index()
d = pd.DataFrame(df[df['iso_code']== 'CHN']['total_vaccinations'].cumsum()).reset_index()
e = pd.DataFrame(df[df['iso_code']== 'ISR']['total_vaccinations'].cumsum()).reset_index()
f = pd.DataFrame(df[df['iso_code']== 'RUS']['total_vaccinations'].cumsum()).reset_index()
g = pd.DataFrame(df[df['iso_code']== 'MEX']['total_vaccinations'].cumsum()).reset_index()

an = a.append(b)
bn = an.append(c)
cn = bn.append(d)
dn = cn.append(e)
en = dn.append(f)
final = en.append(g)

In [None]:
df = df.reset_index()
final['TOTAL VACC'] = final['total_vaccinations']*1

In [None]:
df_5 = df.merge(final, left_on='index', right_on='index', how='inner')

In [None]:
df_5.info()

In [None]:
fig = px.scatter(df_5, x="daily_vaccinations", y="TOTAL VACC", animation_frame="date", animation_group="iso_code",
           hover_name="iso_code", text='iso_code',range_x=[0,1500000], range_y=[0,175000000])

fig.update_traces(marker=dict(size=40,  color='DarkSlateGrey'))

fig.show()


In [None]:

fig = px.choropleth(df_5,                            # Input Dataframe
                     locations="iso_code",          # identify country code column
                     color="TOTAL VACC",                     # identify representing column
                     hover_name="country",              # identify hover name
                     animation_frame="date",        # identify date column
                     projection="natural earth",        # select projection
                     color_continuous_scale= 'RdBu',  # select prefer color scale
                     range_color=[0,40000000], title='Top 7 Countries vaccine progress!')             # select range of dataset     
fig.show()          
fig.write_html("example_map.html")            

In [None]:
df_5.to_csv('dataframe.csv', index=False)

In [None]:
df_f = df_5.pivot_table(values='TOTAL VACC', index=['date'], columns = 'iso_code')
df_f[:20]

In [None]:
df_f = df_f.reset_index()

In [None]:
df_f.columns

In [None]:
df_f['date'] = pd.to_datetime(df_f['date'])
df_f = df_f.sort_values('date', ascending=True)
df_f['date'] = df_f['date'].dt.strftime('%m/%d/%Y')
df_f = df_f.fillna(0)
df_f[['CAN', 'CHN', 'GBR', 'ISR', 'MEX', 'RUS', 'USA']] = df_f[['CAN', 'CHN', 'GBR', 'ISR', 'MEX', 'RUS', 'USA']].cumsum()
df_f

In [None]:
can = df_f[['date','CAN']]
can['iso'] = 'CAN'
can = can.rename(columns={'CAN': 'Value'})

chn = df_f[['date','CHN']]
chn['iso'] = 'CHN'
chn = chn.rename(columns={'CHN': 'Value'})

gbr = df_f[['date','GBR']]
gbr['iso'] = 'GBR'
gbr = gbr.rename(columns={'GBR': 'Value'})

isr = df_f[['date','ISR']]
isr['iso'] = 'ISR'
isr = isr.rename(columns={'ISR': 'Value'})

mex = df_f[['date','MEX']]
mex['iso'] = 'MEX'
mex = mex.rename(columns={'MEX': 'Value'})

rus = df_f[['date','RUS']]
rus['iso'] = 'RUS'
rus = rus.rename(columns={'RUS': 'Value'})

usa = df_f[['date','USA']]
usa['iso'] = 'USA'
usa = usa.rename(columns={'USA': 'Value'})

In [None]:
final_df = can.append(chn).append(gbr).append(isr).append(mex).append(rus).append(usa)
final_df['date'] = pd.to_datetime(final_df['date'])
final_df = final_df.sort_values('date',ascending=True)
final_df['date'] = final_df['date'].dt.strftime('%m/%d/%Y')
final_df

In [None]:
final_df['Value'] = final_df['Value']/10000

In [None]:
fig = px.bar(final_df, y="iso", x="Value", animation_frame="date", animation_group="iso",
           hover_name="iso",range_x=[0,3500000], color='iso')

fig.show()

In [None]:
final_df.to_csv('final_df.csv', header=False)

# Due to data inconsistency, only Canada Mexico and USA were recorded properly