In this notebook, I have done Data Analysis and Visualization of the COVID-19 World Vaccination Progress dataset on Kaggle. 

Please feel free to reach out to me Linkedin if you have any questions! 
https://www.linkedin.com/in/vidurgupta1999/

In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected = False)
import cufflinks as cf
cf.go_offline(True)

In [None]:
data=pd.read_csv('../input/covid-world-vaccination-progress/country_vaccinations.csv')
data2=pd.read_csv('../input/covid-19/data/countries-aggregated.csv')

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
data.corr()

In [None]:
plt.figure(figsize=(12,10))
sns.heatmap(data.corr())

In [None]:
data.corr()['total_vaccinations_per_hundred'].sort_values().plot(kind='bar')

In [None]:
data.corr()['people_vaccinated_per_hundred'].sort_values().plot(kind='bar')

In [None]:
data.corr()['daily_vaccinations_per_million'].sort_values().plot(kind='bar')

In [None]:
fig=px.choropleth(data_frame=data,
                          locations=data['country'],
                          locationmode='country names',
                          color=data['daily_vaccinations_per_million'],
                          animation_frame=data['date'],
                          animation_group=data['vaccines'])
fig.update_layout(dict1={'title':'Daily Vaccinations Per Million'})

fig.show() 

In [None]:
data.dropna(subset=['daily_vaccinations'],inplace=True)
s=data['date'].str.split('-',expand=True)
data['Year']=s[0]
data['Month']=s[1]
data['Date']=s[2]
fig1=px.scatter_geo(data,color='vaccines',locationmode="ISO-3",locations="iso_code",opacity=0.6,
                     hover_name="iso_code", size="daily_vaccinations",projection='conic equal area',animation_group ="iso_code",color_continuous_scale='blackbody',
                     animation_frame="Date",scope='world',symbol='vaccines',template="plotly_dark",title='Vaccination Count Over The World Over Time' )
fig1.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 400

fig1.update_geos(
    landcolor="green",
    oceancolor="blue",
    showocean=True,
    lakecolor="LightBlue"
)

fig1.update_traces(
    marker_coloraxis=None
)
fig1.show()

# Which vaccine is distributed in the world the most?

In [None]:
data.groupby('vaccines')['country'].count().iplot(title='Covid vaccines Count')

We see the Pfizer, AstraZeneca, and Moderna are our main sources for vaccination

In [None]:
fig=px.choropleth(data_frame=data,
                          locations=data['country'],
                          locationmode='country names',
                          color=data['total_vaccinations_per_hundred'],
                          animation_frame=data['date'],
                          animation_group=data['vaccines'])
fig.update_layout(dict1={'title':'Total Vaccinations per hundred'})
fig.show() 

Visualization of the impact of each vaccine

In [None]:
s=data.drop_duplicates(subset=['iso_code'])['vaccines'].apply(lambda x: x.split(','))
dic={}
for i in s:
    for j in i :
        if j[0]==' ':
            k= j[1:]
        elif j[-1]==' ':
            k=j[:-1]
        else:
            k=j
        if k not in dic :
            dic[k]=1
        else:
            dic[k]+=1
px.bar(x=list(dic.keys()),y=list(dic.values()),color=list(dic.keys()),template='plotly_dark',labels={'x':'Vaccine Name','y':'Total Count'})

In [None]:
df = px.data.gapminder()
fig = px.scatter_geo(df, locations="iso_alpha", color="continent",
                     hover_name="country", size="pop",
                     animation_frame="year",
                     projection="natural earth")
fig.show()

Most Popular Vaccine

In [None]:
data.groupby('country')['total_vaccinations_per_hundred'].mean().sort_values()[40:].iplot(title='Top 20 countries based on Total Vaccinations Per Hundred')
data.groupby('country')['daily_vaccinations_per_million'].mean().sort_values()[40:].iplot(title='Top 20 countries based on Daily Vaccinations Per Million ')

How did the vaccine affect the change in death rate:
Amount of confirmed cases, recoveries, and deaths.

In [None]:
arr=[]
index1=data.groupby(['country','date']).count().index
index2=data2.groupby(['Country','Date']).count().index
for i in index2:
    if i in index1:
        arr.append(1)
    else:
        arr.append(0)
data2['Vaccine_is_there']=arr
data2.head()

In [None]:
t=data2.groupby(['Country']).sum()
cons=t[t['Vaccine_is_there']==0].index
dan=data2[~data2['Country'].isin(cons)]

Corresponding Outcomes of COVID-19 Vaccines

In [None]:
data2['Date']=pd.to_datetime(data2['Date'])
dfs=list(data2.groupby("Country"))
first_title = dfs[0][0]
traces = []
buttons = []
for i,d in enumerate(dfs):
    visible=[False]*len(dfs)
    visible[i]=True
    name=d[0]
    p=d[1][d[1]['Vaccine_is_there']==0]
    n=d[1][d[1]['Vaccine_is_there']==1]
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=p['Date'],y=p['Deaths'].values))
    fig.add_trace(go.Scatter(x=n['Date'],y=n['Deaths'].values))
    
    traces.append(
    fig.update_traces(visible=True if i==0 else False).data[0])
    buttons.append(dict(label=name,
                        method="update",
                        args=[{"visible":visible},
                              {"title":f"{name}"}]))

updatemenus = [{'active':0, "buttons":buttons}]
shapes=[({'type': 'line',
               'xref': 'x',
               'yref': 'y',
               'x0': '2020-12-29' ,
               'y0': 0,
               'x1': '2020-12-29',
               'y1': 3000})]
fig = go.Figure(data=traces,
                 layout=dict(updatemenus=updatemenus,shapes=shapes,template='plotly_dark'))
fig.update_layout(title=first_title, title_x=0.5)
fig.show()