## Importing Libraries

In [None]:
import plotly as py
from plotly import graph_objects as go
from plotly import express as px
from matplotlib import pyplot as plt
py.offline.init_notebook_mode(connected=False)
import seaborn as sns
import pandas as pd
import numpy as np

## Loading Dataset

In [None]:
covid_df = pd.read_csv(r"../input/covid-world-vaccination-progress/country_vaccinations.csv")
geo_data= pd.read_csv(r"../input/geo-data/geo_data.csv") # for Geographical Analysis

### Labeling overseas terroties with respective country

In [None]:
countries = pd.Series(covid_df.country.unique())
countries_missing = ~countries.isin(geo_data.country_name.unique())
countries[countries_missing]

In [None]:
UK = ["Anguilla","Bermuda", "Cayman Islands","England","Saint Helena","Saint Lucia","Scotland",
      "Wales","Montserrat","Falkland Islands","Gibraltar","Turks and Caicos Islands","Guernsey",
      "Northern Ireland","Isle of Man","Jersey"]
others = {"Faeroe Islands":"Denmark","Greenland":"Denmark",
          "Hong Kong":"Hong Kong, China","Northern Cyprus":"Cyprus"}
val_to_replace = {region:"United Kingdom" for region in UK}
val_to_replace.update(others)

In [None]:
def value_replace(country):
    if country in val_to_replace.keys():
        country = val_to_replace[country]
    return country

In [None]:
covid_df["country_name"] = covid_df.country.apply(value_replace)

### Merging both dataset

In [None]:
vac_df = covid_df.merge(geo_data,left_on="country_name",right_on="country_name")
vac_df.head(4)

In [None]:
vac_df.isna().sum()/len(vac_df) * 100

# Top countries by Vaccines Procurement

In [None]:
vacn_prcur = vac_df.groupby("country_name")["total_vaccinations"].max().sort_values(ascending=False)
px.bar(vacn_prcur,title="Total Vaccines Procured by country")

In [None]:
vac_df.geo = vac_df.geo.str.upper()
vacn_prcur = vac_df.groupby(["country_name","geo"])["total_vaccinations"].max().reset_index()

In [None]:
col_scale = px.colors.colorbrewer.Paired_r
fig = px.choropleth(vacn_prcur,locations="geo",color="total_vaccinations",
             color_continuous_scale=col_scale,projection='natural earth',hover_name="country_name")
text="Total Vaccines Procured by country"
title = dict(text=text,xanchor="center",yanchor="top",y=0.90,x=0.5,font=dict(size=18,color="black"))
fig.update_layout(title=title)
fig.update_geos(fitbounds="locations", visible=True)
fig.show()

# How many people have been vaccinated at country level? 

In [None]:
peop_vacn = vac_df.groupby("country_name")[["people_vaccinated","people_fully_vaccinated"]].max(
            ).sort_values(by="people_vaccinated",ascending=False).reset_index()
color_map={"people_vaccinated":"#6a777d","people_fully_vaccinated":"#3936ff"}
title_1 = "People vaccinated (first dose) vs People fully vaccinated"
px.bar(peop_vacn[:30],x="country_name",y=["people_vaccinated","people_fully_vaccinated"],
      title=title_1,color_discrete_map=color_map)

In [None]:
px.bar?

# Vaccination scorecard Region wise

In [None]:
group_by = ["four_regions","country_name"]
four_reg = vac_df.groupby(group_by)[["people_vaccinated","people_fully_vaccinated"]].max(
            ).sort_values(by="people_vaccinated",ascending=False).reset_index()
title_2 = "People vaccinated (first dose) vs People fully vaccinated region wise"
px.bar(four_reg,x="four_regions",y=["people_vaccinated","people_fully_vaccinated"],
       title=title_2,width=650,height=400,hover_name="country_name")

In [None]:
group_by = ["eight_regions","country_name"]
eight_reg = vac_df.groupby(group_by)[["people_vaccinated","people_fully_vaccinated"]].max(
            ).sort_values(by="people_vaccinated",ascending=False).reset_index()
px.bar(eight_reg,x="eight_regions",y=["people_vaccinated","people_fully_vaccinated"],
       title=title_2,width=800,height=500,hover_name="country_name",
      color_continuous_scale=px.colors.colorbrewer.Paired_r)

# By Income Level

In [None]:
group_by = ["country_name","income_groups"]
income_wise = vac_df.groupby(group_by)[["people_vaccinated","people_fully_vaccinated"]].max(
            ).sort_values(by="people_vaccinated",ascending=False).reset_index()
title_3 = "People vaccinated (first dose) vs People fully vaccinated income groups wise"
px.bar(income_wise,x="income_groups",y=["people_vaccinated","people_fully_vaccinated"],
       title=title_3,width=700,height=400,hover_name="country_name")

# Maping the Vaccination Scorecard

In [None]:
vac_100 = vac_df.groupby(["country","geo"])["total_vaccinations_per_hundred"].max().reset_index()
col_scale =        px.colors.colorbrewer.Paired_r
fig = px.choropleth(vac_100,locations="geo",color="total_vaccinations_per_hundred",
             color_continuous_scale=col_scale,projection='natural earth',hover_name="country",
                   range_color=[0,50],color_continuous_midpoint=20)
text="Number of people vaccinated out of 100"
title = dict(text=text,xanchor="center",yanchor="top",y=0.90,x=0.45,font=dict(size=18,color="black"))
fig.update_layout(title=title)
fig.update_geos(fitbounds="locations", visible=True)
fig.show()

# Breaking down analysis by Vaccines

In [None]:
vaccines = vac_df[["country","vaccines"]].drop_duplicates()["vaccines"].value_counts().sort_values(
           ascending=True)

In [None]:
title_4 = "Number of Countries using a particular Vaccination Scheme"
label = dict(value="Number of Countries",index="Vaccine Scheme")
fig=px.bar(vaccines,orientation="h",title=title_4,height=700,labels=label,width=1000)
title = dict(xanchor="center",yanchor="top",y=0.90,x=0.45,font=dict(size=18,color="black"))
fig.update_layout(title=title)

# Seggerating Analysis by Individual vaccine 

In [None]:
columns = ["country_name","world_bank_regions","income_groups","geo","vaccines"]
vaccines = vac_df[columns].drop_duplicates().reset_index(drop=True)
vac_2 = vaccines.set_index(columns[:4]).vaccines.str.split(",",expand=True)
vac_2 = vac_2.melt(ignore_index=False,value_name="Vaccines").dropna()["Vaccines"].reset_index()
vac_2.Vaccines = vac_2.Vaccines.str.strip()

# Vaccines Available

In [None]:
print(F"There are {vac_2.Vaccines.nunique()} Vaccines available") 

In [None]:
def vaccines_count(df,groupby,sort_col="count",sort_order=False,colname="count"):
    vac_count = df.groupby(groupby).size().reset_index(name=colname)
    vac_count = vac_count.sort_values(by=sort_col,ascending=sort_order)
    return vac_count

In [None]:
country_per_vac = vaccines_count(vac_2,groupby="Vaccines",sort_order=True)
vac_count_by_country = vaccines_count(vac_2,groupby="country_name")
regions_wise =  vaccines_count(vac_2,groupby=["Vaccines","world_bank_regions"]) 
inc_wise = vaccines_count(vac_2,groupby=["Vaccines","income_groups"]) 

# Numbers Countries per Vaccines

In [None]:
label="No of Countries"
title = "Countries count by Vaccines"
px.bar(country_per_vac,y="Vaccines",x="count",orientation="h",
       color_discrete_map={0:"red"},labels={"value":label},title=title)

In [None]:
plot_title = "Countries Count by region"
px.bar(regions_wise,x="world_bank_regions",y="count",color="Vaccines",barmode="relative",height=450,
  title=plot_title)

In [None]:
plot_title = "Countries Count by Income groups"
px.bar(inc_wise,x="income_groups",y="count",color="Vaccines",barmode="relative",height=600,
 width=700,title=plot_title)