In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

import os


# Preparing data

In [None]:
vaccination_data = pd.read_csv("/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv")

vaccination_data = vaccination_data.set_index("country")
vaccination_data = vaccination_data.drop(["England", "Gibraltar", "Wales","Northern Cyprus", "Northern Ireland", "Scotland"],axis=0)


In [None]:
world_pop = pd.read_csv("/kaggle/input/population-by-country-2020/population_by_country_2020.csv")
world_pop = world_pop.loc[:,["Country (or dependency)","Population (2020)"]]
world_pop = world_pop.set_index("Country (or dependency)")
world_pop.loc["Algeria",:]

In [None]:
vaccination_data.shape

In [None]:
vaccination_data.head()

## Missing Values

In [None]:
vaccination_data.isna().sum()

# Vaccines used in the world.

We check her the total number of vaccine used allover the word, and the ditrubution of the most ten vaccines used ( this visualization uses tuples of vaccines which do not give us a good istesmation on the exact number of vaccines used as example the Oxford/AstraZeneca and Pfizer/BioNTech)


In [None]:

# extracte the vaccines indeces
vaccine_index = vaccination_data.vaccines.unique()

In [None]:
# construct a database containing vaccines used in countries and total vaccine doses used from each vaccine.

vaccines = vaccination_data.groupby("vaccines")

total_vaccine = pd.DataFrame()
vaccine_use = pd.DataFrame()

for col, group in vaccines:
    total_vaccine.loc[col,"total_vaccination"] = group["daily_vaccinations"].sum()

# Sort the data frame from the most used vaccine to the less used ( this estimation can be false because of the tuples of vaccines).
total_vaccine= total_vaccine.sort_values(by=["total_vaccination"], ascending= False)
total_vaccine

In [None]:
plt.rcParams["figure.figsize"]=[7,5]
plt.title("Total vaccines used per type")
sns.barplot(x=total_vaccine.iloc[:10,:].index, y = total_vaccine.iloc[:10,:]["total_vaccination"])
plt.xticks(rotation = "vertical")
plt.draw()



This figure show the most 10 used vaccines in the word. we can see that Moderna and Pfizer/BioNTech is the most used in the world.

In [None]:

fig = px.choropleth(vaccination_data[vaccination_data["vaccines"].isin(total_vaccine.iloc[:10,:].index)], locations="iso_code", color='vaccines',
                     hover_name=vaccination_data[vaccination_data["vaccines"].isin(total_vaccine.iloc[:10,:].index)].index,
                     projection="natural earth", title="Distribution of the most 10 used vaccines all over the word")
fig.show()

This figure show the distribution of the most used vaccines all over the word.

# Country vaccination:

In this section we will study the progress of vaccination in countries.

In [None]:
# extracte data by country
country_data = vaccination_data.groupby("country")
vaccines.describe()

In [None]:
# constrcut a data frame for the total vaccination

total_vaccination_per_country = pd.DataFrame()

# constrcut a data frame for the people vaccinated
total_people_vaccinated = pd.DataFrame()
# constrcut a data frame for the people fully vaccinated
total_people_fully_vaccinated = pd.DataFrame()
# iso code of countries for mapviz
iso = []


for ctr,group in country_data:
    total_vaccination_per_country.loc[ctr, ["total_vaccinations","country","iso_code"]] = [group["total_vaccinations"].max(),ctr, group.loc[ctr,"iso_code"][0]]
    total_people_vaccinated.loc[ctr, "people_vaccinated"] = group["people_vaccinated"].max()
    total_people_fully_vaccinated.loc[ctr, ["people_fully_vaccinated","iso_code"]] = [group["people_fully_vaccinated"].max(), group.loc[ctr,"iso_code"][0]]
    

total_people_fully_vaccinated = total_people_fully_vaccinated.fillna(0)
total_people_vaccinated = total_people_vaccinated.fillna(0)

for ctr,group in country_data:
    if ctr in world_pop.index:
        total_people_fully_vaccinated.loc[ctr,['percent_people_vaccination',"country"]] = [(total_people_fully_vaccinated.loc[ctr,"people_fully_vaccinated"]/ world_pop.loc[ctr,"Population (2020)"])*100,ctr]


        
# sort the data frame in non-ascending sort
total_vaccination_per_country = total_vaccination_per_country.sort_values(by = "total_vaccinations", ascending = False)

total_people_fully_vaccinated = total_people_fully_vaccinated.sort_values(by = "people_fully_vaccinated", ascending = False)
total_people_vaccinated = total_people_vaccinated.sort_values(by = "people_vaccinated", ascending = False)



In [None]:

plt.rcParams["figure.figsize"] = [14,7]
plt.subplot(2,2,1)
sns.barplot(x= total_vaccination_per_country.iloc[:20,:].index, y = total_vaccination_per_country.iloc[:20,0])
plt.xticks(rotation = "vertical")
plt.title("Top20 Total Vaccination per Country")

plt.subplot(2,2,2)
sns.barplot(x= total_people_vaccinated.iloc[:20,:].index, y = total_people_vaccinated.iloc[:20,0])
plt.xticks(rotation = "vertical")
plt.title("Top20 Country People Vaccinated")

plt.subplot(2,2,3)
sns.barplot(x= total_people_fully_vaccinated.iloc[:20,:].index, y = total_people_fully_vaccinated.iloc[:20,0])
plt.xticks(rotation = "vertical")
plt.title("Top20 Country People fully Vaccinated")

plt.subplot(2,2,4)
sns.barplot(x= total_people_fully_vaccinated.iloc[:20,:].index, y = total_people_fully_vaccinated.iloc[:20,2])
plt.xticks(rotation = "vertical")
plt.title("percent of People fully Vaccinated")

plt.tight_layout()

plt.draw()

the bars plot on the upper side show the top 20 of total number of doses used by the countries and the number of population vaccinated. The two following graphics shows the most 20 countries that vaccinated the most people and the poucentage of the vaccination in thous countries.

In [None]:
fig = px.choropleth(total_vaccination_per_country, locations="iso_code", color='total_vaccinations',
                    hover_name="country",
                     projection="natural earth", title="Total Vaccinaton by country",
                   color_continuous_scale = "YlGn")
fig.show()

In [None]:
fig = px.choropleth(total_people_fully_vaccinated, locations="iso_code", color='percent_people_vaccination',
                    hover_name="country",
                     projection="natural earth", title="Prcent of people fully vaccinated in world",
                   color_continuous_scale = "Redor")
fig.show()

In [None]:
country_use_vaccine = pd.DataFrame(columns=["country", "vaccines", ])
iso_codes = []

country_use_vaccine.columns

for vac,group in vaccines:
    for ctr in group.index.unique():
        for vac_list_iter in vac.split(", "):
            country_use_vaccine.loc[country_use_vaccine.shape[0],:] = [ctr,vac_list_iter]
            iso_codes.append(group["iso_code"][0])



### *which country use which vaccine ?*

In [None]:
country_use_vaccine.head()

In [None]:
plt.rcParams["figure.figsize"]=[10,16]
sns.scatterplot(data = country_use_vaccine, x="vaccines",y="country",hue="vaccines", markers = 'P')
plt.xticks(rotation = 45)
plt.draw()

