In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 
import plotly.express as px 
import pycountry
from datetime import datetime

## The data set contains the following information 

* Country- this is the country for which the vaccination information is provided;


* Country ISO Code - ISO code for the country;


* Date - date for the data entry; for some of the dates we have only the daily vaccinations, for others, only the (cumulative) total;


* Total number of vaccinations - this is the absolute number of total immunizations in the country;


* Total number of people vaccinated - a person, depending on the immunization scheme, will receive one or more (typically 2) vaccines; at a certain moment, the number of vaccination might be larger than the number of people;


* Total number of people fully vaccinated - this is the number of people that received the entire set of immunization according to the immunization scheme (typically 2); at a certain moment in time, there might be a certain number of people that received one vaccine and another number (smaller) of people that received all vaccines in the scheme;


* Daily vaccinations (raw) - for a certain data entry, the number of vaccination for that date/country;


* Daily vaccinations - for a certain data entry, the number of vaccination for that date/country;


* Total vaccinations per hundred - ratio (in percent) between vaccination number and total population up to the date in the country;


* Total number of people vaccinated per hundred - ratio (in percent) between population immunized and total population up to the date in the country;


* Total number of people fully vaccinated per hundred - ratio (in percent) between population fully immunized and total population up to the date in the country;


* Number of vaccinations per day - number of daily vaccination for that day and country;


* Daily vaccinations per million - ratio (in ppm) between vaccination number and total population for the current date in the country;


* Vaccines used in the country - total number of vaccines used in the country (up to date);


* Source name - source of the information (national authority, international organization, local organization etc.);


* Source website - website of the source of information;

In [None]:
#DataFrame Upload
df=pd.read_csv('/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv').drop(columns='source_website', axis=1)

In [None]:
#A Drop is made to eliminate United Kingdom, because the countries belonging to it are found independently. 
df = df.drop(df[df['country']=='United Kingdom'].index)

In [None]:
#We perform an elimination of the blank spaces if there is one. 
df.columns = df.columns.str.replace(' ', '')

In [None]:
#we transform the time column from a str to a datetime array 
df['date'] = df['date'].apply(lambda _: datetime.strptime(_,"%Y-%m-%d"))

In [None]:
df.info()

Much information is lost about vaccinated people, people fully vaccinated and vaccinated daily. 

In [None]:
#The information is grouped by country and the maximum value is taken 
df_group=pd.DataFrame(df.groupby('country')[('total_vaccinations','total_vaccinations_per_hundred',
                                             'people_vaccinated_per_hundred','people_fully_vaccinated_per_hundred','people_vaccinated','people_fully_vaccinated','vaccines')].max())
df_group= df_group.rename_axis('country').reset_index()
df_group['iso_code']=df['iso_code'].unique()

#Sorted by total vaccinations 
df_total_vaccinations=df_group.sort_values(by='total_vaccinations', ascending=False)
#Sorted by people vaccinated per hundred
df_people_vaccinated_per_hundred=df_group.sort_values(by='people_vaccinated_per_hundred', ascending=False)
df_people_vaccinated_per_hundred_low=df_group.sort_values(by='people_vaccinated_per_hundred', ascending=True)

In [None]:
#Missing values are replaced by 0 
df_total_vaccinations=df_total_vaccinations.fillna(0)
df_people_vaccinated_per_hundred=df_people_vaccinated_per_hundred.fillna(0)
df_people_vaccinated_per_hundred_low=df_people_vaccinated_per_hundred_low.fillna(0)

In [None]:
df_people_vaccinated_per_hundred.info()

It is observed that there are no missing values, so we can begin the analysis 

In [None]:
df_total_vaccinations.head()

From the table above, the countries that have a number of vaccinated population are:
- United States of America
- China
- India
- England
- Brazil

Those that correspond to the most populated countries in the world except England. 

In [None]:
#The vaccinated population is plotted by country 
fig = px.choropleth(df_total_vaccinations, locations='iso_code', hover_name='total_vaccinations',color='total_vaccinations')
fig.show()

In [None]:
plt.rcParams.update({'font.size': 18}) 
plt.figure(figsize=(35, 20)) 
plt.barh(df_total_vaccinations['country'][0:20],df_total_vaccinations['total_vaccinations'][0:20], label='total_vaccinations')
plt.xlabel("Amount of People")
plt.ylabel("Country")
plt.title('Top 20 countries with more people vaccinated')
plt.legend()
plt.show()

In [None]:
plt.rcParams.update({'font.size': 14}) 
plt.figure(figsize=(35, 40)) 
plt.subplot(3,2,1)
plt.barh(df_people_vaccinated_per_hundred['country'][0:20],df_people_vaccinated_per_hundred['people_vaccinated_per_hundred'][0:20], label='people_vaccinated')
plt.barh(df_people_vaccinated_per_hundred['country'][0:20],df_people_vaccinated_per_hundred['people_fully_vaccinated_per_hundred'][0:20], label='people_fully_vaccinated_per_hundred')
plt.xlabel("Percentage of vaccinated")
plt.ylabel('Countries')
plt.xticks([0,20,40,60,80,100])
plt.legend()

plt.subplot(3,2,2)
plt.barh(df_people_vaccinated_per_hundred['country'][0:20],df_people_vaccinated_per_hundred['people_vaccinated'][0:20], label='people_vaccinated')
plt.barh(df_people_vaccinated_per_hundred['country'][0:20],df_people_vaccinated_per_hundred['people_fully_vaccinated'][0:20], label='people_fully_vaccinated')
plt.xlabel("Amount of People")

plt.legend()



plt.subplot(3,2,3)
plt.barh(df_people_vaccinated_per_hundred_low['country'][0:20],df_people_vaccinated_per_hundred_low['people_vaccinated_per_hundred'][0:20], label='people_vaccinated')
plt.barh(df_people_vaccinated_per_hundred_low['country'][0:20],df_people_vaccinated_per_hundred_low['people_fully_vaccinated_per_hundred'][0:20], label='people_fully_vaccinated_per_hundred')
plt.xlabel("Percentage of vaccinated")
plt.ylabel('Countries')



plt.subplot(3,2,4)
plt.barh(df_people_vaccinated_per_hundred_low['country'][0:20],df_people_vaccinated_per_hundred_low['people_vaccinated'][0:20], label='people_vaccinated')
plt.barh(df_people_vaccinated_per_hundred_low['country'][0:20],df_people_vaccinated_per_hundred_low['people_fully_vaccinated'][0:20], label='people_fully_vaccinated')
plt.xlabel("Amount of People")

plt.legend()

plt.suptitle('Percentage of vaccinated by country')

plt.show()

It is observed that the countries that have the highest percentage of vaccinated are countries with a small total population, which have managed to obtain sufficient doses of vaccines and generally use more than one vaccine in their vaccination campaigns according to **(https: // data.worldbank.org/indicador/NY.GDP.PCAP.CD?locations=ZJ-Z7-XU)** are countries with good GDP. For countries with low Gross Domestic Product, they do not have 1% of their population vaccinated to date, they are vulnerable countries that cannot protect their population. 

## Vaccine used by country 

In [None]:
#We carry out separation of vaccine information 
df_vac=pd.DataFrame(df.groupby('country')['vaccines'].unique())
df_vac= df_vac.rename_axis('country').reset_index()
df_vac['iso_code']=df['iso_code'].unique()
df_vac=df_vac.astype('str')


In [None]:
df_vac.info()

In [None]:
vacunas=['Pfizer/BioNTech','Sputnik V','Oxford/AstraZeneca','Moderna','Sinovac','Covaxin','Sinopharm/Beijing','Sinopharm/Wuhan',
         'EpiVacCorona','Johnson&Johnson']

In [None]:
cantidad=df_vac.shape[0]
paises=[]
for vacuna in vacunas:
    ciudad=[]
    for i in range(cantidad):
        if vacuna in df_vac['vaccines'][i]:
            ciudad.append(1)
        else:
            ciudad.append(0)
    paises.append(ciudad)

i=0
for vacuna in vacunas:
    df_vac[vacuna]=paises[i]
    i=i+1

In [None]:
df_vac.head()

In [None]:
plt.figure(figsize=(20, 8)) 
plt.rcParams
plt.bar(df_vac.loc[:,"Pfizer/BioNTech":"Johnson&Johnson"].sum().sort_values(ascending=False).index,
        df_vac.loc[:,"Pfizer/BioNTech":"Johnson&Johnson"].sum().sort_values(ascending=False).values)
plt.title("Vaccine used in the world ")
plt.xlabel("Vaccine")
plt.xticks(rotation=45)
plt.ylabel("Number of countries that use the vaccine ")
plt.show()

It is observed that the most widely used vaccines worldwide are Oxfor/AstraZeneca and Pfizer/BioNTech.

In [None]:
# we graph the vaccines used 
for i in vacunas:
    fig=px.choropleth(df_vac, locations='iso_code', hover_name=i,color=i,color_continuous_scale="Blues")
    fig.update_layout(title_text = i)    
    fig.show()

Some questions to ask are:

How long does the immunity acquired by the vaccine last and if it is sufficient for all countries to reach a good number of vaccinated people?

What happens with the countries that cannot acquire the vaccine in time, the virus will continue to mutate and could form a strain immune to the vaccines that are currently used?

And finally, what will happen when the countries that depend on tourism reopen their borders, if they receive visitors from countries where vaccination was not successful? 