In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import math

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
import plotly.offline as pyo
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from plotly.validators.scatter.marker import SymbolValidator

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Overview of the data**

In [None]:
vacc_manufact = pd.read_csv("/kaggle/input/covid-world-vaccination-progress/country_vaccinations_by_manufacturer.csv")
vacc = pd.read_csv("/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv")

The data below is the country vaccinations provided by manufacturer, with the following information:

* Location - country;
* Date - date;
* Vaccine - vaccine type;
* Total number of vaccinations - total number of vaccinations / current time and vaccine type.

In [None]:
vacc_manufact.head()

In [None]:
vacc_manufact["vaccine"].unique()

In [None]:
vacc_manufact.isnull().sum()

In [None]:
len(vacc_manufact)

In [None]:
vacc.head()

The data (country vaccinations) contains the following information:

* Country- this is the country for which the vaccination information is provided;
* Country ISO Code - ISO code for the country;
* Date - date for the data entry; for some of the dates we have only the daily vaccinations, for others, only the (cumulative) total;
* Total number of vaccinations - this is the absolute number of total immunizations in the country;
* Total number of people vaccinated - a person, depending on the immunization scheme, will receive one or more (typically 2) vaccines; at a certain moment, the number of vaccination might be larger than the number of people;
* Total number of people fully vaccinated - this is the number of people that received the entire set of immunization according to the immunization scheme (typically 2); at a certain moment in time, there might be a certain number of people that received one vaccine and another number (smaller) of people that received all vaccines in the scheme;
* Daily vaccinations (raw) - for a certain data entry, the number of vaccination for that date/country;
* Daily vaccinations - for a certain data entry, the number of vaccination for that date/country;
* Total vaccinations per hundred - ratio (in percent) between vaccination number and total population up to the date in the country;
* Total number of people vaccinated per hundred - ratio (in percent) between population immunized and total population up to the date in the country;
* Total number of people fully vaccinated per hundred - ratio (in percent) between population fully immunized and total population up to the date in the country;
* Number of vaccinations per day - number of daily vaccination for that day and country;
* Daily vaccinations per million - ratio (in ppm) between vaccination number and total population for the current date in the country;
* Vaccines used in the country - total number of vaccines used in the country (up to date);
* Source name - source of the information (national authority, international organization, local organization etc.);
* Source website - website of the source of information;

In [None]:
vacc.isnull().sum()

In [None]:
vacc[vacc["total_vaccinations"] > 0]

In [None]:
vacc[vacc["people_fully_vaccinated"] > 0]

In [None]:
len(vacc)

In [None]:
vacc["country"].value_counts()

In [None]:
vacc.drop(["total_vaccinations_per_hundred", "people_fully_vaccinated_per_hundred", "source_name", "source_website"], axis=1, inplace=True)

In [None]:
vacc.isnull().sum()

In [None]:
vacc = vacc.dropna(subset=["total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "daily_vaccinations_raw", "daily_vaccinations_per_million"])

In [None]:
vacc.isnull().sum()

In [None]:
len(vacc)

In [None]:
vacc["date"] = pd.to_datetime(vacc["date"])

**Countries that have higher vaccination populations**

In [None]:
vacc_high_country = vacc[["country", "people_vaccinated", "date"]]
vacc_high_country = vacc_high_country.sort_values(by="people_vaccinated", ascending=False, ignore_index=True)
country = vacc_high_country["country"].unique()
top_10_countries =  country[:10]
top_3_countries = country[:3]
index_vacc_high_country = vacc_high_country
index_vacc_high_country.set_index("country", inplace=True)

In [None]:
num_ppl_vaccinated = []
for i in range(10):
    a = top_10_countries[i]
    b = index_vacc_high_country.loc[a]["people_vaccinated"][0]
    num_ppl_vaccinated.append(b)

num_ppl_vaccinated

In [None]:
fig = px.bar(vacc_high_country, x=top_10_countries, y=num_ppl_vaccinated, color=top_10_countries, title= "Top 10 countries with vaccinated population",text=num_ppl_vaccinated)
fig.update_traces(textposition='outside')
fig.show()

In [None]:
vacc_high_num_top_3_country = vacc_high_country[ (vacc_high_country.index == top_3_countries[0]) | (vacc_high_country.index == top_3_countries[1]) | (vacc_high_country.index == top_3_countries[2])]
vacc_high_num_top_3_country = vacc_high_num_top_3_country.reset_index()
vacc_high_num_top_3_country

In [None]:
fig = px.scatter(vacc_high_num_top_3_country, x="date", y="people_vaccinated", color="country", trendline='ols',facet_col="country",title="Top 3 countries with the highest number of vaccinated population trend line")
fig.show()

**Countries that have higher vaccination rate**

In [None]:
vacc_high_rate_country = vacc[["country", "people_vaccinated_per_hundred", "date"]]
vacc_high_rate_country = vacc_high_rate_country.sort_values(by="people_vaccinated_per_hundred", ascending=False, ignore_index=True)
country = vacc_high_rate_country["country"].unique()
top_10_countries =  country[:10]
index_vacc_high_rate_country = vacc_high_rate_country
index_vacc_high_rate_country.set_index("country", inplace=True)

In [None]:
rate_ppl_vaccinated = []
for i in range(10):
    a = top_10_countries[i]
    b = index_vacc_high_rate_country.loc[a]["people_vaccinated_per_hundred"][0]
    rate_ppl_vaccinated.append(b)
    
rate_ppl_vaccinated

In [None]:
fig = px.bar(vacc_high_rate_country, x=top_10_countries, y=rate_ppl_vaccinated, color=top_10_countries, title= "Top 10 countries with vaccination rates",text=rate_ppl_vaccinated)
fig.update_traces(textposition='outside')
fig.show()

In [None]:
vacc_high_rate_country_trend = pd.DataFrame({ "country" : top_10_countries, "vaccination rate" : rate_ppl_vaccinated})
vacc_high_rate_country_top_3 = vacc_high_rate_country_trend.iloc[:3]["country"]

In [None]:
vacc_high_rate_country_trend_top_3 = vacc_high_rate_country[(vacc_high_rate_country.index== vacc_high_rate_country_top_3[0]) | (vacc_high_rate_country.index == vacc_high_rate_country_top_3[1]) | (vacc_high_rate_country.index == vacc_high_rate_country_top_3[2]) ]
vacc_high_rate_country_trend_top_3 = vacc_high_rate_country_trend_top_3.reset_index()

In [None]:
fig = px.scatter(vacc_high_rate_country_trend_top_3, x="date", y="people_vaccinated_per_hundred", color="country", trendline='ols',facet_col="country",title="Top 3 vaccination rate country trend line")
fig.show()

**All the world status**

In [None]:
fig = px.choropleth(vacc, locations="iso_code",
                    color="people_vaccinated_per_hundred",
                    hover_name="country",
                    color_continuous_scale=px.colors.sequential.Plasma,
                    title= "People_vaccinated_per_hundred")

fig.show()

In [None]:
vacc["vaccines"]

We can see that the vaccines that are mostly used wordwide are *Oxford/AstraZeneca, Pfizer/BioNTech, Sinovac, Moderna*. Based on the information, we can observe the vaccines distribution through pyplot.

**AstraZeneca vaccine distribution around the world**

In [None]:
vacc_astra_zeneca = vacc[vacc["vaccines"].str.contains("Oxford/AstraZeneca")]

In [None]:
fig = px.choropleth(vacc_astra_zeneca, locations="iso_code",
                    color="vaccines",
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma,
                   title= "Vaccines")
fig.update_layout(showlegend=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

**Pfizer/BioNTech vaccine distribution around the world**

In [None]:
vacc_pfizer = vacc[vacc["vaccines"].str.contains("Pfizer/BioNTech")]

In [None]:
fig = px.choropleth(vacc_pfizer, locations="iso_code",
                    color="vaccines",
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma,
                   title= "Vaccines")
fig.update_layout(showlegend=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

**Sinovac vaccine distribution around the world**

In [None]:
vacc_sinovac = vacc[vacc["vaccines"].str.contains("Sinovac")]

In [None]:
fig = px.choropleth(vacc_sinovac, locations="iso_code",
                    color="vaccines",
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma,
                   title= "Vaccines")
fig.update_layout(showlegend=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

**Moderna vaccine distribution around the world**

In [None]:
vacc_moderna = vacc[vacc["vaccines"].str.contains("Moderna")] 

In [None]:
fig = px.choropleth(vacc_moderna, locations="iso_code",
                    color="vaccines",
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma,
                   title= "Vaccines")
fig.update_layout(showlegend=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

**Johnson&Johnson vaccine distribution around the world**

In [None]:
vacc_johnson = vacc[vacc["vaccines"].str.contains("Johnson&Johnson")] 

In [None]:
fig = px.choropleth(vacc_johnson, locations="iso_code",
                    color="vaccines",
                    hover_name="country", # column to add to hover information
                    color_continuous_scale=px.colors.sequential.Plasma,
                   title= "Vaccines")
fig.update_layout(showlegend=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

**Vaccines manufacturing data**

In [None]:
vacc_manufact["location"].unique()

In [None]:
vacc_manufact_subset = vacc_manufact[["vaccine","total_vaccinations"]]

vaccine_type = vacc_manufact["vaccine"].unique()
record_dict={}
for vaccine in vaccine_type:
    vaccine_df = vacc_manufact_subset[vacc_manufact_subset["vaccine"] == vaccine]
    total_records = len(vaccine_df)
    num = 0
    for i in range(total_records):
        num += vaccine_df["total_vaccinations"].iloc[i]
    record_dict[vaccine] = num

vaccine_col = []
total_col = []
for key,value in record_dict.items():
    vaccine_col.append(key)
    total_col.append(value)
    
vaccine_pie = pd.DataFrame({ "vaccine" :vaccine_col , "total vaccination records": total_col})

In [None]:
fig = px.pie(vaccine_pie, values='total vaccination records', names='vaccine', title='Percentage of vaccines that are used in Europe and Latin America according to manufacturer (Chile, Czechia, France, Germany, Iceland, Italy, Latvia, Lithuania, Romania, United States)')
fig.update_traces(textposition='inside', textinfo='percent+label',hole=0.3,)
fig.show()

> We can see that Pfizer/BioNTech (more than 50%) is widely used in these countries : Chile, Czechia, France, Germany, Iceland, Italy, Latvia, Lithuania, Romania, United States. Pfizer/BioNTech & Moderna are the first vaccines to be approved by the US Food and Drug Administration (FDA).