# Libraries

In [None]:
import numpy as np
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
from matplotlib import rcParams

In [None]:
os.listdir("../input/covid-world-vaccination-progress")

In [None]:
root_df = pd.read_csv("../input/covid-world-vaccination-progress/country_vaccinations.csv")

In [None]:
root_df.head()

### The data:

- Country: this is the country for which the vaccination information is provided;
- Country ISO Code: ISO code for the country;
- Date: date for the data entry; for some of the dates we have only the daily vaccinations, for others, only the (cumulative) total;
- Total number of vaccinations: this is the absolute number of total immunizations in the country;
- Total number of people vaccinated: a person, depending on the immunization scheme, will receive one or more (typically 2) vaccines; at a certain moment, the number of vaccination might be larger than the number of people;
- Total number of people fully vaccinated: this is the number of people that received the entire set of immunization according to the immunization scheme (typically 2); at a certain moment in time, there might be a certain number of people that received one vaccine and another number (smaller) of people that received all vaccines in the scheme;
- Daily vaccinations (raw): for a certain data entry, the number of vaccination for that date/country;
- Daily vaccinations: for a certain data entry, the number of vaccination for that date/country;
- Total vaccinations per hundred: ratio (in percent) between vaccination number and total population up to the date in the country;
- Total number of people vaccinated per hundred: ratio (in percent) between population immunized and total population up to the date in the country;
- Total number of people fully vaccinated per hundred: ratio (in percent) between population fully immunized and total population up to the date in the country;
- Number of vaccinations per day: number of daily vaccination for that day and country;
- Daily vaccinations per million: ratio (in ppm) between vaccination number and total population for the current date in the country;
- Vaccines used in the country: total number of vaccines used in the country (up to date);
- Source name: source of the information (national authority, international organization, local organization etc.);
- Source website: website of the source of information;


In [None]:
root_df.describe()

In [None]:
root_df.info()

In [None]:
print(f'Number of countries within dataframe: {len(list(root_df.country.unique()))}')

In [None]:
vaccine_list = list(root_df.vaccines.unique())

print(f'Vaccine List: {vaccine_list}')
print(f'Total Count: {len(vaccine_list)}')

In [None]:
by_country_df = root_df.groupby(["country", "iso_code", "vaccines"])["date","total_vaccinations", "people_vaccinated", 
                                                                     "people_fully_vaccinated", "daily_vaccinations", 
                                                                     "total_vaccinations_per_hundred", "people_vaccinated_per_hundred",
                                                                     "people_fully_vaccinated_per_hundred", "daily_vaccinations_per_million"].max().reset_index()
by_country_df = by_country_df[by_country_df["country"] != "China"]

In [None]:
import plotly.subplots
from plotly import tools
import plotly.offline as py
py.init_notebook_mode(connected = True)
import plotly.graph_objs as go

In [None]:
fig = plotly.subplots.make_subplots(rows = 3, cols = 1, vertical_spacing = 0.10,
                                   subplot_titles = ["Total Vaccinations per Country - Excluding China", 
                                                     "People Vaccinated per Country - Excluding China",
                                                     "Daily Vaccinations by Country - Excluding China"
                                                     ],
                                   shared_xaxes = True)
# Trace 1_1
plot_srs = by_country_df.sort_values(by = "total_vaccinations", ascending = False)
trace_1 = go.Bar(x = plot_srs.country, y = plot_srs.total_vaccinations)

# Trace 2_1
plot_srs = by_country_df.sort_values(by = "people_vaccinated", ascending = False)
trace_2 = go.Bar(x = plot_srs.country, y = plot_srs.people_vaccinated)

# Trace 3_1
plot_srs = by_country_df.sort_values(by = "daily_vaccinations", ascending = False)
trace_3 = go.Bar(x = plot_srs.country, y = plot_srs.daily_vaccinations)


fig.append_trace(trace_1, 1, 1)
fig.append_trace(trace_2, 2, 1)
fig.append_trace(trace_3, 3, 1)


fig['layout'].update(paper_bgcolor='rgba(0,0,0,0)', showlegend = False, height = 700, width = 1000)
py.iplot(fig, filename='vaccine-plots')

In [None]:
fig = plotly.subplots.make_subplots(rows = 3, cols = 1, vertical_spacing = 0.10,
                                   subplot_titles = ["Total Vaccinations per Hundred by Country - Excluding China", 
                                                     "People Vaccinated per Hundred by Country - Excluding China",
                                                     "Daily Vaccinations per Million by Country - Excluding China"
                                                     ],
                                   shared_xaxes = True)

# Trace 1_1
plot_srs = by_country_df.sort_values(by = "total_vaccinations_per_hundred", ascending = False)
trace_1 = go.Bar(x = plot_srs.country, y = plot_srs.total_vaccinations_per_hundred)


# Trace 2_1
trace_2 = go.Bar(x = plot_srs.country, y = plot_srs.people_vaccinated_per_hundred)

# Trace 3_1
trace_3 = go.Bar(x = plot_srs.country, y = plot_srs.daily_vaccinations_per_million)


fig.append_trace(trace_1, 1, 1)
fig.append_trace(trace_2, 2, 1)
fig.append_trace(trace_3, 3, 1)

fig['layout'].update(paper_bgcolor='rgba(0,0,0,0)', showlegend = False, height = 700, width = 1000)
py.iplot(fig, filename='vaccinepercentage-plots')

In [None]:
vaccine = root_df.groupby(["vaccines"])["total_vaccinations", "total_vaccinations_per_hundred"].max().reset_index()
vaccine.head()

In [None]:
# Trace 1_1
data = vaccine.sort_values(by = "total_vaccinations", ascending = False)
trace = go.Bar(x = data.vaccines, y = data.total_vaccinations, marker = dict(color = "#7bdbce"))

layout = dict(height = 500, width = 850, title = "Most to Least Common Vaccine", paper_bgcolor = "rgba(0,0,0,0)")

fig = dict(data = trace, layout = layout)
py.iplot(fig, filename = "vaccine")

In [None]:
by_country_df_2 = root_df.groupby(["country", "iso_code", "vaccines"])["date","total_vaccinations", "people_vaccinated", 
                                                                     "people_fully_vaccinated", "daily_vaccinations", 
                                                                     "total_vaccinations_per_hundred", "people_vaccinated_per_hundred",
                                                                     "people_fully_vaccinated_per_hundred", "daily_vaccinations_per_million"].max().reset_index()

In [None]:
vaccines = by_country_df_2.vaccines.unique()

In [None]:
for x in vaccines:
    countries = list(by_country_df_2.loc[by_country_df_2.vaccines == x, "country"].values)
    print(f'Vaccines: {x}: Countries: {countries}')

In [None]:
import plotly.express

In [None]:
fig = plotly.express.choropleth(locations = by_country_df_2.country,
                   locationmode = "country names",
                   color = by_country_df_2.vaccines, 
                   title = "Vaccine used for each Country")

fig.update_layout({'legend_orientation':'v'})
fig.update_layout({'legend_title':'Vaccine Brands'})
fig.update_geos(projection_type = "orthographic")
fig.show()

In [None]:
trace = go.Choropleth(locations = by_country_df_2.country,
                     locationmode = "country names",
                     text = by_country_df_2.country,
                     z = by_country_df_2.total_vaccinations,
                     reversescale = True)

data = [trace]

layout = go.Layout(
    title = "Total Vaccinations by Country Comparison",
    geo = dict(projection = dict(type = "orthographic")), # Turns plot into oval earth shape instead of rectangle

)

fig = dict(data = data, layout = layout)
py.iplot(fig)

In [None]:
trace = go.Choropleth(locations = by_country_df_2.country,
                     locationmode = "country names",
                     text = by_country_df_2.country,
                     z = by_country_df_2.daily_vaccinations,
                     reversescale = True)

data = [trace]

layout = go.Layout(
    title = "Daily Vaccinations by Country Comparison",
    geo = dict(projection = dict(type = "orthographic"))
)

fig = dict(data = data, layout = layout)
py.iplot(fig)

In [None]:
trace = go.Choropleth(locations = by_country_df_2.country,
                     locationmode = "country names",
                     text = by_country_df_2.country,
                     z = by_country_df_2.people_vaccinated_per_hundred,
                     reversescale = True)

data = [trace]

layout = go.Layout(
    title = "People vaccinated percentage by Country Comparison",
    geo = dict(projection = dict(type = "natural earth")) 
)

fig = dict(data = data, layout = layout)
py.iplot(fig)

## Vaccination Progress

In [None]:
# DropNAs
timeline_df = root_df[["country", "vaccines", "date", "total_vaccinations", "people_vaccinated",
                       "total_vaccinations_per_hundred", "people_vaccinated_per_hundred",
                      ]].dropna()
#timeline_df.head(20)

In [None]:
list(timeline_df.country.unique())

Manually select countries since there are some countries in reptition, i.e. Scotland, Northern Ireland, Wales, are already included within United Kingdom.

In [None]:
manual_selected_list = ["Austria", "Belgium", "Chile", "Costa Rica", "Croatia", "Cyprus", "Czechia", "Denmark", 
                       "Estonia", "Finland", "Germany", "Greece", "Hungary", "Iceland", "Indonesia", "Ireland",
                       "Israel", "Italy", "Lithuania", "Luxembourg", "Malta", "Mexico", "Norway", "Oman", "Poland",
                       "Portugal", "Romania", "Singapore", "Slovakia", "Spain", "Sweden", "United Arab Emirates",
                       "United Kingdom", "United States"]

In [None]:
def timeline_plotter(dataframe, feature, title, countries):
    data = []
    for country in countries:
        df = dataframe.loc[dataframe.country == country]
        trace = go.Scatter(
            x = df.date,
            y = df[feature],
            name = country, 
            mode = "lines",
            text = df.country)
        data.append(trace)
    
    layout = dict(
        title = title, 
        xaxis = dict(title = "Date"),
        yaxis = dict(title = feature,
                     type = "log"), #The logarithmic scale is useful for plotting data that includes very small numbers and very large numbers because the scale plots the data so you can see all the numbers easily, without the small numbers squeezed too closely.
        hovermode = "x",
        height = 1000)
    
    fig = dict(data = data, layout = layout)
    
    py.iplot(fig, filename = "timeline_plots")

In [None]:
timeline_plotter(timeline_df, "total_vaccinations", "Total Vaccination by Country", manual_selected_list)