# COVID-19 World Vaccination Progress

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

pd.options.display.max_rows = 200

In [None]:
df = pd.read_csv("../input/covid-world-vaccination-progress/country_vaccinations_by_manufacturer.csv", index_col = "date", parse_dates = True)

In [None]:
df.isna().sum()

### Total Vaccinations in Austria

In [None]:
austria = df[df["location"] == "Austria"]
austria = austria.pivot_table("total_vaccinations", "date", "vaccine")

In [None]:
plt.figure(figsize=(16,6))
plt.title("Austria total vaccinations")
sns.lineplot(data=austria)
plt.ylabel("Total vaccinations");

## Which country is using what vaccine?

### Vaccine more used in Austria

In [None]:
austria = df[df["location"] == "Austria"]
austria = austria.drop(["location"], axis = 1)
austria = austria.groupby(["vaccine"]).max()
austria[austria["total_vaccinations"] == austria.max().values[0]]

### Vaccines more used in Austria and Belgium

In [None]:
tmp = df[df["location"].isin(["Austria", "Belgium"])]
tmp = tmp.groupby(["location", "vaccine"]).max()
tmp = tmp.reset_index(level=[0,1])
tmp

In [None]:
sns.catplot(data=tmp, kind="bar", x="location", y="total_vaccinations", hue="vaccine", height=8);

### Vaccines per Country

In [None]:
tmp = df.groupby(["location"]).max()
tmp = tmp.groupby(["vaccine"]).count()
tmp = tmp.reset_index(level=0)
tmp

In [None]:
sns.catplot(data=tmp, kind="bar", x="vaccine", y="total_vaccinations", height=7);

### Most Used Vaccine per Country

In [None]:
tmp = df.groupby(["location", "vaccine"]).max()
# calculate percentages
tmp = tmp.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
tmp = tmp.reset_index(level=1)

In [None]:
map_df = gpd.read_file("../input/world-map/World_Countries.shp")
map_df = map_df.set_index("COUNTRY")

In [None]:
def plot_map(map_df, title, column):
    fig, ax = plt.subplots(1, figsize=(18,8))

    ax.axis("off")
    ax.set_title(title)
    
    sm = plt.cm.ScalarMappable(cmap="BuGn",
                               norm=plt.Normalize(vmin=map_df[column].min(), vmax=map_df[column].max()))
    sm._A = []
    cbar = fig.colorbar(sm)

    map_df.plot(column=column, cmap="BuGn", linewidth=0.2, ax=ax, edgecolor="0")

In [None]:
vaccines = tmp["vaccine"].unique()
for v in vaccines:
    tmp_df = tmp[tmp["vaccine"] == v]
    tmp_df = tmp_df.drop(["vaccine"], axis=1)
    x = map_df.join(tmp_df).fillna(0)
    plot_map(x, f"{v} percentages", "total_vaccinations")
    break

### Heatmap

In [None]:
tmp = df.groupby(["location", "vaccine"]).max()
tmp = tmp.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
tmp = tmp.pivot_table("total_vaccinations", "location", "vaccine").fillna(0)
tmp.style.background_gradient(cmap ="BuGn").set_properties(**{'font-size': '17px'})

## In which country the vaccination programme is more advanced?

In [None]:
df = pd.read_csv("../input/covid-world-vaccination-progress/country_vaccinations.csv", index_col="date", parse_dates=True)

In [None]:
df.isna().sum()

### Vaccinations per Hundred

In [None]:
tmp = df[["country",
          "total_vaccinations_per_hundred",
          "people_vaccinated_per_hundred", 
          "people_fully_vaccinated_per_hundred"]]
tmp = tmp.groupby(["country"]).max()
tmp.isna().sum()

In [None]:
map_df = gpd.read_file("../input/world-map/World_Countries.shp")
map_df = map_df.set_index("COUNTRY")

In [None]:
plot_map(map_df.join(tmp).fillna(0), "Total vaccinations per hundred", "total_vaccinations_per_hundred")
# the map doesn't contain Gibraltar

In [None]:
for col in ["total_vaccinations_per_hundred",
          "people_vaccinated_per_hundred",
          "people_fully_vaccinated_per_hundred"]:
    
    sorted_df = tmp.fillna(0).sort_values(col)[-20:]
    g = sns.catplot(data=sorted_df, kind="bar", x=sorted_df.index, y=col, height=10)
    plt.xticks(rotation=45);

## Where are vaccinated more people per day? But in terms of percent from entire population ?

### Daily Vaccinations

In [None]:
# i don't understand daily_vaccinations_raw
tmp = df[["country", "daily_vaccinations"]].copy()

In [None]:
tmp.isna().sum()

In [None]:
# where are the nan values?
tmp[tmp.isnull().any(axis=1)].fillna(1).groupby("country").count().sort_values("daily_vaccinations", ascending=False).head(6)

In [None]:
# we are going to replace each NaN value with the mean, it's not the best way
# at the end we are going to ignore nan values
#for c in tmp["country"].unique():
#    mean = tmp[tmp["country"] == c].mean().values[0]
#    tmp.loc[(tmp["country"] == c) & (tmp["daily_vaccinations"].isna()), "daily_vaccinations"] = mean

In [None]:
tmp.isna().sum()

In [None]:
tmp[tmp["country"] == "Chad"]
# remove this rows

In [None]:
tmp.dropna(axis=0, inplace=True)

In [None]:
last_values = tmp.groupby("country").agg(["last"]).stack()
last_values = last_values.reset_index(level=1).drop("level_1", axis=1)
last_values = last_values.sort_values("daily_vaccinations")[-20:]

sns.catplot(data=last_values, kind="bar", x=last_values.index, y="daily_vaccinations", height=12)
plt.xticks(rotation=45);

### Daily Vaccinations per Million

In [None]:
tmp = df[["country", "daily_vaccinations_per_million"]].dropna(axis=0)

In [None]:
last_values = tmp.groupby("country").agg(["last"]).stack()
last_values = last_values.reset_index(level=1).drop("level_1", axis=1)
last_values = last_values.sort_values("daily_vaccinations_per_million")[-20:]

sns.catplot(data=last_values, kind="bar", x=last_values.index, y="daily_vaccinations_per_million", height=12)
plt.xticks(rotation=45);

### Taking into account the last month only

In [None]:
tmp = tmp[tmp.index >= "2021-06-01"]

last_values = tmp.groupby("country").mean()
last_values = last_values.sort_values("daily_vaccinations_per_million")[-20:]

sns.catplot(data=last_values, kind="bar", x=last_values.index, y="daily_vaccinations_per_million", height=12)
plt.xticks(rotation=45);