## COVID-19 World Vaccination Progress 

* The dataset was copied from the author and the aim was to monitor the progress of corona vaccinations of various countries in  different sections of the contients.
* The analysis included creating a single dataset from multiple datasets with all the relevant informations and then defining a common function for plotting the graphs for each section of continents
* The aim was to gain familiarity with using various different datasets and calling, applying functions on them to get the desired results.



In [None]:
from matplotlib.offsetbox import TextArea, DrawingArea, OffsetImage, AnnotationBbox
from matplotlib.collections import PatchCollection
from matplotlib.patches import PathPatch
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
from geopy.geocoders import Nominatim
import matplotlib.pyplot as plt
from matplotlib import image
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')


    
plt.rcParams['font.family'] = "Krungthep"

In [None]:
data=pd.read_csv("../input/covid-world-vaccination-progress/country_vaccinations.csv")
continents=pd.read_csv("../input/country-mapping-iso-continent-region/continents2.csv")
cases=pd.read_csv("../input/covid19-global-dataset/worldometer_coronavirus_summary_data.csv")

data.head()
# df1= data[["country","people_vaccinated_per_hundred","vaccines"]]
# df1.head()

# continents.head()
rename_cont=continents.rename(columns={'region':'continent','name':'country'})
rename_cont.head()
trim_cases_df2=cases[["country","continent","total_confirmed","total_recovered","total_cases_per_1m_population","total_tests_per_1m_population"]]
trim_cases_df2.head()

merge_subregion_df=pd.merge(rename_cont,trim_cases_df2 ,on=['country','continent'], how="outer")
merge_subregion_df
trim_data_df=merge_subregion_df[["country","continent","sub-region","total_confirmed","total_recovered","total_cases_per_1m_population","total_tests_per_1m_population"]]
trim_data_df


data.head()
data_mean_df=data.groupby(["country"])['total_vaccinations_per_hundred'].mean().reset_index()
data_mean_df
data_vaccine_df=data.groupby('country')["vaccines"].value_counts()
vaccine_df=pd.DataFrame(data_vaccine_df)
vaccine_df.rename(columns = {'vaccines':'count'},inplace=True)
vaccine_df.reset_index(inplace=True)
vaccine_df

data_sum_df=pd.merge(data_mean_df,vaccine_df,on='country')
#df1.head(20)
data_sum_df
merge_df=pd.merge(trim_data_df,data_sum_df,on='country' ,how="inner")
merge_df
# # df66
grp=merge_df.groupby('sub-region').apply(lambda x: x.nlargest(5,"total_cases_per_1m_population")).reset_index(drop=True)
grp["sub-region"].unique()
# grp






# Top 10 Vaccines around the World

In [None]:
plt.figure(figsize=(15,5))
plt.style.use("seaborn-dark")
vacc1=pd.DataFrame(data['vaccines'].value_counts(normalize=True)).reset_index()
vacc1=vacc1.sort_values(by=['vaccines'],ascending=False)
vacc1
# plt.xticks(rotation=-90)
# plt.bar(vacc1['index'][:10],vacc1['vaccines'][:10])
# plt.title('Top 10 Vaccines around the World')
#sns.set_style("darkgrid", {"axes.facecolor": ".9"})


sns.barplot(vacc1['index'][:10],vacc1['vaccines'][:10],palette ='coolwarm')
plt.xticks(rotation=-90)
sns.despine()
plt.title('Top 10 Vaccines around the World')
plt.xlabel('Vaccines')
plt.ylabel('Count')



In [None]:
subregion=grp["sub-region"].unique()
subregion=sorted(subregion)
subregions=pd.DataFrame(subregion)
subregions.rename(columns={0: "Region"},inplace=True)
subregions


# Eastern Asia and South Eastern Asia Region

In [None]:
grp["Recovery_Rate"]=grp["total_recovered"]/grp["total_confirmed"]
df_ewa=grp[grp['sub-region'].isin(['Eastern Asia','South-eastern Asia'])]
df_ewa
#df_ewa["Recovery_Rate"]=df_ewa["total_recovered"]/df_ewa["total_confirmed"]
fig,axis =plt.subplots(2, 2, figsize=(25, 10))

fig.suptitle('South-Eastern & Eastern Asia')

sns.barplot(ax=axis[0, 0], data=df_ewa, x='country', y='total_vaccinations_per_hundred',palette ='coolwarm',order=df_ewa.sort_values(by='total_vaccinations_per_hundred')['country']).set(xlabel=None)
sns.barplot(ax=axis[0, 1], data=df_ewa, x='country', y='total_vaccinations_per_hundred',palette ='coolwarm',order=df_ewa.sort_values(by='total_cases_per_1m_population')['country']).set(xlabel=None)
sns.barplot(ax=axis[1, 0], data=df_ewa, x='country', y='Recovery_Rate',palette ='coolwarm',order=df_ewa.sort_values(by='Recovery_Rate')['country']).set(xlabel=None)
sns.scatterplot(ax=axis[1, 1], data=df_ewa, x='vaccines', y='country',palette ='coolwarm').set(xlabel=None)
plt.xticks(rotation=-90)

axis[0, 0].set_title("Vaccination per hundred")
axis[0, 1].set_title("Cases per million")
axis[1, 0].set_title("Recovery Rate")
axis[1, 1].set_title("Type of Vaccine Used")




# Defining a function

In [None]:
#Plotting defininte Function for every Subregion and identifying

def plot(region):
    df_reg=grp[grp['sub-region']==region]
    if not df_reg.empty:
        fig,axis =plt.subplots(2, 2, figsize=(25, 10))
        fig.suptitle(region)

        sns.barplot(ax=axis[0, 0], data=df_reg, x='country', y='total_vaccinations_per_hundred',palette ='coolwarm', order=df_reg.sort_values(by='total_vaccinations_per_hundred')['country']).set(xlabel=None)
        sns.barplot(ax=axis[0, 1], data=df_reg, x='country', y='total_cases_per_1m_population',palette ='coolwarm',order=df_reg.sort_values(by='total_cases_per_1m_population')['country']).set(xlabel=None)
        sns.barplot(ax=axis[1, 0], data=df_reg, x='country', y='Recovery_Rate',palette ='coolwarm',order=df_reg.sort_values(by='Recovery_Rate')['country']).set(xlabel=None)
        sns.scatterplot(ax=axis[1, 1], data=df_reg, x='vaccines', y='country',palette ='coolwarm').set(xlabel=None)
        plt.xticks(rotation=-90)
        axis[0, 0].set_title("Vaccination per hundred")
        axis[0, 1].set_title("Cases per million")
        axis[1, 0].set_title("Recovery Rate")
        axis[1, 1].set_title("Type of Vaccine Used")
        plt.show()


# Central Asia

In [None]:
plot('Central Asia')

# WESTERN ASIA

In [None]:
plot('Western Asia')



# Southern Asia

In [None]:
plot('Southern Asia')

# Southern Europe

In [None]:
plot('Southern Europe')

# Northern Europe

In [None]:
plot('Northern Europe')

# Western Europe

In [None]:
plot('Western Europe')

# Eastern Europe

In [None]:
plot('Eastern Europe')

### Northern Africa

In [None]:
plot('Northern Africa')

### Sub-Saharan Africa

In [None]:

plot('Sub-Saharan Africa')

### Northern America

In [None]:
df_NA=cases.groupby('continent').apply(lambda x: x.nlargest(5,"total_cases_per_1m_population")).reset_index(drop=True)
df_NA["Recovery_Rate"]=df_NA["total_recovered"]/df_NA["total_confirmed"]

df_NA_sub=df_NA[df_NA['continent']=='North America']
df_NA_sub


# df_100=pd.merge(df_NA_sub,data_sum_df, on='country', how='inner')
# df_100

##CANADA AND AMERICA
trim_1_cases=cases[cases['country'].isin(['Canada','USA'])]
trim_1_cases["Recovery_Rate"]=trim_1_cases["total_recovered"]/trim_1_cases["total_confirmed"]
trim_1_cases


# trim_2_data=data[data['country'].isin(['United States','Canada'])]
# trim_2_data["Recovery_Rate"]=trim_2_data["total_recovered"]/trim_2_data["total_confirmed"]
# trim_2_data

vaccine_trim=trim_2_data.groupby('country')['total_vaccinations_per_hundred'].mean().reset_index()
vaccine_trim1=pd.DataFrame(vaccine_trim)
vaccine_trim1



vaccine_type=trim_2_data.groupby('country')['vaccines'].value_counts()
pdf=pd.DataFrame(vaccine_type)
pdf
# #vaccine_type.reset_index()
pdf.rename(columns={'vaccines':'count'},inplace=True)
# # vaccine_type.reset_index(inplace=True
pdf.reset_index(inplace=True)

pdf


# def plot_NSA(region)
fig,axis =plt.subplots(2, 2, figsize=(25, 10))
fig.suptitle('North America Region')
sns.barplot(ax=axis[0, 0], data=vaccine_trim1, x='country', y='total_vaccinations_per_hundred',palette ='coolwarm', order=vaccine_trim1.sort_values(by='total_vaccinations_per_hundred')['country']).set(xlabel=None)
sns.barplot(ax=axis[0, 1], data=trim_1_cases, x='country', y='total_cases_per_1m_population',palette ='coolwarm',order=trim_1_cases.sort_values(by='total_cases_per_1m_population')['country']).set(xlabel=None)
sns.barplot(ax=axis[1, 0], data=trim_1_cases, x='country', y='Recovery_Rate',palette ='coolwarm',order=trim_1_cases.sort_values(by='Recovery_Rate')['country']).set(xlabel=None)
sns.scatterplot(ax=axis[1, 1], data=pdf, x='vaccines', y='country',palette ='coolwarm').set(xlabel=None)
plt.xticks(rotation=-90)
axis[0, 0].set_title("Vaccination per hundred")
axis[0, 1].set_title("Cases per million")
axis[1, 0].set_title("Recovery Rate")
axis[1, 1].set_title("Type of Vaccine Used")

        


Lessons Learnt:
* My aim was to gather a set of data from different datasets and combine them into a single datasets with only the relevant information required , instead of refering and calling different datasets each time.
* To certain extent I was able to merge the the different dataset into a single dataset,but observed that combing datasets using merge one needs to make sure on what  are you looking to merge data and also which type of join would you prefer.
* I joined the datsets worldometer_corona_summary and continents2 on outer to include different sections of each continent which was futhered merged with country vaccinations_summary. It was obseved that there was a data difference in data bases and therefore while merging (inner) few countries were lost while creating the new dataset.
* Inner was choosen to be the best options because there would have been lost of non specified data if any other option of merging would have been preffered and our purpose of analysis would not had been satisfied.
* However if we know any particular country or group of countries we could use the method used for North American Countries and  analyse the situation for these sets of countries.

Suggestions are welcomed and would apprciate comments/suggestions for improvements