**Purpose: For this notebook is to start learning python from my fellow Kaggle members.

Code Objective: **

1. Download CSV file
2. Learn how to code group by
3. Work with sub-data frame
4. Plot bar graphs

**Exploratory Data Analysis**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#Importing the dataset on Feb 14 2021 ()
df_covid_vaccination= pd.read_csv("/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv")  # read csv file

In [None]:
#Data Cleaning

#There are a lot of numerical columns which are Null/NaN in data frame to replace them with zero. Replace Wales, Scotland, Northern Ireland and England to United Kingdom.
df_covid_vaccination = df_covid_vaccination.fillna(0)
df_covid_vaccination = df_covid_vaccination.replace(to_replace ="Wales", 
                 value ="United Kingdom")
df_covid_vaccination = df_covid_vaccination.replace(to_replace ="Scotland", 
                 value ="United Kingdom")  
df_covid_vaccination = df_covid_vaccination.replace(to_replace ="Northern Ireland", 
                 value ="United Kingdom")  
df_covid_vaccination = df_covid_vaccination.replace(to_replace ="England", 
                 value ="United Kingdom")  



In [None]:
#Explore data
print(df_covid_vaccination.head(10))
print(df_covid_vaccination.tail(10))
print(df_covid_vaccination.dtypes)
print(df_covid_vaccination.describe())

In [None]:
#Compare top 10 countries 

#Top 10 countries with total_vaccination
df_covid_vaccination_agg=df_covid_vaccination.groupby(['country'])["total_vaccinations"].max().reset_index()
df_covid_vaccination_agg=df_covid_vaccination_agg.nlargest(10,["total_vaccinations"]).reset_index()
del df_covid_vaccination_agg['index']
top10_countries=df_covid_vaccination_agg['country']
print(df_covid_vaccination_agg)


#Top 10 countries with people_vaccinated
df_covid_vaccination_agg=df_covid_vaccination.groupby(['country'])["people_vaccinated"].max().reset_index()
df_covid_vaccination_agg=df_covid_vaccination_agg.nlargest(10,["people_vaccinated"]).reset_index()
del df_covid_vaccination_agg['index']
top10_countries=df_covid_vaccination_agg['country']
print(df_covid_vaccination_agg)


#Top 10 countries with people_fully_vaccinated
df_covid_vaccination_agg=df_covid_vaccination.groupby(['country'])["people_fully_vaccinated"].max().reset_index()
df_covid_vaccination_agg=df_covid_vaccination_agg.nlargest(10,["people_fully_vaccinated"]).reset_index()
del df_covid_vaccination_agg['index']
top10_countries=df_covid_vaccination_agg['country']
print(df_covid_vaccination_agg)

In [None]:
#Compare top 5 countries based on total vaccinations: US, Israel, Russia, Germany and UK

#US
variables = ['country','total_vaccinations','people_vaccinated','people_fully_vaccinated']
USdata = df_covid_vaccination.loc[(df_covid_vaccination['country']=='United States'),variables].max()
print(USdata.head())
print(len(USdata))
US_population = 330084974
Percentage_fully_vaccinated = float(1.30822e+07/US_population*100)
print("United State percentage of people fully vaccinated:", Percentage_fully_vaccinated, "%")
Percentage_vaccinated = float(3.70561e+07/US_population*100)
print("United State percentage of people partially vaccinated:", Percentage_vaccinated, "%")

#Israel
variables = ['country','total_vaccinations','people_vaccinated','people_fully_vaccinated']
Israeldata = df_covid_vaccination.loc[(df_covid_vaccination['country']=='Israel'),variables].max()
print(Israeldata.head())
print(len(Israeldata))
Israel_population = 8789774
Percentage_fully_vaccinated = float(2.45743e+06/Israel_population*100)
print("Israel percentage of people fully vaccinated:", Percentage_fully_vaccinated, "%")
Percentage_vaccinated = float(3.82448e+06/Israel_population*100)
print("Israel percentage of people partially vaccinated:", Percentage_vaccinated, "%")

#UK
variables = ['country','total_vaccinations','people_vaccinated','people_fully_vaccinated']
UKdata = df_covid_vaccination.loc[(df_covid_vaccination['country']=='United Kingdom'),variables].max()
print(UKdata.head())
print(len(UKdata))
UK_population = 68110181
Percentage_fully_vaccinated = float(534869/UK_population*100)
print("United Kingdom percentage of people fully vaccinated:", Percentage_fully_vaccinated, "%")
Percentage_vaccinated = float(1.45568e+07/UK_population*100)
print("United Kingdom percentage of people partially vaccinated:", Percentage_vaccinated, "%")

#Russia
variables = ['country','total_vaccinations','people_vaccinated','people_fully_vaccinated']
Russiadata = df_covid_vaccination.loc[(df_covid_vaccination['country']=='Russia'),variables].max()
print(Russiadata.head())
print(len(Russiadata))
Russia_population = 144.37e+06
Percentage_fully_vaccinated = float(1.7e+06/Russia_population*100)
print("Russia percentage of people fully vaccinated:", Percentage_fully_vaccinated, "%")
Percentage_vaccinated = float(2.2e+06/Russia_population*100)
print("Russia percentage of people partially vaccinated:", Percentage_vaccinated, "%")

#Germany
variables = ['country','total_vaccinations','people_vaccinated','people_fully_vaccinated']
Germanydata = df_covid_vaccination.loc[(df_covid_vaccination['country']=='Germany'),variables].max()
print(Germanydata.head())
print(len(Germanydata))
Germany_population = 83.13e+06
Percentage_fully_vaccinated = float(1.33157e+06/Germany_population*100)
print("Germany percentage of people fully vaccinated:", Percentage_fully_vaccinated, "%")
Percentage_vaccinated = float(2.63567e+06/Russia_population*100)
print("Germany percentage of people partially vaccinated:", Percentage_vaccinated, "%")


In [None]:
#Bar graph population
labels = ['United States', 'Israel', 'United Kingdom', 'Russia', 'Germany']
population = [330084974, 8789774, 68110181, 144.37e+06, 83.13e+06 ]
vaccinated = [3.70e+07, 3.82448e+06, 1.45568e+07, 2.2e+06, 2.63567e+06 ]
fully_vaccinated = [1.30822e+07, 2.45743e+06, 534869, 1.7e+06, 1.33157e+06]

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, population, width, label='population')
#rects2 = ax.bar(x - width/2, vaccinated, width, label='vaccinated')
#rects3 = ax.bar(x + width/2, fully_vaccinated, width, label='fully_vaccinated')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Count')
ax.set_title('Country Population 2021')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
#autolabel(rects2)
#autolabel(rects3)

fig.tight_layout()

plt.show()


This bar graph 1 shows the current population for each country. United States is ~ 40X bigger than Israel. The population values were used to normalize the following data for comparison.    

In [None]:
#Bar graph Vaccination#
labels = ['United States', 'Israel', 'United Kingdom', 'Russia', 'Germany']
population = [330084974, 8789774, 68110181, 144.37e+06, 83.13e+06 ]
vaccinated = [3.70e+07, 3.82448e+06, 1.45568e+07, 2.2e+06, 2.63567e+06 ]
fully_vaccinated = [1.30822e+07, 2.45743e+06, 534869, 1.7e+06, 1.33157e+06]

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, vaccinated, width, label='vaccinated')
rects2 = ax.bar(x + width/2, fully_vaccinated, width, label='fully_vaccinated')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Count')
ax.set_title('Vaccination')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()

plt.show()



Bar graph 2 shows the number of people vaccinated with the assumption of they received the first vaccine dose from Pfizer or Morderna. The number of people fully vaccinated received their second dose and completed their vaccination. At the first glance, United States is doing a great job with the vaccination program by comparison to other countries. 

In [None]:
#Bar graph Vaccination % 

labels = ['United States', 'Israel', 'United Kingdom', 'Russia', 'Germany']
partially_vaccinated = [11.3, 43.5, 21.4, 1.5, 1.8]
fully_vaccinated = [4.0, 27.9, 0.8, 1.2, 1.6]


x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, partially_vaccinated, width, label='partially_vaccinated')
rects2 = ax.bar(x + width/2, fully_vaccinated, width, label='fully_vaccinated')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Percentage')
ax.set_title('Percentage Vaccination of Partially vs. Fully')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()

plt.show()

Bar graph 3 shows how each country did with their vaccination program by percentage. The results used the population value to normalize for a fair comparison among the countries.
1. Israel will have 50% of the population be vaccinated with first dose of vaccine and 25% of the population already fully vaccinated. Definitely, a successful case study to learn on how to effectively control a pandemic.
2. United Kingdom has ~ 25% of the population vaccinated with first dose. However,there is less than 1% of the population being fully vaccinated due to shortage of vaccine or slow data reporting. Or the rush to vaccinate as many people as possible due to the new UK variant.
3. United States only have 11% of the population vaccinated at first dose and 4% of the population fully vaccinated. A slow vaccination roll out due to misinformation, missed oppountuity,and mismanagement. This will be a case study for generations to learn how to avoid the missteps.
