In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### With the help of the pandas plotting and with minimal use of any other library we'll try to visualize the vaccination status of different countries


In [None]:
#importing the required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# reading the data and having a look at the dataset
data = pd.read_csv("/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv")
data.head()

### Understanding the data
The data contains the following information:

- Country- this is the country for which the vaccination information is provided;
- Country ISO Code - ISO code for the country;
- Date - date for the data entry; for some of the dates we have only the daily vaccinations, for others, only the (cumulative) total;
- Total number of vaccinations - this is the absolute number of total immunizations in the country;
- Total number of people vaccinated - a person, depending on the immunization scheme, will receive one or more (typically 2) vaccines; at a certain moment, the number of vaccination might be larger than the number of people;
- Total number of people fully vaccinated - this is the number of people that received the entire set of immunization according to the immunization scheme (typically 2); at a certain moment in time, there might be a certain number of people that received one vaccine and another number (smaller) of people that received all vaccines in the scheme;
- Daily vaccinations (raw) - for a certain data entry, the number of vaccination for that date/country;
- Daily vaccinations - for a certain data entry, the number of vaccination for that date/country;
- Total vaccinations per hundred - ratio (in percent) between vaccination number and total population up to the date in the country
- Total number of people vaccinated per hundred - ratio (in percent) between population immunized and total population up to the date in the country;
- Total number of people fully vaccinated per hundred - ratio (in percent) between population fully immunized and total population up to the date in the country;
- Number of vaccinations per day - number of daily vaccination for that day and country;
- Daily vaccinations per million - ratio (in ppm) between vaccination number and total population for the current date in the country;
- Vaccines used in the country - total number of vaccines used in the country (up to date);
- Source name - source of the information (national authority, international organization, local organization etc.);
- Source website - website of the source of information;

In [None]:
# having a look at the columns data
data.info()

In [None]:
# finding out the percentage of null values
(data.isnull().sum()/data.shape[0])*100

Here we can see that a lot of values in the data is missing. `people_fully_vaccinated` column has the highest amount of missing values.For further analysis of the data we will replace the missing values with 0

In [None]:
#filling the missing values
data["iso_code"]= data.iso_code.replace(np.nan, "Unknown");data
data = data.replace(np.nan, 0);data.head()

### Finding out which country has highest number of vaccinated citizens

In [None]:
plt.figure(figsize=(10, 5))
sorted_vaccination_data = data.groupby("country").total_vaccinations.agg("max").sort_values(ascending = False)
sorted_vaccination_data[sorted_vaccination_data> 1300000].plot(kind = "bar")
plt.ylabel("total no of people vaccinated(in crores)")
plt.title("Country Leading the vaccination race")

### It is clear from the graph that U.S has the highest vaccinated citizens

### Finding out which country has highest fully vaccinated citizens (i.e people administred with two vaccines of covid)


In [None]:
# people fully vaccinated 
plt.figure(figsize=(10, 5))
sorted_fully_vaccination_data = data.groupby("country").people_fully_vaccinated.agg("max").sort_values(ascending = False)
sorted_fully_vaccination_data[sorted_fully_vaccination_data> 150000].plot(kind = "bar")
plt.ylabel("Total no of Fully vaccinated people(in crores)")
plt.title("Country leading in complete vaccination(i.e people administrated with two vaccines)")


### United States leads the race in this too

### Finding out the most popular vaccines/combination of vaccines that is used by different countries

In [None]:
# most popular vaccines
pop_vaccines = data.groupby(["vaccines"])['total_vaccinations'].max()
pop_vaccines.sort_values(ascending = False).plot.bar(color = "purple", xlabel = "Vaccines", ylabel = "NO of people vaccinated(in crores)"
                                                     , title = "Most popular vaccines", figsize = (10,5))

### The combination of Pfizer and Moderna are used the most

### Finding daily vaccination progress of four major countries

In [None]:
# daily Vaccination progress of major countries
data2 = data.pivot_table(index = "date", columns = "country", values = "daily_vaccinations")
data2[["United States", "United Kingdom", "India", "Israel"]].replace(0, np.nan).fillna(method = "bfill", axis = 0).plot(xlabel= "Date", ylabel = "No of people vaccinated daily(in millions)", title = "Progress of daily vaccination in major countries", figsize = (10,7))
plt.legend(loc = "upper right", bbox_to_anchor = (1.4, 1))

### Finding out which vaccines/combination of vaccines are preffered by most countries

In [None]:
# which combinations of vaccines/vaccine is preferred most by different countries
data.groupby(["vaccines"])["country"].nunique().sort_values(ascending = False).plot.pie(startangle = 30,fontsize = 10, figsize = (8,10), autopct = "%.1f")


### here we can see that pfizer is preffered by 26% of the countries followed by the combination of moderna, pfizer and astrazeneca

### finding out which country has vaccinated most of its population

In [None]:
data2 = data.groupby(["country"])["total_vaccinations_per_hundred"].max().reset_index()
data2[data2.total_vaccinations_per_hundred > 20].sort_values(by = "total_vaccinations_per_hundred", ascending = False).plot.bar(x = "country", y = "total_vaccinations_per_hundred", 
                                                                                                                              xlabel = "Countries(having vaccinated more than 20% of its population)", ylabel = "Percentage", color = "g", figsize = (10,5))

### Gibralter has vaccinated amlost 87% of its population followed by Israel

### Since pandas plotting is limited and not much can be done using the basic plotting in pandas we can use other libraies for visualization like seaborn, matplotlib and plotly... Since many notebooks related to the dataset had much complex visualization libraries used I wanted to demonstrate the same could be done with basic plotting present in pandas... Although the plotting is limited in pandas it is much simpler and can be easily understood by anyone... 