In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df=pd.read_csv("../input/covid-world-vaccination-progress/country_vaccinations.csv")
df

# **DATA CLEANING**

In [None]:
df.info()

In [None]:
df.country.unique()

In [None]:
# Finding countries where iso_code is blank
df[df['iso_code'].isnull()]['country'].unique()

In [None]:
# filling all the missing values
df = df.fillna ({
    'iso_code' : 'GBR',
     'total_vaccinations' : 0,
    'people_vaccinated' :0 ,
    'people_fully_vaccinated' :0,
    'daily_vaccinations' :0,
    'daily_vaccinations_raw' :0,
    'total_vaccinations_per_hundred' :0,
    'people_vaccinated_per_hundred' :0,
    'people_fully_vaccinated_per_hundred' :0,
    'daily_vaccinations_per_million' :0
    
})
df.tail(50)

In [None]:
# dropping unnecessary columns
df.drop(['source_name','source_website'],axis=1, inplace=True)

In [None]:
# changing datatype of date column as earlier it was string object now it will datetime object
df['date'] = pd.to_datetime(df['date'])
df.info()

In [None]:
# creating new dataframe which groups data on basis of selected column and find max of desired columns.
new_df = df.groupby(["country", "iso_code","vaccines"])['total_vaccinations', 
                                                                       'total_vaccinations_per_hundred',
                                                                      'daily_vaccinations',
                                                                      'daily_vaccinations_per_million',
                                                                      'people_vaccinated',
                                                                      'people_vaccinated_per_hundred'

                                                                    ].max().reset_index()

new_df



In [None]:
#Can be used to find only count of countries using vaccines
# vacc = new_df['vaccines'].value_counts().reset_index()
# vacc

# List of contries using specific vaccine
let = pd.DataFrame(columns=['Vaccines','Countries'])
vacc2 = new_df.vaccines.unique()
for v in vacc2 :
    conn = new_df.loc[new_df.vaccines == v, 'country'].values
#     print(conn)
    de = pd.DataFrame(columns=['Vaccines','Countries'])
    de = de.append({'Vaccines' : v , 'Countries' : conn},  
                ignore_index = True) 
    let = let.append(de,ignore_index=True)
let = let.iloc[(-let.Countries.str.len()).argsort()].reset_index(drop=True)
let['count'] = let.Countries.str.len()
let

#  **DATA VISUALIZATION**

In [None]:
# what all vacinnes are used in which country
fig = px.choropleth(locations=new_df['country'], 
                    locationmode="country names",
                    color=new_df['vaccines'],
                    title="Countries using each vaccine ",
                   )
fig.update_layout({'legend_orientation':'h'})
fig.update_layout({'legend_title':'Vaccine scheme'})
fig.show()

In [None]:
# what all countries are using what vaccines

fig = px.treemap(new_df, path = ['vaccines', 'country'], values = 'total_vaccinations',
                title="total vaccination in each country")

fig.show()

In [None]:
# top vaccines around the world
top_vacc = df.groupby(['vaccines'])[['people_vaccinated']].max().reset_index()
top_vacc = top_vacc.sort_values('people_vaccinated',ascending=False)
# top_vacc

plt.figure(figsize=(20,11))
sns.barplot(data = top_vacc , x = 'people_vaccinated' , y = 'vaccines')
plt.title('Top vaccaines')
plt.show()

In [None]:
# Top 20 countries consuming vaccines
df_vaccines = df.groupby(['country','vaccines'])['total_vaccinations'].max().reset_index()
df_vaccines = df_vaccines.sort_values('total_vaccinations',ascending=False)
plt.figure(figsize=(20,9))
sns.barplot(data = df_vaccines[0:20] , x = 'total_vaccinations' , y = 'country')
plt.title('Top 20 countries consuming vaccines')
plt.show()


In [None]:
# Daily Vaccination progress in country
fig = px.scatter_geo(new_df, locations="iso_code",
                    size="daily_vaccinations",
                    color='country',
                    hover_name="country", 
                    color_continuous_scale=px.colors.sequential.Tealgrn,
                    title ="daily vaccination in each Country")

fig.update_layout(geo=dict(bgcolor= '#61efff'))

fig.update_geos(projection_type="natural earth")


fig.show()


In [None]:
# total_Vaccination progress in INDIA
plt.figure(figsize=(20,7))
sns.lineplot(data = df , x = 'date', y = 'total_vaccinations')
plt.title('Total vaccination in INDIA')
plt.show()

In [None]:
plt.figure(figsize=(20,7))
sns.lineplot(data = df , x = 'date', y = 'total_vaccinations_per_hundred')
plt.title('Total vaccination  per hundred in INDIA')
plt.show()

In [None]:
plt.figure(figsize=(20,7))
sns.lineplot(data = df , x = 'date', y = 'daily_vaccinations')
plt.title('Daily vaccination in INDIA')
plt.show()

In [None]:
plt.figure(figsize=(20,7))
sns.lineplot(data = df , x = 'date', y = 'daily_vaccinations_per_million')
plt.title('Daily Vaccinations per million in INDIA')
plt.show()

In [None]:
plt.figure(figsize=(20,7))
sns.lineplot(data = df , x = 'date', y = 'people_vaccinated')
plt.title('People Vaccinated in INDIA')
plt.show()

In [None]:
plt.figure(figsize=(20,7))
sns.lineplot(data = df , x = 'date', y = 'people_vaccinated_per_hundred')
plt.title('People Vaccinated per hundred in INDIA')
plt.show()