In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import plotly.express as px
from fbprophet import Prophet
from fbprophet.plot import plot_plotly
pd.set_option('display.float_format', lambda x: '%.2f' % x)
import warnings
warnings.filterwarnings('ignore')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv')

In [None]:
df.info()

In [None]:
print(df.shape)

In [None]:
df.head(30)

In [None]:
df.tail(30)

In [None]:
#Getting Null Values If Any in the dataset
df.isnull().sum().sort_values(ascending=False)

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
df.columns

In [None]:
df = df[['date',
          'country',
          'iso_code',
           'daily_vaccinations',
            'people_vaccinated_per_hundred',
             'vaccines']]

In [None]:
#checking for categorical missing values
df.select_dtypes(include=['object']).isnull().sum()

In [None]:
#missing iso_code country wise
df.loc[df.iso_code.isnull(), 'country'].value_counts()

In [None]:
#Filling the missing values with UK ISO_CODE
df.loc[df.country == 'United Kingdom', 'iso_code'].unique()

In [None]:
df.loc[df.country.isin(['England', 'Scotland', 'Wales', 'Northern Ireland']),
      'iso_code'] = 'GBR'

In [None]:
continents = pd.read_csv('../input/country-mapping-iso-continent-region/continents2.csv')
continents.head()

In [None]:
data = df.merge(continents[['alpha-3', 'region', 'sub-region']],
                          how='left',
                          left_on = 'iso_code',
                          right_on = 'alpha-3',
                         ).drop(columns=['alpha-3'])
data.head()

In [None]:
#Renaming
data.columns = ['date', 'country', 'iso_code', 'daily_vaccinations', 'people_vaccinated_per_hundred', 'vaccines', 
                'continent', 'region']

In [None]:
data.continent.value_counts()

In [None]:
data.loc[data.region == 'Northern America', 'continent'] = 'North America'
data.loc[data.region == 'Latin America and the Caribbean', 'continent'] = 'South America'

In [None]:
data.loc[data.continent == 'Oceania', 'continent'] = 'Australia and Oceania'

In [None]:
data.isnull().sum()

In [None]:
data.loc[data.continent.isnull(), 'iso_code'].value_counts()

In [None]:
(
    data[data.people_vaccinated_per_hundred.isnull()].region.value_counts() / data.region.value_counts()
).mul(100).plot.barh(color="teal", figsize=(15,5))

plt.title('Missing people_vaccinated_per_hundred')
plt.xlabel('percentage (%)')
plt.show()

In [None]:
data.sort_values(by='date', inplace=True)
all_countries = list(data.country.unique())

for country in all_countries:
    data.loc[data.country == country, 'people_vaccinated_per_hundred'] = \
    data.loc[data.country == country, 'people_vaccinated_per_hundred'].fillna(method='bfill').fillna(method='ffill')

In [None]:
(
    data[data.people_vaccinated_per_hundred.isnull()].region.value_counts() / data.region.value_counts()
).mul(100).plot.barh(color="teal", figsize=(15,5))

plt.title('Missing people_vaccinated_per_hundred')
plt.xlabel('percentage (%)')
plt.show()

In [None]:
fig = px.choropleth(locations = data.iso_code, 
                    color = data.region,
                    color_continuous_scale = 'rainbow',
                    title = 'Regions with available data', 
                   )
fig.show()

In [None]:
data.vaccines.value_counts()

In [None]:
df = data[['iso_code', 'vaccines']].drop_duplicates()
df.shape

In [None]:
data.iso_code.unique().shape

In [None]:
map_plot_df = pd.concat([pd.Series(row['iso_code'], row['vaccines'].split(','))              
                         for _, row in df.iterrows()]).reset_index()

#rename columns
map_plot_df.columns = ['vaccine', 'iso_code']

#strip white spaces
map_plot_df['vaccine'] = map_plot_df['vaccine'].str.strip(' ')

map_plot_df.vaccine.value_counts()

In [None]:
fig = px.choropleth(locations = map_plot_df.iso_code, 
                    color = map_plot_df.vaccine,
                    color_continuous_scale = 'rainbow',
                    title = 'Where is each vaccine used?', 
                   )
fig.show()

In [None]:
data.groupby(['country']).agg({'daily_vaccinations':'mean'}).sort_values(by='daily_vaccinations', ascending=False)[:10].plot.barh(color="teal", figsize=(15,5))
plt.title('Average daily vaccinations by country - Top 10')
plt.legend('')
plt.xlabel('average daily vaccinations')
plt.show()

In [None]:
uk_countries = ['England', 'Scotland', 'Wales', 'Northern Ireland', 'Jersey', 'Isle of Man', 'Bermuda', 'Gibraltar']
data[~data.country.isin(uk_countries)].groupby(['country']).agg({'people_vaccinated_per_hundred':'max'}).sort_values(by='people_vaccinated_per_hundred', ascending=False)[:10].plot.barh(color="teal", figsize=(15,5))
plt.title('people_vaccinated_per_hundred- Top 10')
plt.legend('')
plt.xlabel('percentage of people vaccinated (%)')
plt.show()

In [None]:
fbprophet_predict_and_plot(data = data, 
                           region_column = 'country',
                           region = 'United States',
                           vaccination_metric = 'people_vaccinated_per_hundred',
                           future_days = 55,
                          )

In [None]:
all_countries_EU_US = list(data[data.continent.isin(['Europe', 'North America'])].country.unique())
#removing countries from UK
all_countries_EU_US = [country for country in all_countries_EU_US if country not in uk_countries]

In [None]:
print('Predicted percentage of people vaccinated until June: \n======================================================')

for country in all_countries_EU_US:
    
    #if there are no people_vaccinated_per_hundred values for country, skip country
    if data[(data['people_vaccinated_per_hundred'].notnull()) & (data['country'] == country)].shape[0] > 2:
    
        min_date = data[(data['country'] == country)].date.min()

        forecast = fbprophet_predict_and_plot(data = data, 
                                              region_column = 'country',
                                              region = country,
                                              vaccination_metric = 'people_vaccinated_per_hundred',
                                              min_date = min_date,
                                              future_days = 150,
                                              plot = False,
                                             )
    
        #print predicted percentage of vaccinated people until June
        print('\t', country, '\t', round(float(forecast.loc[forecast.ds == '2021-06-01'].yhat),2), '%')