# 

# Content
The data (country vaccinations) contains the following information:

* **Country**- this is the country for which the vaccination information is provided;
* **Country ISO Code** - ISO code for the country;
* **Date** - date for the data entry; for some of the dates we have only the daily vaccinations, for others, only the (cumulative) total;
* **Total number of vaccinations** - this is the absolute number of total immunizations in the country;
* **Total number of people vaccinated** - a person, depending on the immunization scheme, will receive one or more (typically 2) vaccines; at a certain moment, the number of vaccination might be larger than the number of people;
* **Total number of people fully vaccinated** - this is the number of people that received the entire set of immunization according to the immunization scheme (typically 2); at a certain moment in time, there might be a certain number of people that received one vaccine and another number (smaller) of people that received all vaccines in the scheme;
* **Daily vaccinations (raw)** - for a certain data entry, the number of vaccination for that date/country;
* **Daily vaccinations** - for a certain data entry, the number of vaccination for that date/country;
* **Total vaccinations per hundred** - ratio (in percent) between vaccination number and total population up to the date in the country;
* **Total number of people vaccinated per hundred** - ratio (in percent) between population immunized and total population up to the date in the country;
* **Total number of people fully vaccinated per hundred** - ratio (in percent) between population fully immunized and total population up to the date in the country;
* **Number of vaccinations per day** - number of daily vaccination for that day and country;
* **Daily vaccinations per million** - ratio (in ppm) between vaccination number and total population for the current date in the country;
* **Vaccines used in the country** - total number of vaccines used in the country (up to date);
* **Source name** - source of the information (national authority, international organization, local organization etc.);
* **Source website** - website of the source of information;



There is a second file added recently (country vaccinations by manufacturer), with the following columns:

* **Location** - country;
* **Date** - date;
* **Vaccine** - vaccine type;
* **Total number of vaccinations** - total number of vaccinations / current time and vaccine type.

# import necessory libraries.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.image as mpimg
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter, MonthLocator
from matplotlib.ticker import NullFormatter
import seaborn as sns



import os
from collections import OrderedDict
from termcolor import colored
from collections import Counter, defaultdict
import plotly.graph_objs as go
import plotly.graph_objects as go
import plotly.express as px
import squarify 
import re 
import datetime as dt
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings("ignore")


for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        


In [None]:
df = pd.read_csv('/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv')
df['date'] = pd.to_datetime(df['date'])


df_daily = pd.read_csv('/kaggle/input/covid19-global-dataset/worldometer_coronavirus_daily_data.csv')
df_daily['date'] = pd.to_datetime(df_daily['date'])

df_mu = pd.read_csv('/kaggle/input/covid-world-vaccination-progress/country_vaccinations_by_manufacturer.csv')
df_mu['date'] = pd.to_datetime(df_mu['date'])

df_pop = pd.read_csv('/kaggle/input/population-by-country-2020/population_by_country_2020.csv')

# 1. List of COVID-19 Vaccines:

In [None]:
vaccines = df['vaccines'].to_list()

# Remove Duplicate from vaccines list 
vaccine_list = list(OrderedDict.fromkeys(vaccines))
vaccines_text =""

for i in vaccine_list:
    vaccines_text = vaccines_text  + ' '+ i

lists = vaccines_text.replace(',', ' ')
lists.split()


# Remove Duplicate from vaccines list 
vacc_list = list(OrderedDict.fromkeys(lists.split()))
#print(vacc_list)

# revise Sputnik & V to Sputnik-V 
vacc_list.remove('Sputnik')
vacc_list.remove('V')
vacc_list.append('Sputnik-V')
#print(vacc_list)

print('')
print('Right now, there are vaccines: ', len(vacc_list), 'types in world.')
print('')

cnt = 0
for i in vacc_list:
    print(cnt+1, i)
    cnt += 1


# 2. What vaccines are used in each country?
**ANSWER:**

In [None]:
print('')
print('There are', len(list(OrderedDict.fromkeys(df['country']))), 'countries with', len(list(OrderedDict.fromkeys(df['vaccines']))), 'vaccines type groups')
print('')

In [None]:
# Remove Duplicate from vaccines list 
vaccine_list = list(OrderedDict.fromkeys(df['vaccines']))

#Convert list to str...
def covert_list_to_str(org_list, seperator= ' ' ):
    return seperator.join(org_list)

#display Countries of each Vaccines group.

for i in vaccine_list:
    countries = df[df['vaccines'] == i]
    print('')
    print(colored(i,'blue',attrs=['bold']))
    a = list(OrderedDict.fromkeys(countries['country']))
    full_str =   covert_list_to_str(a,', ')
    print(full_str)
  

In [None]:
# Remove Duplicate from vaccines list 
vaccine_list = list(OrderedDict.fromkeys(df['vaccines']))

#Convert list to str...
def covert_list_to_str(org_list, seperator= ' ' ):
    return seperator.join(org_list)

#display Countries of each Vaccines group.
vacc_grp = []
value_maxtotal = []

for i in vaccine_list:
    countries = df[df['vaccines'] == i]
    value = countries.groupby(['country'])['total_vaccinations'].max().sort_values(ascending=False).reset_index()
    vacc_grp.append(i)
    value_maxtotal.append(sum(value['total_vaccinations']))
    
vaccine_total = pd.DataFrame({'vaccines group': vacc_grp,
                             'total vaccinations': value_maxtotal })

total_vaccination = vaccine_total.groupby(['vaccines group'])['total vaccinations'].max().sort_values(ascending=False).reset_index()


f,ax = plt.subplots(1,1,figsize=(15,40))

sns.barplot(y='vaccines group', x='total vaccinations', data = total_vaccination)

y = total_vaccination['total vaccinations']
for i, v in enumerate(y):   
    ax.text(v+1, i+.25, str(v), fontsize=8, color='blue')

plt.title('Top total COVID-19 vaccination group', weight='bold', size=15)
plt.ylabel('')
plt.xlabel('total_vaccinations')
plt.grid(axis='x')
plt.show()



# 3. Top Countries

In [None]:
vaccine_type = df.groupby(['country'])['total_vaccinations'].max().sort_values(ascending=False).reset_index()

#plot Top all Countries by % People vaccinated

vaccine_type.reset_index()
f,ax = plt.subplots(1,1,figsize=(15,40))

cnt = len(list(OrderedDict.fromkeys(df['country'])))
sns.barplot(y='country', x='total_vaccinations', data = vaccine_type.head(cnt))

y = vaccine_type['total_vaccinations'].head(cnt)
for i, v in enumerate(y):   
    ax.text(v+1, i+.25, str(v), fontsize=8, color='blue')

plt.title('Top Countries by total_vaccinations', weight='bold', size=15)
plt.ylabel('')
plt.xlabel('total_vaccinations')
plt.grid(axis='x')
plt.show()


In [None]:
top_percent_people_vacc = df.groupby(['country'])['people_vaccinated_per_hundred'].max().sort_values(ascending=False).reset_index()

In [None]:
#plot Top all Countries by % People vaccinated

top_percent_people_vacc.reset_index()
f,ax = plt.subplots(1,1,figsize=(15,40))

cnt = 211
sns.barplot(y='country', x='people_vaccinated_per_hundred', data = top_percent_people_vacc.head(cnt))

y = top_percent_people_vacc['people_vaccinated_per_hundred'].head(cnt)
for i, v in enumerate(y):   
    ax.text(v+1, i+.25, str(v), fontsize=8, color='blue')

plt.title('Top Countries by % People vaccinated', weight='bold', size=15)
plt.ylabel('')
plt.xlabel('%People vaccinated')
plt.grid(axis='x')
plt.show()


# 4. Top Countries by %people fully vaccinated

In [None]:
top_percent_people_fvacc = df.groupby(['country'])['people_fully_vaccinated_per_hundred'].max().sort_values(ascending=False).reset_index()

In [None]:
#plot Top all Countries by % People vaccinated

top_percent_people_fvacc.reset_index()
f,ax = plt.subplots(1,1,figsize=(15,40))
#plt.figure(figsize=(15,17))
sns.barplot(y='country', x='people_fully_vaccinated_per_hundred', data = top_percent_people_fvacc.head(cnt))

y = top_percent_people_fvacc['people_fully_vaccinated_per_hundred'].head(cnt)
for i, v in enumerate(y):   
    ax.text(v+0.5, i+.25, str(v), fontsize=8, color='blue')

plt.title('Top Countries by % People Fully vaccinated', weight='bold', size=18)
plt.ylabel('')
plt.xlabel('%People Fully vaccinated')
plt.grid(axis='x')
plt.show()


# 5. Total Vaccination Vs Population

In [None]:

vaccine_type = df.groupby(['country'])['total_vaccinations'].max().sort_values(ascending=False).reset_index()

fully_vaccine_countries =[]
fully_vaccine_pop=[]
people_fvacct = []


countries = list(vaccine_type['country'])
for pop in countries:
    pop1 = df_pop[df_pop['Country (or dependency)'] == pop]
    pop2 = pop1['Population (2020)'].unique()
    for number in pop2:
        fully_vaccine_pop.append(number)   
        fully_vaccine_countries.append(pop)

data = pd.DataFrame({'countries': fully_vaccine_countries,
                     'Population (2020)': fully_vaccine_pop,
                        })

countries = list(data['countries'])

for pop in countries:
    pop3 = vaccine_type[vaccine_type['country'] == pop]
    pop4 = pop3['total_vaccinations'].unique()
    
    for number in pop4:
        people_fvacct.append(number)
  
      
data2 = pd.DataFrame({'countries': fully_vaccine_countries,
                     'Population (2020)': fully_vaccine_pop,
                      'total_vaccinations': people_fvacct
                        }) 

In [None]:
fig,ax = plt.subplots(1,1,figsize=(30,10))
X = data2['countries']
X_axis = np.arange(len(X))
plt.bar(X_axis -0.0, data2['Population (2020)'], color = 'b', width=0.3)
plt.bar(X_axis +0.0, data2['total_vaccinations'], color ='pink', width=0.40 )

fig.text(0.1,0.9, 'Number total_vaccinations and Number of Population of each Countries', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, 'Number Population (2020)', fontsize=20, fontweight='bold', fontfamily='serif',color='blue')
fig.text(0.2,0.7, 'Number total_vaccinations', fontsize=20, fontweight='bold', fontfamily='serif',color='pink')
ax.tick_params(length=0)

plt.xticks(X_axis, X, rotation = 90)
plt.grid(axis='x')
plt.grid(axis='y')

#plt.title("Number people of Fully vaccinated and Number of Population of each Countries")
plt.ylabel("x 1,000 Million or 1 Billion " )
plt.xlim([-2,50])

# 6. People of Fully vaccinated Vs Population.

In [None]:
people_vacc = df.groupby(['country'])['people_fully_vaccinated'].max().sort_values(ascending=False).reset_index()

fully_vaccine_countries =[]
fully_vaccine_pop=[]
people_vacct = []


countries = list(people_vacc['country'])
for pop in countries:
    pop1 = df_pop[df_pop['Country (or dependency)'] == pop]
    pop2 = pop1['Population (2020)'].unique()
    for number in pop2:
        fully_vaccine_pop.append(number)   
        fully_vaccine_countries.append(pop)

data = pd.DataFrame({'countries': fully_vaccine_countries,
                     'Population (2020)': fully_vaccine_pop,
                        })

countries = list(data['countries'])
for pop in countries:
    pop3 = people_vacc[people_vacc['country'] == pop]
    pop4 = pop3['people_fully_vaccinated'].unique()
    for number in pop4:
        people_vacct.append(number)
  
      
data2 = pd.DataFrame({'countries': fully_vaccine_countries,
                     'Population (2020)': fully_vaccine_pop,
                      'fully People vaccinated': people_vacct
                        }) 

In [None]:
fig,ax = plt.subplots(1,1,figsize=(30,10))
X = data2['countries']
X_axis = np.arange(len(X))
plt.bar(X_axis -0.0, data2['Population (2020)'], color = 'b', width=0.3)
plt.bar(X_axis +0.0, data2['fully People vaccinated'], color ='g', width=0.40 )

fig.text(0.1,0.9, 'Number people of Fully vaccinated and Number of Population of each Countries', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, 'Number Population (2020)', fontsize=20, fontweight='bold', fontfamily='serif',color='blue')
fig.text(0.2,0.7, 'Number Fully people vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')
ax.tick_params(length=0)

plt.xticks(X_axis, X, rotation = 90)
plt.grid(axis='x')
plt.grid(axis='y')

#plt.title("Number people of Fully vaccinated and Number of Population of each Countries")
plt.ylabel("x 1,000 Million or 1 Billion " )
plt.xlim([-2,50])

# 7. people vaccinated Vs Population of each Countries


In [None]:
people_vacc = df.groupby(['country'])['people_vaccinated'].max().sort_values(ascending=False).reset_index()

fully_vaccine_countries =[]
fully_vaccine_pop=[]
people_vacct = []


countries = list(people_vacc['country'])
for pop in countries:
    pop1 = df_pop[df_pop['Country (or dependency)'] == pop]
    pop2 = pop1['Population (2020)'].unique()
    for number in pop2:
        fully_vaccine_pop.append(number)   
        fully_vaccine_countries.append(pop)
       
data = pd.DataFrame({'countries': fully_vaccine_countries,
                     'Population (2020)': fully_vaccine_pop,
                        })


countries = list(data['countries'])
for pop in countries:
    pop3 = people_vacc[people_vacc['country'] == pop]
    pop4 = pop3['people_vaccinated'].unique()
    for number in pop4:
        people_vacct.append(number)
  
      
data2 = pd.DataFrame({'countries': fully_vaccine_countries,
                     'Population (2020)': fully_vaccine_pop,
                      'People vaccinated': people_vacct
                        }) 

In [None]:
fig,ax = plt.subplots(1,1,figsize=(30,10))
X = data2['countries']
X_axis = np.arange(len(X))
plt.bar(X_axis -0.0, data2['Population (2020)'], color = 'b', width=0.3)
plt.bar(X_axis +0.0, data2['People vaccinated'], color ='r', width=0.40 )

fig.text(0.1,0.9, 'Number people of Fully vaccinated and Number of Population of each Countries', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, 'Number Population (2020)', fontsize=20, fontweight='bold', fontfamily='serif',color='blue')
fig.text(0.2,0.7, 'Number people vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
ax.tick_params(length=0)

plt.xticks(X_axis, X, rotation = 90)
plt.grid(axis='x')
plt.grid(axis='y')

#plt.title("Number people of Fully vaccinated and Number of Population of each Countries")
plt.ylabel("x 1,000 Million or 1 Billion " )
plt.xlim([-2,50])


# 8. Predict the Success of COVID-19 vaccination in each country of interest.

> # - United State

In [None]:
usa_df = df[df['country'] == 'United States']

vaccine_type = usa_df['vaccines']

# Remove Duplicate from vaccines list 
vacc = list(OrderedDict.fromkeys(vaccine_type))

for i in vacc:
    print('United States choose vaccines: ',i)

> > # - Current

In [None]:
data = usa_df[['country','date','people_vaccinated_per_hundred','people_fully_vaccinated_per_hundred']]
data['date']  = pd.to_datetime(data['date']).dt.strftime('%Y-%m-%d')  #b=month's name , %m= month's number
     
fig,ax = plt.subplots(1,1,figsize=(25,8))
sns.barplot(x='date', y='people_vaccinated_per_hundred', data = data, color= 'red')
sns.barplot(x='date', y='people_fully_vaccinated_per_hundred', data = data, color= 'green')

fig.text(0.1,0.9, 'COVID-19 Vaccination of USA', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, '%People vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.7, '%People fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')

#plt.grid(axis='x')
plt.grid(axis='y')


#set X-axis
plt.xticks( rotation = 30)
ax.set_xticks(ax.get_xticks()[::7])

plt.ylabel("Percentage " )


> > # - Prediction 

In [None]:
data.head(3)

In [None]:
# formula of parabola =>> y =mx^2+c
m = 1/2.7
c = -7
m1 = 1/2.6
c1 = -20

y_est = []
y_est1 = []
date = []
y_act = list(data['people_vaccinated_per_hundred']) # => len=145
y_act1 = list(data['people_fully_vaccinated_per_hundred']) # => len=145

t1 = dt.date(2020,12,20) #USA start 'people_vaccinated_per_hundred'
x = 0
y = 0
y1 = 0
while y1 < 100:
    if x >= len(list(data['people_vaccinated_per_hundred'])):
        y_act.append(0)
        y_act1.append(0)
            
    y = (m*(x))+c
    y_est.append(y)
    
    y1 = (m1*(x))+c1
    y_est1.append(y1)   
    
    
    t2 = t1+dt.timedelta(days=x)
    date.append(t2)  
    x += 1
        
data_predict= pd.DataFrame({'date': date,
                            'y_est': y_est,
                            'y_est1': y_est1,
                            'y_act': y_act,
                            'y_act1': y_act1
                        
                           })

#print(len(y_act))
#print(len(y_act1))
#print(len(y_est))
#print(len(y_est1))
#print(len(list(list(data['people_vaccinated_per_hundred']))))


data_predict['date'] = pd.to_datetime(data_predict['date']).dt.strftime('%Y-%m-%d')  #b=month's name , %m= month's number
fig,ax = plt.subplots(1,1,figsize=(25,8))
sns.barplot(x='date', y='y_act', data = data_predict, color= 'red')
sns.barplot(x='date', y='y_act1', data = data_predict, color= 'green')
sns.lineplot(x='date', y='y_est', data = data_predict, color= 'red')
sns.lineplot(x='date', y='y_est1', data = data_predict, color= 'green')


#set X-axis, y-axis
plt.xticks(rotation = 30)
plt.yticks(np.arange(0,110,10))
plt.ylim(0)
plt.grid(axis='y')
ax.set_xticks(ax.get_xticks()[::7]) # set x-axis 7 days step.
plt.ylabel("Percentage " )

plt.axvline(x=289, color='r', linestyle ='--')
plt.axvline(x=313, color='g', linestyle ='--')


fig.text(0.1,0.9, 'COVID-19 Vaccination of USA', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, '%People vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.4,0.75, '-- Est.100% vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.7, '%People fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')
fig.text(0.4,0.7, '-- Est.100% fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')
plt.show()
#print(data_predict.tail(3))

> # - India

In [None]:
india_df = df[df['country'] == 'India']

vaccine_type = india_df['vaccines']

# Remove Duplicate from vaccines list 
vacc = list(OrderedDict.fromkeys(vaccine_type))

for i in vacc:
    print('India choose vaccines: ',i)

> > # - Current

In [None]:
data = india_df[['country','date','people_vaccinated_per_hundred','people_fully_vaccinated_per_hundred']]
data['date']  = pd.to_datetime(data['date']).dt.strftime('%Y-%m-%d')  #b=month's name , %m= month's number
     
fig,ax = plt.subplots(1,1,figsize=(25,8))
sns.barplot(x='date', y='people_vaccinated_per_hundred', data = data, color= 'red')
sns.barplot(x='date', y='people_fully_vaccinated_per_hundred', data = data, color= 'green')

fig.text(0.1,0.9, 'COVID-19 Vaccination of India', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, '%People vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.7, '%People fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')

#plt.grid(axis='x')
plt.grid(axis='y')


#set X-axis
plt.xticks( rotation = 30)
ax.set_xticks(ax.get_xticks()[::7])

plt.ylabel("Percentage " )


> > # - Prediction 

In [None]:
data.head(3)

In [None]:
# formula of parabola =>> y =mx^2+c
m = 1/6.8
c = -7
m1 = 1/15
c1 = -5

y_est = []
y_est1 = []
date = []
y_act = list(data['people_vaccinated_per_hundred']) # => len=119
y_act1 = list(data['people_fully_vaccinated_per_hundred']) # => len=119

t1 = dt.date(2021,1,15) #India start 'people_vaccinated_per_hundred'
x = 0
y = 0
y1 = 0
while y1 < 100:
    if x >= len(list(data['people_vaccinated_per_hundred'])):
        y_act.append(0)
        y_act1.append(0)
            
    y = (m*(x))+c
    y_est.append(y)
    
    y1 = (m1*(x))+c1
    y_est1.append(y1)   
    
    
    t2 = t1+dt.timedelta(days=x)
    date.append(t2)  
    x += 1
        
data_predict= pd.DataFrame({'date': date,
                            'y_est': y_est,
                            'y_est1': y_est1,
                            'y_act': y_act,
                            'y_act1': y_act1
                        
                           })

#print(len(y_act))
#print(len(y_act1))
#print(len(y_est))
#print(len(y_est1))
#print(len(list(list(data['people_vaccinated_per_hundred']))))

data_predict['date'] = pd.to_datetime(data_predict['date']).dt.strftime('%Y-%m-%d')  #b=month's name , %m= month's number
fig,ax = plt.subplots(1,1,figsize=(25,8))
sns.barplot(x='date', y='y_act', data = data_predict, color= 'red')
sns.barplot(x='date', y='y_act1', data = data_predict, color= 'green')
sns.lineplot(x='date', y='y_est', data = data_predict, color= 'red')
sns.lineplot(x='date', y='y_est1', data = data_predict, color= 'green')


#set X-axis, y-axis
plt.xticks(rotation = 30)
plt.yticks(np.arange(0,110,10))
plt.ylim(0,100)
plt.grid(axis='y')
ax.set_xticks(ax.get_xticks()[::30]) # set x-axis 30 days step.
plt.ylabel("Percentage " )

plt.axvline(x=725, color='r', linestyle ='--')
plt.axvline(x=1575, color='g', linestyle ='--')


fig.text(0.1,0.9, 'COVID-19 Vaccination of India', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, '%People vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.7, '-- Est.100% vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.65, '%People fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')
fig.text(0.2,0.6, '-- Est.100% fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')
plt.show()
#print(data_predict.tail(3))


> # - Thailand

In [None]:
th_df = df[df['country'] == 'Thailand']

vaccine_type = th_df['vaccines']

# Remove Duplicate from vaccines list 
vacc = list(OrderedDict.fromkeys(vaccine_type))

for i in vacc:
    print('Thailand choose vaccines: ',i)

> > # - Current

In [None]:
data = th_df[['country','date','people_vaccinated_per_hundred','people_fully_vaccinated_per_hundred']]
data['date']  = pd.to_datetime(data['date']).dt.strftime('%Y-%m-%d')  #b=month's name , %m= month's number
     
fig,ax = plt.subplots(1,1,figsize=(25,8))
sns.barplot(x='date', y='people_vaccinated_per_hundred', data = data, color= 'red')
sns.barplot(x='date', y='people_fully_vaccinated_per_hundred', data = data, color= 'green')

fig.text(0.1,0.9, 'COVID-19 Vaccination of Thailand', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, '%People vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.7, '%People fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')

#plt.grid(axis='x')
plt.grid(axis='y')


#set X-axis
plt.xticks( rotation = 30)
ax.set_xticks(ax.get_xticks()[::7])

plt.ylabel("Percentage " )


In [None]:
data.head(3)

> > # - Prediction 

In [None]:
# formula of parabola =>> y =mx^2+c
m = 1/21
c = -1.5
m1 = 1/25
c1 = -2

y_est = []
y_est1 = []
date = []
y_act = list(data['people_vaccinated_per_hundred']) # => len=76
y_act1 = list(data['people_fully_vaccinated_per_hundred']) # => len=76

t1 = dt.date(2021,2,27) #Thailand start 'people_vaccinated_per_hundred'
x = 0
y = 0
y1 = 0
while y1 < 100:
    if x >= len(list(data['people_vaccinated_per_hundred'])):
        y_act.append(0)
        y_act1.append(0)
            
    y = (m*(x))+c
    y_est.append(y)
    
    y1 = (m1*(x))+c1
    y_est1.append(y1)   
    
    
    t2 = t1+dt.timedelta(days=x)
    date.append(t2)  
    x += 1
        
data_predict= pd.DataFrame({'date': date,
                            'y_est': y_est,
                            'y_est1': y_est1,
                            'y_act': y_act,
                            'y_act1': y_act1
                        
                           })

#print(len(y_act))
#print(len(y_act1))
#print(len(y_est))
#print(len(y_est1))
#print(len(list(list(data['people_vaccinated_per_hundred']))))

data_predict['date'] = pd.to_datetime(data_predict['date']).dt.strftime('%Y-%m-%d')  #b=month's name , %m= month's number
fig,ax = plt.subplots(1,1,figsize=(25,8))
sns.barplot(x='date', y='y_act', data = data_predict, color= 'red')
sns.barplot(x='date', y='y_act1', data = data_predict, color= 'green')
sns.lineplot(x='date', y='y_est', data = data_predict, color= 'red')
sns.lineplot(x='date', y='y_est1', data = data_predict, color= 'green')


#set X-axis, y-axis
plt.xticks(rotation = 30)
plt.yticks(np.arange(0,110,10))
plt.ylim(0,100)
plt.grid(axis='y')
ax.set_xticks(ax.get_xticks()[::60]) # set x-axis 30 days step.
plt.ylabel("Percentage " )

plt.axvline(x=2145, color='r', linestyle ='--')
plt.axvline(x=2550, color='g', linestyle ='--')


fig.text(0.1,0.9, 'COVID-19 Vaccination of Thailand', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, '%People vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.7, '-- Est.100% vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.65, '%People fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')
fig.text(0.2,0.6, '-- Est.100% fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')
plt.show()


> # - China

Refer the world by population (2020): 1,439,323,776 

Rightnow, there is no data of people vaccination but there is total vaccination, only.

I assume, people vaccined of China, just apply 1 dose.


In [None]:
cn_df = df[df['country'] == 'China']

vaccine_type = cn_df['vaccines']

# Remove Duplicate from vaccines list 
vacc = list(OrderedDict.fromkeys(vaccine_type))

for i in vacc:
    print('China choose vaccines: ',i)

In [None]:
cn = df[df['country']=='China'][['country','date','total_vaccinations']]
cn['date']  = pd.to_datetime(cn['date']).dt.strftime('%Y-%m-%d')  #b=month's name , %m= month's number
cn.head(3)

cn['%total'] = cn['total_vaccinations']/1439323776*100


In [None]:

m = 1/430
c = -24
y_est = []
date = []
y_act = list(cn['%total']) 

t1 = dt.date(2020,12,15) #Thailand start 'people_vaccinated_per_hundred'
x = 0
y = 0
d = 0
while y < 100:  # Refer the world by population (2020): 1,439,323,776
    if x >= len(list(cn['%total'])):
        y_act.append(0)
    
    y = (m*(x**2))+c
    y_est.append(y)
    x += 1

    t2 = t1+dt.timedelta(days=d)
    date.append(t2)  
    d += 1

    
data_predict= pd.DataFrame({'%y_est': y_est,
                            '%y_act': y_act,
                            'date': date
    
                        })
data_predict['date'] = pd.to_datetime(data_predict['date']).dt.strftime('%Y-%m-%d')  #b=month's name , %m= month's number


fig,ax = plt.subplots(1,1,figsize=(25,8))

sns.barplot(x='date', y='%y_act', data=data_predict, color='red')
sns.lineplot(x='date', y='%y_est', data=data_predict, color='black')


#set X-axis, y-axis

plt.xticks(rotation = 30)
plt.yticks(np.arange(0,110,10))
plt.ylim(0,100)
plt.ylabel("Percentage " )
plt.grid(axis='y')
ax.set_xticks(ax.get_xticks()[::7]) # set x-axis 30 days step.
plt.axvline(x=230, color='black', linestyle ='--')

fig.text(0.1,0.9, 'COVID-19 Vaccination of China', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, '%Total Vaccination', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.7, '-- Est.100% Total vaccination', fontsize=20, fontweight='bold', fontfamily='serif',color='black')


plt.show()

> # - Japan

In [None]:
jp_df = df[df['country'] == 'Japan']

vaccine_type = jp_df['vaccines']

# Remove Duplicate from vaccines list 
vacc = list(OrderedDict.fromkeys(vaccine_type))

for i in vacc:
    print('Japan choose vaccines: ',i)

In [None]:
data = jp_df[['country','date','people_vaccinated_per_hundred','people_fully_vaccinated_per_hundred']]
data['date']  = pd.to_datetime(data['date']).dt.strftime('%Y-%m-%d')  #b=month's name , %m= month's number
     
fig,ax = plt.subplots(1,1,figsize=(25,8))
sns.barplot(x='date', y='people_vaccinated_per_hundred', data = data, color= 'red')
sns.barplot(x='date', y='people_fully_vaccinated_per_hundred', data = data, color= 'green')

fig.text(0.1,0.9, 'COVID-19 Vaccination of Japan', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, '%People vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.7, '%People fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')

#plt.grid(axis='x')
plt.grid(axis='y')


#set X-axis
plt.xticks( rotation = 30)
ax.set_xticks(ax.get_xticks()[::7])

plt.ylabel("Percentage " )

> > # - Prediction 

In [None]:
data.head(3)

In [None]:
# formula of parabola =>> y =mx^2+c
m = 1/14
c = -3
m1 = 1/26
c1 = -2

y_est = []
y_est1 = []
date = []
y_act = list(data['people_vaccinated_per_hundred']) 
y_act1 = list(data['people_fully_vaccinated_per_hundred']) 

t1 = dt.date(2021,2,17) 
x = 0
y = 0
y1 = 0
while y1 < 100:
    if x >= len(list(data['people_vaccinated_per_hundred'])):
        y_act.append(0)
        y_act1.append(0)
            
    y = (m*(x))+c
    y_est.append(y)
    
    y1 = (m1*(x))+c1
    y_est1.append(y1)   
    
    
    t2 = t1+dt.timedelta(days=x)
    date.append(t2)  
    x += 1
        
data_predict= pd.DataFrame({'date': date,
                            'y_est': y_est,
                            'y_est1': y_est1,
                            'y_act': y_act,
                            'y_act1': y_act1
                        
                           })

#print(len(y_act))
#print(len(y_act1))
#print(len(y_est))
#print(len(y_est1))
#print(len(list(list(data['people_vaccinated_per_hundred']))))

data_predict['date'] = pd.to_datetime(data_predict['date']).dt.strftime('%Y-%m-%d')  #b=month's name , %m= month's number
fig,ax = plt.subplots(1,1,figsize=(25,8))
sns.barplot(x='date', y='y_act', data = data_predict, color= 'red')
sns.barplot(x='date', y='y_act1', data = data_predict, color= 'green')
sns.lineplot(x='date', y='y_est', data = data_predict, color= 'red')
sns.lineplot(x='date', y='y_est1', data = data_predict, color= 'green')


#set X-axis, y-axis
plt.xticks(rotation = 30)
plt.yticks(np.arange(0,110,10))
plt.ylim(0,110)
plt.grid(axis='y')
ax.set_xticks(ax.get_xticks()[::60]) # set x-axis 30 days step.
plt.ylabel("Percentage " )

plt.axvline(x=1460, color='r', linestyle ='--')
plt.axvline(x=2652, color='g', linestyle ='--')


fig.text(0.1,0.9, 'COVID-19 Vaccination of Japan', fontsize=23, fontweight='bold', fontfamily='serif',color='black')
fig.text(0.2,0.75, '%People vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.7, '-- Est.100% vaccined', fontsize=20, fontweight='bold', fontfamily='serif',color='red')
fig.text(0.2,0.65, '%People fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')
fig.text(0.2,0.6, '-- Est.100% fully vaccinated', fontsize=20, fontweight='bold', fontfamily='serif',color='green')
plt.show()


# Vaccine Manufacturers

In [None]:
df_mu[df_mu['vaccine']=='Sinovac'].groupby(['location'])['total_vaccinations'].max()

df_mu.tail()

In [None]:
df_mu['date'] = pd.to_datetime(df_mu['date']).dt.strftime('%Y-%m-%d')
f,ax = plt.subplots(1,1,figsize=(15,10))

sns.lineplot(data=df_mu, y='total_vaccinations', x='date', hue='location', style='vaccine', lw=1)

#y = top_percent_people_fvacc['people_fully_vaccinated_per_hundred'].head(cnt)
#for i, v in enumerate(y):   
 #   ax.text(v+0.5, i+.25, str(v), fontsize=8, color='blue')

plt.title('Vaccination by Manufacturer', weight='bold', size=18)
plt.ylabel('Count')
plt.xlabel('')
plt.grid(axis='x')
plt.xticks(rotation = 30)
#plt.yticks(np.arange(0,110,10))
#plt.ylim(0,110)


ax.set_xticks(ax.get_xticks()[::7]) 

plt.show()