In [None]:
#import common libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import plotly to make interactive plots
import plotly.express as px
import plotly.graph_objs as go

from plotly.offline import iplot, init_notebook_mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

import warnings
warnings.filterwarnings("ignore")
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
#load files
manu=pd.read_csv('/kaggle/input/vaccine-covid-19-around-the-world/country_vaccinations_by_manufacturer.csv')
country=pd.read_csv('/kaggle/input/vaccine-covid-19-around-the-world/country_vaccinations.csv')

In [None]:
#process data
manu['date']=pd.to_datetime(manu['date'])
country['date']=pd.to_datetime(country['date'])

In [None]:
manu

In [None]:
##FILE MANU
#Process data to count new vaccinations by day from cumulative total vaccinations 
manu['new']=0
for i in range(len(manu)-1):
    if((manu.loc[i+1,'vaccine']==manu.loc[i,'vaccine'])&(manu.loc[i+1,'location']==manu.loc[i+1,'location'])):
        manu.loc[i,'new']=manu.loc[i,'total_vaccinations']-manu.loc[i+1,'total_vaccinations']
    else:
        manu.loc[i,'new']=manu.loc[i,'total_vaccinations']

#Count the number of vaccinations by vaccine
manu_vaccine=manu.groupby(['vaccine']).agg({'new':sum})
manu_vaccine.reset_index(inplace=True)
manu_vaccine.rename({'new':'Total vaccinations'},axis=1,inplace=True)
fig=px.bar(manu_vaccine,x='vaccine',y='Total vaccinations',title='Total vaccinations by vaccine in Europe, US, Japan, Uruguay from January to July 2021')
fig.show()

In [None]:

#Select European countries to investigate
non_european = ['Chile', 'United States', 'Uruguay','Japan']
manu_europe = manu.query('location not in @non_european')
#Process data to count new vaccinations by day from cumulative total vaccinations
manu_europe=manu_europe.sort_values(by=['location','vaccine','date'], ascending=[True,True,False])
manu_europe=manu_europe.reset_index(drop=True)
manu_europe['new']=0
for i in range(len(manu_europe)-1):
    if((manu_europe.loc[i+1,'vaccine']==manu_europe.loc[i,'vaccine'])&(manu_europe.loc[i+1,'location']==manu_europe.loc[i+1,'location'])):
        manu_europe.loc[i,'new']=manu_europe.loc[i,'total_vaccinations']-manu_europe.loc[i+1,'total_vaccinations']
    else:
        manu_europe.loc[i,'new']=manu_europe.loc[i,'total_vaccinations']


In [None]:
#Calculate cumulative vaccinations by vaccines in Europe countries
vaccines_new=manu_europe[['date','vaccine','new']]
df=vaccines_new[['date','vaccine']]
df.drop_duplicates(inplace=True)
df.sort_values(by=['vaccine','date'],ascending=[True,False]).reset_index(drop=True)
df['before']=0
for i,row in df.iterrows():
    total_before=vaccines_new[(vaccines_new.vaccine == row.vaccine)&(vaccines_new.date<=row.date)]['new'].sum()
    df.loc[i,'before']=total_before
#Plot number of vaccinations by vaccines in Europes countries
df=df.pivot(index='date',columns='vaccine',values='before')
plt.figure(figsize=(8,10))
df.iplot(mode = 'lines', xTitle = 'Date', yTitle = 'Total vaccinations', title = 'Cumulative vaccinations in Europe by vaccine')

In [None]:
#Plot for USA
manu_usa=manu.loc[manu['location']=='United States']
fig=px.line(manu_usa,x='date',y='total_vaccinations',color='vaccine',title='Cumulative vaccinations by vaccines in USA')
fig.show()

In [None]:
#Plot for Germany
manu_germany=manu.loc[manu['location']=='Germany']
fig=px.line(manu_germany,x='date',y='total_vaccinations',color='vaccine',title='Cumulative vaccinations by vaccines in Germany')
fig.show()

In [None]:
manu_netherlands=manu.loc[manu['location']=='Netherlands']
fig=px.line(manu_netherlands,x='date',y='total_vaccinations',color='vaccine',title='Cumulative vaccinations by vaccines in Netherlands')
fig.show()

In [None]:
##FILE COUNTRY
#select some countries to analyze
selected_countries=['United States','United Kingdom','New Zealand','Australia','India','Indonesia','Italy','Germany','China','Japan','Vietnam']
country_s = country.query('country in @selected_countries')
#Show which vaccines used in each countries
country_s_vaccine=country_s[['country','date','vaccines']]
country_s_vaccine=country_s_vaccine.sort_values(by=['country','date'],ascending=[True,False])
country_s_vaccine=country_s_vaccine.drop_duplicates(subset='country',keep='first')

In [None]:
country_s_vaccine

In [None]:
#Extract metrics 'total vaccinations,total vaccinations per hundred,daily vaccinations per million' to analyze
total_vaccinations=country_s[['country','date','total_vaccinations']]
total_vaccinations_per_hundred=country_s[['country','date','total_vaccinations_per_hundred']]
daily_vaccinations=country_s[['country','date','daily_vaccinations_per_million']]
#Turn to pivot tables
total_vaccinations=total_vaccinations.pivot(index='date',columns='country',values='total_vaccinations')
total_vaccinations_per_hundred=total_vaccinations_per_hundred.pivot(index='date',columns='country',values='total_vaccinations_per_hundred')
daily_vaccinations=daily_vaccinations.pivot(index='date',columns='country',values='daily_vaccinations_per_million')

In [None]:
total_vaccinations

In [None]:
#Plot total vaccinations,total vaccinations per hundred, daily vaccinations per million in selected countries
plt.figure(figsize=(6,6))
total_vaccinations.iplot(mode = 'lines', xTitle = 'Date', yTitle = 'Total vaccinations', title = 'Cumulative vaccinations in some countries up to date')
total_vaccinations_per_hundred.iplot(mode = 'lines', xTitle = 'Date', yTitle = 'Total vaccinations per hundred', title = 'Cumulative total vaccinations per hundred in some countries up to date')
daily_vaccinations.iplot(mode = 'lines', xTitle = 'Date', yTitle = 'Daily vaccinations per million', title = 'Daily vaccinations per million in some countries up to date')


In [None]:
#Extract some developed countries to plot
total_vaccinations_developed=total_vaccinations[['United States','United Kingdom','Germany','Australia','New Zealand','Italy']]
total_vaccinations_per_hundred_developed=total_vaccinations_per_hundred[['United States','United Kingdom','Germany','Australia','New Zealand','Italy']]
daily_vaccinations_developed=daily_vaccinations[['United States','United Kingdom','Germany','Australia','New Zealand','Italy']]
#Plot metrics for developed countries
total_vaccinations_developed.iplot(mode = 'lines', xTitle = 'Date', yTitle = 'Total vaccinations', title = 'Cumulative total vaccinations in some developed countries up to date')
total_vaccinations_per_hundred_developed.iplot(mode = 'lines', xTitle = 'Date', yTitle = 'Total vaccinations per hundred', title = 'Cumulative total vaccinations per hundred in some developed countries up to date')
daily_vaccinations_developed.iplot(mode = 'lines', xTitle = 'Date', yTitle = 'Daily vaccinations per million', title = 'Daily vaccinations per million in some  developed countries up to date')

In [None]:
#Extract Asian coutries to plot
plt.figure(figsize=(4,4))
total_vaccinations_Asian=total_vaccinations[['Vietnam','India','China','Japan','Indonesia']]
total_vaccinations_per_hundred_Asian=total_vaccinations_per_hundred[['Vietnam','India','China','Japan','Indonesia']]
daily_vaccinations_Asian=daily_vaccinations[['Vietnam','India','China','Japan','Indonesia']]
#Plot metrics for Asian countries
total_vaccinations_Asian.iplot(mode = 'lines', xTitle = 'Date', yTitle = 'Total vaccinations', title = 'Total vaccinations in some Asian countries up to date')
total_vaccinations_per_hundred_Asian.iplot(mode = 'lines', xTitle = 'Date', yTitle = 'Total vaccinations per hundred', title = 'Total vaccinations per hundred in some Asian countries up to date')
daily_vaccinations_Asian.iplot(mode = 'lines', xTitle = 'Date', yTitle = 'Daily vaccinations per million', title = 'Daily vaccinations per million in some Asian countries up to date')