In [1]:
#import libaries
import pandas as pd
import numpy as np
import geopandas
import pycountry
from geopy import Nominatim
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

### Data Loading and Preprocessing

In [2]:
#import vaccine data into dataframe
df = pd.read_csv('VaccineData.csv')

In [3]:
#change Date col to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [4]:
#drop European Union rows, since they are unneeded for this analysis
df.drop(df.loc[df['Country'] == 'European Union'].index, inplace=True, axis=0)

In [5]:
#drop columns that are not the alpha, delta, or omicron
df.drop(df.columns[[4,5,8,9,10,11]], axis=1, inplace = True)

In [6]:
#load efficacy data 
df_eff = pd.read_csv('Vaccine_Efficacy.csv')
df_eff.drop(df_eff.columns[[1,2,5,6,7,8]], axis=1, inplace = True)

In [7]:
#merge vaccine data and efficacy
df = pd.merge(df,df_eff,on='Vaccine_Manufacturer',how='left')

In [8]:
#Find latest date for each country/manuf pair
df_latest = df.loc[df.groupby(['Country','Vaccine_Manufacturer']).Date.idxmax()]

In [9]:
#function to fill in missing data for time series by including all vaccine manuf for each country for all dates 
def expand(df_in):
    dataframes =[]
    for country in df_in['Country'].unique():
        df = df_in[df_in['Country']==country]
        manuflist = list(df['Vaccine_Manufacturer'].unique())
        for manuf in manuflist:
            df_sm = df[df['Vaccine_Manufacturer']==manuf]
            df_expanded = pd.DataFrame({'Date':pd.date_range(start=df_sm.loc[df_sm.Date.idxmin()]['Date'], end=df.loc[df.Date.idxmax()]['Date'])})
            df_expanded = pd.merge(df_expanded,df_sm,how='left',on=['Date'])
            df_expanded = df_expanded.ffill()
            dataframes.append(df_expanded)
    return pd.concat(dataframes)

In [10]:
#expanded dataframe with forward filled data for all dates in timeseries 
df_expanded = expand(df)

#added column for total vaccine of all manuf for specific date and country for proportion calculations
total_vacc = df_expanded.groupby(['Country','Date']).sum()[['Total_Vaccinations']].rename(columns={'Total_Vaccinations':'Total'})

#merge total vaccination per day calculation to df_expanded 
df_expanded = pd.merge(df_expanded,total_vacc,how='left',on=['Country','Date'])

In [11]:
#merge number vaccined (at least one dose) per 100 people to dataframe
df_proportions = pd.read_csv('share-people-vaccinated-covid.csv')
df_proportions.drop(['Code','145610-annotations'],inplace=True,axis=1)
df_proportions.columns = ['Country','Date','overall vacc per 100 ppl']
df_proportions['Date'] = pd.to_datetime(df_proportions['Date'])
df_expanded = pd.merge(df_expanded,df_proportions,on=['Country','Date'],how='left')

### Quick Statistics

In [12]:
#Number of unique countries
df['Country'].nunique()

43

In [13]:
#Number of total vaccinations 
df.loc[df.groupby(['Country','Vaccine_Manufacturer']).Date.idxmax()]['Total_Vaccinations'].sum()

2482427113

### Percentage of Vaccinations Not Offering Protection and Breakthrough Infection Calculation

#### Proportions calculations

In [14]:
# percent of specific manuf vaccine out of the total vaccinations administered
df_expanded['perc of manuf vacc'] = df_expanded['Total_Vaccinations'] / df_expanded['Total'] * 100

# specific manuf vaccine administered per 100 ppl
df_expanded['num manuf vacc per 100 ppl'] = df_expanded['perc of manuf vacc'] / 100 * df_expanded['overall vacc per 100 ppl']

#### Alpha Calculations

In [15]:
# people who are not protected who were given the specific manuf vac per 100 ppl
df_expanded['breakthrough alpha'] = df_expanded['num manuf vacc per 100 ppl'] * (1 - (df_expanded['Eff Infection Alpha']/100)) 

#### Delta Calculations

In [16]:
# people who are not protected who were given the specific manuf vac per 100 ppl
df_expanded['breakthrough delta'] = df_expanded['num manuf vacc per 100 ppl'] * (1 - (df_expanded['Eff Infection Delta']/100)) 

#### Omicron Calculations

In [17]:
# people who are not protected who were given the specific manuf vac per 100 ppl
df_expanded['breakthrough omicron'] = df_expanded['num manuf vacc per 100 ppl'] * (1 - (df_expanded['Eff Infection Omicron']/100)) 

#### Post Calculation Processing (collapse expanded dataframe back to dates of the original dataset)

In [18]:
#Function to collapse expanded df back to the dates of the original dataframe
def collapse(df,df_expanded):
    dataframes =[]
    for country in df_expanded['Country'].unique():
        d_f = df_expanded[df_expanded['Country']==country]
        dates = df[df['Country']==country]['Date']
        d_f = d_f[d_f['Date'].isin(dates)]
        dataframes.append(d_f)
    return pd.concat(dataframes)

In [19]:
#collapse dataframe 
df = collapse(df,df_expanded)

In [20]:
#drop na rows in collapse df due to missing data from share-people-vaccinated-covid.csv
df = df.dropna()

### Visualizations

In [23]:
px.line(df.loc[df['Country']=='Argentina'],x='Date',y='Total_Vaccinations',color='Vaccine_Manufacturer')

In [24]:
px.line(df.sort_values('Date'),x='Date',y='Total',color='Country')

In [26]:
px.line(df.loc[df['Country']=='Estonia'].groupby('Date').sum().reset_index(),x='Date',y='breakthrough alpha')

In [27]:
px.line(df.groupby(['Country','Date']).sum().reset_index().sort_values('Date'),x='Date',y='breakthrough alpha',color='Country')