<a href="https://colab.research.google.com/github/nataliebakken/PortfolioProjects/blob/main/2021_COVID_19_Vaccination_Progress_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 2021 COVID-19 Vaccination Progress Analysis



In [48]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")

# LOAD DATASET

from google.colab import files
import io
import pandas as pd

uploaded = files.upload()
filename = list(uploaded.keys())[0]

df = pd.read_csv(io.BytesIO(uploaded[filename]), parse_dates=['date'], index_col='date')
print(df.head())


Collecting kaleido
  Downloading kaleido-1.1.0-py3-none-any.whl.metadata (5.6 kB)
Collecting choreographer>=1.0.10 (from kaleido)
  Downloading choreographer-1.1.1-py3-none-any.whl.metadata (6.8 kB)
Collecting logistro>=1.0.8 (from kaleido)
  Downloading logistro-1.1.0-py3-none-any.whl.metadata (2.6 kB)
Collecting pytest-timeout>=2.4.0 (from kaleido)
  Downloading pytest_timeout-2.4.0-py3-none-any.whl.metadata (20 kB)
Downloading kaleido-1.1.0-py3-none-any.whl (66 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.3/66.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading choreographer-1.1.1-py3-none-any.whl (52 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.3/52.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading logistro-1.1.0-py3-none-any.whl (7.9 kB)
Downloading pytest_timeout-2.4.0-py3-none-any.whl (14 kB)
Installing collected packages: logistro, pytest-timeout, choreographer, kaleido
Successfully installed choreogr

Saving country_vaccinations.csv to country_vaccinations (10).csv
                country iso_code  total_vaccinations  people_vaccinated  \
date                                                                      
2021-02-22  Afghanistan      AFG                 0.0                0.0   
2021-02-23  Afghanistan      AFG                 NaN                NaN   
2021-02-24  Afghanistan      AFG                 NaN                NaN   
2021-02-25  Afghanistan      AFG                 NaN                NaN   
2021-02-26  Afghanistan      AFG                 NaN                NaN   

            people_fully_vaccinated  daily_vaccinations_raw  \
date                                                          
2021-02-22                      NaN                     NaN   
2021-02-23                      NaN                     NaN   
2021-02-24                      NaN                     NaN   
2021-02-25                      NaN                     NaN   
2021-02-26                     

In [49]:
# RENAME COLUMNS

df.rename(columns={
    'country': 'Country',
    'total_vaccinations': 'TotalVaccinations',
    'people_vaccinated': 'PeopleVaccinated',
    'people_fully_vaccinated': 'PeopleFullyVaccinated',
    'daily_vaccinations': 'DailyVaccinations',
    'total_vaccinations_per_hundred': 'TotalPerHundred',
    'people_vaccinated_per_hundred': 'PeoplePerHundred',
    'people_fully_vaccinated_per_hundred': 'FullyPerHundred',
    'daily_vaccinations_per_million': 'DailyPerMillion',
    'vaccines': 'Vaccines'
}, inplace=True)

In [50]:
# SUMMARY STATISTICS

print(df.describe())

       TotalVaccinations  PeopleVaccinated  PeopleFullyVaccinated  \
count       1.452200e+04      1.376200e+04           1.104100e+04   
mean        9.139252e+06      4.737802e+06           2.681353e+06   
std         4.592776e+07      1.797156e+07           1.093032e+07   
min         0.000000e+00      0.000000e+00           1.000000e+00   
25%         1.005620e+05      7.876700e+04           3.719700e+04   
50%         7.007560e+05      5.149065e+05           2.834740e+05   
75%         3.241681e+06      2.139073e+06           1.192415e+06   
max         1.029223e+09      6.220000e+08           2.232990e+08   

       daily_vaccinations_raw  DailyVaccinations  TotalPerHundred  \
count            1.200300e+04       2.559500e+04     14522.000000   
mean             2.012910e+05       1.020430e+05        24.162602   
std              1.122962e+06       7.304361e+05        30.690367   
min              0.000000e+00       0.000000e+00         0.000000   
25%              3.967500e+03    

In [52]:
# BAR PLOT OF TOTAL VACCINATIONS PER VACCINE SCHEME

vaccine_summary = df.groupby('Vaccines')[
    ['TotalVaccinations', 'TotalPerHundred', 'DailyVaccinations', 'DailyPerMillion']
].max().reset_index()

import plotly.express as px

fig = px.bar(
    vaccine_summary,
    x='Vaccines',
    y='TotalVaccinations',
    title='Total Vaccinations per Vaccine Scheme',
    text='TotalVaccinations',
    color='Vaccines'
)

# Adjust layout to make it readable
fig.update_layout(
    height=600,                   # taller figure
    xaxis_tickangle=-45,          # rotate x-axis labels
    xaxis_tickfont=dict(size=10), # smaller font for labels
    showlegend=False              # hide legend if color matches x-axis
)
fig.show()


In [31]:
# TREEMAP PER COUNTRY AND VACCINE SCHEME

country_vaccine = df.groupby(['Vaccines', 'Country'])[
    ['TotalVaccinations', 'DailyVaccinations', 'PeopleVaccinated']
].max().reset_index()

fig = px.treemap(
    country_vaccine,
    path=['Vaccines', 'Country'],
    values='TotalVaccinations',
    title="Total vaccinations per country grouped by vaccine scheme"
)

fig.show()



In [32]:
# MAP OF TOTAL VACCINATIONS PER COUNTRY

fig = px.choropleth(country_vaccine, locations='Country', locationmode='country names',
                    color='TotalVaccinations', hover_name='Country',
                    color_continuous_scale='Viridis', title='Total Vaccinations per Country')
fig.show()

In [33]:
# TIME-SERIES PLOT FOR SELECTED COUNTRIES

countries = ['United States', 'China', 'United Kingdom', 'Germany', 'France']
for country in countries:
    df_country = df[df['Country'] == country]
    fig = px.line(df_country, x=df_country.index, y='TotalVaccinations', title=f'Total Vaccinations in {country}')
    fig.show()
