In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

[Show on gallery](https://public.tableau.com/views/covid2_16138717906360/Dashboard1?:language=zh-Hant&:display_count=y&publish=yes&:origin=viz_share_link)

%%html
<div class='tableauPlaceholder' id='viz1613892574569' style='position: relative'><noscript><a href='#'><img alt=' ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;6X&#47;6XDFR2GM5&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz'  style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='path' value='shared&#47;6XDFR2GM5' /> <param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;6X&#47;6XDFR2GM5&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /><param name='language' value='zh-Hant' /><param name='filter' value='publish=yes' /></object></div>                <script type='text/javascript'>                    var divElement = document.getElementById('viz1613892574569');                    var vizElement = divElement.getElementsByTagName('object')[0];                    if ( divElement.offsetWidth > 800 ) { vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';} else if ( divElement.offsetWidth > 500 ) { vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';} else { vizElement.style.width='100%';vizElement.style.height='1827px';}                     var scriptElement = document.createElement('script');                    scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    vizElement.parentNode.insertBefore(scriptElement, vizElement);                </script>

## How to use

Most of the information are in the tool tip of map chart, the control bars are explained below

- __Select Metric:__ 
Chose one from the 4 metics ( People fully vaccinated / People vaccinated / Total Vaccinated / Last N days Avg.) and result will be present on left 2 charts.

- __Unit:__ 
By raw number / By per hundred (only applies to first 3 metrics)

- __Vaccination Threshold__: 
When observing metrics with By per hundred, the rank chart may include some region that with small population, can be avoid by increading vaccinations threshold.

- __N days:__ 
How days are used to calculate average of daily vaccinations (only applies to Last N days Avg.)

## Data Clean 

I did some data clean for the purpose of plotting cumulative chart and calculating last n days average, both of them need data that has value at current data, however some countries are not updating daily. So I just assume that the date they are not updated is 0 or remain the same.

For example on 2021-02-21 one of the countries data looks like this, there has no data of 2/20 and 2/21

> | Country       | Date       | Daily Vaccinations | People Fully Vaccinated |
|---------------|------------|--------------------|-------------------------|
| United States | 2021-02-17 | 100                | NaN                     |
| United States | 2021-02-18 | 200                | NaN                     |
| United States | 2021-02-19 | 300                | 200                     |

Then after imputation it should look like this, 0 was imputed to daily associated columns and latest data was cloned to status features like _People Fully Vaccinated_.

> | Country       | Date       | Daily Vaccinations | People Fully Vaccinated |
|---------------|------------|--------------------|-------------------------|
| United States | 2021-02-17 | 100                | NaN                     |
| United States | 2021-02-18 | 200                | NaN                     |
| United States | 2021-02-19 | 300                | 200                     |
| United States | 2021-02-20 | 0                  | 200                     |
| United States | 2021-02-21 | 0                  | 200                     |

In [None]:
df = pd.read_csv("/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv")

# latest report date
max_date = df['date'].max()

# create country/region list
country_lst = df.country.tolist()
country_lst = set(country_lst)

# create dict to access index of each column 
col_inx = {col_name:i for i,col_name in enumerate(df.columns.values)}

_Country that doesn't update for a while, for example China has stopped from 2/9_

In [None]:
df.groupby(['country']).get_group('China').tail(5)

In [None]:
# Started to impute country by country
temp_df_lst = []
for country in country_lst:
    
    temp_df = pd.DataFrame(df.groupby(['country']).get_group(country))
    # lastest date of this country
    last_update = temp_df['date'].max()
    
    if last_update < max_date:
        # create date list 
        time_delta = pd.to_datetime(max_date) - pd.to_datetime(last_update)
        # length of lack data
        append_len = time_delta.days
        # create date string list
        time_lst = [pd.to_datetime(temp_df['date'].max())+pd.Timedelta(x, unit="day") for x in range(1,time_delta.days+1)]
        time_lst = [x.strftime("%Y-%m-%d") for x in time_lst]
        
        # create append data for other features, basically remain the same like the last day's information
        last_data = temp_df[temp_df['date'] == last_update].values
        # cloning to append length
        append_data = np.tile(last_data,(append_len,1))
        # specify 0 to "daily" related columns since there is no updates at these dates, and other features remain the same
        append_data[:,col_inx['date']] = time_lst
        append_data[:,col_inx['daily_vaccinations_raw']] = 0
        append_data[:,col_inx['daily_vaccinations']]=0
        append_data[:,col_inx['daily_vaccinations_per_million']]=0
        # create df 
        append_df = pd.DataFrame(append_data,columns=df.columns)
        # append to origin df and add to list 
        temp_df_lst.append(temp_df.append(append_df,ignore_index=True))
    else:
        temp_df_lst.append(temp_df)

# concate into new df

new_df = pd.concat(temp_df_lst,axis=0,ignore_index=True)


In [None]:
# sorting 
new_df = new_df.sort_values(by=['country','date'])

new_df.reset_index(inplace=True,drop=True)


In [None]:
# check imputation
new_df.groupby(['country']).get_group('China').tail(10)


<div class='tableauPlaceholder' id='viz1613888325983' style='position: relative'><noscript><a href='#'><img alt=' ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;co&#47;covid2_16138717906360&#47;Dashboard1&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz'  style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='path' value='views&#47;covid2_16138717906360&#47;Dashboard1?:language=zh-Hant&amp;:embed=y&amp;:display_count=y&amp;publish=yes' /> <param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;co&#47;covid2_16138717906360&#47;Dashboard1&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /><param name='language' value='zh-Hant' /><param name='filter' value='publish=yes' /></object></div>                <script type='text/javascript'>                    var divElement = document.getElementById('viz1613888325983');                    var vizElement = divElement.getElementsByTagName('object')[0];                    if ( divElement.offsetWidth > 800 ) { vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';} else if ( divElement.offsetWidth > 500 ) { vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px';} else { vizElement.style.width='100%';vizElement.style.height='1827px';}                     var scriptElement = document.createElement('script');                    scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    vizElement.parentNode.insertBefore(scriptElement, vizElement);                </script>