In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.palettes import Category20, Category20b
from bokeh.models import ColumnDataSource, Legend

In [2]:
output_notebook()

# COVID-19: The most selfish countries in Europe!

# Download datasets

## Country information from Geonames

[Web site](http://download.geonames.org/export/dump/)

This work is licensed under a Creative Commons Attribution 4.0 License,
see https://creativecommons.org/licenses/by/4.0/

In [3]:
names = ['ReportingCountry', 'ISO3', 'ISO-Numeric', 'fips', 'CountryName', 'Capital', 'Area(in sq km)', 'CountryPopulation', 
         'Continent', 'tld', 'CurrencyCode', 'CurrencyName', 'Phone', 'Postal Code Format', 
         'Postal Code Regex', 'Languages', 'geonameid', 'neighbours', 'EquivalentFipsCode']
countries_info = pd.read_csv('http://download.geonames.org/export/dump/countryInfo.txt', sep='\t', 
                             skiprows=50, 
                             names=names)

In [4]:
countries_info[['ReportingCountry', 'CountryName', 'CountryPopulation']]

Unnamed: 0,ReportingCountry,CountryName,CountryPopulation
0,AD,Andorra,77006
1,AE,United Arab Emirates,9630959
2,AF,Afghanistan,37172386
3,AG,Antigua and Barbuda,96286
4,AI,Anguilla,13254
...,...,...,...
247,ZA,South Africa,57779622
248,ZM,Zambia,17351822
249,ZW,Zimbabwe,14439018
250,CS,Serbia and Montenegro,10829175



## Data on COVID-19 vaccination in the EU/EEA

[Web site](https://www.ecdc.europa.eu/en/publications-data/data-covid-19-vaccination-eu-eea)

This work is licensed under a Open Source License.

In [5]:
vaccine_df = pd.read_csv('https://opendata.ecdc.europa.eu/covid19/vaccine_tracker/csv/data.csv')

In [6]:
vaccine_df[['Year', 'Week']] = vaccine_df['YearWeekISO'].str.split('-', expand=True)
vaccine_df['Year'] = pd.to_numeric(vaccine_df['Year'])
vaccine_df['Week'] = pd.to_numeric(vaccine_df['Week'].str[1:])

vaccine_df['ReportingCountry'] = vaccine_df['ReportingCountry'].astype('category')
vaccine_df['Region'] = vaccine_df['Region'].astype('category')


vaccine_df['Vaccine'] = vaccine_df['Vaccine'].astype('category')
vaccine_df['TargetGroup'] = vaccine_df['TargetGroup'].astype('category')

vaccine_df['Date'] = pd.to_datetime(vaccine_df.Week.astype(str)+
                           vaccine_df.Year.astype(str).add('-1') ,format='%V%G-%u')

In [7]:
len(vaccine_df)

193571

In [8]:
vaccine_df.sample(10)

Unnamed: 0,YearWeekISO,ReportingCountry,Denominator,NumberDosesReceived,NumberDosesExported,FirstDose,FirstDoseRefused,SecondDose,DoseAdditional1,UnknownDose,Region,TargetGroup,Vaccine,Population,Year,Week,Date
94947,2021-W46,FI,,,,19,,19,142,0,FI1C5,Age60_69,COM,5525292,2021,46,2021-11-15
6167,2021-W31,BG,15000.0,,,3,,0,0,0,BG,LTCF,JANSS,6951482,2021,31,2021-08-02
146805,2021-W09,PL,45572.0,,,0,,0,0,0,PL42,Age15_17,MOD,37958138,2021,9,2021-03-01
15536,2021-W17,EL,1854378.0,90000.0,0.0,0,,0,0,0,EL,Age<18,MOD,10718565,2021,17,2021-04-26
43742,2021-W13,FI,,,,0,,0,0,0,FI200,Age80+,JANSS,5525292,2021,13,2021-03-29
24892,2021-W01,FI,,,,0,,0,0,0,FI195,Age70_79,JANSS,5525292,2021,1,2021-01-04
61099,2021-W24,FI,,,,0,,0,0,0,FI1D2,AgeUNK,JANSS,5525292,2021,24,2021-06-14
184716,2021-W18,PT,,,,721,,6179,0,0,PTCSR02,ALL,COM,10295909,2021,18,2021-05-03
188618,2021-W05,SE,521829.0,,,0,,0,0,0,SE33,ALL,AZ,10327589,2021,5,2021-02-01
133664,2021-W48,LV,359457.0,0.0,0.0,0,,0,0,0,LV,Age<18,AZ,1907675,2021,48,2021-11-29


In [9]:
vaccine_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193571 entries, 0 to 193570
Data columns (total 17 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   YearWeekISO          193571 non-null  object        
 1   ReportingCountry     193571 non-null  category      
 2   Denominator          111049 non-null  float64       
 3   NumberDosesReceived  28139 non-null   float64       
 4   NumberDosesExported  18822 non-null   float64       
 5   FirstDose            193571 non-null  int64         
 6   FirstDoseRefused     1429 non-null    float64       
 7   SecondDose           193571 non-null  int64         
 8   DoseAdditional1      193571 non-null  int64         
 9   UnknownDose          193571 non-null  int64         
 10  Region               193571 non-null  category      
 11  TargetGroup          193571 non-null  category      
 12  Vaccine              193571 non-null  category      
 13  Population    

In [10]:
df = pd.merge(vaccine_df, countries_info[['ReportingCountry', 'CountryPopulation', 'CountryName']], 
              on="ReportingCountry", how="left")

numeric_fields = ['NumberDosesReceived', 'NumberDosesExported', 'FirstDose', 'SecondDose', 'DoseAdditional1', 
                  'UnknownDose']
df[numeric_fields] = df[numeric_fields].fillna(0)

df['UsedDoses'] = df['FirstDose'] + df['SecondDose'] + df['DoseAdditional1'] + df['UnknownDose'] + df['NumberDosesExported']
df['UnusedDoses'] = vaccine_df['NumberDosesReceived'] - df['UsedDoses']

aux = df[['Date', 'CountryName', 'UnusedDoses']].groupby(
    ['Date', 'CountryName']).sum().reset_index()
r = pd.pivot(aux, values='UnusedDoses', index=['Date'],
                    columns=['CountryName']).fillna(0).cumsum()

In [11]:
population = countries_info[['CountryName', 'CountryPopulation']].set_index('CountryName')
for country in r.columns:
    r[country] = r[country] / population.loc[country, 'CountryPopulation']

In [12]:
countries = ['Liechtenstein',
'Italy',
'Belgium',
'Finland',
'Germany',
'France',
'Romania',
'Sweden',
'Croatia',
'Norway',
'Netherlands',
'Czechia',
'Spain',
'Bulgaria',
'Portugal',
'Slovenia',
'Poland',
'Hungary',
]

sorted_countries = r.loc['2021-12-13',countries].sort_values(ascending=False).index.tolist()

In [13]:
from bokeh.layouts import column
from bokeh.models import Div

div = Div(text="""<h1>COVID-19: The most selfish countries in Europe!</h1>
<p>
<p style="text-align:center">
<img src="https://www.statnews.com/wp-content/uploads/2020/12/GettyImages-1291379264-645x645.jpg" 
width="250" height="300">
</p>
""", height=300, width=800)

r_data = r.reset_index()
source = ColumnDataSource(data=r_data)

p = figure(
    plot_width=900, 
    plot_height=600,
    x_axis_type="datetime",
    sizing_mode="stretch_width"
)

lines = [
    p.line(x='Date', y=country, source=source, color=color)
    for country, color in zip(sorted_countries, Category20b[20][:len(sorted_countries)][::-1])
]

p.yaxis.axis_label = 'Unused Doses per Person'
p.xaxis.axis_label = 'Time'

legend = Legend(items=[
    (f"{pos+1}. {country}", [l]) for country, l, pos in zip(sorted_countries, lines, range(len(sorted_countries)))
], location="center")

legend.click_policy="hide"
p.add_layout(legend, 'right')

show(column(div, p))