# COVID-19 Deaths Per Capita
> Comparing death rates adjusting for population size, the version focused on European countries.

- comments: false
- author: Petr Simecek
- hide: false

This is a small adaptation of a graph by Joao B. Duarte & Hamel Husain at [COVID-19 Dashboards](https://covid19dashboards.com/covid-compare-permillion). Similar graph (just not normalized per capita) is regularly updated and comented in [The Financial Times](https://www.ft.com/coronavirus-latest).

I have created this version to include Czechia (my home country) and other European countries.

In [1]:
#hide
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt

chart_width = 550
chart_height= 400

## Deaths Per Million Of Inhabitants

Since reaching at least 1 death per million

> Tip: Click (Shift+ for multiple) on countries in the legend to filter the visualization.

In [2]:
#hide 
data = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv", 
                   error_bad_lines=False)
data = data.drop(columns=["Lat", "Long"])
data = data.melt(id_vars= ["Province/State", "Country/Region"])
data = pd.DataFrame(data.groupby(['Country/Region', "variable"]).sum())
data.reset_index(inplace=True)  
data = data.rename(columns={"Country/Region": "location", "variable": "date", "value": "total_cases"})
data['date'] =pd.to_datetime(data.date)
data = data.sort_values(by = "date")
data.loc[data.location == "US","location"] = "United States"
data.loc[data.location == "Korea, South","location"] = "South Korea"

data_pwt = pd.read_stata("https://www.rug.nl/ggdc/docs/pwt91.dta")

filter1 = data_pwt["year"] == 2017
data_pop = data_pwt[filter1]
data_pop = data_pop[["country","pop"]]
data_pop.loc[data_pop.country == "Republic of Korea","country"] = "South Korea"
#data_pop.loc[data_pop.country == "Iran (Islamic Republic of)","country"] = "Iran"
data_pop.loc[data_pop.country == "Czech Republic","country"] = "Czechia"

# per habitant
data_pc = data.copy()
countries = ["Italy", "Spain", "France", "United Kingdom", "Germany", 
             "Portugal", "United States", "Slovakia", "South Korea", "Japan", 
             "Brazil", 'Netherlands', 'Belgium', 'Sweden', 
             'Switzerland', 'Norway', 'Denmark', 'Austria', 'Slovenia', 'Greece',
             'Czechia', 'Romania', 'Ireland', 'Poland', 'Chile',
             'Ukraine', 'Hungary', 'Finland', 'Albania', 'Bulgaria']
data_countries = []
data_countries_pc = []

MIN_DEATHS = 10
filter_min_dead = data_pc.total_cases < MIN_DEATHS
data_pc = data_pc.drop(data_pc[filter_min_dead].index)

# compute per habitant
for i in countries:
    data_pc.loc[data_pc.location == i,"total_cases"] = data_pc.loc[data_pc.location == i,"total_cases"]/float(data_pop.loc[data_pop.country == i, "pop"])

    # get each country time series
filter1 = data_pc["total_cases"] > 1
for i in countries:
    filter_country = data_pc["location"]== i
    data_countries_pc.append(data_pc[filter_country & filter1])

In [3]:
#hide_input
# Stack data to get it to Altair dataframe format
data_countries_pc2 = data_countries_pc.copy()
for i in range(0,len(countries)):
    data_countries_pc2[i] = data_countries_pc2[i].reset_index()
    data_countries_pc2[i]['n_days'] = data_countries_pc2[i].index
    data_countries_pc2[i]['log_cases'] = np.log(data_countries_pc2[i]["total_cases"])
data_plot = data_countries_pc2[0]
for i in range(1, len(countries)):    
    data_plot = pd.concat([data_plot, data_countries_pc2[i]], axis=0)
data_plot["trend_2days"] = np.log(2)/2*data_plot["n_days"]
data_plot["trend_4days"] = np.log(2)/4*data_plot["n_days"]
data_plot["trend_12days"] = np.log(2)/12*data_plot["n_days"]
data_plot["trend_2days_label"] = "Doubles every 2 days"
data_plot["trend_4days_label"] = "Doubles evey 4 days"
data_plot["trend_12days_label"] = "Doubles every 12 days"


# Plot it using Altair
source = data_plot

scales = alt.selection_interval(bind='scales')
selection = alt.selection_multi(fields=['location'], bind='legend')

base = alt.Chart(source, title = "COVID-19 Deaths Per Million of Inhabitants").encode(
    x = alt.X('n_days:Q', title = "Days passed since reaching 1 death per million"),
    y = alt.Y("log_cases:Q",title = "Log of deaths per million"),
    color = alt.Color('location:N', legend=alt.Legend(title="Country", labelFontSize=15, titleFontSize=17),
                     scale=alt.Scale(scheme='tableau20')),
    opacity = alt.condition(selection, alt.value(1), alt.value(0.1))
)

lines = base.mark_line().add_selection(
    scales
).add_selection(
    selection
).properties(
    width=chart_width,
    height=chart_height
)

trend_2d = alt.Chart(source).encode(
    x = "n_days:Q",
    y = alt.Y("trend_2days:Q",  scale=alt.Scale(domain=(0, max(data_plot["log_cases"])))),
).mark_line(color="grey", strokeDash=[3,3])


labels = pd.DataFrame([{'label': 'Doubles every 2 days', 'x_coord': 6, 'y_coord': 4},
                       {'label': 'Doubles every 4 days', 'x_coord': 16, 'y_coord': 3.5},
                       {'label': 'Doubles every 12 days', 'x_coord': 25, 'y_coord': 1.8},
                      ])
trend_label = (alt.Chart(labels)
                    .mark_text(align='left', dx=-55, dy=-15, fontSize=12, color="grey")
                    .encode(x='x_coord:Q',
                            y='y_coord:Q',
                            text='label:N')
                   )

trend_4d = alt.Chart(source).mark_line(color="grey", strokeDash=[3,3]).encode(
    x = "n_days:Q",
    y = alt.Y("trend_4days:Q",  scale=alt.Scale(domain=(0, max(data_plot["log_cases"])))),
)

trend_12d = alt.Chart(source).mark_line(color="grey", strokeDash=[3,3]).encode(
    x = "n_days:Q",
    y = alt.Y("trend_12days:Q",  scale=alt.Scale(domain=(0, max(data_plot["log_cases"])))),
)


plot1= (
(trend_2d + trend_4d + trend_12d + trend_label + lines)
.configure_title(fontSize=20)
.configure_axis(labelFontSize=15,titleFontSize=18)
)
#plot1.save(("../images/covid-permillion-trajectories.png"))
plot1

Last Available Total Deaths By Country:

In [3]:
#hide_input
label = 'Deaths'
temp = pd.concat([x.copy() for x in data_countries_pc]).loc[lambda x: x.date >= '3/1/2020']

metric_name = f'{label} per Million'
temp.columns = ['Country', 'date', metric_name]
# temp.loc[:, 'month'] = temp.date.dt.strftime('%Y-%m')
temp.loc[:, f'Log of {label} per Million'] = temp[f'{label} per Million'].apply(lambda x: np.log(x))

temp.groupby('Country').last()

Unnamed: 0_level_0,date,Deaths per Million,Log of Deaths per Million
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Albania,2020-05-24,10.920805,2.39067
Austria,2020-05-24,73.264664,4.294078
Belgium,2020-05-24,811.94571,6.699433
Brazil,2020-05-24,108.300377,4.684909
Bulgaria,2020-05-24,18.349735,2.909615
Chile,2020-05-24,39.767982,3.683062
Czechia,2020-05-24,29.665756,3.389993
Denmark,2020-05-24,98.019534,4.585167
Finland,2020-05-24,55.583407,4.017885
France,2020-05-24,422.007152,6.045022


## Appendix

> What Log of Deaths per Million means for Czechia (total population 10.65 million)

In [4]:
#hide_input
log_deaths_per_milion = np.linspace(start=0, stop=7.0, num=15)
counts_czechia = np.exp(log_deaths_per_milion) * 10.65
pd.DataFrame({'Log of Deaths per Million': log_deaths_per_milion,
              'Deaths (Czechia)': np.around(counts_czechia).astype(int)})

Unnamed: 0,Log of Deaths per Million,Deaths (Czechia)
0,0.0,11
1,0.5,18
2,1.0,29
3,1.5,48
4,2.0,79
5,2.5,130
6,3.0,214
7,3.5,353
8,4.0,581
9,4.5,959
