In [1]:
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px

In [22]:
whole_df = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv')

In [23]:
df = whole_df.copy()

# Prune down the data a bit to make plotting more legible
important_countries = set(df.query('population >= 1000000 and total_deaths >= 1000').iso_code)
df = df.groupby(['iso_code', 'continent', 'location']).rolling(7, on='date').mean().reset_index()
df.query('iso_code in @important_countries', inplace=True)
df.query('total_deaths > 0', inplace=True)
df.sort_values(['iso_code', 'continent', 'location', 'date'], inplace=True)

df

Unnamed: 0,iso_code,continent,location,level_3,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
337,AFG,Asia,Afghanistan,90,2020-03-30,77.571429,11.428571,7.632429,1.857143,0.571429,...,1803.987,,597.029,9.59,,,37.746,0.50,64.83,0.498
338,AFG,Asia,Afghanistan,91,2020-03-31,92.000000,14.428571,9.306000,2.285714,0.428571,...,1803.987,,597.029,9.59,,,37.746,0.50,64.83,0.498
339,AFG,Asia,Afghanistan,92,2020-04-01,109.714286,17.714286,11.428429,2.714286,0.428571,...,1803.987,,597.029,9.59,,,37.746,0.50,64.83,0.498
340,AFG,Asia,Afghanistan,93,2020-04-02,126.428571,16.714286,12.734571,3.142857,0.428571,...,1803.987,,597.029,9.59,,,37.746,0.50,64.83,0.498
341,AFG,Asia,Afghanistan,94,2020-04-03,149.285714,22.857143,14.918286,3.571429,0.428571,...,1803.987,,597.029,9.59,,,37.746,0.50,64.83,0.498
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56681,ZAF,Africa,South Africa,47719,2020-11-15,742758.571429,1896.571429,1694.142857,20007.285714,59.571429,...,12294.876,18.9,200.380,5.52,8.1,33.2,43.993,2.32,64.13,0.699
56682,ZAF,Africa,South Africa,47720,2020-11-16,744722.285714,1963.714286,1761.306143,20069.000000,61.714286,...,12294.876,18.9,200.380,5.52,8.1,33.2,43.993,2.32,64.13,0.699
56683,ZAF,Africa,South Africa,47721,2020-11-17,746685.714286,1963.428571,1818.734714,20136.000000,67.000000,...,12294.876,18.9,200.380,5.52,8.1,33.2,43.993,2.32,64.13,0.699
56684,ZAF,Africa,South Africa,47722,2020-11-18,748686.000000,2000.285714,1871.469429,20204.857143,68.857143,...,12294.876,18.9,200.380,5.52,8.1,33.2,43.993,2.32,64.13,0.699


In [24]:
# Isolate the last bit of data for each country
last_df = pd.merge(df, df.query('new_deaths > 0').groupby('iso_code').date.max().reset_index(), on=['iso_code', 'date'])
last_df

Unnamed: 0,iso_code,continent,location,level_3,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,AFG,Asia,Afghanistan,324,2020-11-19,4.340243e+04,177.428571,160.734857,1623.571429,9.142857,...,1803.987,,597.029,9.59,,,37.746,0.50,64.83,0.498
1,ARG,South America,Argentina,2148,2020-11-19,1.311837e+06,9426.000000,9627.510286,35535.714286,259.428571,...,18933.907,0.6,191.032,5.50,16.2,27.7,,5.00,76.67,0.825
2,ARM,Asia,Armenia,2473,2020-11-19,1.181099e+05,1328.428571,1526.591857,1786.571429,28.857143,...,8787.580,1.8,341.010,7.11,1.5,52.1,94.043,4.20,75.09,0.755
3,AUT,Europe,Austria,3370,2020-11-19,2.024691e+05,7076.285714,7253.734571,1696.571429,65.000000,...,45436.686,0.7,145.183,6.35,28.4,30.9,,7.37,81.54,0.908
4,AZE,Asia,Azerbaijan,3695,2020-11-19,7.550729e+04,1829.000000,1543.142714,966.428571,20.714286,...,15847.419,,559.812,7.11,0.3,42.5,83.241,4.70,73.00,0.757
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,TUN,Africa,Tunisia,52677,2020-11-19,8.000214e+04,1321.428571,1379.612429,2337.000000,62.714286,...,10849.297,2.0,318.991,8.52,1.1,65.8,78.687,2.30,76.70,0.735
70,TUR,Asia,Turkey,52928,2020-11-19,4.146859e+05,3367.857143,2907.898143,11515.571429,96.428571,...,25129.341,0.2,171.285,12.13,14.1,41.1,,2.81,77.69,0.791
71,UKR,Europe,Ukraine,53667,2020-11-19,5.354356e+05,11477.857143,10777.673429,9605.428571,166.428571,...,7894.393,0.1,539.849,7.11,13.5,47.4,,8.80,72.06,0.751
72,USA,North America,United States,54642,2020-11-19,1.104735e+07,161239.285714,150630.122571,246433.285714,1248.142857,...,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.924


In [27]:
# Plot
fig1 = px.line(df, x='total_deaths_per_million', y='new_deaths_per_million', line_group='iso_code', color='iso_code')
fig2 = px.scatter(last_df, x='total_deaths_per_million', y='new_deaths_per_million', color='iso_code', text='location')
fig = go.Figure(
    data=list(fig2.data) + list(fig1.data),
    layout=go.Layout(
        yaxis=go.layout.YAxis(type='log', title='New deaths per million'),
        xaxis=go.layout.XAxis(title='Total deaths per million')
    )
)
fig

In [10]:
fig.write_html('normalized_deaths.html', include_plotlyjs='cdn', full_html=True)
