# 10_02: Summarizing COVID-19 data

In [1]:
import math
import collections
import dataclasses
import datetime

import numpy as np
import pandas as pd

import plotly.express as px

In [2]:
covid19 = pd.read_csv('covid19.csv.gz', parse_dates=['date'], dtype_backend='pyarrow')

In [3]:
covid19.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156710 entries, 0 to 156709
Data columns (total 15 columns):
 #   Column                      Non-Null Count   Dtype          
---  ------                      --------------   -----          
 0   country                     156710 non-null  string[pyarrow]
 1   date                        156710 non-null  datetime64[ns] 
 2   continent                   156710 non-null  string[pyarrow]
 3   population                  156710 non-null  int64[pyarrow] 
 4   life_expectancy             151672 non-null  double[pyarrow]
 5   gdp_per_capita              148190 non-null  int64[pyarrow] 
 6   population_density          156710 non-null  double[pyarrow]
 7   median_age                  156710 non-null  double[pyarrow]
 8   extreme_poverty             137966 non-null  double[pyarrow]
 9   human_development_index     156710 non-null  double[pyarrow]
 10  hospital_beds_per_thousand  122630 non-null  double[pyarrow]
 11  percent_fully_vaccinated  

In [4]:
final = covid19.groupby('country').last()
final

Unnamed: 0_level_0,date,continent,population,life_expectancy,gdp_per_capita,population_density,median_age,extreme_poverty,human_development_index,hospital_beds_per_thousand,percent_fully_vaccinated,total_cases,total_deaths,total_excess
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Afghanistan,2024-09-08,Asia,40578846,61.9824,1516,62.215546,16.752,,0.462,0.39,45.270844,235213,7998,
Algeria,2024-09-08,Africa,45477391,61.6609,11198,19.09418,27.983,0.455468,0.745,1.9,14.251447,272156,6881,126117
Angola,2024-09-08,Africa,35635028,76.4626,5906,28.583483,16.39,31.122005,0.591,,26.965265,107481,1937,
Argentina,2024-09-08,South America,45407904,76.3767,22461,16.592274,31.739,,0.849,4.99,76.860214,10102033,130671,186306
Australia,2024-09-08,Oceania,26200987,72.5398,51090,3.406256,37.512,0.497094,0.946,3.84,82.62102,11861160,25236,36645
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venezuela,2024-09-08,South America,28213016,80.1106,,31.985733,28.81,,0.699,0.87,53.048817,552694,5856,
Vietnam,2024-09-08,Asia,99680656,72.3542,11396,318.03265,31.938,0.963795,0.726,2.6,86.23695,11623999,43206,
Yemen,2024-09-08,Asia,38222880,74.9362,,72.395935,18.118,19.802757,0.424,0.71,2.11145,11945,2159,
Zambia,2024-09-08,Africa,20152934,80.535,3365,27.109505,17.288,64.349754,0.569,,45.719406,349868,4078,


In [5]:
final.total_cases.nlargest(10)

country
United States     103436827
China              99379132
India              45042835
France             38997489
Germany            38437755
Brazil             37511921
South Korea        34571873
Japan              33803571
Italy              26826486
United Kingdom     24983908
Name: total_cases, dtype: int64[pyarrow]

In [6]:
pd.DataFrame(final.total_cases.nlargest(10))

Unnamed: 0_level_0,total_cases
country,Unnamed: 1_level_1
United States,103436827
China,99379132
India,45042835
France,38997489
Germany,38437755
Brazil,37511921
South Korea,34571873
Japan,33803571
Italy,26826486
United Kingdom,24983908


In [7]:
pd.DataFrame(final.total_cases.nlargest(10)).style.format('{:,d}')

Unnamed: 0_level_0,total_cases
country,Unnamed: 1_level_1
United States,103436827
China,99379132
India,45042835
France,38997489
Germany,38437755
Brazil,37511921
South Korea,34571873
Japan,33803571
Italy,26826486
United Kingdom,24983908


In [8]:
pd.Series.nice = lambda series, format='{:,.0f}': pd.DataFrame(series).style.format(format)
pd.DataFrame.nice = lambda frame, format='{:,.0f}': frame.style.format(format)

In [9]:
final.total_cases.nlargest(10).nice()

Unnamed: 0_level_0,total_cases
country,Unnamed: 1_level_1
United States,103436827
China,99379132
India,45042835
France,38997489
Germany,38437755
Brazil,37511921
South Korea,34571873
Japan,33803571
Italy,26826486
United Kingdom,24983908


In [10]:
final.nlargest(10, columns='total_deaths')[['total_cases', 'total_deaths']].nice()

Unnamed: 0_level_0,total_cases,total_deaths
country,Unnamed: 1_level_1,Unnamed: 2_level_1
United States,103436827,1127911
Brazil,37511921,702116
India,45042835,533636
Russia,24334446,403292
Mexico,7621651,334714
United Kingdom,24983908,232112
Peru,4526976,220975
Italy,26826486,197542
Germany,38437755,174979
France,38997489,167985


In [11]:
final['cases_per_million'] = final.total_cases / (final.population / 1.0e6)
final['deaths_per_million'] = final.total_deaths / (final.population / 1.0e6)

In [12]:
final.nlargest(10, 'deaths_per_million')[['cases_per_million', 'deaths_per_million']].nice()

Unnamed: 0_level_0,cases_per_million,deaths_per_million
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Peru,135233,6601
Czechia,446657,4079
Greece,546852,3789
Romania,185760,3594
United Kingdom,366444,3404
Brazil,178368,3339
Italy,449965,3313
United States,302859,3302
Chile,276258,3209
Poland,174392,3147


In [13]:
final.nlargest(10, 'deaths_per_million')[['cases_per_million', 'deaths_per_million']] \
     .assign(death_rate=lambda row: 100 * row.deaths_per_million / row.cases_per_million).nice('{:,.1f}')

Unnamed: 0_level_0,cases_per_million,deaths_per_million,death_rate
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Peru,135232.8,6601.1,4.9
Czechia,446656.9,4078.5,0.9
Greece,546851.8,3788.8,0.7
Romania,185759.8,3594.4,1.9
United Kingdom,366444.1,3404.4,0.9
Brazil,178367.9,3338.5,1.9
Italy,449964.6,3313.4,0.7
United States,302859.5,3302.5,1.1
Chile,276258.1,3208.9,1.2
Poland,174391.9,3146.8,1.8


In [14]:
final['excess_per_million'] = final.total_excess / (final.population / 1.0e6)

In [15]:
final.nlargest(10, 'excess_per_million')[['cases_per_million', 'deaths_per_million', 'excess_per_million']].nice()

Unnamed: 0_level_0,cases_per_million,deaths_per_million,excess_per_million
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Russia,167155,2770,9272
Peru,135233,6601,6003
Bolivia,100367,1854,5288
Cuba,100694,771,5125
Mexico,59260,2602,5044
South Africa,65292,1645,4960
Romania,185760,3594,4822
Ecuador,60594,2023,4807
Ukraine,134893,2678,4384
Italy,449965,3313,4366
