# COVID-19 - Fatality rates
> Evolution of fatality rates for different countries

- comments: true
- author: Caglar Cakan
- categories: [covid19]
- permalink: /covid-fatality-rates/
- image: images/covid-fatality-rates.png
- license: Creative Commons

In [1]:
#hide_input
import datetime
print("This page was last built on", datetime.datetime.now().strftime("%d.%m.%y %H:%M:%S"))

This page was last built on 13.06.20 01:21:33


In [2]:
#hide

# altair code borrowed from Pratap Vardhan, https://github.com/machine-learning-apps/covid19-dashboard/blob/master/_notebooks/2020-03-17-covid19-compare-country-death-trajectories.ipynb
# load_timeseries code borrowed from Thomas Wiecki & Hamel Husain, https://github.com/machine-learning-apps/covid19-dashboard/blob/master/_notebooks/2020-03-16-covid19_growth_bayes.ipynb
# Thanks!

import numpy as np

import pandas as pd

import requests
import io
import altair as alt

In [3]:
#hide
def load_timeseries(name, subname):
    dataState = pd.read_csv("https://api.covid19india.org/csv/latest/state_wise_daily.csv")
    statemap = pd.read_csv("https://api.covid19india.org/csv/latest/state_wise.csv")
    statemap = statemap[['State_code',"State"]]
    dataState.Date = pd.to_datetime(dataState.Date, errors='coerce')
    deathData = dataState[dataState.Status==name]
    for item in deathData.columns[2:]:
          deathData[item] = deathData[item].cumsum()
    
    melted_data = pd.melt(deathData, id_vars =['Date', 'Status'])
    melted_data = pd.merge(melted_data, statemap, how="left", left_on="variable", right_on="State_code")
    melted_data["State"] =  melted_data["State"].apply(lambda x: "India" if x=="Total" else x)
    melted_data.rename(columns={'Date': 'date', 'Status': 'type', 'State':'state', 'value':'cases'}, inplace=True)
    melted_data = melted_data[["date", "state", "type", "cases"]]
    melted_data=melted_data.set_index("date")
    melted_data.type=subname
    return melted_data

In [4]:
#hide

def get_countries_with_min_cases_for_df(df_cases, by='cases', min_cases = 10):
    MIN_CASES = min_cases
    countries = df_cases[df_cases[by].ge(MIN_CASES)
           ].sort_values(by=by, ascending=False)
    countries = countries['state'].values
    return countries

def get_countries_with_min_cases(df_cases, by='cases', min_cases = 10):
    MIN_CASES = min_cases
    countries = df_cases[df_cases[by].ge(MIN_CASES)].sort_values(by=by, ascending=False)
    countries = countries['state'].unique()
    return countries

def filter_cases(df_cases, by='cases', min_cases = 10, since_cases=None):
    countries = get_countries_with_min_cases_for_df(df_cases, by, min_cases)
    if since_cases is None:
        SINCE_CASES_NUM = 100
    else:
        SINCE_CASES_NUM = since_cases
        
    COL_X = f'Days since case {SINCE_CASES_NUM}'
    dff2 = df_cases[df_cases['state'].isin(countries)].copy()
    dff2['date'] = dff2.index
    days_since = (dff2.assign(F=dff2[by].ge(SINCE_CASES_NUM))
                  .set_index('date')
                  .groupby('state')['F'].transform('idxmax'))
    dff2[COL_X] = (dff2['date'] - days_since.values).dt.days.values
    dff2 = dff2[dff2[COL_X].ge(0)]
    return dff2

In [10]:
#hide
def get_country_colors(x):
    mapping = {
        'Maharashtra': 'black',
        'Gujarat': '#A1BA59',
        'Delhi': '#E45756',
        'Tamil Nadu': '#F58518',
        'Rajasthan': '#ab134e',
        'Madhya Pradesh': 'green',
        'Uttar Pradesh': '#2495D3',
        'Andhra Pradesh': '#9D755D',
        'Punjab': '#3250a8',
        'United Kingdom': '#3250a8',
        'West Bengal': 'red',
        'Telangana': '#C1B7AD',
        'Jammu and Kashmir': '#C1B7AD',
        'Karnataka': '#C1B7AD',
        'Haryana': '#C1B7AD',
        'Bihar': '#9467bd'}
    return mapping.get(x, '#C1B7AD')

In [11]:
#hide

df_deaths = load_timeseries("Deceased","deaths")
df_confirmed = load_timeseries("Confirmed","confirmed")

# concatenate both timeseries
df_cases = pd.concat((df_confirmed, df_deaths), axis=1)
df_cases.columns = ['state', 'type', 'confirmed', 'state', 'type', 'deaths']
df_cases = df_cases.loc[:,~df_cases.columns.duplicated()]

# dirty ratio
df_cases['ratio'] = df_cases.confirmed / (df_cases.deaths + 1)
df_cases['ratio_in'] = 1 / df_cases['ratio'] * 100

# create diffs
df_cases['new_deaths'] = df_cases.deaths.diff()
# flatten artifacts from one country to the next
#df_cases[df_cases['new_deaths']<0] = 0
df_cases.loc[df_cases['new_deaths']<0, 'new_deaths'] = 0 

df_cases['new_confirmed'] = df_cases.confirmed.diff()
#df_cases[df_cases['new_confirmed']<0] = 0
df_cases.loc[df_cases['new_confirmed']<0, 'new_confirmed'] = 0 

#hide
df_cases['fatality'] = (df_cases['deaths'] / df_cases['confirmed'].round(1))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  deathData[item] = deathData[item].cumsum()


In [12]:
#hide
MIN_CASES = 1000
SINCE_CASES = 50
dff2 = filter_cases(df_cases, 'confirmed', MIN_CASES, SINCE_CASES)

In [13]:
#hide
def make_since_chart(dff2, highlight_countries=[], baseline_countries=[]):
    
    max_date = dff2['date'].max()
    color_domain = list(dff2['state'].unique())
    color_range = list(map(get_country_colors, color_domain))


    selection = alt.selection_multi(fields=['state'], bind='legend', 
                                    init=[{'state': x} for x in highlight_countries + baseline_countries])
    
    COL_X = f'Days since case {SINCE_CASES}'
    
    base = alt.Chart(dff2, width=550).encode(
        x=f'{COL_X}:Q',
        y=alt.Y('ratio_in:Q', axis=alt.Axis(title='Fatality rate (%)')),
        color=alt.Color('state:N', scale=alt.Scale(domain=color_domain, range=color_range)),
        tooltip=list(dff2),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
    )    
    
    # 33% growth reference -----------
    #print(dff2)
    max_day = dff2[COL_X].max()
    ref = pd.DataFrame([[x, SINCE_CASES*1.33**x] for x in range(max_day+1)], columns=[COL_X, 'Confirmed Cases'])
    base_ref = alt.Chart(ref).encode(x=f'{COL_X}:Q', y='Confirmed Cases:Q')
    
    return (
        base.mark_line(point=True).add_selection(selection) + 
        base.transform_filter(
            alt.datum['date'] >= int(max_date.timestamp() * 1000)
        ).mark_text(dy=-8, align='right', fontWeight='bold').encode(text='States:N') #+  
    ).properties(
        title=f"Fatality rates per state"
    )

# Cummulative confirmed cases per deaths

The fatality rate is calculated as the cumulative number of confirmed cases divided by the cumulative number of deaths.

`click` the country label that you want to highlight. `Shift` + `click` to compare different countries.

In [15]:
#hide_input
chart = make_since_chart(dff2[dff2.state != "China"], ["Rajasthan"],  ['Maharashtra', 'Gujarat', 'Delhi', 'Tamil Nadu', "Utter Pradesh", "West Bengal"])
chart