# Deaths by Covid-19 in Brazil - 2020

Data provided by the civil registry offices in Brazil

In [None]:
import datetime
import warnings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
warnings.simplefilter(action='ignore')

sns.set(rc={'figure.figsize':(15, 6)})

sns.set_style('white', {
    'axes.spines.left': True,
    'axes.spines.bottom': True,
    'axes.spines.right': False,
    'axes.spines.top': False
})

In [None]:
palette = {
    2019: sns.color_palette('Greys')[1],
    2020: sns.color_palette('deep')[0]
}

palette_covid = {
    2019: sns.color_palette('Greys')[1],
    2020: sns.color_palette('Greys')[3]
}

In [None]:
df = pd.read_csv('/kaggle/input/cause-of-death-in-brazil-20192020/death_cause_brazil.csv', parse_dates=['date'])

In [None]:
df_states = pd.read_csv('/kaggle/input/brazilianstates/states.csv')

In [None]:
df.head()

## Compare total deaths by day from 2019 and 2020 in Brazil

In [None]:
df_filtered = df[df['date'] < '2020-09-01']

df_group = df_filtered.groupby(['date']).sum().reset_index()

df_group['year'] = df_group['date'].apply(lambda d: d.year)
df_group['date_year'] = df_group['date'].apply(lambda d: datetime.date(2020, d.month, d.day))

ax = sns.lineplot(x='date_year', y='total', hue='year', data=df_group, palette=palette)

ax.legend(ax.get_legend_handles_labels()[0][-2:], ['2019', '2020'], frameon=False)

ax.set_xticklabels(pd.date_range(start='2020-01', freq='M', periods=12).month_name())

plt.xlim(datetime.date(2020, 1, 1), datetime.date(2020, 12, 31))

plt.xlabel('')
plt.ylabel('Deaths')

plt.legend(frameon=False)

plt.title('Deaths by day in Brazil between 2019 and 2020')

plt.show()

## Deaths in 2019 and 2020 highlighting Covid-19

In [None]:
# Complete data
df_filter = df[df['date'] < '2020-09-01']

df_group = df_filter.groupby(['date']).sum().reset_index()
df_group = df_group.resample('MS', on='date').sum().reset_index()

df_group['year'] = df_group['date'].apply(lambda d: d.year)
df_group['date_year'] = df_group['date'].apply(lambda d: datetime.date(2020, d.month, d.day))

ax = sns.barplot(x='date_year', y='total', hue='year', data=df_group, palette=palette)

# Data without covid-19
df_filter = df_filter[df_filter['cause'].apply(lambda x: not x.startswith('Covid'))]

df_group = df_filter.groupby(['date']).sum().reset_index()
df_group = df_group.resample('MS', on='date').sum().reset_index()

df_group['year'] = df_group['date'].apply(lambda d: d.year)
df_group['date_year'] = df_group['date'].apply(lambda d: datetime.date(2020, d.month, d.day))

ax = sns.barplot(x='date_year', y='total', hue='year', data=df_group, palette=palette_covid)

ax.legend(ax.get_legend_handles_labels()[0][-2:] + [ax.get_legend_handles_labels()[0][1]], ['2019', '2020', 'Covid-19'], frameon=False)

ax.set_xticklabels(pd.date_range(start='2020-01', freq='M', periods=12).month_name())

plt.xlabel('')
plt.ylabel('Deaths')

plt.title('Deaths in Brazil in 2019 and 2020')

plt.show()

## Deaths in 2019 and 2020 by state/region highlighting Covid-19

Data from:
* deaths between January and August from 2019 and 2020
* pupulation from 2019

In [None]:
map_region_state = {}

regions = np.unique(df_states['Region'])
for region in regions:
    map_region_state[region] = sorted(list(np.unique(df_states[df_states['Region'] == region]['UF'])))

In [None]:
fig, axes = plt.subplots(1, len(map_region_state), sharey=True, gridspec_kw={'width_ratios': [4, 7, 9, 3, 4]})

for i, (region, states) in enumerate(map_region_state.items()):

    # Complete data
    df_filter = df[df['date'] < '2020-09-01']
    df_filter['year'] = df_filter['date'].apply(lambda d: d.year)
    df_filter = df_filter[((df_filter['year'] == 2019) & (df_filter['date'] < '2019-09-01')) | 
                          ((df_filter['year'] == 2020) & (df_filter['date'] < '2020-09-01'))]
    df_filter = df_filter[df_filter['state'].apply(lambda x: x in states)]

    df_group = df_filter.groupby(['state', 'year']).sum().reset_index()

    df_merge = pd.merge(left=df_group, right=df_states, left_on='state', right_on='UF')
    df_merge['per_hab'] = df_merge.apply(lambda r: r['total'] / r['Population'] * 100000, axis=1)

    sns.barplot(ax=axes[i], x='state', y='per_hab', hue='year', data=df_merge, palette=palette)
    
    # Data without covid-19
    df_filter = df_filter[df_filter['cause'].apply(lambda x: not x.startswith('Covid'))]
        
    df_group = df_filter.groupby(['state', 'year']).sum().reset_index()

    df_merge = pd.merge(left=df_group, right=df_states, left_on='state', right_on='UF')
    df_merge['per_hab'] = df_merge.apply(lambda r: r['total'] / r['Population'] * 100000, axis=1)

    sns.barplot(ax=axes[i], x='state', y='per_hab', hue='year', data=df_merge, palette=palette_covid)
    
    axes[i].set(xlabel=region, ylabel='')
    axes[i].spines['left'].set_visible(False)
    axes[i].legend([], frameon=False)
    
    
fig.suptitle('Deaths in Brazil in 2019 and 2020 by state per 100.000 inhabitants')

axes[0].legend(axes[0].get_legend_handles_labels()[0][-2:] + [ax.get_legend_handles_labels()[0][1]], ['2019', '2020', 'Covid-19'], frameon=False)
axes[0].set_ylabel('Deaths per 100.000 inhabitants')
axes[0].spines['left'].set_visible(True)

plt.subplots_adjust(wspace=.05)
plt.show()

## Deaths in 2019 and 2020 by gender highlighting Covid-19

Considering deaths between January and August from 2019 and 2020.

In [None]:
# Complete data
df_filter = df[df['date'] < '2020-09-01']

df_filter['year'] = df_filter['date'].apply(lambda d: d.year)
df_filter = df_filter[((df_filter['year'] == 2019) & (df_filter['date'] < '2019-09-01')) | 
                      ((df_filter['year'] == 2020) & (df_filter['date'] < '2020-09-01'))]

df_group = df_filter.groupby(['gender', 'year']).sum().reset_index()

ax = sns.barplot(x='gender', y='total', hue='year', data=df_group, palette=palette)

# Data without covid-19
df_filter = df_filter[df_filter['cause'].apply(lambda x: not x.startswith('Covid'))]

df_group = df_filter.groupby(['gender', 'year']).sum().reset_index()

ax = sns.barplot(x='gender', y='total', hue='year', data=df_group, palette=palette_covid)

ax.legend(ax.get_legend_handles_labels()[0][-2:] + [ax.get_legend_handles_labels()[0][1]], ['2019', '2020', 'Covid-19'], frameon=False)

plt.xlabel('')
plt.ylabel('Deaths')

plt.title('Deaths in Brazil in 2019 and 2020 by gender')

plt.show()

## Deaths in 2019 and 2020 by age highlighting Covid-19

Considering deaths between January and August from 2019 and 2020.

In [None]:
age_order = ['< 9', '10 - 19', '20 - 29', '30 - 39', '40 - 49', '50 - 59', '60 - 69', '70 - 79', '80 - 89', '90 - 99', '> 100', 'N/I']

In [None]:
# Complete data
df_filter = df[df['date'] < '2020-09-01']

df_filter['year'] = df_filter['date'].apply(lambda d: d.year)
df_filter = df_filter[((df_filter['year'] == 2019) & (df_filter['date'] < '2019-09-01')) | 
                      ((df_filter['year'] == 2020) & (df_filter['date'] < '2020-09-01'))]

df_group = df_filter.groupby(['age', 'year']).sum().reset_index()

ax = sns.barplot(x='age', y='total', hue='year', data=df_group, palette=palette, order=age_order)

# Data without covid-19
df_filter = df_filter[df_filter['cause'].apply(lambda x: not x.startswith('Covid'))]

df_group = df_filter.groupby(['age', 'year']).sum().reset_index()

ax = sns.barplot(x='age', y='total', hue='year', data=df_group, palette=palette_covid, order=age_order)

ax.legend(ax.get_legend_handles_labels()[0][-2:] + [ax.get_legend_handles_labels()[0][1]], ['2019', '2020', 'Covid-19'], frameon=False)

plt.xlabel('')
plt.ylabel('Deaths')

plt.title('Deaths in Brazil in 2019 and 2020 by age')

plt.show()

more to come...