In [1]:
import pandas as pd
import pycountry_convert as pc
import plotly.express as px


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject



## Data cleaning / shaping

In [17]:
series = {
    name: pd.read_csv(f'COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-{name}.csv') 
    for name in ['Confirmed', 'Recovered', 'Deaths']
}

In [50]:
def get_continent(country):
    exceptions = {
        'Reunion': 'Africa', 'Congo (Kinshasa)': 'Africa', 'Taiwan*': 'Asia', 'US': 'North America',
        "Cote d'Ivoire": 'Africa', 'Holy See': 'Europe', 'Korea, South': 'Asia'
    }
    try:
        country_code = pc.country_name_to_country_alpha2(country, cn_name_format="default")
        continent_code = pc.country_alpha2_to_continent_code(country_code)
        return pc.convert_continent_code_to_continent_name(continent_code)
    except:
        return exceptions.get(country, country)


In [51]:
grouped = {}
for name, df in series.items():
    df_grouped = (df.groupby('Country/Region')
                  .apply(lambda x: x.filter(regex=r'\d+/\d+/\d+', axis=1).sum())
                  .reset_index())
    df_grouped['continent'] = df_grouped['Country/Region'].apply(get_continent)
    grouped[name] = df_grouped

In [92]:
melted = {}
for name, df in grouped.items():
    melted[name] = (pd.melt(df, id_vars=['Country/Region', 'continent'])
                    .rename({'variable':'date', 'value':'count'}, axis=1))

In [57]:
def get_over_k(df, k=500):
    maxes = (df.groupby(['continent', 'Country/Region'])
                .max()
                .max(axis=1))
    return maxes[maxes >= k].reset_index()['Country/Region'].values

## Plotting

Set minimum number of confirmed cases `k` for selection 

In [70]:
k=1000

In [71]:
to_plot = melted['Confirmed'][melted['Confirmed']['Country/Region'].isin(get_over_k(grouped['Confirmed'], k=k))]
fig = px.line(to_plot, 
              x="date", 
              y="count", 
              color='Country/Region', 
              line_dash="continent",
              log_y=True,
              line_shape="spline", 
              render_mode="svg", 
              title=f"Evolution of confirmed cases for countries that have n_cases >= {k}")
fig.show()

In [72]:
to_plot = melted['Deaths'][melted['Deaths']['Country/Region'].isin(get_over_k(grouped['Confirmed'], k=k))]
fig = px.line(to_plot, 
              x="date", 
              y="count", 
              color='Country/Region', 
              line_dash="continent",
              log_y=True,
              line_shape="spline", 
              render_mode="svg", 
              title=f"Evolution of deaths for countries that have n_cases >= {k}")
fig.show()

## plotting rates

In [114]:
from functools import reduce
import numpy as np

In [115]:
def get_rate(numerator, denominator, min_cases = 30):
    rate = numerator/ denominator
    rate[numerator < min_cases] = np.nan
    return rate

In [128]:
join_on = ['Country/Region', 'continent', 'date']
min_cases = 15
joined_df = None
for name, df in melted.items():
    if joined_df is None:
        joined_df = df.rename({'count': name}, axis=1)
    else:
        joined_df = pd.merge(joined_df, df.rename({'count': name}, axis=1), on=join_on, left_index=False, right_index=False)
joined_df['death_rate'] = get_rate(joined_df['Deaths'], joined_df['Confirmed'], min_cases=min_cases)
joined_df['recovery_rate'] = get_rate(joined_df['Recovered'], joined_df['Confirmed'], min_cases=min_cases)
joined = pd.melt(joined_df, id_vars=join_on)

In [129]:
to_plot = joined_df[joined_df['Country/Region'].isin(get_over_k(grouped['Confirmed'], k=k))]
fig = px.line(to_plot, 
              x="date", 
              y="death_rate", 
              line_dash='continent', 
              color="Country/Region",
              log_y=False,
              line_shape="spline", 
              render_mode="svg", 
              title=f"Evolution of death rate for countries that have n_cases >= {k} (starting when n_deaths >= {min_cases})")
fig.show()

In [130]:
to_plot = joined_df[joined_df['Country/Region'].isin(get_over_k(grouped['Confirmed'], k=k))]
fig = px.line(to_plot, 
              x="date", 
              y="recovery_rate", 
              line_dash='continent', 
              color="Country/Region",
              log_y=False,
              line_shape="spline", 
              render_mode="svg", 
              title=f"Evolution of recovery rate for countries that have n_cases >= {k} (starting when n_recovered >= {min_cases})")
fig.show()

## Comparing Italy and France

In [107]:
countries = ['Italy', 'France']

In [108]:
to_plot = joined[
    (joined['Country/Region'].isin(countries))
    & (joined['variable'].isin(['Confirmed', 'Deaths', 'Recovered']))
]
fig = px.line(to_plot, 
              x="date", 
              y="value", 
              line_dash='variable', 
              color="Country/Region",
              log_y=True,
              line_shape="spline", 
              render_mode="svg", 
              title=f"Coevolution of confirmed cases, deaths and recoveries in Italy and France")
fig.show()