In [1]:
import datetime
import re
import pandas as pd
import numpy as np

import altair as alt

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import folium

In [2]:
def to_epiweek(cal_date: datetime.date):
    delta = cal_date - datetime.date(cal_date.year, 1, 1)
    days = delta.days
    epiweek = (days // 7) + 1
    epiweek = epiweek if epiweek < 53 else 52
    return (cal_date.year, epiweek)

assert to_epiweek(datetime.date(2020, 1, 1)) == (2020, 1)
assert to_epiweek(datetime.date(2020, 12, 31)) == (2020, 52)
assert to_epiweek(datetime.date(2021, 1, 1)) == (2021, 1)

In [3]:
country_codes = pd.read_csv('data/iso_codes.csv')

In [4]:
covid19_df = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv')

In [5]:
covid19_df = pd.merge(covid19_df, country_codes, how='left', left_on='iso_code', right_on='Alpha3')
covid19_df = covid19_df.set_index('iso_code')
covid19_df

Unnamed: 0_level_0,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,...,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality,Country,Alpha2,Alpha3,Numeric,Latitude,Longitude
iso_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AFG,Asia,Afghanistan,2020-02-24,1.0,1.0,,,,,0.026,...,0.5,64.83,0.511,,Afghanistan,AF,AFG,4.0,33.0,65.0
AFG,Asia,Afghanistan,2020-02-25,1.0,0.0,,,,,0.026,...,0.5,64.83,0.511,,Afghanistan,AF,AFG,4.0,33.0,65.0
AFG,Asia,Afghanistan,2020-02-26,1.0,0.0,,,,,0.026,...,0.5,64.83,0.511,,Afghanistan,AF,AFG,4.0,33.0,65.0
AFG,Asia,Afghanistan,2020-02-27,1.0,0.0,,,,,0.026,...,0.5,64.83,0.511,,Afghanistan,AF,AFG,4.0,33.0,65.0
AFG,Asia,Afghanistan,2020-02-28,1.0,0.0,,,,,0.026,...,0.5,64.83,0.511,,Afghanistan,AF,AFG,4.0,33.0,65.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZWE,Africa,Zimbabwe,2021-08-19,121902.0,404.0,449.714,4198.0,17.0,21.571,8201.749,...,1.7,61.49,0.571,,Zimbabwe,ZW,ZWE,716.0,-20.0,30.0
ZWE,Africa,Zimbabwe,2021-08-20,121902.0,0.0,342.000,4198.0,0.0,17.857,8201.749,...,1.7,61.49,0.571,,Zimbabwe,ZW,ZWE,716.0,-20.0,30.0
ZWE,Africa,Zimbabwe,2021-08-21,122487.0,585.0,376.286,4236.0,38.0,20.857,8241.109,...,1.7,61.49,0.571,,Zimbabwe,ZW,ZWE,716.0,-20.0,30.0
ZWE,Africa,Zimbabwe,2021-08-22,122652.0,165.0,366.286,4249.0,13.0,20.000,8252.210,...,1.7,61.49,0.571,,Zimbabwe,ZW,ZWE,716.0,-20.0,30.0


In [6]:
# Collect some filterable values

columns = covid19_df.columns.tolist()
continents = covid19_df.continent.unique()
locations = covid19_df.location.unique()

# Set globals

selected_continent = 'Africa'
selected_country = locations[0]
first_metric = 'new_cases_smoothed'
second_metric = 'new_deaths_smoothed'
time_period = 600

In [7]:
def filter_countries(continent):
    global selected_continent
    selected_continent = continent
    
    countries = covid19_df.loc[covid19_df.continent == continent]
    countries = countries.location.unique()
    
    global selected_country
    selected_country = countries[0]
    
    return (
        interact(select_country, country = widgets.Dropdown(
            options=countries,
            value=selected_country,
            description='Country',
            disabled=False,
        ))
    )

In [8]:
def select_country(country):
    global selected_country
    selected_country = country

In [9]:
def set_first_metric(metric):
    global first_metric
    first_metric = metric

In [10]:
def set_second_metric(metric):
    global second_metric
    second_metric = metric

In [11]:
def set_period(period):
    global time_period
    time_period = period

In [12]:
def draw_chart():
    global selected_country
    global first_metric
    global second_metric
    global time_period
      
    country_df = covid19_df.loc[covid19_df.location == selected_country]
    
    country_df = country_df.tail(time_period)

    base = alt.Chart(country_df, width=1000).encode(
        alt.X('yearmonthdate(date):T', title='epiweek:O', axis=alt.Axis(tickCount=14, labelAngle=90))
    )

    mark_bar = base.mark_bar().encode(
        y = alt.Y(first_metric)
#         color = alt.Color('positive_rate', scale = alt.Scale(range=['blue', 'green', 'yellow', 'red']), title='Case Positivity')
    )

    mark_line = base.mark_line(color='red').encode(
        y = alt.Y(second_metric)
    )

    return alt.layer(mark_bar, mark_line).resolve_scale(y = 'independent')

In [13]:
# compute when a range is monotonically increasing
def is_increasing(arr):
    for i in range(1, len(arr)):
        if arr[i-1] >= arr[i]:
            return False
    else:
        return True

In [14]:
def cases_increasing(country_df,increase_window):
    country_df.insert(0, 'increasing', country_df.new_cases_smoothed.rolling(increase_window).apply(is_increasing))
    country_df.insert(0, 'increasing_avg', country_df.increasing.rolling(increase_window).mean())

In [15]:
def draw_trend_chart():
    global selected_country
    global first_metric
    global second_metric
    global time_period
    
    country_df = covid19_df.loc[covid19_df.location == selected_country]
    
    country_df = country_df.tail(time_period)
    
    increase_window = 7
    
    country_df.insert(0, 'increasing', country_df.new_cases_smoothed.rolling(increase_window).apply(is_increasing))
    country_df.insert(0, 'increasing_avg', country_df.increasing.rolling(increase_window).mean())
    
    pd.set_option('display.max_rows', None)
    display(country_df)
    
    base = alt.Chart(country_df, width=1000).encode(
        alt.X('yearmonthdate(date):T', title='epiweek:O', axis=alt.Axis(tickCount=14, labelAngle=90))
    )

    mark_bar = base.mark_bar().encode(
        x = alt.X('yearmonthdate(date):T', title='epiweek:O', axis=alt.Axis(tickCount=14, labelAngle=90)),
        y = alt.Y('new_cases_smoothed', title='Daily Cases'),
        color = alt.Color('increasing_avg', title=f'{increase_window} Day Increase', scale=alt.Scale(range=['#85C1E9 ', '#F1948A']))
    )

    mark_first_line = base.mark_line(color='#16A085 ').encode(
        y = alt.Y(first_metric)
    )
    
    mark_second_line = base.mark_line(color='#34495E').encode(
        y = alt.Y(second_metric)
    )

    return alt.layer(mark_bar, mark_first_line).resolve_scale(y = 'independent') 


In [16]:
def choropleth():
    global selected_country
    global first_metric
    global second_metric
    global time_period
    
    countries_df = covid19_df.loc[covid19_df.continent == selected_continent]
    
    increase_window = 5
    
    countries_df.insert(0, 'increasing', countries_df.new_cases_smoothed.rolling(increase_window).apply(is_increasing))
    countries_df.insert(0, 'increasing_avg', countries_df.increasing.rolling(increase_window).mean())
    
    countries_df.to_csv('data.csv')
    
    
    
    countries_df = countries_df.sort_values('date').drop_duplicates('Alpha3',keep='last')
    
    
    
    m = folium.Map(location=[30, -10], zoom_start=3)
    
    countries_geo = f"data/world-countries.json"

    folium.Choropleth(
        geo_data=countries_geo,
        name="choropleth",
        data=countries_df,
        columns=["Alpha3","increasing"],
        key_on="feature.id",
        fill_color="YlGn",
        fill_opacity=0.5,
        line_opacity=.1,
        legend_name="",
    ).add_to(m)

    folium.LayerControl().add_to(m)

    return m

In [17]:
def mobility_data():
    global selected_country
    
    country_df = covid19_df.loc[covid19_df.location == selected_country]
    
    Alpha2 = country_df.iloc[0]['Alpha2']
    
    country_mobility_df = pd.read_csv('data/mobility-reports/2021_' + Alpha2 + '_Region_Mobility_Report.csv')
    
    country_mobility_df = country_mobility_df.drop(columns=['place_id','iso_3166_2_code','census_fips_code'])
    
    return country_mobility_df

In [99]:
# FILTER COUNTRIES BY CONTINENT

interact(filter_countries, continent = widgets.Dropdown(
    options=continents,
    value='Africa',
    description='Continent',
    disabled=False,
))

# SELECT METRICS

interact(set_first_metric, metric = widgets.Dropdown(
    options=columns,
    value='new_cases_smoothed',
    description='',
    disabled=False,
))

interact(set_second_metric, metric = widgets.Dropdown(
    options=columns,
    value='new_deaths_smoothed',
    description='',
    disabled=False,
))

# PERIOD

interact(set_period, period = widgets.IntText(
    value=100,
    description='Period:',
    disabled=False
))

interactive(children=(Dropdown(description='Continent', index=3, options=('Asia', nan, 'Europe', 'Africa', 'No…

interactive(children=(Dropdown(description='metric', index=5, options=('continent', 'location', 'date', 'total…

interactive(children=(Dropdown(description='metric', index=8, options=('continent', 'location', 'date', 'total…

interactive(children=(IntText(value=100, description='Period:'), Output()), _dom_classes=('widget-interact',))

<function __main__.set_period(period)>

In [112]:
display(draw_trend_chart()) # new_cases_smoothed increase/decrease compared to ...
display(draw_chart())
# display(mobility_data())
# display(choropleth())


Unnamed: 0_level_0,increasing_avg,increasing,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,...,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality,Country,Alpha2,Alpha3,Numeric,Latitude,Longitude
iso_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CIV,,,Africa,Cote d'Ivoire,2021-03-27,42861.0,393.0,421.143,232.0,3.0,...,,57.78,0.538,,Côte d'Ivoire,CI,CIV,384.0,8.0,-5.0
CIV,,,Africa,Cote d'Ivoire,2021-03-27,42861.0,393.0,421.143,232.0,3.0,...,,57.78,0.538,,Ivory Coast,CI,CIV,384.0,8.0,-5.0
CIV,,,Africa,Cote d'Ivoire,2021-03-28,43180.0,319.0,410.0,237.0,5.0,...,,57.78,0.538,,Côte d'Ivoire,CI,CIV,384.0,8.0,-5.0
CIV,,,Africa,Cote d'Ivoire,2021-03-28,43180.0,319.0,410.0,237.0,5.0,...,,57.78,0.538,,Ivory Coast,CI,CIV,384.0,8.0,-5.0
CIV,,,Africa,Cote d'Ivoire,2021-03-29,43422.0,242.0,403.0,239.0,2.0,...,,57.78,0.538,,Côte d'Ivoire,CI,CIV,384.0,8.0,-5.0
CIV,,,Africa,Cote d'Ivoire,2021-03-29,43422.0,242.0,403.0,239.0,2.0,...,,57.78,0.538,,Ivory Coast,CI,CIV,384.0,8.0,-5.0
CIV,,0.0,Africa,Cote d'Ivoire,2021-03-30,43542.0,120.0,382.0,242.0,3.0,...,,57.78,0.538,,Côte d'Ivoire,CI,CIV,384.0,8.0,-5.0
CIV,,0.0,Africa,Cote d'Ivoire,2021-03-30,43542.0,120.0,382.0,242.0,3.0,...,,57.78,0.538,,Ivory Coast,CI,CIV,384.0,8.0,-5.0
CIV,,0.0,Africa,Cote d'Ivoire,2021-03-31,43889.0,347.0,368.857,244.0,2.0,...,,57.78,0.538,,Côte d'Ivoire,CI,CIV,384.0,8.0,-5.0
CIV,,0.0,Africa,Cote d'Ivoire,2021-03-31,43889.0,347.0,368.857,244.0,2.0,...,,57.78,0.538,,Ivory Coast,CI,CIV,384.0,8.0,-5.0


In [128]:
# identify data gaps
# threshold is the number of runs of the same number to look for
# window_size is the window "around" the threshold to look at

window_size=14
def data_gaps(col):
    threshold=5
    return col.groupby((col != col.shift()).cumsum()).transform('count').gt(threshold).any() == 1.0
    
covid19_df.loc[covid19_df.location == "Congo", :].new_cases.rolling(window=window_size).apply(data_gaps)

iso_code
COG    NaN
COG    NaN
COG    NaN
COG    NaN
COG    NaN
COG    NaN
COG    NaN
COG    NaN
COG    NaN
COG    NaN
COG    NaN
COG    NaN
COG    NaN
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
COG    0.0
C