In [1]:
import os
from pprint import pprint
import warnings

warnings.filterwarnings("ignore")

import ipywidgets as widgets
from ipywidgets import interact

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import (DateFormatter, MO, WeekdayLocator)
import seaborn as sns

import plotly.express as px
import requests
import datetime
import re
import glob
import pycountry

In [2]:
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 150

## Load the Data

In [3]:
def update_data(DATA_PATH='../data/'):
    import zipfile
    # readme from data repo
    README_URL = 'https://raw.githubusercontent.com/IBM/wntrac/master/data/README.md'
    DATA_PATH = DATA_PATH.strip('/')
    # regex to find links in markdown
    mdlink_regex = r"\[(\S+)\]\((\S+)\)"
    # recover text from README.md, find all links
    readme_text = requests.get(README_URL).text
    matches = re.findall(mdlink_regex, readme_text)
    # discard non-date links
    dates = [match for match in matches if '-' in match[0]]
    # sort dates in descending order
    dates.sort(key = lambda date: datetime.datetime.strptime(date[0], '%Y-%m-%d'), reverse=True)
    # recover repo location of lastest dataset
    lastest_relurl = dates[0][1].split('..')[-1]
    REPO_URL = 'https://github.com/IBM/wntrac'
    DATA_URL = REPO_URL + lastest_relurl
    # Local file: base dir, data
    DATA_FILE = DATA_PATH + '/' + lastest_relurl.split('/')[-1]
    os.makedirs(DATA_PATH, exist_ok=True)
    # check if data already downloaded
    if os.path.exists(DATA_FILE):
        print(f'WNTRAC already up to date ({dates[0][0]}), nothing new downloaded')
    else:
        # get byte response and save to file
        r = requests.get(DATA_URL, stream=True)
        with open(DATA_FILE, 'wb') as fd:
            for chunk in r.iter_content(chunk_size=128):
                fd.write(chunk)
        print(f'WNTRAC updated to {dates[0][0]}')
        # extract zip
        with zipfile.ZipFile(DATA_FILE, 'r') as zip_ref:
            zip_ref.extractall(DATA_PATH)
        print(f'WNTRAC extracted to {DATA_PATH}')

In [4]:
# Your current working directory
os.getcwd()

'C:\\Users\\marti\\Desktop\\Projects\\Pandemic-Prize\\covid-xprize\\visualization'

In [5]:
update_data()

WNTRAC already up to date (2020-11-30), nothing new downloaded


In [6]:
print('Available data:')
pprint(glob.glob('../data/*.csv', recursive=True))

Available data:
['../data\\ibm-wntrac-2020-11-30-events.csv',
 '../data\\ibm-wntrac-2020-11-30-evidences.csv',
 '../data\\OxCGRT_latest.csv']


In [7]:
csv_path = '../data/ibm-wntrac-2020-11-30-events.csv'

df = pd.read_csv(csv_path)

## Print Data Types

In [8]:
columns = list(df.columns)
print(f"{len(columns)} columns\n")

print(df.dtypes)

8 columns

type               object
country            object
state/province     object
epoch             float64
even_id            object
date               object
value              object
restriction         int64
dtype: object


In [9]:
new_df = df.drop(['state/province', 'epoch', 'even_id'], axis=1)

new_df.head()

Unnamed: 0,type,country,date,value,restriction
0,introduction of travel quarantine policies,COM,2020-04-01,na,1
1,school closure,ZAF,2020-03-22,Only schools (primary/secondary) closed,1
2,entertainment/cultural sector closure,NGA,2020-03-24,Churches,1
3,entertainment/cultural sector closure,COL,2020-07-05,"Bars, restaurants, night clubs",1
4,entertainment/cultural sector closure,CMR,2021-06-01,na,1


In [10]:
# Unique measures
new_df['type'].unique()

array(['introduction of travel quarantine policies', 'school closure',
       'entertainment/cultural sector closure', 'confinement',
       'mask wearing', 'international flight restrictions',
       'economic impact', 'freedom of movement (nationality dependent)',
       'mass gatherings', 'public services closure',
       'state of emergency (legal impact)', 'public transportation',
       'domestic flight restriction', 'work restrictions',
       'changes in prison-related policies', 'contact tracing', 'other'],
      dtype=object)

In [11]:
# Available countries
countryCodes = pd.unique(new_df["country"])
countries = [pycountry.countries.get(alpha_3=code).name if (pycountry.countries.get(alpha_3=code) is not None) else 'NAN' for code in countryCodes]

country_2_countryCode = {c:code for c,code in zip(countries,countryCodes)}

number_of_countries = len(countries)
print(f"{number_of_countries} different countries")
pprint(sorted(countries))

185 different countries
['Afghanistan',
 'Albania',
 'Algeria',
 'Andorra',
 'Angola',
 'Anguilla',
 'Antarctica',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia, Plurinational State of',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Comoros',
 'Congo, The Democratic Republic of the',
 'Costa Rica',
 'Croatia',
 'Cuba',
 'Cyprus',
 'Czechia',
 "Côte d'Ivoire",
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia',
 'Ethiopia',
 'Fiji',
 'Finland',
 'France',
 'French Guiana',
 'French Polynesia',
 'Gabon',
 'Gambia',
 'Georgia',
 'Germany',
 'Ghana',
 'Greece',
 'Grenada',
 'G

## Plot Restriction events

In [13]:
def plot_events(country):
    subdf = df.loc[df["country"] == country_2_countryCode[country]]
    
    x = subdf["date"].tolist()
    
#     dates = pd.date_range('2020-01-01', datetime.datetime.today()).tolist()
    
    width = 5
    height = width/1.618
    fig = plt.figure(figsize=(width,height))
    plt.title(country)
    
    dates = pd.to_datetime(pd.Series([str(xi) for xi in x]), format = '%Y-%m-%d').tolist()
    colors = np.array(['green' if subdf["restriction"].iloc[i] == 0 else 'red' for i in range(len(subdf["restriction"]))])
    
    plt.plot_date(dates, subdf["type"], ".", markersize=5, label=subdf["restriction"])
    
    loc = WeekdayLocator(byweekday=MO, interval=3)
    formatter = DateFormatter('%d/%m/%y')

    ax = plt.gca()
#     ax.xaxis.set_major_locator(loc)
#     ax.xaxis.set_major_formatter(formatter)
    ax.xaxis.set_tick_params(rotation=30, labelsize=10)

    plt.ylabel("Measure type")
    plt.xlabel("Dates")
#     plt.legend()
    plt.show()
    

w_countries = widgets.Dropdown(
    options=countries,
    value='France',

    description='Country:',
    disabled=False
)

interact(plot_events, country=w_countries)

interactive(children=(Dropdown(description='Country:', index=34, options=('Comoros', 'South Africa', 'Nigeria'…

<function __main__.plot_events(country)>