# Exploring COVID-19

The Johns Hopkins University Center for Systems Science and Engineering (JHU CSSE) maintain a [GitHub repository with daily update information about teh Corona Virus](https://github.com/CSSEGISandData/COVID-19). They also provide a [dashboard to interact with the data](https://www.arcgis.com/apps/opsdashboard/index.html#/bda7594740fd40299423467b48e9ecf6).

However, even their dashboard doesn't provide 

## Imports

I am using Pandas + cufflinks to plot our data using Plotly.

I am also using ipywidgets to interact with the plots. If you are using Jupyterlab, you may need to refer to the [installation documention](https://ipywidgets.readthedocs.io/en/latest/user_install.html#installing-the-jupyterlab-extension).

In [10]:
import pandas as pd
from ipywidgets import interact, fixed
import cufflinks as cf

In [6]:
path_confirmed = ("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/"
                  "csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
path_death = ("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/"
              "csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv") 
path_recovered = ("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/"
                  "csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv")

In [94]:
def set_index(df):
    """Set the index for the data frame using the date
    
    Args:
        df: Pandas data frame obtained from John Hopkins repo
    """
    # Set region, country, lat and long as index
    index = pd.MultiIndex.from_frame(df.iloc[:, :4])
    # Set the index and transpose
    df = df.iloc[:, 4:].set_index(index).T
    # Set date as index
    return df.set_index(pd.to_datetime(df.index, dayfirst=False))




df_conf = pd.read_csv(path_death)
df_conf = set_index(df_conf)

In [95]:
import matplotlib.dates as mdates
from ipywidgets.widgets import Dropdown, SelectionSlider, Checkbox
from datetime import timedelta
from datetime import datetime
import plotly.graph_objects as go


def my_widget(df_input, width=800, heigth=600):
    df_input[(None, 'Total', None, None)] = df_input.groupby(level=1, axis=1).sum().sum(axis=1)
        
    country_list = df_input.columns.get_level_values(1).unique().sort_values().tolist()
    country_list = ['No country', 'Total'] + [c for c in country_list if c != 'Total']
    
    date_list = df_input.index.strftime('%d/%m/%y')
    
    country_widget_1 = Dropdown(options=country_list, value='Total', 
                                description='Country 1')
    
    country_widget_2 = Dropdown(options=country_list, value='No country', 
                                description='Country 2')
    
    country_widget_3 = Dropdown(options=country_list, value='No country', 
                                description='Country 3')
    
    start_date_widget = SelectionSlider(
        options=date_list,
        value=date_list[0],
        description='Start date',
    )
    
    end_date_widget = SelectionSlider(
        options=date_list,
        value=date_list[-1],
        description='End date',
    )
    
    daily_cases_widget = Checkbox(
        value=False,
        description='Daily cases',
    )
    
    layout = cf.Layout(height=heigth,
                       width=width, 
                       autosize=False,
                       xaxis=dict(title='Date'),
                       margin=dict(l=60, r=60, b=40, 
                                   t=40, pad=4))

    @interact(country_1=country_widget_1, 
              country_2=country_widget_2, 
              country_3=country_widget_3, 
              start_date=start_date_widget, 
              end_date=end_date_widget,
              daily_var=daily_cases_widget)
    def plot_range(country_1, country_2, country_3, 
                   start_date, end_date, daily_var):       
        df = (
            df_input
            .groupby(level=1, axis=1).sum()
            .loc[lambda df_: (df_.index >= datetime.strptime(start_date, '%d/%m/%y')) &  
                             (df_.index <= datetime.strptime(end_date, '%d/%m/%y')),
                 lambda df_: (df_.columns.get_level_values('Country/Region')
                              .isin([country_1, country_2, country_3]))]
        )
                
        if daily_var:
            df = df - df.shift()
            title = 'Number of new cases / day'
            plot_type = 'bar'
        else:
            title = 'Number of cases (cumulative)'
            plot_type = 'line'
        
        layout.title = title
        layout.yaxis.title = title
        df.iplot(kind=plot_type, yTitle=title, layout=layout)

my_widget(df_conf.rename(columns={'Others': 'Diamond Princess'}))

interactive(children=(Dropdown(description='Country 1', index=1, options=('No country', 'Total', 'Afghanistan'…

In [96]:
import click



df = (
    pd.read_csv('/tmp/NL23INGB0795086296_01-03-2020_10-03-2020.csv')
    .iloc[:, [0,1,5,6]]
    .assign(Date=lambda df_: df_['Date'].apply(lambda s: datetime.strptime(str(s), '%Y%m%d')))
    .assign(Amount=lambda df_: df['Amount (EUR)'].str.replace(',', '.'))
    .assign(Amount=lambda df_: df_.apply(lambda row: -float(row['Amount']) 
                                         if row['Debit/credit'] == 'Debit' 
                                         else float(row['Amount']), axis=1))
    .drop(['Amount (EUR)', 'Debit/credit'], axis=1)
)

NameError: name 'df' is not defined

In [148]:
df.to_clipboard()