# Import and display options

In [52]:
import pandas as pd
import numpy as np
from pivottablejs import pivot_ui
# Pandas options
pd.options.display.max_rows = 999
pd.set_option('expand_frame_repr', False)
# jupyter options
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%matplotlib inline

# Define functions
- string_to_date tunrs a string column to datetime in a Panadas DF
- read_and_stack reads the data stack it for furher manipulations

In [53]:
def _string_to_date(s, format):
    """
    This is an extremely fast approach to datetime parsing.
    For large data, the same dates are often repeated. Rather than
    re-parse these, we store all unique dates, parse them, and
    use a lookup to convert all dates.
    """
    dates = {date:pd.to_datetime(date, format = format) for date in s.unique()}
    return s.map(dates)

def _read_and_stack(file):
    """ Reads the data and format it in long format with .stack() with countries as index 

    Parameters
    ----------
    file: String
        the file to import, from the COVID-19 repository on github:
        https://github.com/CSSEGISandData/COVID-19.git
        
    Returns
    -------
    - df:
        pandas dataframe, stacked, with coutries as index
    
    See Also
    --------

    Examples
    --------
    confirmed = _read_and_stack("Confirmed")

    """
    
    df = (pd
          .read_csv(rf"csse_covid_19_data\csse_covid_19_time_series\time_series_covid19_{file}_global.csv")
          .reset_index()
          .drop(["index", "Lat", "Long"], axis=1)
          .set_index(['Province/State', 'Country/Region'])
          .stack()
          .reset_index()
         )
    df["Date"] = _string_to_date(df["level_2"], "%m/%d/%y")
    df = (df
          .set_index(['Province/State', 'Country/Region', 'Date'])
          .drop("level_2", axis=1)
         )
    df.columns = [file]
    
    return df[file]

df = _read_and_stack("Confirmed")
# df.columns
df.head()

Province/State  Country/Region  Date      
NaN             Afghanistan     2020-01-22    0
                                2020-01-23    0
                                2020-01-24    0
                                2020-01-25    0
                                2020-01-26    0
Name: Confirmed, dtype: int64

# Covid-19 progression dashboard
Per country, province and date: confirmed, death, new confirmed and new deaths

## Prepare the data

In [54]:
# Datframe with all the data
files=["Confirmed", "Deaths"]
df = pd.DataFrame()
for file in files:
    df[file] = _read_and_stack(file)

# cleaning up the data
df = df.reset_index()
# df.loc[df['Province/State'].isnull(),'Province/State'] = df['Country/Region']
df['Province/State'] = df['Province/State'].fillna(df['Country/Region'])

# Add the mortality rate, number of new confirmed and number of new deaths
df["rate"] = (df.Deaths / df.Confirmed).fillna(0)
df["New Confirmed"] = df.groupby(['Country/Region', "Province/State"]).Confirmed.diff().fillna(0)
df["New Deaths"] = df.groupby(['Country/Region', "Province/State"]).Deaths.diff().fillna(0)
# df.loc[df["Country/Region"] == "China"]
# df.head(20)

## Export the dashboard with pivottablejs.pivot_ui

In [55]:
# Export the interactive table / chart
pivot_ui(df,outfile_path="time_series_pivotchart.html")

# Rescaled dashboard to compare progression per country

## Rescale to log scale and start when the number of confirmed cases reaches 100

In [68]:
df2 = df.groupby(["Country/Region", "Date"]).sum()
df2 = df2.loc[df2.Confirmed >= 30]
df2["log_Confirmed"] = np.log(df2["Confirmed"])
df2["log_Deaths"] = np.log(df2["Deaths"])
df2 = df2.replace([np.inf, -np.inf], 0)
df2 = df2.reset_index()
df2["Date"] = df2.groupby(["Country/Region"]).cumcount()
df2 = (df2
       .set_index(["Country/Region", "Date"])
       .loc[:, ["Confirmed", "log_Confirmed", "Deaths", "log_Deaths"]]
      )
# df2.loc[["France", "China"]]
df2.loc["Turkey"]

  after removing the cwd from sys.path.


Unnamed: 0_level_0,Confirmed,log_Confirmed,Deaths,log_Deaths
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,47,3.850148,1,0.0
1,98,4.584967,1,0.0
2,192,5.257495,3,1.098612
3,359,5.883322,4,1.386294
4,670,6.507278,9,2.197225
5,1236,7.119636,30,3.401197
6,1529,7.332369,37,3.610918
7,1872,7.534763,44,3.78419
8,2433,7.79688,59,4.077537
9,3629,8.196712,75,4.317488


## Export the dashboard with pivottablejs.puivot_ii

In [69]:
# Export the interactive table / chart
pivot_ui(df2,outfile_path="country_comparison_rescaled.html")