In [1]:
from datetime import datetime
import numpy as np
import pandas as pd
import tensorflow as tf

import sys
sys.path.append('../')

from wavy import panel
from wavy import side
from wavy import block
from wavy import nn
from wavy import plot

# Load `csv` and generate `pkl` file

In [2]:
df = pd.read_csv('../data/covid.csv')

In [3]:
df.sample(20)

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
95591,2020-09-19,MH,Marshall Islands,WPRO,0,0,0,0
23131,2020-08-12,BG,Bulgaria,EURO,210,13722,12,471
112068,2021-04-26,NG,Nigeria,AFRO,35,164719,1,2062
94320,2021-04-14,ML,Mali,AFRO,177,12179,3,417
150936,2020-07-01,SD,Sudan,EMRO,128,9500,5,592
46892,2020-12-03,GQ,Equatorial Guinea,AFRO,0,5156,0,85
37480,2021-06-16,CU,Cuba,AMRO,1537,160594,8,1106
6454,2021-06-28,AR,Argentina,AMRO,18555,4393142,338,92317
32873,2020-12-25,KM,Comoros,AFRO,17,683,0,7
118306,2020-03-09,PW,Palau,WPRO,0,0,0,0


In [4]:
# Remove columns
df.pop('Country_code');
df.pop('WHO_region');

# Set date as index
df = df.set_index('Date_reported')

In [5]:
df.head(20)

Unnamed: 0_level_0,Country,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
Date_reported,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-03,Afghanistan,0,0,0,0
2020-01-04,Afghanistan,0,0,0,0
2020-01-05,Afghanistan,0,0,0,0
2020-01-06,Afghanistan,0,0,0,0
2020-01-07,Afghanistan,0,0,0,0
2020-01-08,Afghanistan,0,0,0,0
2020-01-09,Afghanistan,0,0,0,0
2020-01-10,Afghanistan,0,0,0,0
2020-01-11,Afghanistan,0,0,0,0
2020-01-12,Afghanistan,0,0,0,0


In [6]:
df_list = []
countries = df.Country.unique()
for country in countries:
    temp_df = df[df['Country']==country]
    temp_df.pop('Country')
    df_list.append(temp_df)

new_df = pd.concat(df_list, axis = 1, keys=(countries))
new_df

Unnamed: 0_level_0,Afghanistan,Afghanistan,Afghanistan,Afghanistan,Albania,Albania,Albania,Albania,Algeria,Algeria,...,Yemen,Yemen,Zambia,Zambia,Zambia,Zambia,Zimbabwe,Zimbabwe,Zimbabwe,Zimbabwe
Unnamed: 0_level_1,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths,New_cases,Cumulative_cases,...,New_deaths,Cumulative_deaths,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
Date_reported,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-01-03,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-04,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-05,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-06,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-07,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-01-06,38,158245,1,7367,1236,213257,4,3228,462,220415,...,1,1985,8770,269991,19,3772,1379,219057,14,5092
2022-01-07,0,158245,0,7367,1648,214905,5,3233,410,220825,...,0,1985,4096,274087,10,3782,1121,220178,16,5108
2022-01-08,0,158245,0,7367,1781,216686,0,3233,491,221316,...,0,1985,4189,278276,8,3790,1104,221282,28,5136
2022-01-09,0,158245,0,7367,1801,218487,4,3237,426,221742,...,0,1985,3300,281576,8,3798,636,221918,12,5148


In [7]:
new_df.to_pickle("../data/covid.pkl")

# Lead `pkl` file

In [8]:
df = pd.read_pickle('../data/covid.pkl')

In [9]:
cpanel = panel.from_data(df, lookback = 5, horizon = 1, gap = 0, assets = ['Brazil', 'United States of America'], channels= ['New_cases', 'New_deaths'])

In [10]:
cpanel.plot_slider()

In [11]:
# convert to pct_change
cpanel.x = cpanel.x.side_pct_change()
cpanel.y = cpanel.y.side_pct_change()

In [12]:
# Find invalid values
print(cpanel.findna())
print(cpanel.findinf())

100%|██████████| 734/734 [00:00<00:00, 5389.41it/s]
100%|██████████| 734/734 [00:00<00:00, 5588.38it/s]


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80]


100%|██████████| 734/734 [00:00<00:00, 203949.59it/s]
100%|██████████| 734/734 [00:00<00:00, 175559.94it/s]

[70, 720, 73, 75, 76, 80, 81, 82, 83, 84, 85, 721, 722, 723, 724, 725]





In [13]:
# Remove invalid values
cpanel = cpanel.dropinvalid()

100%|██████████| 734/734 [00:00<00:00, 182371.85it/s]
100%|██████████| 734/734 [00:00<00:00, 151031.16it/s]
100%|██████████| 734/734 [00:00<00:00, 187138.72it/s]
100%|██████████| 734/734 [00:00<00:00, 167470.99it/s]


In [14]:
cpanel.plot_slider()

# Model

In [15]:
# Define model
model = nn.LinearRegression(cpanel)

100%|██████████| 463/463 [00:00<00:00, 156572.02it/s]
100%|██████████| 115/115 [00:00<00:00, 160835.27it/s]
100%|██████████| 64/64 [00:00<00:00, 127462.23it/s]
100%|██████████| 463/463 [00:00<00:00, 150236.95it/s]
100%|██████████| 115/115 [00:00<00:00, 143001.77it/s]
100%|██████████| 64/64 [00:00<00:00, 123305.22it/s]
2022-01-10 23:59:58.443644: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
model.fit()



In [17]:
predicted = model.predict()

In [18]:
model.evaluate()



[4.133019924163818, 1.5731371641159058]