# Predicting COVID-19 in European Countries with Random Forest

In [1]:
import pandas as pd
import numpy as np
import matplotlib.dates as mdates
from datetime import datetime
from matplotlib import pyplot as plt
from matplotlib.dates import date2num
%matplotlib inline
from sklearn.ensemble import RandomForestRegressor
from category_encoders import TargetEncoder
from sklearn.metrics import mean_squared_error
from pmdarima.utils import diff_inv
import json
import math
plt.close("all")

euro_data = pd.read_csv("data/euro_countries_filled.csv", index_col=0)
with open("data/iso_country_codes.json", "r") as read_file:
    country_codes = json.load(read_file)

euro_data


Bad key savefig.frameon in file /home/alpakkan/.local/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle, line 421 ('savefig.frameon : True')
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.3.2/matplotlibrc.template
or from the matplotlib source distribution

Bad key verbose.level in file /home/alpakkan/.local/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle, line 472 ('verbose.level  : silent      # one of silent, helpful, debug, debug-annoying')
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.3.2/matplotlibrc.template
or from the matplotlib source distribution

Bad key verbose.fileo in file /home/alpakkan/.local/lib/python3.6/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle, line 473 ('verbose.fileo  : sys.stdout  # a log filename, sys.stdout or sys.stderr')
You probably need to get an upd

Unnamed: 0,iso_code,location,date,new_cases,new_cases_smoothed,new_cases_per_million,new_cases_smoothed_per_million,population,new_tests,new_tests_smoothed,stringency_index,latitude,longitude
0,ALB,Albania,2019-12-31,0.0,0.000,0.000,0.000,2877800.0,0.0,0.0,0.00,41.0,20.0
1,ALB,Albania,2020-01-01,0.0,0.000,0.000,0.000,2877800.0,0.0,0.0,0.00,41.0,20.0
2,ALB,Albania,2020-01-02,0.0,0.000,0.000,0.000,2877800.0,0.0,0.0,0.00,41.0,20.0
3,ALB,Albania,2020-01-03,0.0,0.000,0.000,0.000,2877800.0,0.0,0.0,0.00,41.0,20.0
4,ALB,Albania,2020-01-04,0.0,0.000,0.000,0.000,2877800.0,0.0,0.0,0.00,41.0,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10487,UKR,Ukraine,2020-11-18,11968.0,11208.571,273.656,256.291,43733759.0,42862.0,41017.0,61.57,49.0,32.0
10488,UKR,Ukraine,2020-11-19,12496.0,11477.857,285.729,262.448,43733759.0,40585.0,40110.0,61.57,49.0,32.0
10489,UKR,Ukraine,2020-11-20,13357.0,11806.429,305.416,269.961,43733759.0,,,61.57,49.0,32.0
10490,UKR,Ukraine,2020-11-21,29155.0,14287.571,666.647,326.694,43733759.0,,,61.57,49.0,32.0


## Preprocessing

In [2]:
# Remove rows before 2020-03-01 and after 2020-11-07
start_date, end_date = '2020-03-01', '2020-11-07'

df = euro_data
df['date'] = pd.to_datetime(df['date'])
df = df[~(df['date'] < start_date)]
df = df[~(df['date'] > end_date)]

dates = df['date']

# Drop columns that did not help in the predictions
df = df.drop(['location', 'population', 'new_tests', 'new_tests_smoothed', 'latitude', 'longitude'], axis = 1)

# Group by country
dfs = [group[1] for group in df.groupby('iso_code')]

dfs[0]

Unnamed: 0,iso_code,date,new_cases,new_cases_smoothed,new_cases_per_million,new_cases_smoothed_per_million,stringency_index
61,ALB,2020-03-01,0.0,0.000,0.000,0.000,0.00
62,ALB,2020-03-02,0.0,0.000,0.000,0.000,0.00
63,ALB,2020-03-03,0.0,0.000,0.000,0.000,0.00
64,ALB,2020-03-04,0.0,0.000,0.000,0.000,0.00
65,ALB,2020-03-05,0.0,0.000,0.000,0.000,0.00
...,...,...,...,...,...,...,...
308,ALB,2020-11-03,321.0,296.857,111.544,103.154,50.93
309,ALB,2020-11-04,381.0,310.714,132.393,107.969,50.93
310,ALB,2020-11-05,396.0,322.857,137.605,112.189,50.93
311,ALB,2020-11-06,421.0,343.714,146.292,119.436,50.93


## Feature Engineering

In [3]:
for i in range(len(dfs)):
    dfs[i]['day'] = df['date'].dt.day
    dfs[i]['month'] = df['date'].dt.month
    # Remove date column
    dfs[i] = dfs[i].drop('date', axis = 1)
    
dfs[0]

Unnamed: 0,iso_code,new_cases,new_cases_smoothed,new_cases_per_million,new_cases_smoothed_per_million,stringency_index,day,month
61,ALB,0.0,0.000,0.000,0.000,0.00,1,3
62,ALB,0.0,0.000,0.000,0.000,0.00,2,3
63,ALB,0.0,0.000,0.000,0.000,0.00,3,3
64,ALB,0.0,0.000,0.000,0.000,0.00,4,3
65,ALB,0.0,0.000,0.000,0.000,0.00,5,3
...,...,...,...,...,...,...,...,...
308,ALB,321.0,296.857,111.544,103.154,50.93,3,11
309,ALB,381.0,310.714,132.393,107.969,50.93,4,11
310,ALB,396.0,322.857,137.605,112.189,50.93,5,11
311,ALB,421.0,343.714,146.292,119.436,50.93,6,11


### Differencing

In [4]:
# Difference the target variable
for i in range(len(dfs)):
    diff = dfs[i]['new_cases_smoothed_per_million'].diff(periods=1)
    dfs[i].insert(loc=0, column='new_cases_diff', value=diff)
    
dfs[0]

Unnamed: 0,new_cases_diff,iso_code,new_cases,new_cases_smoothed,new_cases_per_million,new_cases_smoothed_per_million,stringency_index,day,month
61,,ALB,0.0,0.000,0.000,0.000,0.00,1,3
62,0.000,ALB,0.0,0.000,0.000,0.000,0.00,2,3
63,0.000,ALB,0.0,0.000,0.000,0.000,0.00,3,3
64,0.000,ALB,0.0,0.000,0.000,0.000,0.00,4,3
65,0.000,ALB,0.0,0.000,0.000,0.000,0.00,5,3
...,...,...,...,...,...,...,...,...,...
308,1.638,ALB,321.0,296.857,111.544,103.154,50.93,3,11
309,4.815,ALB,381.0,310.714,132.393,107.969,50.93,4,11
310,4.220,ALB,396.0,322.857,137.605,112.189,50.93,5,11
311,7.247,ALB,421.0,343.714,146.292,119.436,50.93,6,11


### Create lagged features

In [5]:
lag = 14

def build_lagged_features(s, lag=3, dropna=True):
    if type(s) is pd.DataFrame:
        new_dict={}
        for col_name in s:
            new_dict[col_name]=s[col_name]
            for l in range(1,lag+1):
                new_dict['%s_lag%d' %(col_name,l)]=s[col_name].shift(l)
        res=pd.DataFrame(new_dict,index=s.index)
    if dropna:
        return res.dropna()
    else:
        return res

def build_all(features):
    new_features = []
    for f in features:
        f = f.dropna(subset=['new_cases_diff'])
        lagged = build_lagged_features(f.drop(['day', 'month', 'iso_code'], axis = 1), lag=lag)
        lagged['day'] = f['day'][lag:]
        lagged['month'] = f['month'][lag:]
        lagged['iso_code'] = f['iso_code'][lag:]
        new_features.append(lagged)
    return new_features

# Create lags
features = build_all(dfs)
features = [x for x in features if len(x) > 7]

features[0]

Unnamed: 0,new_cases_diff,new_cases_diff_lag1,new_cases_diff_lag2,new_cases_diff_lag3,new_cases_diff_lag4,new_cases_diff_lag5,new_cases_diff_lag6,new_cases_diff_lag7,new_cases_diff_lag8,new_cases_diff_lag9,...,stringency_index_lag8,stringency_index_lag9,stringency_index_lag10,stringency_index_lag11,stringency_index_lag12,stringency_index_lag13,stringency_index_lag14,day,month,iso_code
76,0.100,1.886,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,16,3,ALB
77,0.248,0.100,1.886,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,36.11,0.00,0.00,0.00,0.00,0.00,0.00,17,3,ALB
78,0.000,0.248,0.100,1.886,0.000,0.000,0.000,0.000,0.000,0.000,...,41.67,36.11,0.00,0.00,0.00,0.00,0.00,18,3,ALB
79,0.149,0.000,0.248,0.100,1.886,0.000,0.000,0.000,0.000,0.000,...,51.85,41.67,36.11,0.00,0.00,0.00,0.00,19,3,ALB
80,-0.050,0.149,0.000,0.248,0.100,1.886,0.000,0.000,0.000,0.000,...,51.85,51.85,41.67,36.11,0.00,0.00,0.00,20,3,ALB
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,1.638,1.390,-3.028,0.645,-1.340,0.695,-0.844,-0.348,0.894,1.440,...,54.63,54.63,54.63,54.63,54.63,54.63,54.63,3,11,ALB
309,4.815,1.638,1.390,-3.028,0.645,-1.340,0.695,-0.844,-0.348,0.894,...,54.63,54.63,54.63,54.63,54.63,54.63,54.63,4,11,ALB
310,4.220,4.815,1.638,1.390,-3.028,0.645,-1.340,0.695,-0.844,-0.348,...,54.63,54.63,54.63,54.63,54.63,54.63,54.63,5,11,ALB
311,7.247,4.220,4.815,1.638,1.390,-3.028,0.645,-1.340,0.695,-0.844,...,54.63,54.63,54.63,54.63,54.63,54.63,54.63,6,11,ALB


In [6]:
# Splitting into labels and features, training, validation and test sets
labels = {}
labels_diff = []

train_features = {}
train_labels = {}
train_labels_final = {}
val_features = {}
val_labels = {}
test_features = {}
test_labels = {}
for i in range(len(features)):
    iso_code = features[i]['iso_code'].iloc[0]
    labels[iso_code] = features[i]['new_cases_smoothed_per_million']
    labels_diff.append(features[i]['new_cases_diff'])
    features[i] = features[i].drop(['new_cases', 'new_cases_smoothed', 'new_cases_per_million', 'new_cases_smoothed_per_million', 'new_cases_diff', 'stringency_index'], axis = 1)
    train_features[iso_code] = features[i][:-14]
    train_labels[iso_code] = labels_diff[i][:-14]
    train_labels_final[iso_code] = labels_diff[i][:-7]
    # Using last 7 days of October as validation set
    val_features[iso_code] = features[i][-14:-7]
    val_labels[iso_code] = labels[iso_code][-14:-7]
    # Using first 7 days of November as validation set
    test_features[iso_code] = features[i][-7:]
    test_labels[iso_code] = labels[iso_code][-7:]

feature_list = list(features[0].columns)

In [7]:
# Target encode ISO code
for iso_code, tf in train_features.items():
    target_encoder = TargetEncoder(cols=['iso_code'], smoothing=8, min_samples_leaf=5).fit(train_features[iso_code], train_labels[iso_code])
    train_features[iso_code] = target_encoder.transform(train_features[iso_code])
    val_features[iso_code] = target_encoder.transform(val_features[iso_code])
    test_features[iso_code] = target_encoder.transform(test_features[iso_code])

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical

## Recursive forecasting

In [8]:
# Multi-step forecasts
def forecast_n_steps(model, test_features, predictions=[], horizon=7, n_steps=7):
    predictions.append(model.predict(np.array(test_features.head(1)))[0])
    test_features = test_features[1:]
    if n_steps == 1:
        return predictions
    else:
        day_no = len(predictions)
        for row in range(0, horizon-day_no):
            test_features.iloc[row, row] = predictions[-1]
        return forecast_n_steps(model, test_features, predictions, horizon=horizon, n_steps=n_steps-1)

## Evaluating different models

### Training one model on all series

In [9]:
# Convert to numpy array
np_train_features = np.array(pd.concat(train_features))
np_train_labels = np.array(pd.concat(train_labels))

# Train random forest model with 1000 decision trees
rf = RandomForestRegressor(n_estimators = 1000, random_state = 49)
rf.fit(np_train_features, np_train_labels)

RandomForestRegressor(n_estimators=1000, random_state=49)

#### Evaluation on validation set

In [10]:
# Get predictions of the difference
diff_predictions = {}
for iso_code, vf in val_features.items():
    diff_predictions[iso_code] = forecast_n_steps(rf, vf, predictions=[])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

In [11]:
predictions = {}
errors = {}
for iso_code, vl in val_labels.items():
    predictions[iso_code] = diff_inv(np.append(np.array(labels[iso_code].iloc[-8:-7]), diff_predictions[iso_code]), lag=1, differences=1)[-7:]
    # Get RMSE
    errors[iso_code] = predictions[iso_code] - np.array(vl)
    print('RMSE:', iso_code, round(math.sqrt(mean_squared_error(vl, predictions[iso_code])), 2))

RMSE: ALB 3.86
RMSE: AUT 143.87
RMSE: BEL 241.09
RMSE: BGR 131.39
RMSE: BIH 181.76
RMSE: BLR 18.41
RMSE: CHE 354.77
RMSE: CZE 254.08
RMSE: DEU 67.58
RMSE: DNK 51.32
RMSE: ESP 123.75
RMSE: EST 16.18
RMSE: FIN 4.25
RMSE: FRA 198.25
RMSE: GRC 40.36
RMSE: HRV 216.3
RMSE: HUN 88.74
RMSE: ITA 151.7
RMSE: LTU 106.61
RMSE: LVA 29.87
RMSE: MDA 12.27
RMSE: NLD 115.24
RMSE: NOR 26.95
RMSE: POL 161.82
RMSE: PRT 106.7
RMSE: ROU 66.25
RMSE: RUS 12.1
RMSE: SRB 73.07
RMSE: SVK 132.61
RMSE: SVN 339.67
RMSE: SWE 80.2
RMSE: UKR 26.02


In [12]:
flatten = lambda t: [item for sublist in t for item in sublist]

# Get RMSE for entire Europe
print('RMSE:', round(math.sqrt(mean_squared_error(flatten(list(predictions.values())), flatten(list(val_labels.values())))), 2))

RMSE: 145.26


In [13]:
# Get feature importances
importances = list(rf.feature_importances_)
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(feature_list, importances)]
# Sort feature importances descendingly by importance
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)

[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances];

Variable: new_cases_smoothed_per_million_lag1 Importance: 0.21
Variable: new_cases_diff_lag1  Importance: 0.14
Variable: new_cases_diff_lag7  Importance: 0.14
Variable: new_cases_diff_lag2  Importance: 0.06
Variable: new_cases_diff_lag3  Importance: 0.05
Variable: new_cases_diff_lag6  Importance: 0.05
Variable: new_cases_diff_lag8  Importance: 0.03
Variable: new_cases_per_million_lag7 Importance: 0.03
Variable: new_cases_smoothed_per_million_lag2 Importance: 0.03
Variable: new_cases_diff_lag4  Importance: 0.02
Variable: new_cases_diff_lag5  Importance: 0.02
Variable: new_cases_diff_lag14 Importance: 0.02
Variable: new_cases_per_million_lag1 Importance: 0.02
Variable: new_cases_diff_lag9  Importance: 0.01
Variable: new_cases_diff_lag10 Importance: 0.01
Variable: new_cases_diff_lag11 Importance: 0.01
Variable: new_cases_diff_lag13 Importance: 0.01
Variable: new_cases_per_million_lag4 Importance: 0.01
Variable: new_cases_per_million_lag6 Importance: 0.01
Variable: new_cases_per_million_la

### Training separate models on individual series

In [14]:
# Train random forest model with 1000 decision trees
rfs = {}
for iso_code, val in train_features.items():
    rf = RandomForestRegressor(n_estimators = 1000, random_state = 49)
    rf.fit(np.array(train_features[iso_code].drop('iso_code', axis=1)), np.array(train_labels[iso_code]))
    rfs[iso_code] = rf

#### Evaluation on validation set

In [15]:
diff_predictions = {}
for iso_code, vf in val_features.items():
    diff_predictions[iso_code] = forecast_n_steps(rfs[iso_code], vf.drop('iso_code', axis=1), predictions=[])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

In [16]:
predictions = {}
errors = {}
for iso_code, vl in val_labels.items():
    predictions[iso_code] = diff_inv(np.append(np.array(labels[iso_code].iloc[-8:-7]), diff_predictions[iso_code]), lag=1, differences=1)[-7:]
    # Get RMSE
    errors[iso_code] = predictions[iso_code] - np.array(vl)
    print('RMSE:', iso_code, round(math.sqrt(mean_squared_error(vl, predictions[iso_code])), 2))

RMSE: ALB 9.45
RMSE: AUT 139.75
RMSE: BEL 272.86
RMSE: BGR 117.53
RMSE: BIH 143.51
RMSE: BLR 16.56
RMSE: CHE 336.25
RMSE: CZE 225.85
RMSE: DEU 50.48
RMSE: DNK 54.37
RMSE: ESP 51.95
RMSE: EST 15.18
RMSE: FIN 2.7
RMSE: FRA 161.32
RMSE: GRC 43.27
RMSE: HRV 184.34
RMSE: HUN 84.42
RMSE: ITA 148.41
RMSE: LTU 98.36
RMSE: LVA 27.27
RMSE: MDA 15.54
RMSE: NLD 83.65
RMSE: NOR 22.46
RMSE: POL 157.0
RMSE: PRT 87.13
RMSE: ROU 44.94
RMSE: RUS 12.19
RMSE: SRB 68.2
RMSE: SVK 105.34
RMSE: SVN 355.49
RMSE: SWE 70.66
RMSE: UKR 25.62


In [17]:
flatten = lambda t: [item for sublist in t for item in sublist]

# Get RMSE for entire Europe
print('RMSE:', round(math.sqrt(mean_squared_error(flatten(list(predictions.values())), flatten(list(val_labels.values())))), 2))

RMSE: 136.09


In [18]:
# Get feature importances
importances = list(rf[0].feature_importances_)
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(feature_list, importances)]
# Sort feature importances descendingly by importance
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)

[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances];

Variable: new_cases_diff_lag8  Importance: 0.59
Variable: new_cases_diff_lag7  Importance: 0.19
Variable: new_cases_lag10      Importance: 0.07
Variable: new_cases_diff_lag9  Importance: 0.04
Variable: new_cases_diff_lag14 Importance: 0.03
Variable: new_cases_smoothed_per_million_lag14 Importance: 0.03
Variable: new_cases_diff_lag1  Importance: 0.01
Variable: new_cases_smoothed_per_million_lag9 Importance: 0.01
Variable: stringency_index_lag7 Importance: 0.01
Variable: new_cases_diff_lag2  Importance: 0.0
Variable: new_cases_diff_lag3  Importance: 0.0
Variable: new_cases_diff_lag4  Importance: 0.0
Variable: new_cases_diff_lag5  Importance: 0.0
Variable: new_cases_diff_lag6  Importance: 0.0
Variable: new_cases_diff_lag10 Importance: 0.0
Variable: new_cases_diff_lag11 Importance: 0.0
Variable: new_cases_diff_lag12 Importance: 0.0
Variable: new_cases_diff_lag13 Importance: 0.0
Variable: new_cases_lag1       Importance: 0.0
Variable: new_cases_lag2       Importance: 0.0
Variable: new_cases

## Training the final model

In [19]:
# Train random forest model with 1000 decision trees
rfs = {}
for iso_code, val in train_features.items():
    rf = RandomForestRegressor(n_estimators = 1000, random_state = 49)
    rf.fit(np.array(train_features[iso_code].append(val_features[iso_code]).drop('iso_code', axis=1)), np.array(train_labels_final[iso_code]))
    rfs[iso_code] = rf

In [20]:
diff_predictions = {}
for iso_code, tf in test_features.items():
    diff_predictions[iso_code] = forecast_n_steps(rfs[iso_code], tf.drop('iso_code', axis=1), predictions=[])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in th

In [21]:
predictions = {}
errors = {}
for iso_code, tl in test_labels.items():
    predictions[iso_code] = diff_inv(np.append(np.array(labels[iso_code].iloc[-8:-7]), diff_predictions[iso_code]), lag=1, differences=1)[-7:]
    # Get Mean Absolute Error (MAE)
    errors[iso_code] = abs(predictions[iso_code] - np.array(tl))
    #print('MAE:', iso_code, round(np.mean(errors[iso_code]), 2), 'new cases per million')
    print('RMSE:', iso_code, round(math.sqrt(mean_squared_error(tl, predictions[iso_code])), 2))

RMSE: ALB 6.69
RMSE: AUT 12.53
RMSE: BEL 277.84
RMSE: BGR 11.0
RMSE: BIH 96.95
RMSE: BLR 3.54
RMSE: CHE 173.81
RMSE: CZE 39.55
RMSE: DEU 20.82
RMSE: DNK 14.27
RMSE: ESP 35.15
RMSE: EST 29.84
RMSE: FIN 1.1
RMSE: FRA 78.29
RMSE: GRC 18.45
RMSE: HRV 138.35
RMSE: HUN 6.23
RMSE: ITA 33.64
RMSE: LTU 32.17
RMSE: LVA 10.16
RMSE: MDA 19.33
RMSE: NLD 91.98
RMSE: NOR 8.26
RMSE: POL 34.43
RMSE: PRT 42.48
RMSE: ROU 40.91
RMSE: RUS 4.82
RMSE: SRB 5.3
RMSE: SVK 88.86
RMSE: SVN 315.68
RMSE: SWE 10.86
RMSE: UKR 17.76


In [22]:
# Get average for entire Europe
print('MAE:', round(np.mean([*errors.values()]), 2))
print('RMSE:', round(math.sqrt(np.mean([e**2 for e in errors.values()])), 2))

MAE: 45.98
RMSE: 92.02


In [23]:
# Get feature importances
importances = list(rf[0].feature_importances_)
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(feature_list, importances)]
# Sort feature importances descendingly by importance
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)

[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances];

Variable: new_cases_diff_lag7  Importance: 0.27
Variable: new_cases_lag7       Importance: 0.17
Variable: new_cases_diff_lag1  Importance: 0.16
Variable: new_cases_diff_lag8  Importance: 0.06
Variable: new_cases_diff_lag11 Importance: 0.06
Variable: new_cases_per_million_lag8 Importance: 0.06
Variable: new_cases_lag6       Importance: 0.05
Variable: new_cases_per_million_lag4 Importance: 0.05
Variable: new_cases_diff_lag14 Importance: 0.03
Variable: new_cases_lag13      Importance: 0.02
Variable: new_cases_smoothed_per_million_lag14 Importance: 0.02
Variable: new_cases_smoothed_lag4 Importance: 0.01
Variable: new_cases_per_million_lag12 Importance: 0.01
Variable: new_cases_smoothed_per_million_lag8 Importance: 0.01
Variable: new_cases_diff_lag2  Importance: 0.0
Variable: new_cases_diff_lag3  Importance: 0.0
Variable: new_cases_diff_lag4  Importance: 0.0
Variable: new_cases_diff_lag5  Importance: 0.0
Variable: new_cases_diff_lag6  Importance: 0.0
Variable: new_cases_diff_lag9  Importanc

## Export results

In [24]:
d = {'rf_NOR': predictions['NOR'], 'rf_SWE': predictions['SWE'], 'rf_FIN': predictions['FIN'], 'rf_BEL': predictions['BEL']}
results = pd.DataFrame(d, index=dates.iloc[-7:])
results

Unnamed: 0_level_0,rf_NOR,rf_SWE,rf_FIN,rf_BEL
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-11-01,65.698497,217.189669,36.840025,1311.409212
2020-11-02,68.654144,231.348005,38.39875,1295.8563
2020-11-03,72.281758,244.910669,38.72607,1275.847695
2020-11-04,75.106994,261.912368,38.833577,1255.677725
2020-11-05,77.994989,281.570073,38.631033,1233.427839
2020-11-06,80.58678,301.628768,39.871272,1211.529793
2020-11-07,83.251876,319.85855,38.60827,1189.14509


In [25]:
results.to_csv('predictions/rf_predictions.csv')