# Statistical models in R
This notebook is covering:
1. Comparing models using MAE and RMSE as well as performing Diebold-Mariano

In [87]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

In [88]:
# rpy2 is a Python package that allows you to run R code from Python
%pip install rpy2

Note: you may need to restart the kernel to use updated packages.


In [89]:
# Load the rpy2 extension to use R in Jupyter
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


The magic function `%%R` is used for running R code in Jupyter

In [90]:
%%R

# Define the hub pairs and models
hub_pairs <- list(c("ttf", "the"),c("ttf", "nbp"), c("the", "nbp"))
models <- c("naive", "arima", "vecm", "vecm_short", "tvecm_t1", "tvecm_t2")
horizon <- 250
window_size <- 5

source("compare_models.r")

results_list <- compare_error_metrics(hub_pairs, models, horizon, window_size)



$ttf_the
$ttf_the$ttf
            mape  rmse
naive       6.1  2.96 
arima      6.11  2.97 
vecm        6.1  2.92 
vecm_short 6.08~ 2.91~
tvecm_t1   6.26  2.95 
tvecm_t2   6.79  4.12 

$ttf_the$the
            mape  rmse
naive      6.02~ 2.94 
arima      6.03  2.95 
vecm       6.03   2.9~
vecm_short 6.02~  2.9~
tvecm_t1   6.22  2.95 
tvecm_t2   6.67  4.02 


$ttf_nbp
$ttf_nbp$ttf
            mape  rmse
naive       6.1  2.96 
arima      6.11  2.97 
vecm       5.91  2.94 
vecm_short 5.88~ 2.93~
tvecm_t1   6.53  3.04 
tvecm_t2   6.17  2.97 

$ttf_nbp$nbp
            mape  rmse
naive       6.5  3.09 
arima      6.51  3.08 
vecm        6.2  3.03 
vecm_short 6.17~ 3.02~
tvecm_t1   6.51  3.06 
tvecm_t2    6.9   3.2 


$the_nbp
$the_nbp$the
            mape  rmse
naive      6.02  2.94 
arima      6.03  2.95 
vecm       5.86  2.93 
vecm_short 5.84~ 2.92~
tvecm_t1   6.46  3.04 
tvecm_t2   6.49  3.07 

$the_nbp$nbp
            mape  rmse
naive       6.5  3.09 
arima      6.51  3.08 
vecm       6.2

In [91]:
%%R
source("compare_models.r")
diebold_mariano(hub_pairs, models, horizon, window_size)

$ttf_the
$ttf_the$ttf
              naive    arima     vecm vecm_short tvecm_t1 tvecm_t2
naive          -    0.339    0.536      0.564    0.314    0.134   
arima      0.661        -    0.555      0.583    0.322    0.138   
vecm       0.464    0.445        -      0.973    0.174    0.111   
vecm_short 0.436    0.417    0.027*         -    0.153    0.104   
tvecm_t1   0.686    0.678    0.826      0.847        -    0.232   
tvecm_t2   0.866    0.862    0.889      0.896    0.768        -   

$ttf_the$the
              naive    arima     vecm vecm_short tvecm_t1 tvecm_t2
naive          -    0.414    0.521      0.543    0.285    0.142   
arima      0.586        -    0.532      0.554    0.289    0.144   
vecm       0.479    0.468        -      0.940    0.133    0.116   
vecm_short 0.457    0.446    0.060          -    0.117    0.109   
tvecm_t1   0.715    0.711    0.867      0.883        -    0.266   
tvecm_t2   0.858    0.856    0.884      0.891    0.734        -   


$ttf_nbp
$ttf_nbp$ttf
  

In [92]:
%%R

hub1_name <- "ttf"
hub2_name <- "the"
model1_name <- "tvecm_t2"
model2_name <- "naive"

model1_filename <- paste0("../predictions/test/predictions/", hub1_name, "_", hub2_name, "_h", horizon, "_w", window_size, "_", model1_name, "_predictions.csv")
model1_predictions <- read.csv(model1_filename)

model2_filename <- paste0("../predictions/test/predictions/", hub1_name, "_h", horizon, "_w", window_size, "_", model2_name, "_predictions.csv")
model2_predictions <- read.csv(model2_filename)

actuals_hub1_filename <- paste0("../data/interpolated/", hub1_name, "_close_interpolated.csv")
actuals_hub1 <- tail(read.csv(actuals_hub1_filename), horizon)
colnames(actuals_hub1) <- c("Date", hub1_name)

actuals_hub2_filename <- paste0("../data/interpolated/", hub2_name, "_close_interpolated.csv")
actuals_hub2 <- tail(read.csv(actuals_hub2_filename), horizon)
colnames(actuals_hub2) <- c("Date", hub2_name)

model1_mape_hub1 <- mape(actuals_hub1[[hub1_name]], model1_predictions[[hub1_name]])*100
model1_rmse_hub1 <- rmse(actuals_hub1[[hub1_name]], model1_predictions[[hub1_name]])

model2_mape_hub1 <- mape(actuals_hub1[[hub1_name]], model2_predictions[[hub1_name]])*100
model2_rmse_hub1 <- rmse(actuals_hub1[[hub1_name]], model2_predictions[[hub1_name]])

print(paste0("MAPE for ", hub1_name, ": ", model1_mape_hub1))
print(paste0("RMSE for ", hub1_name, ": ", model1_rmse_hub1))

print(paste0("MAPE for ", hub1_name, ": ", model2_mape_hub1))
print(paste0("RMSE for ", hub1_name, ": ", model2_rmse_hub1))

[1] "MAPE for ttf: 6.79094937667706"
[1] "RMSE for ttf: 4.12372146863511"
[1] "MAPE for ttf: 6.09629307101034"
[1] "RMSE for ttf: 2.96090183926752"


In [93]:
%%R
model2_filename <- paste0("../predictions/test/predictions/", hub2_name, "_h", horizon, "_w", window_size, "_", model2_name, "_predictions.csv")
model2_predictions <- read.csv(model2_filename)

model1_mape_hub2 <- mape(actuals_hub2[[hub2_name]], model1_predictions[[hub2_name]])*100
model1_rmse_hub2 <- rmse(actuals_hub2[[hub2_name]], model1_predictions[[hub2_name]])

model2_mape_hub2 <- mape(actuals_hub2[[hub2_name]], model2_predictions[[hub2_name]])*100
model2_rmse_hub2 <- rmse(actuals_hub2[[hub2_name]], model2_predictions[[hub2_name]])

print(paste0("MAPE for ", hub2_name, ": ", model1_mape_hub2))
print(paste0("RMSE for ", hub2_name, ": ", model1_rmse_hub2))

print(paste0("MAPE for ", hub2_name, ": ", model2_mape_hub2))
print(paste0("RMSE for ", hub2_name, ": ", model2_rmse_hub2))

[1] "MAPE for the: 6.67212386189849"
[1] "RMSE for the: 4.01953958437271"
[1] "MAPE for the: 6.02219443490839"
[1] "RMSE for the: 2.94050043911652"


In [94]:
%%R
model1_hub1_resids <- actuals_hub1[[hub1_name]] - model1_predictions[[hub1_name]]
model2_hub1_resids <- actuals_hub1[[hub1_name]] - model2_predictions[[hub1_name]]
hub1_dm <- dm.test(model1_hub1_resids, model2_hub1_resids, h = window_size, power = 1, alternative = "less")
hub1_dm

Error in ts(x) : 'ts' object must have one or more observations


RInterpreterError: Failed to parse and evaluate line 'model1_hub1_resids <- actuals_hub1[[hub1_name]] - model1_predictions[[hub1_name]]\nmodel2_hub1_resids <- actuals_hub1[[hub1_name]] - model2_predictions[[hub1_name]]\nhub1_dm <- dm.test(model1_hub1_resids, model2_hub1_resids, h = window_size, power = 1, alternative = "less")\nhub1_dm\n'.
R error message: "Error in ts(x) : 'ts' object must have one or more observations"