# Statistical models in R
This notebook is covering:
1. Data preprocessing:
    1. Aligning all dataframe to 5-day week from 2018-10-01 to 2024-08-30.
    2. Interpolating missing values.
2. ARMA-models.

In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

In [3]:
# rpy2 is a Python package that allows you to run R code from Python
%pip install rpy2

Note: you may need to restart the kernel to use updated packages.


In [4]:
# Load the rpy2 extension to use R in Jupyter
%load_ext rpy2.ipython

The magic function `%%R` is used for running R code in Jupyter

In [5]:
%%R
# Install required packages
if (require("dplyr") == FALSE) {
  install.packages("dplyr")
  library(dplyr)
}
if (require("zoo") == FALSE) {
  install.packages("zoo")
  library(zoo)
}
if (require("psych") == FALSE) {
  install.packages("psych")
  library(psych)
}
if (require("TSA") == FALSE) {
  install.packages("TSA")
  library(TSA)
}
if (require("forecast") == FALSE) {
  install.packages("forecast")
  library(forecast)
}
if (require("Metrics") == FALSE) {
  install.packages("Metrics")
  library(Metrics)
}
if (require("ggplot2") == FALSE) {
  install.packages("ggplot2")
  library(ggplot2)
}
if (require("vars") == FALSE) {
  install.packages("vars")
  library(vars)
}
if (require("svars") == FALSE) {
  install.packages("svars")
  library(svars)
}
if (require("tsDyn") == FALSE) {
  install.packages("tsDyn")
  library(tsDyn)
}


Loading required package: dplyr

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric

Loading required package: psych
Loading required package: TSA

Attaching package: ‘TSA’

The following objects are masked from ‘package:stats’:

    acf, arima

The following object is masked from ‘package:utils’:

    tar

Loading required package: forecast
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 
Registered S3 methods overwritten by 'forecast':
  method       from
  fitted.Arima TSA 
  plot.Arima   TSA 
This is forecast 8.23.0 
  Use suppressPackageStartupMessages() to eliminate package startup messages.
Loading required package: Metrics

Attaching package: ‘Metr

In [32]:
%%R
hub1_name <- "the"
hub2_name <- "nbp"
horizon <- 250
window_size <- 5
model1 <- "tvecm"
model2 <- "vecm"

model1_filename <- paste0("../predictions/", hub1_name, "_", hub2_name, "_h", horizon , "_w", window_size, "_", model1, "_predictions",  ".csv")
model2_filename <- paste0("../predictions/", hub1_name, "_", hub2_name, "_h", horizon , "_w", window_size, "_", model2, "_predictions",  ".csv")
actuals_filename <- paste0("../predictions/", hub1_name, "_", hub2_name, "_h", horizon, "_w", window_size, "_actuals",  ".csv")

model1_predictions <- read.csv(model1_filename)
model2_predictions <- read.csv(model2_filename)
actuals <- read.csv(actuals_filename)



In [33]:
%%R
model1_hub1_resids <- actuals[[hub1_name]] - model1_predictions[[hub1_name]]
model2_hub1_resids <- actuals[[hub1_name]] - model2_predictions[[hub1_name]]
print(paste0("Pair: ", hub1_name, " | ", hub2_name))
print(paste0(hub1_name,": Diebold-Mariano Test:"))
dm.test(model1_hub1_resids, model2_hub1_resids, h = window_size, power = 1, alternative = "greater")

[1] "Pair: the | nbp"
[1] "the: Diebold-Mariano Test:"

	Diebold-Mariano Test

data:  model1_hub1_residsmodel2_hub1_resids
DM = -0.22522, Forecast horizon = 5, Loss function power = 1, p-value =
0.589
alternative hypothesis: greater



In [34]:
%%R
model1_hub2_resids <- actuals[[hub2_name]] - model1_predictions[[hub2_name]]
model2_hub2_resids <- actuals[[hub2_name]] - model2_predictions[[hub2_name]]
print(paste0("Pair: ", hub1_name, " | ", hub2_name))
print(paste0(hub2_name,": Diebold-Mariano Test:"))
dm.test(model1_hub2_resids, model2_hub2_resids, h = window_size, power = 1, alternative = "greater")

[1] "Pair: the | nbp"
[1] "nbp: Diebold-Mariano Test:"

	Diebold-Mariano Test

data:  model1_hub2_residsmodel2_hub2_resids
DM = -0.63618, Forecast horizon = 5, Loss function power = 1, p-value =
0.7374
alternative hypothesis: greater

