# Statistical models in R
This notebook is covering:
1. Test predict method for TVECM

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

In [2]:
# rpy2 is a Python package that allows you to run R code from Python
%pip install rpy2

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Load the rpy2 extension to use R in Jupyter
%load_ext rpy2.ipython

The magic function `%%R` is used for running R code in Jupyter

In [4]:
%%R
# Install required packages
if (require("dplyr") == FALSE) {
  install.packages("dplyr")
  library(dplyr)
}
if (require("zoo") == FALSE) {
  install.packages("zoo")
  library(zoo)
}
if (require("psych") == FALSE) {
  install.packages("psych")
  library(psych)
}
if (require("TSA") == FALSE) {
  install.packages("TSA")
  library(TSA)
}
if (require("forecast") == FALSE) {
  install.packages("forecast")
  library(forecast)
}
if (require("Metrics") == FALSE) {
  install.packages("Metrics")
  library(Metrics)
}
if (require("ggplot2") == FALSE) {
  install.packages("ggplot2")
  library(ggplot2)
}
if (require("vars") == FALSE) {
  install.packages("vars")
  library(vars)
}
if (require("svars") == FALSE) {
  install.packages("svars")
  library(svars)
}
if (require("tsDyn") == FALSE) {
  install.packages("tsDyn")
  library(tsDyn)
}
if (require("devtools") == FALSE) {
  install.packages("devtools")
  library(devtools)
}


Loading required package: dplyr

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric

Loading required package: psych
Loading required package: TSA

Attaching package: ‘TSA’

The following objects are masked from ‘package:stats’:

    acf, arima

The following object is masked from ‘package:utils’:

    tar

Loading required package: forecast
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 
Registered S3 methods overwritten by 'forecast':
  method       from
  fitted.Arima TSA 
  plot.Arima   TSA 
Loading required package: Metrics

Attaching package: ‘Metrics’

The following object is masked from ‘package:forecast’:

    accuracy

Loading required package: 

In [5]:
%%R
# Load data
hub_prices <- list(
  nbp = read.csv("../../data/interpolated/nbp_close_interpolated.csv"),
  peg = read.csv("../../data/interpolated/peg_close_interpolated.csv"),
  the = read.csv("../../data/interpolated/the_close_interpolated.csv"),
  ttf = read.csv("../../data/interpolated/ttf_close_interpolated.csv"),
  ztp = read.csv("../../data/interpolated/ztp_close_interpolated.csv")
)

In [13]:
%%R

hub1_name <- "the"
hub2_name <- "nbp"

hub1 <- hub_prices[[hub1_name]]
hub2 <- hub_prices[[hub2_name]]

hubs <- data.frame(hub1 = hub1$CLOSE, hub2 = hub2$CLOSE)

In [14]:
%%R
# Based on the predict.TVAR function from the tsDyn packag, for reference only.
# not directly used here. some small modifications needed to make it work for VECM see below
predict.TVECM <- function(object, newdata, n.ahead=5, 
                        newdataTrendStart, ...){
  
  ## extract parameters, coefs
  lag <- object$lag
  k <- object$k
  include <- object$include
  B <- object$coeffmat  
  Thresh <- getTh(object)
  nthresh <- object$model.specific$nthresh
  beta <- object$model.specific$beta

  
  ## setup starting values (data in y), innovations (0)
  original.data <- object$model[,1:k, drop=FALSE]
  starting <-   tsDyn:::myTail(original.data,lag)
  innov <- matrix(0, nrow=n.ahead, ncol=k)  

  
  
  if(!missing(newdata)) {
    if(!inherits(newdata, c("data.frame", "matrix","zoo", "ts"))) stop("Arg 'newdata' should be of class data.frame, matrix, zoo or ts")
    if(nrow(newdata)!=lag+1) stop(paste0("Please provide newdata with nrow=lag=", lag+1))
    starting <-  as.matrix(newdata)
  }
  
  ## trend DOES NOT WORK YET
  if(missing(newdataTrendStart)){
    if(include%in%c("trend", "both")){
      trendStart <- object$t+1
    }  else {
      trendStart <- 0
    }
  } else {
    trendStart <- newdataTrendStart
  }
  
  
  res <- tsDyn:::TVECM.gen(B=B, beta=beta, n=n.ahead, lag=lag, 
                      include = include, 
                      nthresh= nthresh,
                      Thresh = Thresh, 
                      starting=starting, innov=innov)
  
  ## format results
  colnames(res) <- colnames(original.data)
  end_rows <- nrow(original.data) + n.ahead
  if(hasArg("returnStarting") && isTRUE(list(...)["returnStarting"])) {
    start_rows <- nrow(original.data)+1 - lag
  } else {
    start_rows <- nrow(original.data)+1
  }
  rownames(res) <- start_rows : end_rows
  
  return(res)
}

In [15]:
%%R
#Based on the predict.TVAR function from tsDyn
mypredict.VECM <- function(object, newdata, n.ahead=5, 
                        newdataTrendStart, ...){
  
  ## extract parameters, coefs
  lag <- object$lag
  k <- object$k
  include <- object$include
  B <- coef(object)
  Thresh <- getTh(object)
  nthresh <- object$model.specific$nthresh
  beta <- as.vector(-vecm$model.specific$beta[2])

  
  ## setup starting values (data in y), innovations (0)
  original.data <- object$model[,1:k, drop=FALSE]
  starting <-   tsDyn:::myTail(original.data,lag)
  innov <- matrix(0, nrow=n.ahead, ncol=k)  

  
  
  if(!missing(newdata)) {
    if(!inherits(newdata, c("data.frame", "matrix","zoo", "ts"))) stop("Arg 'newdata' should be of class data.frame, matrix, zoo or ts")
    if(nrow(newdata)!=lag+1) stop(paste0("Please provide newdata with nrow=lag=", lag+1))
    starting <-  as.matrix(newdata)
  }
  
  ## trend
  if(missing(newdataTrendStart)){
    if(include%in%c("trend", "both")){
      trendStart <- object$t+1
    }  else {
      trendStart <- 0
    }
  } else {
    trendStart <- newdataTrendStart
  }
  
  
  res <- tsDyn:::TVECM.gen(B=B, beta=beta, n=n.ahead, lag=lag, 
                      include = include, 
                      nthresh= nthresh,
                      Thresh = Thresh, 
                      starting=starting, innov=innov)
  
  ## format results
  colnames(res) <- colnames(original.data)
  end_rows <- nrow(original.data) + n.ahead
  if(hasArg("returnStarting") && isTRUE(list(...)["returnStarting"])) {
    start_rows <- nrow(original.data)+1 - lag
  } else {
    start_rows <- nrow(original.data)+1
  }
  rownames(res) <- start_rows : end_rows
  
  return(res)
}

In [17]:
%%R
vecm <- VECM(hubs, lag = 2, r = 1, include = "const", estim = "ML")
hub_forecast <- predict(vecm, n.ahead = 15)
colnames(hub_forecast) <- c(hub1_name, hub2_name)
hub_forecast

          the      nbp
1546 41.66868 38.89345
1547 41.53780 38.72816
1548 41.48815 38.59855
1549 41.48462 38.53015
1550 41.47826 38.45398
1551 41.46575 38.37397
1552 41.45353 38.29642
1553 41.44293 38.22291
1554 41.43323 38.15236
1555 41.42409 38.08439
1556 41.41553 38.01898
1557 41.40760 37.95608
1558 41.40027 37.89562
1559 41.39352 37.83750
1560 41.38731 37.78163


In [18]:
%%R
pred_data <- tail(hubs, 3)
hub_forecast <- mypredict.VECM(vecm, beta=beta, newdata=pred_data, n.ahead=15)
colnames(hub_forecast) <- c(hub1_name, hub2_name)
hub_forecast

          the      nbp
1546 41.66868 38.89345
1547 41.53780 38.72816
1548 41.48815 38.59855
1549 41.48462 38.53015
1550 41.47826 38.45398
1551 41.46575 38.37397
1552 41.45353 38.29642
1553 41.44293 38.22291
1554 41.43323 38.15236
1555 41.42409 38.08439
1556 41.41553 38.01898
1557 41.40760 37.95608
1558 41.40027 37.89562
1559 41.39352 37.83750
1560 41.38731 37.78163
