# Statistical models in R
This notebook is covering:
1. Naïve model

In [20]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

In [21]:
# rpy2 is a Python package that allows you to run R code from Python
%pip install rpy2

Note: you may need to restart the kernel to use updated packages.


In [22]:
# Load the rpy2 extension to use R in Jupyter
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


The magic function `%%R` is used for running R code in Jupyter

In [23]:
%%R
# Install required packages
if (require("dplyr") == FALSE) {
  install.packages("dplyr")
  library(dplyr)
}
if (require("zoo") == FALSE) {
  install.packages("zoo")
  library(zoo)
}
if (require("psych") == FALSE) {
  install.packages("psych")
  library(psych)
}
if (require("TSA") == FALSE) {
  install.packages("TSA")
  library(TSA)
}
if (require("forecast") == FALSE) {
  install.packages("forecast")
  library(forecast)
}
if (require("Metrics") == FALSE) {
  install.packages("Metrics")
  library(Metrics)
}
if (require("ggplot2") == FALSE) {
  install.packages("ggplot2")
  library(ggplot2)
}
if (require("vars") == FALSE) {
  install.packages("vars")
  library(vars)
}
if (require("svars") == FALSE) {
  install.packages("svars")
  library(svars)
}
if (require("tsDyn") == FALSE) {
  install.packages("tsDyn")
  library(tsDyn)
}


In [24]:
%%R
# Load data
hub_prices <- list(
  nbp = read.csv("../data/interpolated/nbp_close_interpolated.csv"),
  peg = read.csv("../data/interpolated/peg_close_interpolated.csv"),
  the = read.csv("../data/interpolated/the_close_interpolated.csv"),
  ttf = read.csv("../data/interpolated/ttf_close_interpolated.csv"),
  ztp = read.csv("../data/interpolated/ztp_close_interpolated.csv")
)

In [25]:
%%R

naive_predictions <- function(hub1_name, horizon, window_size, verbose = TRUE, save = TRUE) {

hub1 <- hub_prices[[hub1_name]]
hubs <- data.frame(hub1 = hub1$CLOSE)

train_size <- nrow(hubs) - horizon - window_size
hub_train <- hubs[1:train_size+1, ]

hubs_lag <- lag(hubs, window_size)


hubs_prediction <- tail(hubs_lag, n = horizon)
hubs_actual <- tail(hubs, n = horizon)


if (verbose) {

  hub1_naive_predictions <- hubs_prediction$hub1
  hub1_naive_actuals <- hubs_actual$hub1
  hub1_naive_mae <- mae(hub1_naive_actuals, hub1_naive_predictions)
  hub1_naive_rmse <- rmse(hub1_naive_actuals, hub1_naive_predictions)
  print(paste0(hub1_name, ": Mean Absolute Error: ", hub1_naive_mae))
  print(paste0(hub1_name, ": Mean Squared Error: ", hub1_naive_rmse))
}
if (save) {
    naive_predictions <- hubs_prediction
    naive_actuals <- hubs_actual
    colnames(naive_predictions) <- c(hub1_name)
    colnames(naive_actuals) <- c(hub1_name)
    prediction_dates = tail(hub1$Date, horizon)
    naive_predictions <- cbind(data.frame(Date = prediction_dates), naive_predictions)
    naive_actuals <- cbind(data.frame(Date = prediction_dates), naive_actuals)
    write.csv(naive_predictions, paste0("../predictions/test/predictions/",hub1_name, "_h", horizon, "_w", window_size, "_naive_predictions.csv"), row.names = FALSE)
}

}

In [30]:
%%R
hub1_name <- "the"
window_size <- 5
horizon <- 250

naive_predictions(hub1_name, horizon, window_size, verbose = TRUE, save = TRUE)

[1] "the: Mean Absolute Error: 2.13075466666667"


[1] "the: Mean Squared Error: 2.94050043911652"


In [31]:
%%R
hubs <- c("ttf", "the", "nbp")
window_sizes <- c(1, 2, 5, 10)
horizon <- 250

for (hub in hubs) {
  for (window_size in window_sizes) {
    naive_predictions(hub, horizon, window_size, verbose = FALSE, save = TRUE)
  }
}

In [28]:
%%R


UsageError: %%R is a cell magic, but the cell body is empty. Did you mean the line magic %R (single %)?
