# Statistical models in R
This notebook is covering:
1. Data preprocessing:
    1. Aligning all dataframe to 5-day week from 2018-10-01 to 2024-08-30.
    2. Interpolating missing values.
2. ARMA-models.

In [None]:
# rpy2 is a Python package that allows you to run R code from Python
%pip install rpy2

In [1]:
# Load the rpy2 extension to use R in Jupyter
%load_ext rpy2.ipython

The magic function `%%R` is used for running R code in Jupyter

In [19]:
# Load the R packages that we will use in this notebook
%%R
if (require("dplyr") == FALSE) {
  install.packages("dplyr")
  library(dplyr)
}
if (require("zoo") == FALSE) {
  install.packages("zoo")
  library(zoo)
}
if (require("psych") == FALSE) {
  install.packages("psych")
  library(psych)
}

In [42]:
# Load the data
%%R
hubs <- list(
  nbp = read.csv("../data/nbp_close.csv"),
  peg = read.csv("../data/peg_close.csv"),
  the = read.csv("../data/the_close.csv"),
  ttf = read.csv("../data/ttf_close.csv"),
  ztp = read.csv("../data/ztp_close.csv")
)

In [43]:
# Create a date index with a 5 day week
%%R
start_date <- as.Date("2018-10-01")
end_date <- as.Date("2024-08-30")
date_seq <- seq.Date(start_date, end_date, by = "day")
date_index <- date_seq[!weekdays(date_seq) %in% c("Saturday", "Sunday")]

In [44]:
# Merge the hub data with the created date index and fill in missing values
%%R
hubs <- lapply(hubs, function(df) {
  df$Date <- as.Date(df$Date)
  df <- merge(data.frame(Date = date_index), df, by = "Date", all.x = TRUE)
  df <- df %>%
    mutate(CLOSE = na.approx(CLOSE, rule = 2))
  return(df)
})

In [45]:
# Rename the columns
%%R
nbp <- hubs$nbp
peg <- hubs$peg
the <- hubs$the
ttf <- hubs$ttf
ztp <- hubs$ztp

In [54]:
%%R
store_ols_residuals <- function(hub1, hub2, file_name) {
  ols_model <- lm(hub1$CLOSE ~ hub2$CLOSE)
  residuals_df <- data.frame(Date = hub1$Date, Residuals = ols_model$residuals)
  write.csv(residuals_df, file_name, row.names = FALSE)
  return(ols_model)
}
folder_path <- "intermediate_storage/"
ztp_ttf_ols <- store_ols_residuals(ztp, ttf, paste(folder_path, "ztp_ttf_residuals.csv", sep = "")) # paste() concatenates strings