# Statistical models in R
This notebook is covering:
1. Data preprocessing:
    1. Aligning all dataframe to 5-day week from 2018-10-01 to 2024-08-30.
    2. Interpolating missing values.

In [22]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

Removing duplicates and converting NBP to MWh / EUR

In [23]:
nbp = pd.read_csv("../data/raw/nbp.csv")
peg = pd.read_csv("../data/raw/peg.csv")
the = pd.read_csv("../data/raw/the.csv")
ttf = pd.read_csv("../data/raw/ttf.csv")
ztp = pd.read_csv("../data/raw/ztp.csv")
gbpeur = pd.read_csv("../data/raw/gbpeur.csv")
hubs = [nbp, peg, the, ttf, ztp, gbpeur]

In [24]:
for idx, hub in enumerate(hubs):
    hub["Date"] = pd.to_datetime(hub["Date"])
    hub.set_index("Date", inplace=True)
    hub = hub[~hub.index.duplicated(keep='first')]
    hubs[idx] = hub

nbp, peg, the, ttf, ztp, gbpeur = hubs

In [25]:
nbp_eur_mwh = nbp.copy()
nbp_eur_mwh['CLOSE'] = nbp_eur_mwh['CLOSE'] * gbpeur['CLOSE'] / 2.9307

In [26]:
nbp_eur_mwh[['CLOSE']].to_csv("../data/close/nbp_close.csv")
peg[['CLOSE']].to_csv("../data/close/peg_close.csv")
the[['CLOSE']].to_csv("../data/close/the_close.csv")
ttf[['CLOSE']].to_csv("../data/close/ttf_close.csv")
ztp[['CLOSE']].to_csv("../data/close/ztp_close.csv")

Continuing preprocessing in R

In [27]:
# rpy2 is a Python package that allows you to run R code from Python
%pip install rpy2

Note: you may need to restart the kernel to use updated packages.


In [28]:
# Load the rpy2 extension to use R in Jupyter
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


The magic function `%%R` is used for running R code in Jupyter

Loading required libraries

In [29]:
%%R
# Install required packages
if (require("dplyr") == FALSE) {
  install.packages("dplyr")
  library(dplyr)
}
if (require("zoo") == FALSE) {
  install.packages("zoo")
  library(zoo)
}
if (require("psych") == FALSE) {
  install.packages("psych")
  library(psych)
}
if (require("TSA") == FALSE) {
  install.packages("TSA")
  library(TSA)
}
if (require("forecast") == FALSE) {
  install.packages("forecast")
  library(forecast)
}
if (require("Metrics") == FALSE) {
  install.packages("Metrics")
  library(Metrics)
}
if (require("ggplot2") == FALSE) {
  install.packages("ggplot2")
  library(ggplot2)
}
if (require("vars") == FALSE) {
  install.packages("vars")
  library(vars)
}
if (require("svars") == FALSE) {
  install.packages("svars")
  library(svars)
}
if (require("tsDyn") == FALSE) {
  install.packages("tsDyn")
  library(tsDyn)
}


In [30]:
%%R
# Load data
hub_prices <- list(
  nbp = read.csv("../data/close/nbp_close.csv"),
  peg = read.csv("../data/close/peg_close.csv"),
  the = read.csv("../data/close/the_close.csv"),
  ttf = read.csv("../data/close/ttf_close.csv"),
  ztp = read.csv("../data/close/ztp_close.csv")
)

In [31]:
%%R
# Create a date index with a 5 day week to align data
start_date <- as.Date("2018-10-01")
end_date <- as.Date("2024-08-30")
date_seq <- seq.Date(start_date, end_date, by = "day")
date_index <- date_seq[!weekdays(date_seq) %in% c("Saturday", "Sunday")]

In [32]:
%%R
# Merge the hub data with the created date index so that all data is aligned and fill in missing values
hub_prices <- lapply(hub_prices, function(df) {
  df$Date <- as.Date(df$Date)
  df <- merge(data.frame(Date = date_index), df, by = "Date", all.x = TRUE)
  df <- df %>%
    mutate(CLOSE = na.approx(CLOSE, rule = 2))
  return(df)
})

In [33]:
%%R
# Load the prices processed pricing data for each hub
nbp_price <- hub_prices$nbp
peg_price <- hub_prices$peg
the_price <- hub_prices$the
ttf_price <- hub_prices$ttf
ztp_price <- hub_prices$ztp

In [34]:
%%R
write.csv(nbp_price, "../data/interpolated/nbp_close_interpolated.csv", row.names = FALSE)
write.csv(peg_price, "../data/interpolated/peg_close_interpolated.csv", row.names = FALSE)
write.csv(the_price, "../data/interpolated/the_close_interpolated.csv", row.names = FALSE)
write.csv(ttf_price, "../data/interpolated/ttf_close_interpolated.csv", row.names = FALSE)
write.csv(ztp_price, "../data/interpolated/ztp_close_interpolated.csv", row.names = FALSE)