In [3]:
# rpy2 is a Python package that allows you to run R code from Python
%pip install rpy2

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Load the rpy2 extension to use R in Jupyter
%load_ext rpy2.ipython

In [4]:
%%R
# Install required packages
if (require("dplyr") == FALSE) {
  install.packages("dplyr")
  library(dplyr)
}
if (require("zoo") == FALSE) {
  install.packages("zoo")
  library(zoo)
}
if (require("urca") == FALSE) {
  install.packages("urca")
  library(urca)
}
if (require("forecast") == FALSE) {
  install.packages("forecast")
  library(forecast)
}
if (require("tseries") == FALSE) {
  install.packages("tseries")
  library(tseries)
}
if (require("moments") == FALSE) {
  install.packages("moments")
  library(moments)
}

Loading required package: dplyr

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric

Loading required package: urca
Loading required package: forecast
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 
Loading required package: tseries

    ‘tseries’ version: 0.10-57

    ‘tseries’ is a package for time series analysis and computational
    finance.

    See ‘library(help="tseries")’ for details.

Loading required package: moments


In [6]:
%%R

nbp <- read.csv("../data/interpolated/nbp_close_interpolated.csv")
peg <- read.csv("../data/interpolated/peg_close_interpolated.csv")
the <- read.csv("../data/interpolated/the_close_interpolated.csv")
ttf <- read.csv("../data/interpolated/ttf_close_interpolated.csv")
ztp <- read.csv("../data/interpolated/ztp_close_interpolated.csv")

print(adf.test(nbp$CLOSE, k = 5))
print(adf.test(peg$CLOSE, k = 5))
print(adf.test(the$CLOSE, k = 5))
print(adf.test(ttf$CLOSE, k = 5))
print(adf.test(ztp$CLOSE, k = 5))




	Augmented Dickey-Fuller Test

data:  nbp$CLOSE
Dickey-Fuller = -2.9848, Lag order = 5, p-value = 0.1614
alternative hypothesis: stationary




	Augmented Dickey-Fuller Test

data:  peg$CLOSE
Dickey-Fuller = -2.7475, Lag order = 5, p-value = 0.2619
alternative hypothesis: stationary


	Augmented Dickey-Fuller Test

data:  the$CLOSE
Dickey-Fuller = -2.1861, Lag order = 5, p-value = 0.4995
alternative hypothesis: stationary


	Augmented Dickey-Fuller Test

data:  ttf$CLOSE
Dickey-Fuller = -2.2424, Lag order = 5, p-value = 0.4757
alternative hypothesis: stationary


	Augmented Dickey-Fuller Test

data:  ztp$CLOSE
Dickey-Fuller = -2.5844, Lag order = 5, p-value = 0.3309
alternative hypothesis: stationary



In [7]:
%%R


# Define a function to calculate the statistics with 5 lags
calculate_statistics <- function(data) {
  mean_val <- mean(data, na.rm = TRUE)
  sd_val <- sd(data, na.rm = TRUE)
  skewness_val <- skewness(data, na.rm = TRUE)
  kurtosis_val <- kurtosis(data, na.rm = TRUE)
  jarque_bera_val <- jarque.test(data)
  ljung_box_val <- Box.test(data, lag = 5, type = "Ljung-Box")
  kpss_val <- ur.kpss(data, lags = "short", use.lag = 5)
  adf_val <- adf.test(data, k = 5)
  
  return(list(mean = mean_val,
              std_dev = sd_val,
              skewness = skewness_val,
              kurtosis = kurtosis_val,
              jarque_bera = jarque_bera_val,
              ljung_box = ljung_box_val,
              kpss = summary(kpss_val),
              adf = adf_val))
}

# Calculate for each hub
nbp_stats <- calculate_statistics(nbp$CLOSE)
peg_stats <- calculate_statistics(peg$CLOSE)
the_stats <- calculate_statistics(the$CLOSE)
ttf_stats <- calculate_statistics(ttf$CLOSE)
ztp_stats <- calculate_statistics(ztp$CLOSE)

# Print results
print("NBP Statistics:")
print(nbp_stats)

print("PEG Statistics:")
print(peg_stats)

print("THE Statistics:")
print(the_stats)

print("TTF Statistics:")
print(ttf_stats)

print("ZTP Statistics:")
print(ztp_stats)


[1] "NBP Statistics:"
$mean
[1] 41.24167

$std_dev
[1] 38.36496

$skewness
[1] 1.862139

$kurtosis
[1] 6.783042

$jarque_bera

	Jarque-Bera Normality Test

data:  data
JB = 1814.2, p-value < 2.2e-16
alternative hypothesis: greater


$ljung_box

	Box-Ljung test

data:  data
X-squared = 7253.4, df = 5, p-value < 2.2e-16


$kpss

####################### 
# KPSS Unit Root Test # 
####################### 

Test is of type: mu with 5 lags. 

Value of test-statistic is: 7.1767 

Critical value for a significance level of: 
                10pct  5pct 2.5pct  1pct
critical values 0.347 0.463  0.574 0.739


$adf

	Augmented Dickey-Fuller Test

data:  data
Dickey-Fuller = -2.9848, Lag order = 5, p-value = 0.1614
alternative hypothesis: stationary


[1] "PEG Statistics:"
$mean
[1] 42.23844

$std_dev
[1] 39.9608

$skewness
[1] 1.752134

$kurtosis
[1] 6.006887

$jarque_bera

	Jarque-Bera Normality Test

data:  data
JB = 1372.6, p-value < 2.2e-16
alternative hypothesis: greater


$ljung_box

	Box-Lj

In [9]:
%%R

hub1 <- nbp
hub2 <- peg

hubs <- data.frame(hub1 = hub1$CLOSE, hub2 = hub2$CLOSE)
johansen_test_all_hubs <- ca.jo(hubs, type="trace", ecdet="none", spec="longrun")

summary(johansen_test_all_hubs)


###################### 
# Johansen-Procedure # 
###################### 

Test type: trace statistic , with linear trend 

Eigenvalues (lambda):
[1] 0.037569175 0.005319541



Values of teststatistic and critical values of test:

          test 10pct  5pct  1pct
r <= 1 |  8.23  6.50  8.18 11.65
r = 0  | 67.32 15.66 17.95 23.52

Eigenvectors, normalised to first column:
(These are the cointegration relations)

           hub1.l2   hub2.l2
hub1.l2  1.0000000  1.000000
hub2.l2 -0.9414117 -3.962025

Weights W:
(This is the loading matrix)

           hub1.l2     hub2.l2
hub1.d -0.05204497 0.003308664
hub2.d  0.02235975 0.003309374



In [10]:
%%R
combined_hubs <- data.frame(nbp = nbp$CLOSE, peg = peg$CLOSE, the = the$CLOSE, ttf = ttf$CLOSE, ztp = ztp$CLOSE)

In [11]:
%%R

hub_pairs <- combn(names(combined_hubs), 2, simplify = FALSE)

results <- data.frame()

for (pair in hub_pairs) {
  selected_hubs <- combined_hubs[, pair]
  
  jotest <- ca.jo(selected_hubs, type="trace", ecdet="none", spec="longrun")
  
  test_stats <- jotest@teststat
  crit_values_5pct <- jotest@cval[,2]
  
  coefficients_first_column <- jotest@V[,1]

  temp_df <- data.frame(
    Pair = paste(pair[1], "-", pair[2]),
    Rank = c("r <= 1", "r = 0"),
    Test_Statistic = test_stats,
    Significant_at_5_pct = test_stats > crit_values_5pct,
    `Coefficient 1` = coefficients_first_column
  )

  results <- rbind(results, temp_df)
}

print(results)


               Pair   Rank Test_Statistic Significant_at_5_pct Coefficient.1
r <= 1 |  nbp - peg r <= 1       8.229960                 TRUE     1.0000000
r = 0  |  nbp - peg  r = 0      67.316192                 TRUE    -0.9414117
r <= 1 |1 nbp - the r <= 1       4.978199                FALSE     1.0000000
r = 0  |1 nbp - the  r = 0      32.947909                 TRUE    -0.6878284
r <= 1 |2 nbp - ttf r <= 1       5.241710                FALSE     1.0000000
r = 0  |2 nbp - ttf  r = 0      32.385712                 TRUE    -0.7160428
r <= 1 |3 nbp - ztp r <= 1       7.025634                FALSE     1.0000000
r = 0  |3 nbp - ztp  r = 0      41.637661                 TRUE    -0.8387576
r <= 1 |4 peg - the r <= 1       5.915996                FALSE     1.0000000
r = 0  |4 peg - the  r = 0      51.224301                 TRUE    -0.7754893
r <= 1 |5 peg - ttf r <= 1       6.528849                FALSE     1.0000000
r = 0  |5 peg - ttf  r = 0      58.043996                 TRUE    -0.8066732

In [12]:
%%R
# Loop through the results dataframe to generate plots for each pair
for (i in seq(1, nrow(results), by = 2)) {
  # Extract the hub pair and coefficient
  pair <- results$Pair[i]
  hubs <- unlist(strsplit(pair, " - "))
  
  # Extract the relevant coefficient
  coefficient <- results$Coefficient.1[i + 1]  # Coefficient for r = 0
  
  # Calculate the spread
  spread <- combined_hubs[[hubs[1]]] + coefficient * combined_hubs[[hubs[2]]]
  
  print(pair)
  print(adf.test(spread, k=5))
}


[1] "nbp - peg"

	Augmented Dickey-Fuller Test

data:  spread
Dickey-Fuller = -5.5732, Lag order = 5, p-value = 0.01
alternative hypothesis: 

stationary

[1] "nbp - the"

	Augmented Dickey-Fuller Test

data:  spread
Dickey-Fuller = -5.0076, Lag order = 5, p-value = 0.01
alternative hypothesis: stationary

[1] "nbp - ttf"

	Augmented Dickey-Fuller Test

data:  spread
Dickey-Fuller = -5.081, Lag order = 5, p-value = 0.01
alternative hypothesis: stationary

[1] "nbp - ztp"

	Augmented Dickey-Fuller Test

data:  spread
Dickey-Fuller = -5.5803, Lag order = 5, p-value = 0.01
alternative hypothesis: stationary

[1] "peg - the"

	Augmented Dickey-Fuller Test

data:  spread
Dickey-Fuller = -4.6626, Lag order = 5, p-value = 0.01
alternative hypothesis: stationary

[1] "peg - ttf"

	Augmented Dickey-Fuller Test

data:  spread
Dickey-Fuller = -4.6385, Lag order = 5, p-value = 0.01
alternative hypothesis: stationary

[1] "peg - ztp"

	Augmented Dickey-Fuller Test

data:  spread
Dickey-Fuller = -4.7976, Lag order = 5, p-value = 0.01
alternative hypothesis: stationary

[1] "the - ttf"

	Augmented Dickey-Fuller Test

data:  spread
Dickey-Fu

1: In adf.test(spread, k = 5) : p-value smaller than printed p-value
2: In adf.test(spread, k = 5) : p-value smaller than printed p-value
3: In adf.test(spread, k = 5) : p-value smaller than printed p-value
4: In adf.test(spread, k = 5) : p-value smaller than printed p-value
5: In adf.test(spread, k = 5) : p-value smaller than printed p-value
6: In adf.test(spread, k = 5) : p-value smaller than printed p-value
7: In adf.test(spread, k = 5) : p-value smaller than printed p-value
8: In adf.test(spread, k = 5) : p-value smaller than printed p-value
9: In adf.test(spread, k = 5) : p-value smaller than printed p-value
10: In adf.test(spread, k = 5) : p-value smaller than printed p-value


In [13]:
%%R

# Initialize a data frame to store the spreads
spreads <- data.frame(Time = rownames(combined_hubs))

# Loop through the results dataframe to calculate each spread
for (i in seq(1, nrow(results), by = 2)) {
  # Extract the hub pair and coefficient
  pair <- results$Pair[i]
  hubs <- unlist(strsplit(pair, " - "))
  
  # Extract the relevant coefficient for r = 0
  coefficient <- results$Coefficient.1[i + 1]  
  
  # Calculate the spread
  spread <- combined_hubs[[hubs[1]]] + coefficient * combined_hubs[[hubs[2]]]
  
  # Store the spread in the spreads data frame with a column named after the pair
  spreads[[paste(hubs[1], hubs[2], sep = "_")]] <- spread
}



