In [None]:
install.packages(c("dplyr", "glmnet", "zoo", "lubridate"))


In [2]:
library(dplyr)
library(glmnet)
library(zoo)
library(lubridate)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix

Loaded glmnet 4.1-10


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric



Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




In [3]:
clean_oil_data = function(filename) {
  read.csv2(filename) %>% 
    filter(!is.na(Date)) %>% 
    # put Date into dateformat
    mutate(Date = as.Date(Date, "%d.%m.%Y")) %>% 
    # put all other variables into numer format
    mutate(across(-Date, ~ as.numeric(as.character(.))))
}

files = list(
  "Industry_D.csv", 
  "Industry_M.csv", 
  "Industry_W.csv", 
  "Macro_M.csv", 
  "StockPrices_M.csv"
)

cleaned_data = lapply(files, clean_oil_data)

industry_d = cleaned_data[[1]]
industry_m = cleaned_data[[2]] %>% 
# these variables already appear inside industry_d
  select(-any_of(c("CL1", "CL2", "Brent", "CRKS321C.Index", "Baltic.Dry.Index", "datadate")))
industry_w = cleaned_data[[3]]
macro_m = cleaned_data[[4]]
stockprices_m = cleaned_data[[5]]

head(industry_d)
head(industry_m)

Unnamed: 0_level_0,Date,CL1,CL2,Brent,CRKS321C.Index,BDIY.Index
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2024-07-30,74.73,73.88,78.63,21.883,1762
2,2024-07-29,75.81,74.8,79.78,22.171,1797
3,2024-07-26,77.16,76.0,81.13,21.071,1808
4,2024-07-25,78.28,77.11,82.37,20.832,1834
5,2024-07-24,77.59,76.58,81.71,19.776,1864
6,2024-07-23,76.96,75.92,81.01,21.118,1869


Unnamed: 0_level_0,Date,Daily.Production,Inventories,Rig.Count,Commercial.Long,Commercial.Short,Total.Open.Interest,X,X.1,X.2,X.3
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2024-06-28,13200,821134,581,1600085,1631509,3231594,,,,
2,2024-05-31,13100,826109,600,1750160,1785108,3535268,,,,
3,2024-04-30,13100,827161,613,1708427,1751709,3460136,,,,
4,2024-03-29,13100,815058,621,1687838,1725350,3413188,,,,
5,2024-02-29,13300,807417,626,1583286,1606750,3190036,,,,
6,2024-01-31,13000,779314,621,1673204,1695615,3368819,,,,


In [None]:
# set a grid for the date range
date_grid = data.frame(
  Date = seq(
    from = min(
      industry_d$Date,
      industry_w$Date,
      industry_m$Date,
      macro_m$Date,
      stockprices_m$Date,
      na.rm = TRUE
    ),
    to = max(
      industry_d$Date,
      industry_w$Date,
      industry_m$Date,
      macro_m$Date,
      stockprices_m$Date,
      na.rm = TRUE
    ),
    by = "day"
  )
)


In [9]:
data_compile <- date_grid %>%
  left_join(industry_d, by = "Date") %>%
  left_join(industry_w, by = "Date") %>%
  left_join(industry_m, by = "Date") %>%
  left_join(macro_m, by = "Date") %>%
  left_join(stockprices_m, by = "Date") %>%
  arrange(Date)


head(data_compile)
tail(data_compile)
print(paste("Rows after merge:", nrow(data_compile)))

Unnamed: 0_level_0,Date,CL1,CL2,Brent,CRKS321C.Index,BDIY.Index,Weekly.Prod.Crude,Weekly.Rig.Count,Weekly.change.in.Crude.Stock,Weekly.Commercial.Long,⋯,DXY.returns,VIX,Gasoline.All.Grades,Dow.Jones.US.Oil.Gas.Index,ExxonMobil,ConocoPhilips,Chevron,BP,Shell,TotalEnergies
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1990-01-02,22.89,22.41,21.95,,,,,,,⋯,,,,,,,,,,
2,1990-01-03,23.68,22.97,23.48,,,,,,,⋯,,,,,,,,,,
3,1990-01-04,23.41,22.53,26.78,,,,,,,⋯,,,,,,,,,,
4,1990-01-05,23.08,22.03,27.4,,,7512.0,,,,⋯,,,,,,,,,,
5,1990-01-06,,,,,,,,,,⋯,,,,,,,,,,
6,1990-01-07,,,,,,,,,,⋯,,,,,,,,,,


Unnamed: 0_level_0,Date,CL1,CL2,Brent,CRKS321C.Index,BDIY.Index,Weekly.Prod.Crude,Weekly.Rig.Count,Weekly.change.in.Crude.Stock,Weekly.Commercial.Long,⋯,DXY.returns,VIX,Gasoline.All.Grades,Dow.Jones.US.Oil.Gas.Index,ExxonMobil,ConocoPhilips,Chevron,BP,Shell,TotalEnergies
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
12624,2024-07-25,78.28,77.11,82.37,20.832,1834.0,,,,,⋯,,,,,,,,,,
12625,2024-07-26,77.16,76.0,81.13,21.071,1808.0,,,,,⋯,,,,,,,,,,
12626,2024-07-27,,,,,,,,,,⋯,,,,,,,,,,
12627,2024-07-28,,,,,,,,,,⋯,,,,,,,,,,
12628,2024-07-29,75.81,74.8,79.78,22.171,1797.0,,,,,⋯,,,,,,,,,,
12629,2024-07-30,74.73,73.88,78.63,21.883,1762.0,,,,,⋯,,,,,,,,,,


[1] "Rows after merge: 12629"


In [10]:
data_compile_filled <- data_compile %>%
  arrange(Date) %>%
  mutate(across(-Date, ~ zoo::na.locf(., na.rm = FALSE))) %>% 
  mutate(across(-Date, ~ zoo::na.locf(., fromLast = TRUE, na.rm = FALSE)))

head(data_compile_filled)  # earliest dates
tail(data_compile_filled)  # latest dates

Unnamed: 0_level_0,Date,CL1,CL2,Brent,CRKS321C.Index,BDIY.Index,Weekly.Prod.Crude,Weekly.Rig.Count,Weekly.change.in.Crude.Stock,Weekly.Commercial.Long,⋯,DXY.returns,VIX,Gasoline.All.Grades,Dow.Jones.US.Oil.Gas.Index,ExxonMobil,ConocoPhilips,Chevron,BP,Shell,TotalEnergies
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1990-01-02,22.89,22.41,21.95,1.777,1599,7512,532,9539,166945,⋯,-0.004610635,13.2,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116
2,1990-01-03,23.68,22.97,23.48,1.777,1599,7512,532,9539,166945,⋯,-0.004610635,13.2,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116
3,1990-01-04,23.41,22.53,26.78,1.777,1599,7512,532,9539,166945,⋯,-0.004610635,13.2,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116
4,1990-01-05,23.08,22.03,27.4,1.777,1599,7512,532,9539,166945,⋯,-0.004610635,13.2,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116
5,1990-01-06,23.08,22.03,27.4,1.777,1599,7512,532,9539,166945,⋯,-0.004610635,13.2,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116
6,1990-01-07,23.08,22.03,27.4,1.777,1599,7512,532,9539,166945,⋯,-0.004610635,13.2,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116


Unnamed: 0_level_0,Date,CL1,CL2,Brent,CRKS321C.Index,BDIY.Index,Weekly.Prod.Crude,Weekly.Rig.Count,Weekly.change.in.Crude.Stock,Weekly.Commercial.Long,⋯,DXY.returns,VIX,Gasoline.All.Grades,Dow.Jones.US.Oil.Gas.Index,ExxonMobil,ConocoPhilips,Chevron,BP,Shell,TotalEnergies
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
12624,2024-07-25,78.28,77.11,82.37,20.832,1834,13300,482,-3741,654310,⋯,0.01135205,12.44,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68
12625,2024-07-26,77.16,76.0,81.13,21.071,1808,13300,482,-3741,654310,⋯,0.01135205,12.44,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68
12626,2024-07-27,77.16,76.0,81.13,21.071,1808,13300,482,-3741,654310,⋯,0.01135205,12.44,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68
12627,2024-07-28,77.16,76.0,81.13,21.071,1808,13300,482,-3741,654310,⋯,0.01135205,12.44,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68
12628,2024-07-29,75.81,74.8,79.78,22.171,1797,13300,482,-3741,654310,⋯,0.01135205,12.44,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68
12629,2024-07-30,74.73,73.88,78.63,21.883,1762,13300,482,-3741,654310,⋯,0.01135205,12.44,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68


In [6]:
data_compile_final <- data_compile_filled %>%
    mutate(
        CL1_log_ret = log(CL1) - log(lag(CL1)),
        CL2_log_ret = log(CL2) - log(lag(CL2))
    ) %>%
    filter(!is.na(CL1_log_ret))

head(data_compile_final)
tail(data_compile_final)

data_compile_final[133800:133825,]

[1m[22m[36mℹ[39m In argument: `CL1_log_ret = log(CL1) - log(lag(CL1))`.
[33m![39m NaNs produced


Unnamed: 0_level_0,Date,CL1,CL2,Brent,CRKS321C.Index,BDIY.Index,Weekly.Prod.Crude,Weekly.Rig.Count,Weekly.change.in.Crude.Stock,Weekly.Commercial.Long,⋯,Gasoline.All.Grades,Dow.Jones.US.Oil.Gas.Index,ExxonMobil,ConocoPhilips,Chevron,BP,Shell,TotalEnergies,CL1_log_ret,CL2_log_ret
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1990-01-03,23.68,22.97,23.48,1.777,1599,7512,532,9539,166945,⋯,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116,0.03393068,0.024681729
2,1990-01-04,23.41,22.53,26.78,1.777,1599,7512,532,9539,166945,⋯,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116,-0.01146753,-0.019341262
3,1990-01-05,23.08,22.03,27.4,1.777,1599,7512,532,9539,166945,⋯,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116,-0.01419684,-0.022442594
4,1990-01-08,21.62,21.03,24.98,1.777,1599,7512,532,9539,166945,⋯,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116,-0.06534763,-0.046455171
5,1990-01-09,22.07,21.23,24.75,1.777,1599,7512,532,9539,166945,⋯,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116,0.02060041,0.009465286
6,1990-01-10,22.9,21.99,25.47,1.777,1599,7512,532,9539,166945,⋯,1.125,107.21,15.1875,12.483,22.3125,19.9688,61.28,14.5116,0.03691769,0.035172529


Unnamed: 0_level_0,Date,CL1,CL2,Brent,CRKS321C.Index,BDIY.Index,Weekly.Prod.Crude,Weekly.Rig.Count,Weekly.change.in.Crude.Stock,Weekly.Commercial.Long,⋯,Gasoline.All.Grades,Dow.Jones.US.Oil.Gas.Index,ExxonMobil,ConocoPhilips,Chevron,BP,Shell,TotalEnergies,CL1_log_ret,CL2_log_ret
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
133825,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133826,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133827,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133828,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133829,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133830,,74.73,73.88,16.37,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0


Unnamed: 0_level_0,Date,CL1,CL2,Brent,CRKS321C.Index,BDIY.Index,Weekly.Prod.Crude,Weekly.Rig.Count,Weekly.change.in.Crude.Stock,Weekly.Commercial.Long,⋯,Gasoline.All.Grades,Dow.Jones.US.Oil.Gas.Index,ExxonMobil,ConocoPhilips,Chevron,BP,Shell,TotalEnergies,CL1_log_ret,CL2_log_ret
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
133800,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133801,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133802,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133803,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133804,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133805,,74.73,73.88,16.37,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133806,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133807,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133808,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
133809,,74.73,73.88,18.98,21.883,1762,13300,482,-3741,654310,⋯,3.557,760.4,115.12,114.38,156.42,36.1,72.18,66.68,0,0
