## Application - Forecasting daily aggregate US stock returns 

In [None]:
# Empty the workspace
rm(list = ls())

# Set Folder-Path
path <- "~/LocalPredictability" # Change to project folder

In [None]:
# Packages
library("tidyverse")
library("purrr")
library("magrittr")
library("glue")
library("roll")
library("rlang")
library("readr")
library("doParallel")
library("ggplot2")
library("cowplot")
library("ggridges")
library("glmnet")
library("ranger")
library("lightgbm")
library("xgboost")
#install.packages(paste0(path, "/eDMA_1.5-3.tar.gz"), repos = NULL, type = "source")
library("eDMA") # Local Version
library("hdflex")
library("forecast")
library("kableExtra")

In [None]:
# Load custom functions
source(glue("{path}/Code/_fmodels.R"))
source(glue("{path}/Code/_helpers.R"))

# Convert Jupyter Notebook to R script
convert_ipynb_to_r("/Users/slehmann/Library/CloudStorage/Dropbox/HFTP/Code/finance_main_script.ipynb")

---

## 1) Forecasting Accuracy

In [None]:
######### --------------------------------------------
### Predict Daily Aggregate US Stock Returns ###
# Setting
window_size <- 5 * 252
setting <- glue("finance_{window_size}")

# Set global parameter
cores <- 20
eval_start <- "1967-01-04"
eval_update <- "1999-01-04"
eval_end <- "2021-12-31"

# Load data
econ_fin_signals <- read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/econ_fin_signals.rds"))
topic_signals <- read_rds(glue("{path}/Data/ES1/P_Signals/Text_Signals/topic_signals.rds"))
text_signals <- read_rds(glue("{path}/Data/ES1/P_Signals/Text_Signals/text_signals.rds"))
f_signals <- read_rds(glue("{path}/Data/ES1/F_Signals/refined_signals.rds"))

# Get signal names
econ_fin_names <- setdiff(names(econ_fin_signals), c("Date", "equity_premium"))
topic_names <- setdiff(names(topic_signals), "Date")
text_names <- setdiff(names(text_signals), "Date")
f_names <- setdiff(names(f_signals), "Date")

# Combine, lag and prepare data
dataset <- econ_fin_signals %>%
  dplyr::left_join(topic_signals, by = "Date") %>%
  dplyr::left_join(text_signals, by = "Date") %>%
  dplyr::mutate(across(!Date & !equity_premium, dplyr::lag)) %>%
  dplyr::left_join(f_signals, by = "Date") %>%
  dplyr::filter(Date <= eval_end) %>%
  tidyr::fill(!equity_premium, .direction = "down") %>%
  tibble::column_to_rownames(var = "Date") %>%
  dplyr::slice(-1)

# Assign Response-Variable
y <- dataset %>%
  dplyr::select(equity_premium) %>%
  as.matrix()

# Assign "P"-Signals
X <- dataset %>%
  dplyr::select(all_of(c(econ_fin_names, topic_names, text_names))) %>%
  as.matrix()

# Assign "F"-Signals
Ext_F <- dataset %>%
  dplyr::select(all_of(f_names)) %>%
  as.matrix()

# Dimensions
cat("Size of P-Signals:", ncol(X), "\n")
cat("Size of F-Signals:", ncol(Ext_F), "\n")

# Get Dates
tdates <- rownames(dataset)
dates_ld <- tdates[tdates <= eval_update]

# Get Lengths
tlength <- length(tdates)
length_ld <- length(dates_ld)

# Low-Dimensional Model: 1954-01-01 to 1999-01-01
y_ld <- y[dates_ld, , drop = FALSE]
X_ld <- X[dates_ld, c("dp", "TBL3M", "TSP1", "vola"), drop = FALSE]
Ext_F_ld <- Ext_F[dates_ld, "PHM", drop = FALSE]

# Clear Memory
rm(list = c("econ_fin_signals",
            "text_signals",
            "topic_signals",
            "f_signals",
            "dataset",
            "econ_fin_names",
            "topic_names",
            "f_names"))
invisible(gc())

######### --------------------------------------------
### Build Result Matrices
# Benchmark Methods
benchmark_names <- c("PHM", "STPHM",
                     "DART", "ReLa", "TRF",
                     "XGB", "TCSR",
                     #---#
                     "STSC", "STSC_NWC", "STSC_LD", "STSC_EXF",
                     "STSCSx", "STSCSFLEX",
                     "PC5DMA", "PC10DMA", "PC15DMA")

# Result-Object: Point Forecasts
preds <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
                dimnames = list(tdates, benchmark_names))

# Result-Object: Variances
vari <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
               dimnames = list(tdates, benchmark_names))

# Result-Object: Squared Errors
se <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
             dimnames = list(tdates, benchmark_names))

# Result-Object: Continuous Ranked Probability Score
crps_score <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
                     dimnames = list(tdates, benchmark_names))

# Result-Object: Evaluation-Matrix
evaluation <- matrix(NA, ncol = length(benchmark_names), nrow = 2,
                     dimnames = list(c("MSE", "CRPS"), benchmark_names))

######### --------------------------------------------
### Part 0: Benchmark ###
### Prevailing Historical Mean (PHM)
# Apply Function
phm_results <- phm(y, window_size, "expanding")

# Assign Results
preds[, "PHM"] <- phm_results[[1]]
vari[, "PHM"] <- phm_results[[2]]
se[, "PHM"] <- (y - preds[, "PHM"]) ** 2

### Density Transformation (ST-PHM)
# Apply Function
stphm_results <- hdflex::tvc(y,
                             NULL,
                             preds[, "PHM", drop = FALSE],
                             5 * 252,
                             1.00,
                             0.94,
                             FALSE)

# Assign Results
preds[, "STPHM"] <- stphm_results$Forecasts$Point_Forecasts
vari[, "STPHM"] <- stphm_results$Forecasts$Variance_Forecasts
se[, "STPHM"] <- (y - preds[, "STPHM"]) ** 2
crps_score[, "STPHM"] <- crps(y, preds[, "STPHM"],
                              sqrt(vari[, "STPHM"]),
                              NULL, "normal")$crps
# Remove & Clean Memory
rm(list = c("phm_results", "stphm_results"))
invisible(gc())

######### --------------------------------------------
### Part 1: Forecasting Models with Rolling Window ###
### Low-Dimensional Dataset
# Combine, remove NA-rows and subset response
S_sub_ld <- na.omit(cbind(X_ld, Ext_F_ld))
y_sub_ld <- y_ld[rownames(S_sub_ld), , drop = FALSE]

# Get Start Point for Training
train_start_ld <- sum(rownames(S_sub_ld) < eval_start)
if (train_start_ld < window_size) {
  rlang::abort("Dataset is too small for the given window size.")
}

# First Non-NA-Row
first_complete_ld <- which(complete.cases(cbind(X_ld, Ext_F_ld)))[1]
adj_ld <- first_complete_ld - 1

# Time Sequence
t_seq <- seq(train_start_ld, nrow(S_sub_ld) - 1)

# Open Parallel Backend
cl <- parallel::makeCluster(cores)
doParallel::registerDoParallel(cl)

# Parallel Loop over Time
res_ld <- foreach::foreach(t = t_seq,
                           .combine = comb,
                           .multicombine = TRUE,
                           .packages = c("glmnet",
                                         "xgboost",
                                         "lightgbm",
                                         "ranger")) %dopar% {

  ### Split Data
  # Train Data
  s_train <- S_sub_ld[(t - window_size + 1):t, , drop = FALSE]
  y_train <- y_sub_ld[(t - window_size + 1):t, , drop = FALSE]

  # Predict Data
  s_pred <- S_sub_ld[t + 1, , drop = FALSE]
  y_pred <- y_sub_ld[t + 1, ]

  ####### Model 1: Relaxed-Lasso #######
  # Parameter
  rela_folds <- 5

  # Fit and Predict
  pred_relax <- relasso_model(s_train,
                              y_train,
                              s_pred,
                              rela_folds,
                              t)

  ####### Model 2: XGBoost #######
  # Parameter
  xgb_folds  <- 5
  xgb_ntrees <- 500
  xgb_lr     <- 0.1
  xgb_target <- NULL
  xgb_cores  <- 1

  # Fit and Predict
  pred_xgb <- xgb_model(s_train,
                        y_train,
                        s_pred,
                        xgb_folds,
                        xgb_ntrees,
                        xgb_lr,
                        xgb_target,
                        xgb_cores,
                        t)

  ####### Model 3: LightGBM #######
  # Parameter
  dart_folds  <- 5
  dart_ntrees <- 500
  dart_lr     <- 0.1
  dart_dr     <- 0.1
  dart_target <- NULL
  dart_cores  <- 1

  # Fit and Predict
  pred_dart <- dart_model(s_train,
                          y_train,
                          s_pred,
                          dart_folds,
                          dart_ntrees,
                          dart_lr,
                          dart_dr,
                          dart_target,
                          dart_cores,
                          t)

  ####### Model 4: (Targeted) RandomForests #######
  # Parameter
  trf_ntarget   <- if (ncol(s_train) > 50) {50} else {NULL}
  trf_ntrees    <- 500
  trf_max_depth <- 3
  trf_cores     <- 1

  # Fit and Predict
  pred_trf <- trf_model(s_train,
                        y_train,
                        s_pred,
                        trf_ntarget,
                        trf_ntrees,
                        trf_max_depth,
                        trf_cores,
                        t)

  ####### Model 5: (Targeted) Complete Subset Regression #######
  # Parameter
  csr_n_target <- if (ncol(s_train) > 30) {20} else {NULL}
  csr_n_subset <- if (ncol(s_train) > 30) {10} else {4}
  csr_ubound   <- 10000
  csr_sampling <- if (ncol(s_train) <= 15) {FALSE} else {TRUE}
  csr_sampling <- TRUE

  # Fit and Predict
  pred_csr <- csr_model(s_train,
                        y_train,
                        s_pred,
                        csr_n_target,
                        csr_n_subset,
                        csr_ubound,
                        csr_sampling)

  # Combine Results
  preds <- c(pred_relax, pred_xgb, pred_dart, pred_trf, pred_csr)
  varis <- rep(var(y_train), length(preds))
  se <- (y_pred - preds) ** 2

  # Return Results
  return(list(preds, varis, se))
}

# Assign Results
sub_col <- c("ReLa", "XGB", "DART", "TRF", "TCSR")
preds[t_seq + 1 + adj_ld, sub_col] <- res_ld[[1]]
vari[t_seq + 1 + adj_ld, sub_col] <- res_ld[[2]]
se[t_seq + 1 + adj_ld, sub_col] <- res_ld[[3]]

# Remove & Clean Memory
rm(list = c("res_ld", "S_sub_ld", "y_sub_ld", "train_start_ld",
            "adj_ld", "t_seq"))
invisible(gc())

######### --------------------------------------------
### High-Dimensional Dataset
# Remove NA-rows and subset response
S_sub <- na.omit(cbind(X, Ext_F))
y_sub <- y[rownames(S_sub), , drop = FALSE]

# Remove Word-Count-Signals (-> due to computational reasons)
S_sub <- S_sub[, !colnames(S_sub) %in% text_names]

# Print reduced dimensions
cat("Size of reduced Signals:", ncol(S_sub), "\n")

# Train Start Point
train_start <- sum(rownames(S_sub) < eval_update)
if (train_start < window_size) {
  rlang::abort("Dataset is too small for the given window size.")
}

# First Non-NA-Row
first_complete <- which(complete.cases(cbind(X, Ext_F)))[1]
adj <- first_complete - 1

# Time Sequence
t_seq <- seq(train_start, nrow(S_sub) - 1)

# Open Parallel Backend
cl <- parallel::makeCluster(cores)
doParallel::registerDoParallel(cl)

# Parallel Loop over Time
res <- foreach::foreach(t = t_seq,
                        .combine = comb,
                        .multicombine = TRUE,
                        .packages = c("glmnet",
                                      "xgboost",
                                      "lightgbm",
                                      "ranger")) %dopar% {

  ### Split Data ###
  # Train Data
  s_train <- S_sub[(t - window_size + 1):t, , drop = FALSE]
  y_train <- y_sub[(t - window_size + 1):t, , drop = FALSE]

  # Predict Data
  s_pred <- S_sub[t + 1, , drop = FALSE]
  y_pred <- y_sub[t + 1, ]

  ####### Model 1: Relaxed-Lasso #######
  # Parameter
  rela_folds <- 5

  # Fit and Predict
  pred_relax <- relasso_model(s_train,
                              y_train,
                              s_pred,
                              rela_folds,
                              t)

  ####### Model 2: XGBoost #######
  # Parameter
  xgb_folds  <- 5
  xgb_ntrees <- 500
  xgb_lr     <- 0.1
  xgb_target <- NULL
  xgb_cores  <- 1

  # Fit and Predict
  pred_xgb <- xgb_model(s_train,
                        y_train,
                        s_pred,
                        xgb_folds,
                        xgb_ntrees,
                        xgb_lr,
                        xgb_target,
                        xgb_cores,
                        t)

  ####### Model 3: LightGBM #######
  # Parameter
  dart_folds  <- 5
  dart_ntrees <- 500
  dart_lr     <- 0.1
  dart_dr     <- 0.1
  dart_target <- NULL
  dart_cores  <- 1

  # Fit and Predict
  pred_dart <- dart_model(s_train,
                          y_train,
                          s_pred,
                          dart_folds,
                          dart_ntrees,
                          dart_lr,
                          dart_dr,
                          dart_target,
                          dart_cores,
                          t)

  ####### Model 4: (Targeted) RandomForests #######
  # Parameter
  trf_ntarget   <- if (ncol(s_train) > 50) {50} else {NULL}
  trf_ntrees    <- 500
  trf_max_depth <- 3
  trf_cores     <- 1

  # Fit and Predict
  pred_trf <- trf_model(s_train,
                        y_train,
                        s_pred,
                        trf_ntarget,
                        trf_ntrees,
                        trf_max_depth,
                        trf_cores,
                        t)

  ####### Model 5: (Targeted) Complete Subset Regression #######
  # Parameter
  csr_n_target <- if (ncol(s_train) > 30) {20} else {NULL}
  csr_n_subset <- if (ncol(s_train) > 30) {10} else {4}
  csr_ubound   <- 10000
  csr_sampling <- if (ncol(s_train) <= 15) {FALSE} else {TRUE}
  csr_sampling <- TRUE

  # Fit and Predict
  pred_csr <- csr_model(s_train,
                        y_train,
                        s_pred,
                        csr_n_target,
                        csr_n_subset,
                        csr_ubound,
                        csr_sampling)

  # Combine Results
  preds <- c(pred_relax, pred_xgb, pred_dart, pred_trf, pred_csr)
  varis <- rep(var(y_train), length(preds))
  se    <- (y_pred - preds) ** 2

  # Return Results
  return(list(preds, varis, se))
}

# Assign Results
sub_col <- c("ReLa", "XGB", "DART", "TRF", "TCSR")
preds[t_seq + adj + 1, sub_col] <- res[[1]]
vari[t_seq + adj + 1, sub_col] <- res[[2]]
se[t_seq + adj + 1, sub_col] <- res[[3]]

# Remove & Clean Memory
rm(list = c("res", "S_sub", "y_sub", "train_start",
            "adj", "t_seq"))
invisible(gc())

######### --------------------------------------------
### Part 2: STSC and Variants ###
####### STSC-Model #######
### Subsample 1: 1954-1999 (Low-Dim)
# Set TV-C-Parameter
init <- 5 * 252
lambda_grid <- c(0.9984, 0.9992, 1.0000)
kappa_grid <- c(0.90, 0.92, 0.94)
bias <- TRUE

# Set DSC-Parameter
gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90,
                0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00)
n_tvc_ld <- (ncol(X_ld) + ncol(Ext_F_ld)) * length(lambda_grid) * length(kappa_grid)
psi_grid_ld <- c(1:n_tvc_ld)
delta <- 0.9992
burn_in_ld <- first_complete_ld + init / 2
burn_in_dsc <- 1
metric <- 5
equal_weight <- TRUE
incl <- NULL
parallel <- TRUE
n_threads <- 10

# Apply STSC-Function
stsc_results_ld <- hdflex::stsc(y_ld,
                                X_ld,
                                Ext_F_ld,
                                init,
                                lambda_grid,
                                kappa_grid,
                                bias,
                                gamma_grid,
                                psi_grid_ld,
                                delta,
                                burn_in_ld,
                                burn_in_dsc,
                                metric,
                                equal_weight,
                                incl,
                                parallel,
                                n_threads,
                                NULL)

### Subsample 2: 1999-2021 (High-Dim)
# Update Parameter
burn_in <- first_complete + init / 2
n_tvc <- (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)
psi_grid <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc / 4)))

# Apply STSC-Function
stsc_results <- hdflex::stsc(y,
                             X,
                             Ext_F,
                             init,
                             lambda_grid,
                             kappa_grid,
                             bias,
                             gamma_grid,
                             psi_grid,
                             delta,
                             burn_in,
                             burn_in_dsc,
                             metric,
                             equal_weight,
                             incl,
                             parallel,
                             n_threads,
                             NULL)

# Assign STSC-Results
ld_seq <- seq(length_ld)
pred_stsc <- c(
  stsc_results_ld$Forecasts$Point_Forecasts,
  stsc_results$Forecasts$Point_Forecasts[-ld_seq]
)
var_stsc <- c(
  stsc_results_ld$Forecasts$Variance_Forecasts,
  stsc_results$Forecasts$Variance_Forecasts[-ld_seq]
)
chosen_gamma <- c(
  stsc_results_ld$Tuning_Parameters$Gamma,
  stsc_results$Tuning_Parameters$Gamma[-ld_seq]
)
chosen_psi <- c(
  stsc_results_ld$Tuning_Parameters$Psi,
  stsc_results$Tuning_Parameters$Psi[-ld_seq]
)
chosen_sig <- bind_rows(
  as.data.frame(stsc_results_ld$Tuning_Parameters$Signals),
  as.data.frame(stsc_results$Tuning_Parameters$Signals[-ld_seq, ])
)
chosen_lambda <- bind_rows(
  as.data.frame(stsc_results_ld$Tuning_Parameters$Lambda),
  as.data.frame(stsc_results$Tuning_Parameters$Lambda[-ld_seq, ])
)
chosen_kappa <- bind_rows(
  as.data.frame(stsc_results_ld$Tuning_Parameters$Kappa),
  as.data.frame(stsc_results$Tuning_Parameters$Kappa[-ld_seq, ])
)

# Add Dates
names(pred_stsc) <- tdates
names(var_stsc) <- tdates
names(chosen_gamma) <- tdates
names(chosen_psi) <- tdates
rownames(chosen_sig) <- tdates
rownames(chosen_lambda) <- tdates
rownames(chosen_kappa) <- tdates

# Check
if (length(pred_stsc) != tlength || nrow(chosen_sig) != tlength) {
  stop("Length of Predictions does not match!")
}

# Remove & Clean Memory
rm(list = c("stsc_results"))

####### STSC - No Word Counts #######
### Subsample 1: 1954-1999 (Low-Dim)
# --Same as above--

### Subsample 2: 1999-2021 (No word counts)
# Update Parameter
X_nwc <- X[, !colnames(X) %in% text_names]
n_tvc_nwc <- (ncol(X_nwc) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)
psi_grid_nwc <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc_nwc / 4)))

# Apply STSC-Function
stsc_nwc_results <- hdflex::stsc(y,
                                 X_nwc,
                                 Ext_F,
                                 init,
                                 lambda_grid,
                                 kappa_grid,
                                 bias,
                                 gamma_grid,
                                 psi_grid_nwc,
                                 delta,
                                 burn_in,
                                 burn_in_dsc,
                                 metric,
                                 equal_weight,
                                 incl,
                                 parallel,
                                 n_threads,
                                 NULL)

# Assign STSC-Results
pred_stsc_nwc <- c(
  stsc_results_ld$Forecasts$Point_Forecasts,
  stsc_nwc_results$Forecasts$Point_Forecasts[-ld_seq]
)
var_stsc_nwc  <- c(
  stsc_results_ld$Forecasts$Variance_Forecasts,
  stsc_nwc_results$Forecasts$Variance_Forecasts[-ld_seq]
)

# Check
if (length(pred_stsc_nwc) != tlength) {
  stop("Length of Predictions does not match!")
}

# Remove & Clean Memory
rm(list = c("stsc_results_ld", "stsc_nwc_results"))

####### STSC - Excluding-F-Signals #######
### Subsample 1: 1954-1999 (Low-Dim)
# Set Parameter
n_tvc_exf_ld <- (ncol(X_ld) + 0) * length(lambda_grid) * length(kappa_grid)
psi_grid_exf_ld <- c(1:n_tvc_exf_ld)

# Apply STSC-Function
stsc_exf_results_ld <- hdflex::stsc(y_ld,
                                    X_ld,
                                    NULL,
                                    init,
                                    lambda_grid,
                                    kappa_grid,
                                    bias,
                                    gamma_grid,
                                    psi_grid_exf_ld,
                                    delta,
                                    burn_in_ld,
                                    burn_in_dsc,
                                    metric,
                                    equal_weight,
                                    incl,
                                    parallel,
                                    n_threads,
                                    NULL)

### Subsample 2: 1999-2021 (High-Dim)
# Update Parameter
n_tvc_exf <- (ncol(X) + 0) * length(lambda_grid) * length(kappa_grid)
psi_grid_exf <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc_exf / 4)))

# Apply STSC-Function
stsc_exf_results <- hdflex::stsc(y,
                                 X,
                                 NULL,
                                 init,
                                 lambda_grid,
                                 kappa_grid,
                                 bias,
                                 gamma_grid,
                                 psi_grid_exf,
                                 delta,
                                 burn_in,
                                 burn_in_dsc,
                                 metric,
                                 equal_weight,
                                 incl,
                                 parallel,
                                 n_threads,
                                 NULL)

# Assign STSC-Results
pred_stsc_exf <- c(
  stsc_exf_results_ld$Forecasts$Point_Forecasts,
  stsc_exf_results$Forecasts$Point_Forecasts[-ld_seq]
)
var_stsc_exf  <- c(
  stsc_exf_results_ld$Forecasts$Variance_Forecasts,
  stsc_exf_results$Forecasts$Variance_Forecasts[-ld_seq]
)

# Check
if (length(pred_stsc_exf) != tlength) {
  stop("Length of Predictions does not match!")
}

# Remove & Clean Memory
rm(list = c("stsc_exf_results_ld", "stsc_exf_results"))

####### STSC - Low-Dimensional (FST 2023) #######
# Apply STSC-Function
stsc_ld_results <- hdflex::stsc(y,
                                X[, c("dp", "TBL3M", "TSP1", "vola")],
                                Ext_F[, "PHM", drop = FALSE],
                                init,
                                lambda_grid,
                                kappa_grid,
                                bias,
                                gamma_grid,
                                psi_grid_ld,
                                delta,
                                burn_in_ld,
                                burn_in_dsc,
                                metric,
                                equal_weight,
                                incl,
                                parallel,
                                n_threads,
                                NULL)

# Assign STSC-Low-Dim-Results
pred_stsc_ld <- c(stsc_ld_results$Forecasts$Point_Forecasts)
var_stsc_ld  <- c(stsc_ld_results$Forecasts$Variance_Forecasts)

# Check
if (length(pred_stsc_ld) != tlength) {
  stop("Length of Predictions does not match!")
}

# Remove & Clean Memory
rm(list = c("stsc_ld_results"))

####### STSCx - Selecting X Forecasts with LASSO #######
### Subsample 1: 1954-1999 (Low-Dim)
# Set TV-C-Parameter
init <- 5 * 252
lambda_grid <- c(0.9984, 0.9992, 1.0000)
kappa_grid <- c(0.90, 0.92, 0.94)
bias <- TRUE
n_cores <- cores

# Set Lasso-Target-Parameter
stscsx_target_ld <- 5

# Apply STSC-S-X Function
stscsx_results_ld <- stscsx(y_ld,
                            X_ld,
                            Ext_F_ld,
                            lambda_grid,
                            kappa_grid,
                            init,
                            bias,
                            n_cores,
                            window_size,
                            stscsx_target_ld)

### Subsample 2: 1999-2021 (High-Dim)
# Update Parameter
stscsx_target <- 10

# Apply STSC-S-X Function
stscsx_results <- stscsx(y,
                         X[, !colnames(X) %in% text_names],
                         Ext_F,
                         lambda_grid,
                         kappa_grid,
                         init,
                         bias,
                         n_cores,
                         window_size,
                         stscsx_target)

# Assign Results
pred_stscsx <- c(
  stscsx_results_ld[, 1],
  stscsx_results[, 1][-ld_seq]
)
var_stscsx  <- c(
  stscsx_results_ld[, 2],
  stscsx_results[, 2][-ld_seq]
)

# Remove & Clean Memory
rm(list = c("stscsx_results_ld", "stscsx_results"))

####### STSC - Using LASSO to dynamically select forecasts #######
### Subsample 1: 1954-1999 (Low-Dim)
# Set TV-C-Parameter
init <- 5 * 252
lambda_grid <- c(0.9984, 0.9992, 1.0000)
kappa_grid <- c(0.90, 0.92, 0.94)
bias <- TRUE
n_cores <- cores

# Set Lasso-CV-Parameter
stscsflex_folds <- 5

# Apply STSC-S-FLEX Function
stscsflex_results_ld <- stscsflex(y_ld,
                                  X_ld,
                                  Ext_F_ld,
                                  lambda_grid,
                                  kappa_grid,
                                  init,
                                  bias,
                                  n_cores,
                                  window_size,
                                  stscsflex_folds)

### Subsample 2: 1999-2021 (High-Dim)
# Apply STSC-S-FLEX Function
stscsflex_results <- stscsflex(y,
                               X[, !colnames(X) %in% text_names],
                               Ext_F,
                               lambda_grid,
                               kappa_grid,
                               init,
                               bias,
                               n_cores,
                               window_size,
                               stscsflex_folds)

# Assign Results
pred_stscsflex <- c(
  stscsflex_results_ld[, 1],
  stscsflex_results[, 1][-ld_seq]
)
var_stscsflex  <- c(
  stscsflex_results_ld[, 2],
  stscsflex_results[, 2][-ld_seq]
)

# Remove & Clean Memory
rm(list = c("stscsflex_results_ld", "stscsflex_results"))

####### (Principal Component) Dynamic Model Averaging #######
### Subsample 1: 1954-1999 (Low-Dim)
# Set Parameters
pcdma_win    <- window_size
pcdma_alpha  <- 0.99
pcdma_lambda <- c(0.9984, 0.9992, 1.0000)
pcdma_kappa  <- 0.94
pcdma_cores  <- cores
pcdma_excl   <- NULL

# Apply DMA-Function
dma_results_ld <- dma(y_ld,
                      X_ld,
                      Ext_F_ld,
                      pcdma_alpha,
                      pcdma_lambda,
                      pcdma_kappa)

### Subsample 2: 1999-2021 (High-Dim)
# Update Parameter
pc5dma_comp  <- 5
pc10dma_comp <- 10
pc15dma_comp <- 15

# Apply PCA-5-DMA
pc5dma_results <- pcdma(y,
                        X[, !colnames(X) %in% text_names],
                        Ext_F,
                        pcdma_win,
                        pc5dma_comp,
                        pcdma_alpha,
                        pcdma_lambda,
                        pcdma_kappa,
                        pcdma_cores,
                        pcdma_excl)

# Apply PCA-10-DMA
pc10dma_results <- pcdma(y,
                         X[, !colnames(X) %in% text_names],
                         Ext_F,
                         pcdma_win,
                         pc10dma_comp,
                         pcdma_alpha,
                         pcdma_lambda,
                         pcdma_kappa,
                         pcdma_cores,
                         pcdma_excl)

# Apply PCA-15-DMA
pc15dma_results <- pcdma(y,
                         X[, !colnames(X) %in% text_names],
                         Ext_F,
                         pcdma_win,
                         pc15dma_comp,
                         pcdma_alpha,
                         pcdma_lambda,
                         pcdma_kappa,
                         pcdma_cores,
                         pcdma_excl)

# Assign Results
pred_pc5dma <- c(dma_results_ld[, 1], pc5dma_results[, 1][-ld_seq])
var_pc5dma  <- c(dma_results_ld[, 3], pc5dma_results[, 3][-ld_seq])
crps_pc5dma <- c(dma_results_ld[, 4], pc5dma_results[, 4][-ld_seq])

pred_pc10dma <- c(dma_results_ld[, 1], pc10dma_results[, 1][-ld_seq])
var_pc10dma  <- c(dma_results_ld[, 3], pc10dma_results[, 3][-ld_seq])
crps_pc10dma <- c(dma_results_ld[, 4], pc10dma_results[, 4][-ld_seq])

pred_pc15dma <- c(dma_results_ld[, 1], pc15dma_results[, 1][-ld_seq])
var_pc15dma  <- c(dma_results_ld[, 3], pc15dma_results[, 3][-ld_seq])
crps_pc15dma <- c(dma_results_ld[, 4], pc15dma_results[, 4][-ld_seq])

# Remove & Clean Memory
rm(list = c("dma_results_ld",
            "pc5dma_results", "pc10dma_results", "pc15dma_results"))

######### --------------------------------------------
####### Save Point Forecasts #######
preds[, "STSC"] <- pred_stsc
preds[, "STSC_NWC"] <- pred_stsc_nwc
preds[, "STSC_EXF"] <- pred_stsc_exf
preds[, "STSC_LD"] <- pred_stsc_ld
preds[, "STSCSx"] <- pred_stscsx
preds[, "STSCSFLEX"] <- pred_stscsflex
preds[, "PC5DMA"] <- pred_pc5dma
preds[, "PC10DMA"] <- pred_pc10dma
preds[, "PC15DMA"] <- pred_pc15dma

###### Save Variances #######
vari[, "STSC"] <- var_stsc
vari[, "STSC_NWC"] <- var_stsc_nwc
vari[, "STSC_EXF"] <- var_stsc_exf
vari[, "STSC_LD"] <- var_stsc_ld
vari[, "STSCSx"] <- var_stscsx
vari[, "STSCSFLEX"] <- var_stscsflex
vari[, "PC5DMA"] <- var_pc5dma
vari[, "PC10DMA"] <- var_pc10dma
vari[, "PC15DMA"] <- var_pc15dma

####### Save Squared Errors #######
se[, "STSC"] <- (y - pred_stsc) ** 2
se[, "STSC_NWC"] <- (y - pred_stsc_nwc) ** 2
se[, "STSC_EXF"] <- (y - pred_stsc_exf) ** 2
se[, "STSC_LD"] <- (y - pred_stsc_ld) ** 2
se[, "STSCSx"] <- (y - pred_stscsx) ** 2
se[, "STSCSFLEX"] <- (y - pred_stscsflex) ** 2
se[, "PC5DMA"] <- (y - pred_pc5dma) ** 2
se[, "PC10DMA"] <- (y - pred_pc10dma) ** 2
se[, "PC15DMA"] <- (y - pred_pc15dma) ** 2

####### Save Continuous Ranked Probability Score #######
crps_score[, "STSC"] <- crps(y, pred_stsc, sqrt(var_stsc), NULL, "normal")$crps # nolint
crps_score[, "STSC_NWC"] <- crps(y, pred_stsc_nwc, sqrt(var_stsc_nwc), NULL, "normal")$crps # nolint
crps_score[, "STSC_EXF"] <- crps(y, pred_stsc_exf, sqrt(var_stsc_exf), NULL, "normal")$crps # nolint
crps_score[, "STSC_LD"] <- crps(y, pred_stsc_ld, sqrt(var_stsc_ld), NULL, "normal")$crps # nolint
crps_score[, "STSCSx"] <- crps(y, pred_stscsx, sqrt(var_stscsx), NULL, "normal")$crps # nolint
crps_score[, "STSCSFLEX"] <- crps(y, pred_stscsflex, sqrt(var_stscsflex), NULL, "normal")$crps # nolint
crps_score[, "PC5DMA"] <- crps_pc5dma
crps_score[, "PC10DMA"] <- crps_pc10dma
crps_score[, "PC15DMA"] <- crps_pc15dma

# Remove & Clean Memory
rm(list = c("pred_stsc", "pred_stsc_nwc", "pred_stsc_exf", 
            "pred_stscsx", "pred_stscsflex", "pred_stsc_ld",
            "pred_pc5dma", "pred_pc10dma", "pred_pc15dma",
            "var_stsc", "var_stsc_nwc", "var_stsc_exf",
            "var_stscsflex", "var_stscsx", "var_stsc_ld",
            "var_pc5dma", "var_pc10dma", "var_pc15dma",
            "crps_pc5dma", "crps_pc10dma", "crps_pc15dma"))
invisible(gc())

######### --------------------------------------------
### Part 3: Evaluation ###
####### OOS-Period #######
# Define Evaluation Period (OOS-Period)
oos_idx <- which(tdates >= eval_start & tdates <= eval_end)

# OOS-Data
oos_y <- y[oos_idx, ]
oos_preds <- preds[oos_idx, ]
oos_vari <- vari[oos_idx, ]
oos_se <- se[oos_idx, ]
oos_crps <- crps_score[oos_idx, ]
oos_dates <- names(oos_y)
oos_chosen_gamma <- chosen_gamma[oos_idx]
oos_chosen_psi <- chosen_psi[oos_idx]
oos_chosen_sig <- chosen_sig[oos_idx, ]
oos_chosen_lambda <- chosen_lambda[oos_idx, ]
oos_chosen_kappa <- chosen_kappa[oos_idx, ]

# Check for Missing Values
for (obj in list(oos_y, oos_preds, oos_vari, oos_se)) {
  if (any(is.na(obj))) {
    rlang::abort("Error: Missing Values in one of the OOS objects")
  }
}

### Mean-Squared-Error
# Calculate MSE
evaluation["MSE", ] <- colMeans(oos_se)

### Mean-Continuous Ranked Probability Score
# Calculate CRPS
evaluation["CRPS", ] <- colMeans(oos_crps)

###########--------------------------------------------
# Save Results
write_rds(oos_y, glue("{path}/Results/ES1/y_{setting}.rds"))
write_rds(oos_preds, glue("{path}/Results/ES1/point_forecasts_{setting}.rds"))
write_rds(oos_se, glue("{path}/Results/ES1/squared_errors_{setting}.rds"))
write_rds(oos_vari, glue("{path}/Results/ES1/variances_{setting}.rds"))
write_rds(oos_crps, glue("{path}/Results/ES1/crps_scores_{setting}.rds"))
write_rds(evaluation, glue("{path}/Results/ES1/results_{setting}.rds"))

# Save STSC-Parameter
write_rds(list(
  gamma = oos_chosen_gamma,
  psi = oos_chosen_psi,
  signals = oos_chosen_sig,
  lambda = oos_chosen_lambda,
  kappa = oos_chosen_kappa
), glue("{path}/Results/ES1/stsc_params_{setting}.rds"))
##########--------------------------------------------

# Show Results
evaluation
######### --------------------------------------------

## 2.) Statistical Tests

In [None]:
######### --------------------------------------------
### Diebold-Mariano Test - MSE and CRPS ###
# Setting
setting <- "finance_1260"

# Evaluation period: "whole", "until_1999", "from_1999"
evaluation_period <- "from_1999"

# Build Result-Object
results <- tibble(
  Target = character(),
  Forecast = character(),
  Statistic = numeric(),
  P.Value = numeric(),
  Alternative = character(),
  TestType = character()
)

# Load Data
y <- read_rds(glue("{path}/Results/ES1/y_{setting}.rds"))
preds <- read_rds(glue("{path}/Results/ES1/point_forecasts_{setting}.rds"))
crps_scores <- read_rds(glue("{path}/Results/ES1/crps_scores_{setting}.rds"))

# Filter Data based on evaluation period
if (evaluation_period == "until_1999") {
  print("Data until 1999 is used.")
  y <- y[which(names(y) < "1999-01-04")]
  preds <- preds[which(rownames(preds) < "1999-01-04"), , drop = FALSE]
  crps_scores <- crps_scores[which(rownames(crps_scores) < "1999-01-04"), , drop = FALSE]
} else if (evaluation_period == "from_1999") {
  print("Data from 1999 is used.")
  y <- y[which(names(y) >= "1999-01-04")]
  preds <- preds[which(rownames(preds) >= "1999-01-04"), , drop = FALSE]
  crps_scores <- crps_scores[which(rownames(crps_scores) >= "1999-01-04"), , drop = FALSE]
} else {
  print("Whole evaluation period is used.")
}

# Loop over each forecast method for MSE
for (forecast_method in colnames(preds)) {
  if (forecast_method != "PHM" && forecast_method != "STPHM") {

    # Forecast errors
    errors_benchmark <- y - preds[, "PHM", drop = FALSE]
    errors_forecast <- y - preds[, forecast_method, drop = FALSE]

    # Perform DM-Test
    dm_test_result <- forecast::dm.test(errors_benchmark,
                                        errors_forecast,
                                        alternative = "greater",
                                        h = 1,
                                        power = 2)

    # Append the result
    results <- results %>%
      add_row(
        Target = "stock_returns",
        Forecast = forecast_method,
        Statistic = round(dm_test_result$statistic, 4),
        P.Value = round(dm_test_result$p.value, 4),
        Alternative = dm_test_result$alternative,
        TestType = "MSE"
      )
  }
}

# Loop over each forecast method for CRPS
for (forecast_method in colnames(crps_scores)) {
  if (forecast_method != "PHM" && forecast_method != "STPHM") {

    # Forecast Errors
    errors_benchmark <- crps_scores[, "STPHM", drop = FALSE]
    errors_forecast <- crps_scores[, forecast_method, drop = FALSE]

    # Check for missing values (Non-Density-Methods)
    if (!any(is.na(errors_forecast))) {

      # Perform DM-test
      dm_test_result <- forecast::dm.test(errors_benchmark,
                                          errors_forecast,
                                          alternative = "greater",
                                          h = 1,
                                          power = 1)

      # Append the result
      results <- results %>%
        add_row(
          Target = "stock_returns",
          Forecast = forecast_method,
          Statistic = round(dm_test_result$statistic, 4),
          P.Value = round(dm_test_result$p.value, 4),
          Alternative = dm_test_result$alternative,
          TestType = "CRPS"
        )
    }
  }
}

# Show results
results %>%
  filter(P.Value < 0.10)
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Probability Integral Transformation (PIT)
# Setting
setting <- "finance_1260"

# Load Y
oos_y <- read_rds(glue("{path}/Results/ES1/y_{setting}.rds"))

# Load Predictive Density
pred_stsc <- read_rds(glue("{path}/Results/ES1/point_forecasts_{setting}.rds"))[, "STSC"]
var_stsc <- read_rds(glue("{path}/Results/ES1/variances_{setting}.rds"))[, "STSC"]
data <- cbind(oos_y, cbind(pred_stsc, var_stsc))

# Transform to Uniform (Cumulated Predictive Density Function evaluated at realisation)
cdf_t <- apply(data, 1, FUN = function(x) pnorm(q = x[1], mean = x[2], sd = sqrt(x[3])))

# Kolmogorov-Smirnov-Test
ks_test <- stats::ks.test(cdf_t, "punif", 0, 1)
print(paste("Kolmogorov-Smirnov Test p-value:", round(ks_test$p.value, 4)))

# Save Uniform Vectors
write_rds(cdf_t, glue("{path}/Results/ES1/pit_cdf_t.rds"))
######### --------------------------------------------

## 3) Alternative Tuning Parameter

In [None]:
######### --------------------------------------------
### Predict Daily Equity Premium -- Robustness ###
# Setting
setting <- "robustness"

# Set Global Parameter
window_size <- 5 * 252
eval_start <- "1967-01-04"
eval_update <- "1999-01-04"
eval_end <- "2021-12-31"

# Load data
econ_fin_signals <- read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/econ_fin_signals.rds"))
topic_signals <- read_rds(glue("{path}/Data/ES1/P_Signals/Text_Signals/topic_signals.rds"))
text_signals <- read_rds(glue("{path}/Data/ES1/P_Signals/Text_Signals/text_signals.rds"))
f_signals <- read_rds(glue("{path}/Data/ES1/F_Signals/refined_signals.rds"))

# Get signal names
econ_fin_names <- setdiff(names(econ_fin_signals), c("Date", "equity_premium"))
topic_names <- setdiff(names(topic_signals), "Date")
text_names <- setdiff(names(text_signals), "Date")
f_names <- setdiff(names(f_signals), "Date")

# Combine, lag and prepare data
dataset <- econ_fin_signals %>%
  dplyr::left_join(topic_signals, by = "Date") %>%
  dplyr::left_join(text_signals, by = "Date") %>%
  dplyr::mutate(across(!Date & !equity_premium, dplyr::lag)) %>%
  dplyr::left_join(f_signals, by = "Date") %>%
  dplyr::filter(Date <= eval_end) %>%
  tidyr::fill(!equity_premium, .direction = "down") %>%
  tibble::column_to_rownames(var = "Date") %>%
  dplyr::slice(-1)

# Assign Response-Variable
y <- dataset %>%
  dplyr::select(equity_premium) %>%
  as.matrix()

# Assign "P"-Signals
X <- dataset %>%
  dplyr::select(all_of(c(econ_fin_names, topic_names, text_names))) %>%
  as.matrix()

# Assign "F"-Signals
Ext_F <- dataset %>%
  dplyr::select(all_of(f_names)) %>%
  as.matrix()

# Dimensions
cat("Size of P-Signals:", ncol(X), "\n")
cat("Size of F-Signals:", ncol(Ext_F), "\n")

# Get Dates
tdates <- rownames(dataset)
dates_ld <- tdates[tdates <= eval_update]

# Get Lengths
tlength <- length(tdates)
length_ld <- length(dates_ld)

# Low-Dimensional Model: 1954-01-01 to 1999-01-01
y_ld <- y[dates_ld, , drop = FALSE]
X_ld <- X[dates_ld, c("dp", "TBL3M", "TSP1", "vola"), drop = FALSE]
Ext_F_ld <- Ext_F[dates_ld, "PHM", drop = FALSE]

# First Non-NA-Row
first_complete <- which(complete.cases(cbind(X, Ext_F)))[1]
first_complete_ld <- which(complete.cases(cbind(X_ld, Ext_F_ld)))[1]
adj <- first_complete - 1
adj_ld <- first_complete_ld - 1

# Clear Memory
rm(list = c("econ_fin_signals",
            "text_signals",
            "topic_signals",
            "f_signals",
            "dataset",
            "econ_fin_names",
            "topic_names",
            "text_names",
            "f_names"))
invisible(gc())

######### --------------------------------------------
### Result Matrices
# Benchmark Methods
benchmark_names <- c("PHM", "STPHM",
                     #---#
                     "STSC",
                     paste0("STSC", seq(32)),
                     "STSC_NF")

# Result-Object: Point Forecasts
preds <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
                dimnames = list(tdates, benchmark_names))

# Result-Object: Variances
vari <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
               dimnames = list(tdates, benchmark_names))

# Result-Object: Squared Errors
se <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
             dimnames = list(tdates, benchmark_names))

# Result-Object: Continuous Ranked Probability Score
crps_score <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
                     dimnames = list(tdates, benchmark_names))

# Result-Object: Portfolio-Returns
portfolio_returns <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
                            dimnames = list(tdates, benchmark_names))

# Result-Object: Evaluation-Matrix
evaluation <- matrix(NA, ncol = length(benchmark_names), nrow = 2,
                     dimnames = list(c("MSE", "CRPS"), benchmark_names))

######### --------------------------------------------
### Part 0: Benchmark ###
### Prevailing Historical Mean (PHM)
# Apply Function
phm_results <- phm(y, window_size, "expanding")

# Assign Results
preds[, "PHM"] <- phm_results[[1]]
vari[, "PHM"] <- phm_results[[2]]
se[, "PHM"] <- (y - preds[, "PHM"]) ** 2
crps_score[, "PHM"] <- NA

### Density Transformation (ST-PHM)
# Apply Function
stphm_results <- hdflex::tvc(y,
                             NULL,
                             preds[, "PHM", drop = FALSE],
                             5 * 252,
                             1.00,
                             0.94,
                             FALSE)

# Assign Results
preds[, "STPHM"] <- stphm_results$Forecasts$Point_Forecasts
vari[, "STPHM"] <- stphm_results$Forecasts$Variance_Forecasts
se[, "STPHM"] <- (y - preds[, "STPHM"]) ** 2
crps_score[, "STPHM"] <- crps(y, preds[, "STPHM"],
                              sqrt(vari[, "STPHM"]),
                              NULL, "normal")$crps
# Remove & Clean Memory
rm(list = c("phm_results", "stphm_results"))
invisible(gc())

######### --------------------------------------------
### Part 1: STSC and Variants ###
####### STSC-Model - Default #######
### Subsample 1: 1954-1999 (Low-Dim)
# Set TV-C-Parameter
init <- 5 * 252
lambda_grid <- c(0.9984, 0.9992, 1.0000)
kappa_grid <- c(0.90, 0.92, 0.94)
burn_in_ld <- first_complete_ld + init / 2
bias <- TRUE

# Set DSC-Parameter
gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90,
                0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00)
n_tvc_ld <- (ncol(X_ld) + ncol(Ext_F_ld)) * length(lambda_grid) * length(kappa_grid)
psi_grid_ld <- c(1:n_tvc_ld)
delta <- 0.9992
burn_in_dsc <- 1
metric <- 5
equal_weight <- TRUE
incl <- NULL
parallel <- TRUE
n_threads <- 3

# Apply STSC-Function
stsc_results_ld <- hdflex::stsc(y_ld,
                                X_ld,
                                Ext_F_ld,
                                init,
                                lambda_grid,
                                kappa_grid,
                                bias,
                                gamma_grid,
                                psi_grid_ld,
                                delta,
                                burn_in_ld,
                                burn_in_dsc,
                                metric,
                                equal_weight,
                                incl,
                                parallel,
                                n_threads,
                                NULL)

### Subsample 2: 1999-2021 (High-Dim)
# Update Parameter
burn_in <- first_complete + init / 2
n_tvc <- (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)
psi_grid <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc / 4)))

# Apply STSC-Function
stsc_results <- hdflex::stsc(y,
                             X,
                             Ext_F,
                             init,
                             lambda_grid,
                             kappa_grid,
                             bias,
                             gamma_grid,
                             psi_grid,
                             delta,
                             burn_in,
                             burn_in_dsc,
                             metric,
                             equal_weight,
                             incl,
                             parallel,
                             n_threads,
                             NULL)

# Assign STSC-Results
pred_stsc <- c(
  stsc_results_ld$Forecasts$Point_Forecasts,
  stsc_results$Forecasts$Point_Forecasts[-c(1:length_ld)]
)
var_stsc  <- c(
  stsc_results_ld$Forecasts$Variance_Forecasts,
  stsc_results$Forecasts$Variance_Forecasts[-c(1:length_ld)]
)

# Save Results
preds[, "STSC"] <- pred_stsc
vari[, "STSC"] <- var_stsc
se[, "STSC"] <- (y - pred_stsc) ** 2
crps_score[, "STSC"] <- crps(y, pred_stsc, sqrt(var_stsc), NULL, "normal")$crps

# Remove & Clean Memory
rm(list = c("stsc_results_ld", "stsc_results"))

####### Varying Kappa #######
i <- 1
for (k in c(0.86, 0.88, 0.90, 0.92, 0.94, 0.96, 0.98, 1.00)) {

  # Print
  cat("Kappa:", k, "\n")

  ### Subsample 1: 1954-1999 (Low-Dim)
  # Set Parameter
  n_tvc_ld_k <- (ncol(X_ld) + ncol(Ext_F_ld)) * length(lambda_grid) * 1
  psi_grid_ld_k <- c(1:n_tvc_ld_k)

  # Apply STSC-Function
  stsc_results_ld <- hdflex::stsc(y_ld,
                                  X_ld,
                                  Ext_F_ld,
                                  init,
                                  lambda_grid,
                                  k,
                                  bias,
                                  gamma_grid,
                                  psi_grid_ld_k,
                                  delta,
                                  burn_in_ld,
                                  burn_in_dsc,
                                  metric,
                                  equal_weight,
                                  incl,
                                  parallel,
                                  n_threads,
                                  NULL)

  ### Subsample 2: 1999-2021 (High-Dim)
  # Update Parameter
  n_tvc_k <- (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * 1
  psi_grid_k <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc_k / 4)))

  # Apply STSC-Function
  stsc_results <- hdflex::stsc(y,
                               X,
                               Ext_F,
                               init,
                               lambda_grid,
                               k,
                               bias,
                               gamma_grid,
                               psi_grid_k,
                               delta,
                               burn_in,
                               burn_in_dsc,
                               metric,
                               equal_weight,
                               incl,
                               parallel,
                               n_threads,
                               NULL)

  # Assign STSC-Results
  pred_stsc <- c(
    stsc_results_ld$Forecasts$Point_Forecasts,
    stsc_results$Forecasts$Point_Forecasts[-c(1:length_ld)]
  )
  var_stsc  <- c(
    stsc_results_ld$Forecasts$Variance_Forecasts,
    stsc_results$Forecasts$Variance_Forecasts[-c(1:length_ld)]
  )

  # Save Results
  preds[, paste0("STSC", i)] <- pred_stsc
  vari[, paste0("STSC", i)] <- var_stsc
  se[, paste0("STSC", i)] <- (y - pred_stsc) ** 2
  crps_score[, paste0("STSC", i)] <- crps(y, pred_stsc,
                                          sqrt(var_stsc), NULL, "normal")$crps

  # Remove & Clean Memory
  rm(list = c("stsc_results_ld", "stsc_results"))

  # Update Counter
  i <- i + 1
}

####### Varying Lambda #######
for (l in c(0.9984, 0.9988, 0.9992, 0.9996, 1.0000)) {

  # Print
  cat("Lambda:", l, "\n")

  ### Subsample 1: 1954-1999 (Low-Dim)
  # Set Parameter
  n_tvc_ld_l <- (ncol(X_ld) + ncol(Ext_F_ld)) * 1 * length(kappa_grid)
  psi_grid_ld_l <- c(1:n_tvc_ld_l)

  # Apply STSC-Function
  stsc_results_ld <- hdflex::stsc(y_ld,
                                  X_ld,
                                  Ext_F_ld,
                                  init,
                                  l,
                                  kappa_grid,
                                  bias,
                                  gamma_grid,
                                  psi_grid_ld_l,
                                  delta,
                                  burn_in_ld,
                                  burn_in_dsc,
                                  metric,
                                  equal_weight,
                                  incl,
                                  parallel,
                                  n_threads,
                                  NULL)

  ### Subsample 2: 1999-2021 (High-Dim)
  # Update Parameter
  n_tvc_l <- (ncol(X) + ncol(Ext_F)) * 1 * length(kappa_grid)
  psi_grid_l <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc_l / 4)))

  # Apply STSC-Function
  stsc_results <- hdflex::stsc(y,
                               X,
                               Ext_F,
                               init,
                               l,
                               kappa_grid,
                               bias,
                               gamma_grid,
                               psi_grid_l,
                               delta,
                               burn_in,
                               burn_in_dsc,
                               metric,
                               equal_weight,
                               incl,
                               parallel,
                               n_threads,
                               NULL)

  # Assign STSC-Results
  pred_stsc <- c(
    stsc_results_ld$Forecasts$Point_Forecasts,
    stsc_results$Forecasts$Point_Forecasts[-c(1:length_ld)]
  )
  var_stsc  <- c(
    stsc_results_ld$Forecasts$Variance_Forecasts,
    stsc_results$Forecasts$Variance_Forecasts[-c(1:length_ld)]
  )

  # Save Results
  preds[, paste0("STSC", i)] <- pred_stsc
  vari[, paste0("STSC", i)] <- var_stsc
  se[, paste0("STSC", i)] <- (y - pred_stsc) ** 2
  crps_score[, paste0("STSC", i)] <- crps(y, pred_stsc,
                                          sqrt(var_stsc), NULL, "normal")$crps

  # Remove & Clean Memory
  rm(list = c("stsc_results_ld", "stsc_results"))

  # Update Counter
  i <- i + 1
}

####### Varying Delta #######
for (d in c(0.9984, 0.9988, 0.9992, 0.9996, 1.0000)) {

  # Print
  cat("Delta:", d, "\n")

  ### Subsample 1: 1954-1999 (Low-Dim)
  # Apply STSC-Function
  stsc_results_ld <- hdflex::stsc(y_ld,
                                  X_ld,
                                  Ext_F_ld,
                                  init,
                                  lambda_grid,
                                  kappa_grid,
                                  bias,
                                  gamma_grid,
                                  psi_grid_ld,
                                  d,
                                  burn_in_ld,
                                  burn_in_dsc,
                                  metric,
                                  equal_weight,
                                  incl,
                                  parallel,
                                  n_threads,
                                  NULL)

  ### Subsample 2: 1999-2021 (High-Dim)
  # Apply STSC-Function
  stsc_results <- hdflex::stsc(y,
                               X,
                               Ext_F,
                               init,
                               lambda_grid,
                               kappa_grid,
                               bias,
                               gamma_grid,
                               psi_grid,
                               d,
                               burn_in,
                               burn_in_dsc,
                               metric,
                               equal_weight,
                               incl,
                               parallel,
                               n_threads,
                               NULL)

  # Assign STSC-Results
  pred_stsc <- c(
    stsc_results_ld$Forecasts$Point_Forecasts,
    stsc_results$Forecasts$Point_Forecasts[-c(1:length_ld)]
  )
  var_stsc  <- c(
    stsc_results_ld$Forecasts$Variance_Forecasts,
    stsc_results$Forecasts$Variance_Forecasts[-c(1:length_ld)]
  )

  # Save Results
  preds[, paste0("STSC", i)] <- pred_stsc
  vari[, paste0("STSC", i)] <- var_stsc
  se[, paste0("STSC", i)] <- (y - pred_stsc) ** 2
  crps_score[, paste0("STSC", i)] <- crps(y, pred_stsc,
                                          sqrt(var_stsc), NULL, "normal")$crps

  # Remove & Clean Memory
  rm(list = c("stsc_results_ld", "stsc_results"))

  # Update Counter
  i <- i + 1
}

####### Varying Psi #######
for (p in c(1, 5, 10, 25, 50, 100, 500, 115947)) {

  # Print
  cat("Psi:", p, "\n")

  ### Subsample 1: 1954-1999 (Low-Dim)
  # Adjust Parameter
  p_ld <- ifelse(p > n_tvc_ld, n_tvc_ld, p)

  # Apply STSC-Function
  stsc_results_ld <- hdflex::stsc(y_ld,
                                  X_ld,
                                  Ext_F_ld,
                                  init,
                                  lambda_grid,
                                  kappa_grid,
                                  bias,
                                  gamma_grid,
                                  p_ld,
                                  delta,
                                  burn_in_ld,
                                  burn_in_dsc,
                                  metric,
                                  equal_weight,
                                  incl,
                                  parallel,
                                  n_threads,
                                  NULL)

  ### Subsample 2: 1999-2021 (High-Dim)
  # Apply STSC-Function
  stsc_results <- hdflex::stsc(y,
                               X,
                               Ext_F,
                               init,
                               lambda_grid,
                               kappa_grid,
                               bias,
                               gamma_grid,
                               p,
                               delta,
                               burn_in,
                               burn_in_dsc,
                               metric,
                               equal_weight,
                               incl,
                               parallel,
                               n_threads,
                               NULL)

  # Assign STSC-Results
  pred_stsc <- c(
    stsc_results_ld$Forecasts$Point_Forecasts,
    stsc_results$Forecasts$Point_Forecasts[-c(1:length_ld)]
  )
  var_stsc <- c(
    stsc_results_ld$Forecasts$Variance_Forecasts,
    stsc_results$Forecasts$Variance_Forecasts[-c(1:length_ld)]
  )

  # Save Results
  preds[, paste0("STSC", i)] <- pred_stsc
  vari[, paste0("STSC", i)] <- var_stsc
  se[, paste0("STSC", i)] <- (y - pred_stsc) ** 2
  crps_score[, paste0("STSC", i)] <- crps(y, pred_stsc,
                                          sqrt(var_stsc), NULL, "normal")$crps

  # Remove & Clean Memory
  rm(list = c("stsc_results_ld", "stsc_results"))

  # Update Counter
  i <- i + 1
}

####### Varying Gamma #######
for (g in c(0.40, 0.60, 0.80, 0.95, 0.99, 1.00)) {

  # Print
  cat("Gamma:", g, "\n")

  ### Subsample 1: 1954-1999 (Low-Dim)
  # Apply STSC-Function
  stsc_results_ld <- hdflex::stsc(y_ld,
                                  X_ld,
                                  Ext_F_ld,
                                  init,
                                  lambda_grid,
                                  kappa_grid,
                                  bias,
                                  g,
                                  psi_grid_ld,
                                  delta,
                                  burn_in_ld,
                                  burn_in_dsc,
                                  metric,
                                  equal_weight,
                                  incl,
                                  parallel,
                                  n_threads,
                                  NULL)

  ### Subsample 2: 1999-2021 (High-Dim)
  # Apply STSC-Function
  stsc_results <- hdflex::stsc(y,
                               X,
                               Ext_F,
                               init,
                               lambda_grid,
                               kappa_grid,
                               bias,
                               g,
                               psi_grid,
                               delta,
                               burn_in,
                               burn_in_dsc,
                               metric,
                               equal_weight,
                               incl,
                               parallel,
                               n_threads,
                               NULL)

  # Assign STSC-Results
  pred_stsc <- c(
    stsc_results_ld$Forecasts$Point_Forecasts,
    stsc_results$Forecasts$Point_Forecasts[-c(1:length_ld)]
  )
  var_stsc  <- c(
    stsc_results_ld$Forecasts$Variance_Forecasts,
    stsc_results$Forecasts$Variance_Forecasts[-c(1:length_ld)]
  )

  # Save Results
  preds[, paste0("STSC", i)] <- pred_stsc
  vari[, paste0("STSC", i)] <- var_stsc
  se[, paste0("STSC", i)] <- (y - pred_stsc) ** 2
  crps_score[, paste0("STSC", i)] <- crps(y, pred_stsc,
                                          sqrt(var_stsc), NULL, "normal")$crps

  # Remove & Clean Memory
  rm(list = c("stsc_results_ld", "stsc_results"))

  # Update Counter
  i <- i + 1
}

####### No Forgetting #######
### Subsample 1: 1954-1999 (Low-Dim)
# Set Parameter
lambda_grid_nf <- 1.00
kappa_grid_nf <- 1.00
n_tvc_ld_nf <- (ncol(X_ld) + ncol(Ext_F_ld)) * 1 * 1
psi_grid_ld_nf <- c(1:n_tvc_ld_nf)

# Apply STSC-Function
stsc_results_ld <- hdflex::stsc(y_ld,
                                X_ld,
                                Ext_F_ld,
                                init,
                                lambda_grid_nf,
                                kappa_grid_nf,
                                bias,
                                gamma_grid,
                                psi_grid_ld_nf,
                                delta,
                                burn_in_ld,
                                burn_in_dsc,
                                metric,
                                equal_weight,
                                incl,
                                parallel,
                                n_threads,
                                NULL)

### Subsample 2: 1999-2021 (High-Dim)
# Update Parameter
n_tvc_nf <- (ncol(X) + ncol(Ext_F)) * 1 * 1
psi_grid_nf <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc_nf / 4)))

# Apply STSC-Function
stsc_results <- hdflex::stsc(y,
                             X,
                             Ext_F,
                             init,
                             lambda_grid_nf,
                             kappa_grid_nf,
                             bias,
                             gamma_grid,
                             psi_grid_nf,
                             delta,
                             burn_in,
                             burn_in_dsc,
                             metric,
                             equal_weight,
                             incl,
                             parallel,
                             n_threads,
                             NULL)
# Assign STSC-Results
pred_stsc <- c(
  stsc_results_ld$Forecasts$Point_Forecasts,
  stsc_results$Forecasts$Point_Forecasts[-c(1:length_ld)]
)
var_stsc <- c(
  stsc_results_ld$Forecasts$Variance_Forecasts,
  stsc_results$Forecasts$Variance_Forecasts[-c(1:length_ld)]
)

# Save Results
preds[, "STSC_NF"] <- pred_stsc
vari[, "STSC_NF"] <- var_stsc
se[, "STSC_NF"] <- (y - pred_stsc) ** 2
crps_score[, "STSC_NF"] <- crps(y, pred_stsc,
                                sqrt(var_stsc), NULL, "normal")$crps

# Remove & Clean Memory
rm(list = c("stsc_results_ld", "stsc_results"))

######### --------------------------------------------
### Part 3: Evaluation ###
####### OOS-Period #######
# Define Evaluation Period (OOS-Period)
oos_idx <- which(tdates >= eval_start & tdates <= eval_end)

# OOS-Data
oos_y <- y[oos_idx, ]
oos_preds <- preds[oos_idx, ]
oos_vari <- vari[oos_idx, ]
oos_se <- se[oos_idx, ]
oos_crps <- crps_score[oos_idx, ]
oos_portfolio_returns <- portfolio_returns[oos_idx, ]
oos_dates <- names(oos_y)

# Check for Missing Values
for (obj in list(oos_y, oos_preds, oos_vari, oos_se)) {
  if (any(is.na(obj))) {
    rlang::abort("Error: Missing Values in one of the OOS objects")
  }
}

### Mean-Squared-Error
# Calculate MSE
evaluation["MSE", ] <- colMeans(oos_se)

### Mean-Continuous Ranked Probability Score
# Calculate CRPS
evaluation["CRPS", ] <- colMeans(oos_crps)

###########--------------------------------------------
# Save Results
write_rds(oos_y, glue("{path}/Results/ES1/y_{setting}.rds"))
write_rds(oos_preds, glue("{path}/Results/ES1/point_forecasts_{setting}.rds"))
write_rds(oos_se, glue("{path}/Results/ES1/squared_errors_{setting}.rds"))
write_rds(oos_vari, glue("{path}/Results/ES1/variances_{setting}.rds"))
write_rds(oos_crps, glue("{path}/Results/ES1/crps_scores_{setting}.rds"))
write_rds(evaluation, glue("{path}/Results/ES1/results_{setting}.rds"))
##########--------------------------------------------

# Show Results
evaluation
######### --------------------------------------------

## 4) Conditional Gaussianity

In [None]:
######### --------------------------------------------
### Predict Daily Equity Premium -- Conditional Gaussian ###
# Setting
setting <- "cond_normal_assumption"

# Set Global Parameter
window_size <- 252 * 5
eval_start <- "1967-01-04"
eval_end <- "2021-12-31"

# Load Data
econ_fin_signals <- read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/econ_fin_signals.rds"))
f_signals <- read_rds(glue("{path}/Data/ES1/F_Signals/refined_signals.rds"))

# Get Signal Names
econ_fin_names <- setdiff(names(econ_fin_signals), c("Date", "equity_premium"))
f_names <- setdiff(names(f_signals), "Date")

# Combine and Prepare Data
dataset <- econ_fin_signals %>%
  dplyr::mutate(across(!Date & !equity_premium, dplyr::lag)) %>%
  dplyr::left_join(f_signals, by = "Date") %>%
  dplyr::filter(Date <= eval_end) %>%
  tidyr::fill(!equity_premium, .direction = "down") %>%
  tibble::column_to_rownames(var = "Date") %>%
  dplyr::slice(-1)

# Assign Response-Variable
y <- dataset %>%
  dplyr::select(equity_premium) %>%
  as.matrix()

# Assign "P"-Signals
X <- dataset %>%
  dplyr::select(dp, TBL3M, TSP1, vola, starts_with("MA_D")) %>%
  as.matrix()

# Assign "F"-Signals
Ext_F <- dataset %>%
  dplyr::select(PHM) %>%
  as.matrix()

# Dimensions
cat("Size of P-Signals:", ncol(X), "\n")
cat("Size of F-Signals:", ncol(Ext_F), "\n")

# Get Dates
tdates <- rownames(dataset)

# Get Lengths
tlength <- length(tdates)

# First Non-NA-Row
first_complete <- which(complete.cases(cbind(X, Ext_F)))[1]

# Clear Memory
rm(list = c("econ_fin_signals",
            "f_signals",
            "dataset",
            "econ_fin_names",
            "f_names"))
invisible(gc())

######### --------------------------------------------
### Result Matrices
# Benchmark Methods
benchmark_names <- c("PHM", "STPHM",
                     #---#
                     "STSC1", "DMA1",
                     "STSC5", "DMA5",
                     "STSC15", "DMA15")

# Result-Object: Point Forecasts
preds <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
                dimnames = list(tdates, benchmark_names))

# Result-Object: Variances
vari <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
               dimnames = list(tdates, benchmark_names))

# Result-Object: Squared Errors
se <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
             dimnames = list(tdates, benchmark_names))

# Result-Object: Continuous Ranked Probability Score
crps_score <- matrix(NA, ncol = length(benchmark_names), nrow = tlength,
                     dimnames = list(tdates, benchmark_names))

# Result-Object: Evaluation-Matrix
evaluation <- matrix(NA, ncol = length(benchmark_names), nrow = 2,
                     dimnames = list(c("MSE", "CRPS"), benchmark_names))

######### --------------------------------------------
### Part 0: Benchmark ###
### Prevailing Historical Mean (PHM)
# Apply Function
phm_results <- phm(y, window_size, "expanding")

# Assign Results
preds[, "PHM"] <- phm_results[[1]]
vari[, "PHM"] <- phm_results[[2]]
se[, "PHM"] <- (y - preds[, "PHM"]) ** 2
crps_score[, "PHM"] <- NA

### Density Transformation (ST-PHM)
# Apply Function
stphm_results <- hdflex::tvc(y,
                             NULL,
                             preds[, "PHM", drop = FALSE],
                             5 * 252,
                             1.00,
                             0.94,
                             FALSE)

# Assign Results
preds[, "STPHM"] <- stphm_results$Forecasts$Point_Forecasts
vari[, "STPHM"] <- stphm_results$Forecasts$Variance_Forecasts
se[, "STPHM"] <- (y - preds[, "STPHM"]) ** 2
crps_score[, "STPHM"] <- crps(y, preds[, "STPHM"],
                              sqrt(vari[, "STPHM"]),
                              NULL, "normal")$crps
# Remove & Clean Memory
rm(list = c("phm_results", "stphm_results"))
invisible(gc())

######### --------------------------------------------
####### STSC1-Model #######
# Set TV-C-Parameter
init <- 252 * 5
lambda_grid <- c(0.9984, 0.9992, 1.0000)
kappa_grid <- c(0.94)
bias <- TRUE

# Set DSC-Parameter
gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90,
                0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00)
n_tvc <- 1 * length(lambda_grid)
psi_grid <- c(1:n_tvc)
delta <- 0.9992
burn_in <- first_complete + init / 2
burn_in_dsc <- 1
metric <- 5
equal_weight <- TRUE
incl <- NULL
parallel <- TRUE
n_threads <- 3

# Apply STSC-Function
stsc_results1 <- hdflex::stsc(y,
                              Ext_F[, "PHM", drop = FALSE],
                              NULL,
                              init,
                              lambda_grid,
                              kappa_grid,
                              bias,
                              gamma_grid,
                              psi_grid,
                              delta,
                              burn_in,
                              burn_in_dsc,
                              metric,
                              equal_weight,
                              incl,
                              parallel,
                              n_threads,
                              NULL)

# Assign STSC-Results
pred_stsc1 <- stsc_results1$Forecasts$Point_Forecasts
var_stsc1  <- stsc_results1$Forecasts$Variance_Forecasts

# Add tdates
names(pred_stsc1) <- tdates
names(var_stsc1) <- tdates

# Remove & Clean Memory
rm(list = c("stsc_results1"))

####### STSC5-Model #######
# Update DSC-Parameter
n_tvc    <- 5 * length(lambda_grid)
psi_grid <- c(1:n_tvc)

# Apply STSC-Function
stsc_results5 <- hdflex::stsc(y,
                              X[, c("dp", "TBL3M", "TSP1", "vola")],
                              Ext_F[, "PHM", drop = FALSE],
                              init,
                              lambda_grid,
                              kappa_grid,
                              bias,
                              gamma_grid,
                              psi_grid,
                              delta,
                              burn_in,
                              burn_in_dsc,
                              metric,
                              equal_weight,
                              incl,
                              parallel,
                              n_threads,
                              NULL)

# Assign STSC-Results
pred_stsc5 <- stsc_results5$Forecasts$Point_Forecasts
var_stsc5  <- stsc_results5$Forecasts$Variance_Forecasts

# Add tdates
names(pred_stsc5) <- tdates
names(var_stsc5) <- tdates

# Remove & Clean Memory
rm(list = c("stsc_results5"))

####### STSC15-Model #######
# Update DSC-Parameter
n_tvc <- 15 * length(lambda_grid)
psi_grid <- c(1:n_tvc)

# Apply STSC-Function
stsc_results15 <- hdflex::stsc(y,
                               X,
                               Ext_F[, "PHM", drop = FALSE],
                               init,
                               lambda_grid,
                               kappa_grid,
                               bias,
                               gamma_grid,
                               psi_grid,
                               delta,
                               burn_in,
                               burn_in_dsc,
                               metric,
                               equal_weight,
                               incl,
                               parallel,
                               n_threads,
                               NULL)

# Assign STSC-Results
pred_stsc15 <- stsc_results15$Forecasts$Point_Forecasts
var_stsc15  <- stsc_results15$Forecasts$Variance_Forecasts

# Add tdates
names(pred_stsc15) <- tdates
names(var_stsc15) <- tdates

# Remove & Clean Memory
rm(list = c("stsc_results15"))

####### Dynamic Model Averaging - 1 #######
# Set Parameter
dma_alpha  <- 0.99
dma_lambda <- c(0.9984, 0.9992, 1.0000)
dma_kappa  <- 0.94

# Apply DMA1
dma_results1 <- dma(y,
                    Ext_F[, "PHM", drop = FALSE],
                    NULL,
                    dma_alpha,
                    dma_lambda,
                    dma_kappa)

pred_dma1 <- dma_results1[, 1]
var_dma1  <- dma_results1[, 3]
crps_dma1 <- dma_results1[, 4]

# Remove & Clean Memory
rm(list = c("dma_results1"))

####### Dynamic Model Averaging - 5 #######
# Apply DMA5
dma_results5 <- dma(y,
                    X[, c("dp", "TBL3M", "TSP1", "vola")],
                    Ext_F[, "PHM", drop = FALSE],
                    dma_alpha,
                    dma_lambda,
                    dma_kappa)

pred_dma5 <- dma_results5[, 1]
var_dma5  <- dma_results5[, 3]
crps_dma5 <- dma_results5[, 4]

# Remove & Clean Memory
rm(list = c("dma_results5"))

print("DMA5 Done")

####### Dynamic Model Averaging - 15 #######
# Apply DMA
dma_results15 <- dma(y,
                     X,
                     Ext_F[, "PHM", drop = FALSE],
                     dma_alpha,
                     dma_lambda,
                     dma_kappa)

pred_dma15 <- dma_results15[, 1]
var_dma15  <- dma_results15[, 3]
crps_dma15 <- dma_results15[, 4]

# Remove & Clean Memory
rm(list = c("dma_results15"))

####### ---------------------------------------
####### Save Point Forecasts #######
preds[, "STSC1"]  <- pred_stsc1
preds[, "STSC5"]  <- pred_stsc5
preds[, "STSC15"] <- pred_stsc15
preds[, "DMA1"]   <- pred_dma1
preds[, "DMA5"]   <- pred_dma5
preds[, "DMA15"]  <- pred_dma15

###### Save Variances #######
vari[, "STSC1"]  <- var_stsc1
vari[, "STSC5"]  <- var_stsc5
vari[, "STSC15"] <- var_stsc15
vari[, "DMA1"]   <- var_dma1
vari[, "DMA5"]   <- var_dma5
vari[, "DMA15"]  <- var_dma15

####### Save Squared Errors #######
se[, "STSC1"]  <- (y - pred_stsc1) ** 2
se[, "STSC5"]  <- (y - pred_stsc5) ** 2
se[, "STSC15"] <- (y - pred_stsc15) ** 2
se[, "DMA1"]   <- (y - pred_dma1) ** 2
se[, "DMA5"]   <- (y - pred_dma5) ** 2
se[, "DMA15"]  <- (y - pred_dma15) ** 2

####### Save Continuous Ranked Probability Score #######
crps_score[, "STSC1"]  <- crps(y, pred_stsc1, sqrt(var_stsc1), NULL, "normal")$crps # nolint
crps_score[, "STSC5"]  <- crps(y, pred_stsc5, sqrt(var_stsc5), NULL, "normal")$crps # nolint
crps_score[, "STSC15"] <- crps(y, pred_stsc15, sqrt(var_stsc15), NULL, "normal")$crps # nolint
crps_score[, "DMA1"]   <- crps_dma1
crps_score[, "DMA5"]   <- crps_dma5
crps_score[, "DMA15"]  <- crps_dma15

# Remove & Clean Memory
rm(list = c("pred_stsc1", "pred_dma1",
            "pred_stsc5", "pred_dma5",
            "pred_stsc15", "pred_dma15",
            "var_stsc1", "var_dma1",
            "var_stsc5", "var_dma5",
            "var_stsc15", "var_dma15"))
invisible(gc())

######### --------------------------------------------
### Part 3: Evaluation ###
####### OOS-Period #######
# Define Evaluation Period (OOS-Period)
oos_idx <- which(tdates >= eval_start & tdates <= eval_end)

# Cut Objects
oos_y <- y[oos_idx, ]
oos_preds <- preds[oos_idx, ]
oos_vari <- vari[oos_idx, ]
oos_se <- se[oos_idx, ]
oos_crps <- crps_score[oos_idx, ]

# Check for Missing Values
for (obj in list(oos_y, oos_preds, oos_vari, oos_se)) {
  if (any(is.na(obj))) {
    rlang::abort("Error: Missing Values in one of the OOS objects")
  }
}

### Mean-Squared-Error
# Calculate MSE
evaluation["MSE", ] <- colMeans(oos_se)

### Mean-Continuous Ranked Probability Score
# Calculate CRPS
evaluation["CRPS", ] <- colMeans(oos_crps)

###########--------------------------------------------
# Save Results
write_rds(oos_preds, glue("{path}/Results/ES1/point_forecasts_{setting}.rds"))
write_rds(oos_se, glue("{path}/Results/ES1/squared_errors_{setting}.rds"))
write_rds(oos_vari, glue("{path}/Results/ES1/variances_{setting}.rds"))
write_rds(oos_crps, glue("{path}/Results/ES1/crps_scores_{setting}.rds"))
write_rds(evaluation, glue("{path}/Results/ES1/results_{setting}.rds"))
##########--------------------------------------------

# Evaluation
t(evaluation)

# Relativ to STSC
t(round(evaluation / evaluation[, "STPHM"], 4))
##########--------------------------------------------

## 5) Plots

In [4]:
######### --------------------------------------------
# Setting
setting <- "finance_1260"

### Global Settings
# Store reusable parameters
black_color <- "#000000"
grey_color <- "#807f7f"
orange_color <- "#E69F00"
darkblue_color <- "#3a5795"
sky_color <- "#56B4E9"
green_color <- "#009E73"
blue_color <- "#0072B2"
red_color <- "#D55E00"
purple_color <- "#CC79A7"
dotted_line_color <- "#7b7979da"
lightgrey_color <- "#807f7f18"
text_size <- 24
axis_text_size <- 20
legend_text_size <- 20
legend_key_size <- unit(4, "line")
x_breaks <- seq(ymd("1970-01-01"), ymd("2021-12-31"), by = "10 year")
x_expand <- expansion(mult = c(0, 0.01))
y_expand <- expansion(mult = c(0.05, 0.05))
hline_size <- 0.5
vline_size <- 0.5

# Set the theme globally
theme_set(
  theme_bw() +
    theme(
      plot.title = element_text(size = text_size, face = "bold", hjust = 0.5, margin = margin(0, 0, 10, 0)),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      text = element_text(size = text_size),
      axis.text = element_text(size = axis_text_size),
      axis.text.x = element_text(vjust = 0.5),
      axis.title.x = element_text(vjust = -0.5),
      axis.title = element_text(size = text_size),
      strip.text = element_text(size = text_size),
      legend.title = element_blank(),
      legend.text = element_text(size = legend_text_size),
      legend.position = "bottom",
      legend.margin = margin(t = -30),
      legend.key.size = legend_key_size
    )
)

# Common Plot Asthetics
plot_aesthetics <- list(
  scale_x_date(breaks = x_breaks, date_labels = "%Y", expand = x_expand),
  geom_vline(xintercept = as.Date("1999-01-04"), linetype = "dashed", color = red_color, linewidth = vline_size),
  guides(color = guide_legend(override.aes = list(size = 14, alpha = 1, linewidth = 1.25)))
)
######### --------------------------------------------

In [5]:
######### --------------------------------------------
### Plot: Cumulative Squared Error Differences (CSSED) ###
### Load CSSED
df <- read_rds(glue("{path}/Results/ES1/squared_errors_{setting}.rds")) %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  mutate(across(-Date, ~PHM - .)) %>%
  mutate(across(-Date, ~cumsum(.))) %>%
  dplyr::select(Date, STSC, STSC_LD) %>%
  pivot_longer(!Date, names_to = "Model", values_to = "value") %>%
  mutate(Method = factor(Model, levels = c("STSC", "STSC_LD")))

# Plot CSSED
plot_cssed <- df %>%
  ggplot() +
  geom_line(aes(Date,
                value,
                group = Model,
                color = Method,
                linetype = Method,
                linewidth = Method,
                alpha = Method)) +
  labs(title = "Cumulative Squared Error Differences",
       y = "",
       x = "") +
  scale_y_continuous(expand = y_expand) +
  scale_linetype_manual(values = c(1, 2, 5),
                        labels = c(expression(STSC[highdim]),
                                   expression(STSC[lowdim]))) +
  scale_linewidth_manual(values = c(0.75, 0.5, 0.5),
                         guide = "none") +
  scale_alpha_manual(values = c(1, 0.5, 0.5),
                     guide = "none")  +
  scale_color_manual(values = c(black_color, lightgrey_color, grey_color),
                     labels = c(expression(STSC[highdim]),
                                expression(STSC[lowdim]))) +
  geom_hline(yintercept = 0,
             linetype = "dotted",
             color = dotted_line_color,
             linewidth = hline_size) +
  plot_aesthetics
######### --------------------------------------------

In [6]:
######### --------------------------------------------
### Plot: Cumulative CRPS Differences ###
# Load CRPS
df <- read_rds(glue("{path}/Results/ES1/crps_scores_{setting}.rds")) %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  dplyr::select(Date, STPHM, STSC, STSC_LD) %>%
  mutate(across(-Date, ~STPHM - .)) %>%
  mutate(across(-Date, ~cumsum(.))) %>%
  select(-STPHM) %>%
  pivot_longer(!Date, names_to = "Model", values_to = "value") %>%
  mutate(Method = factor(Model, levels = c("STSC", "STSC_LD")))

# Plot CRPS
plot_crps <- df %>%
  ggplot() +
  geom_line(aes(Date,
                value,
                group = Model,
                color = Method,
                linetype = Method,
                linewidth = Method,
                alpha = Method)) +
  labs(title = "Cumulative CRPS Differences",
       y = "",
       x = "") +
  scale_y_continuous(expand = y_expand) +
  scale_linetype_manual(values = c(1, 2, 5),
                        labels = c(expression(STSC[highdim]),
                                   expression(STSC[lowdim]))) +
  scale_linewidth_manual(values = c(0.75, 0.5, 0.5),
                         guide = "none") +
  scale_alpha_manual(values = c(1, 0.5, 0.5),
                     guide = "none")  +
  scale_color_manual(values = c(black_color, lightgrey_color, grey_color),
                     labels = c(expression(STSC[highdim]),
                                expression(STSC[lowdim]))) +
  geom_hline(yintercept = 0,
             linetype = "dotted",
             color = dotted_line_color,
             linewidth = hline_size) +
  plot_aesthetics
######### --------------------------------------------

In [7]:
######### --------------------------------------------
### Plot: Subset-Size Psi ###
### Load Psi
df <- read_rds(glue("{path}/Results/ES1/stsc_params_{setting}.rds"))$psi %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  pivot_longer(!Date, names_to = "Parameter", values_to = "value") %>%
  filter(value > 0) %>%
  mutate(Description = "Subset-Size")

# Plot Psi (before 1999)
plot_psi_b1999 <- df %>%
  mutate(across(value, ~ if_else(Date > "1999-01-01", NA_real_, .))) %>%
  ggplot() +
  geom_point(aes(Date,
                 value,
                 color = Description),
             size = 1.00, alpha = 1.0, shape = 18) +
  labs(title = expression(bold(paste("Subset Size ", psi, " (before 1999)"))),
       y = "",
       x = "") +
  scale_y_continuous(labels = c(1, 5, 10, 15, 20, 25, 30, 35, 40, 45),
                     breaks = c(1, 5, 10, 15, 20, 25, 30, 35, 40, 45),
                     limits = c(1, 5 * 3 * 3),
                     expand = y_expand) +
  scale_color_manual(values = c(black_color)) +
  theme(legend.position = "none") +
  plot_aesthetics

# Plot Psi (after 1999)
plot_psi_a1999 <- df %>%
  mutate(across(value, ~ if_else(Date < "1999-01-01", NA_real_, .))) %>%
  mutate(value_factor = factor(value, levels = unique(sort(value, na.last = NA)))) %>%
  ggplot() +
  geom_point(aes(Date,
                 value_factor,
                 color = Description),
             size = 1.00, alpha = 1.0, shape = 18) +
  labs(title = expression(bold(paste("Subset Size ", psi, " (after 1999)"))),
       y = "",
       x = "") +
  scale_y_discrete(expand = y_expand,
                   limits = factor(c(1:100, 501:509, 28986, 30001:30009, 57973, 60001:60009, 86960, 100001:100009, 115947)),
                   breaks = factor(c(1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 28986, 57973, 86960, 115947)),
                   labels = factor(c(1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 28986, 57973, 86960, 115947))) +
  scale_color_manual(values = c(black_color)) +
  theme(legend.position = "none") +
  plot_aesthetics
######### --------------------------------------------

In [12]:
######### --------------------------------------------
### Plot: Selected Predictive Signals ###
# Load Econ-Fin Signals
econ_fin_names <- read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/econ_fin_signals.rds")) %>%
  select(-Date, -equity_premium) %>%
  select(-starts_with("MA_D"),
         -starts_with("PMOM_D"),
         -starts_with("RVol"),
         -starts_with("VOL_D"),
         -starts_with("SPMOM_INT")) %>%
  colnames()

# Load Technical Signals
tech_names <- read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/econ_fin_signals.rds")) %>%
  select(starts_with("MA_D"),
         starts_with("PMOM_D"),
         starts_with("RVol"),
         starts_with("VOL_D"),
         starts_with("SPMOM_INT")) %>%
  colnames()

# Load Topics
topic_names <- read_rds(glue("{path}/Data/ES1/P_Signals/Text_Signals/topic_signals.rds")) %>%
  select(-Date) %>%
  colnames()

# Load Word Counts
text_names <- read_rds(glue("{path}/Data/ES1/P_Signals/Text_Signals/text_signals.rds")) %>%
  select(-Date) %>%
  colnames()

# Load Refined Signals
refi_names <- read_rds(glue("{path}/Data/ES1/F_Signals/refined_signals.rds")) %>%
  select(-Date) %>%
  colnames()

# Chosen Signals (before 1999)
df <- read_rds(glue::glue("{path}/Results/ES1/stsc_params_{setting}.rds"))$signals %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  select(Date, dp, TBL3M, TSP1, vola, PHM) %>%
  mutate(t(replicate(nrow(.), seq_len(ncol(.)))) * pick(where(is.numeric))) %>%
  pivot_longer(!Date, names_to = "Predictor", values_to = "value") %>%
  mutate(Indicator = case_when(
    Predictor %in% econ_fin_names ~ "Econ-Fin",
    Predictor %in% refi_names ~ "F-Signal",
    TRUE ~ "Unknown"
  )) %>%
  mutate(Indicator = factor(Indicator,
                            levels = c("Econ-Fin",
                                       "F-Signal",
                                       "Unkown")))

# Plot Signals
plot_sigs_b1999 <- df %>%
  mutate(across(value, ~ if_else(Date > "1999-01-01", 0, .))) %>%
  ggplot() +
  geom_point(aes(Date, value, color = Indicator),
             size = 0.5, alpha = 0.75, shape = 18) +
  labs(title = "Predictive Signals (before 1999)",
       y = "",
       x = "") +
  scale_y_continuous(labels = c("DP", "TBL3M", "TSP1", "VOLA", "RHM"),
                     breaks = c(1, 2, 3, 4, 5),
                     limits = c(1, 5),
                     expand = y_expand) +
  scale_color_manual(values = c("#58e956",
                                "#56B4E9")) +
  theme(legend.position = "none") +
  plot_aesthetics

# Chosen Signals (after 1999)
df <- read_rds(glue::glue("{path}/Results/ES1/stsc_params_{setting}.rds"))$signals %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  replace(is.na(.), 0) %>%
  mutate(
    econ_fin_sum = rowSums(select(., all_of(econ_fin_names))),
    tech_sum = rowSums(select(., all_of(tech_names))),
    topic_sum = rowSums(select(., all_of(topic_names))),
    text_sum = rowSums(select(., all_of(text_names))),
    refi_sum = rowSums(select(., all_of(refi_names)))
  ) %>%
  select(Date, econ_fin_sum, tech_sum, topic_sum, text_sum, refi_sum) %>%
  mutate(across(econ_fin_sum:refi_sum, ~ if_else(. > 0, 1, .))) %>%
  mutate(t(replicate(nrow(.), seq_len(ncol(.)))) * pick(where(is.numeric))) %>%
  pivot_longer(!Date, names_to = "Predictor", values_to = "value") %>%
  mutate(Indicator = factor(Predictor, levels = c("econ_fin_sum", "tech_sum", "topic_sum", "text_sum", "refi_sum")))

# Plot Signals
plot_sigs_a1999 <- df %>%
  mutate(across(value, ~ if_else(Date < "1999-01-01", 0, .))) %>%
  ggplot() +
  geom_point(aes(Date, value, color = Indicator),
             size = 0.5, alpha = 0.75, shape = 18) +
  labs(title = "Predictive Signals (after 1999)",
       y = "",
       x = "") +
  scale_y_continuous(labels = c("Econ-Fin",
                                "Technical",
                                "Topic Model",
                                "Word Count",
                                "F-Signal"),
                     breaks = c(1, 2, 3, 4, 5),
                     limits = c(1, 5),
                     expand = y_expand) +
  scale_color_manual(values = c("#58e956",
                                "#e9565b",
                                "#c5e912",
                                "#0072B2",
                                "#56B4E9")) +
  theme(legend.position = "none") +
  plot_aesthetics
######### --------------------------------------------

In [13]:
######### --------------------------------------------
### Plot: Forgetting Factor Gamma ###
# Load Gamma
df <- read_rds(glue::glue("{path}/Results/ES1/stsc_params_{setting}.rds"))$gamma %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  pivot_longer(!Date, names_to = "Parameter", values_to = "value") %>%
  filter(value != 0) %>%
  mutate(Description = "Discount Factor")

# Plot Gamma
plot_gamma <- df %>%
  mutate(value_factor = factor(value, levels = unique(sort(value)))) %>%
  ggplot(aes(Date, value_factor, color = Description)) +
  geom_point(size = 1.75, alpha = 1.0, shape = 18) +
  labs(title = "",
       y = expression("Discount Factor" ~ bold(gamma)),
       x = "") +
  scale_color_manual(values = c(black_color),
                     guide = guide_legend(override.aes = list(size = 4,
                                                              alpha = 1))) +
  scale_y_discrete(
    expand = y_expand,
    limits = factor(c(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, seq(0.91, 1.00, 0.01))),
    breaks = factor(c(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, seq(0.91, 1.00, 0.01))),
    labels = function(x) sprintf("%.2f", as.numeric(as.character(x)))
  ) +
  theme(legend.position = "none") +
  plot_aesthetics

######### --------------------------------------------
### Plot: Forgetting Factor Lambda ###
# Load and prepare
df <- read_rds(glue::glue("{path}/Results/ES1/stsc_params_{setting}.rds"))$lambda %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  pivot_longer(!Date,
               names_to = "Parameter",
               values_to = "value") %>%
  mutate(Description = "Lambda value") %>%
  mutate(Parameter = case_when(
    Parameter == "1" ~ "1.0000",
    TRUE ~ Parameter
  )) %>%
  group_by(Date) %>%
  mutate(value = value / sum(value) * 100) %>%
  ungroup()

# Plot Lambda
plot_lambda <- df %>%
  ggplot(aes(x = Date, y = value, fill = Parameter)) +
  geom_area(position = "stack", alpha = 0.6) +
  labs(title = "",
       y = expression("Discount Factor" ~ bold(lambda)),
       x = "") +
  scale_y_continuous(expand = y_expand,
                     labels = scales::percent_format(scale = 1.0)) +
  scale_fill_manual(values = c("0.9984" = "gray80",
                               "0.9992" = "gray50",
                               "1.0000" = "gray20")) +
  theme(legend.key.size = unit(0.8, "cm")) +
  plot_aesthetics

######### --------------------------------------------
### Plot: Forgetting Factor Kappa ###
# Load and prepare
df <- read_rds(glue::glue("{path}/Results/ES1/stsc_params_{setting}.rds"))$kappa %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  pivot_longer(!Date,
               names_to = "Parameter",
               values_to = "value") %>%
  mutate(Description = "Kappa value") %>%
  mutate(Parameter = case_when(
    Parameter == "0.9" ~ "0.90",
    TRUE ~ Parameter
  )) %>%
  group_by(Date) %>%
  mutate(value = value / sum(value) * 100) %>%
  ungroup()

# Plot Kappa
plot_kappa <- df %>%
  ggplot(aes(x = Date, y = value, fill = Parameter)) +
  geom_area(position = "stack", alpha = 0.6) +
  labs(title = "",
       y = expression("Discount Factor" ~ bold(kappa)),
       x = "") +
  scale_y_continuous(expand = y_expand,
                     labels = scales::percent_format(scale = 1.0)) +
  scale_fill_manual(values = c("0.90" = "gray80",
                               "0.92" = "gray50",
                               "0.94" = "gray20")) +
  theme(legend.key.size = unit(0.8, "cm")) +
  plot_aesthetics
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Plot: Combination I ###
# Cow Plot
cplot1 <- cowplot::plot_grid(plot_crps,
                             plot_cssed,
                             plot_psi_b1999,
                             plot_sigs_b1999,
                             plot_psi_a1999,
                             plot_sigs_a1999,
                             ncol = 2,
                             align = "v")

# Save Plot
ggsave("plot_finance.pdf",
       cplot1,
       path = glue::glue("{path}/Results/ES1/Plots/"),
       width = 20,
       height = 20)

# Show Plot
options(repr.plot.width = 20, repr.plot.height = 20)
cplot1

######### --------------------------------------------
### Plot: Combination II ###
# Cow Plot
cplot2 <- cowplot::plot_grid(plot_gamma,
                             plot_lambda,
                             plot_kappa,
                             ncol = 1,
                             align = "v")

# Save Plot
ggsave("plot_finance_forgetting.pdf",
       cplot2,
       path = glue::glue("{path}/Results/ES1/Plots/"),
       width = 10,
       height = 15)

# Show Plot
options(repr.plot.width = 20, repr.plot.height = 20)
cplot2
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Plot: Calibration of Aggregate Predictive Densities (I) ###
# Setting
setting <- "finance"

# Number of Draws
n <- 5000

### Load Data
# Load Y
y <- read_rds(glue("{path}/Results/ES1/y_{setting}.rds")) %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  rename("y_real" = value)

# Load Predictive Density (Mean)
pred_stsc <- read_rds(glue("{path}/Results/ES1/point_forecasts_{setting}.rds")) %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  rename("mu" = STSC) %>%
  dplyr::select(Date, mu)

# Load Predictive Density (Variance)
var_stsc <- read_rds(glue("{path}/Results/ES1/variances_{setting}.rds")) %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  mutate("sd" = sqrt(STSC)) %>%
  dplyr::select(Date, sd)

# Prepare Data
data <- pred_stsc %>%
  left_join(var_stsc, by = "Date") %>%
  left_join(y, by = "Date") %>%
  mutate(mon_yr = format(Date, "%Y-%m")) %>%
  filter(mon_yr %in% c("1971-12", "1976-12",
                       "1981-12", "1986-12",
                       "1991-12", "1996-12",
                       "2001-12", "2006-12",
                       "2011-12", "2016-12",
                       "2021-12")) %>%
  mutate(Date = format(Date, "%m/%d/%Y")) %>%
  group_by(mon_yr) %>%
  filter(Date == max(Date)) %>%
  mutate(Date = factor(Date),
         low = mu - 3 * sd,
         high = mu + 3 * sd) %>%
  uncount(n, .id = "row") %>%
  mutate(x = (1 - row / n) * low + row / n * high,
         norm = dnorm(x, mu, sd),
         quantile = pnorm(y_real, mean = mu, sd = sd))

# Plot Aggregate Predictive Densities
plot_ridge <- data %>%
  ggplot(aes(x = x,
             y = Date,
             height = norm)) +
  ggridges::geom_density_ridges(stat = "identity",
                                alpha = 0.5) +
  geom_point(aes(x = y_real, y = Date),
             color = "black",
             size = 1.0,
             shape = 4) +
  scale_y_discrete(expand = expansion(mult = c(0.05, 0.10))) +
  labs(title = "", #"Aggregated Predictive Density",
       y = "",
       x  = "")

# Show Plot
options(repr.plot.width = 10, repr.plot.height = 12)
plot_ridge

# Save Plot
ggsave("plot_finance_ridge.pdf",
       plot_ridge,
       path = glue::glue("{path}/Results/ES1/Plots/"),
       width = 10,
       height = 10)
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Plot: Calibration of Aggregate Predictive Densities (II) ###
# Load U-Vector
cdf_t <- read_rds(glue("{path}/Results/ES1/pit_cdf_t.rds"))

# Generate uniform distribution
data_unif <- data.frame(value = seq(0, 1, length.out = 1000))
data_unif$density <- dunif(data_unif$value, min = 0, max = 1)

# Plot Histogram
plot_hist <- cdf_t %>%
  as_tibble() %>%
  ggplot(aes(value)) +
  geom_histogram(aes(y = ..count..), color = "white",
                 alpha = 0.5,
                 binwidth = 0.125,
                 center = 0.0625) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
  geom_line(data = data_unif,
            aes(x = value, y = density * length(cdf_t) * 0.125),
            color = "blue", linewidth = 0.75, linetype = "dashed") +
  scale_color_manual(values = c("red")) +
  labs(title = "", #"PITs",
       y = "",
       x = "")

# Sort PIT Values
scdf_t <- sort(cdf_t)

# Confidence Bands (normal approximation)
n <- length(cdf_t)
alpha <- 0.1
z <- qnorm(1 - alpha / 2)
unif_tq <- seq(0, 1, length.out = n)
l_band <- unif_tq - z * sqrt(unif_tq * (1 - unif_tq) / n)
u_band <- unif_tq + z * sqrt(unif_tq * (1 - unif_tq) / n)

# Create tibble
data <- tibble(
  theoretical_quantiles = unif_tq,
  empirical_quantiles = sort(cdf_t),
  l_band = l_band,
  u_band = u_band
)

# Plot QQ-Plot
plot_qq <- ggplot(data, aes(x = theoretical_quantiles, y = empirical_quantiles)) +
  geom_point(color = "black", size = 0.25) +
  geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") +
  geom_ribbon(aes(ymin = l_band, ymax = u_band), alpha = 0.25, fill = "blue") +
  labs(title = "Probability Integral Transformation",
       x = "Theoretical Quantiles",
       y = "Empirical Quantiles")

# Show Plot
options(repr.plot.width = 10, repr.plot.height = 9)
plot_hist
plot_qq

# Save Plot
ggsave("plot_finance_pit.pdf",
       plot_hist,
       path = glue::glue("{path}/Results/ES1/Plots/"),
       width = 10,
       height = 9)

# Save Plot
ggsave("plot_finance_qq.pdf",
       plot_qq,
       path = glue::glue("{path}/Results/ES1/Plots/"),
       width = 10,
       height = 9)
######### --------------------------------------------

## 6) Latex Tables

In [None]:
######### --------------------------------------------
### LaTeX Table -- main results ###
# Setting
setting <- "finance_1260"

# Load Data
se <- read_rds(glue("{path}/Results/ES1/squared_errors_{setting}.rds"))
crps_scores <- read_rds(glue("{path}/Results/ES1/crps_scores_{setting}.rds"))

# Identify the index of the 'stsc' model
stsc_index <- which(colnames(se) == "STSC")

### Sample: 1967 - 2021
# Results
mse_full <- colMeans(se)
crps_full <- colMeans(crps_scores)

# Normalize MSE and CRPS by 'stsc' values
mse_full_relative <- mse_full / mse_full[stsc_index]
crps_full_relative <- crps_full / crps_full[stsc_index]

## Sample: 1967 - 1999
## Results
mse_1967 <- colMeans(se[rownames(se) < "1999-01-04", ])
crps_1967 <- colMeans(crps_scores[rownames(crps_scores) < "1999-01-04", ])

# Normalize MSE and CRPS by 'stsc' values
mse_1967_relative <- mse_1967 / mse_1967[stsc_index]
crps_1967_relative <- crps_1967 / crps_1967[stsc_index]

### Sample: 1999 - 2021
# Results
mse_1999 <- colMeans(se[rownames(se) >= "1999-01-04", ])
crps_1999 <- colMeans(crps_scores[rownames(crps_scores) >= "1999-01-04", ])

# Normalize MSE and CRPS by 'stsc' values
mse_1999_relative <- mse_1999 / mse_1999[stsc_index]
crps_1999_relative <- crps_1999 / crps_1999[stsc_index]

# Combine Results
results <- tibble(
  Model = colnames(se),
  ACRPS_full = crps_full_relative,
  MSFE_full = mse_full_relative,
  ACRPS_1967 = crps_1967_relative,
  MSFE_1967 = mse_1967_relative,
  ACRPS_1999 = crps_1999_relative,
  MSFE_1999 = mse_1999_relative
)

# Reshape data frame
results_long <- results %>%
  filter(Model != "PHM") %>%
  pivot_longer(cols = -Model, names_to = c("Metric", "Year"), names_sep = "_") %>%
  pivot_wider(names_from = Model, values_from = value) %>%
  mutate(across(where(is.numeric), ~format(round(.x, 4), nsmall = 4)))

# Table: STSC High-Dimensional vs. Low-Dimensional
results_long %>%
  dplyr::select(Metric, STPHM, STSC, STSC_LD) %>%
  rename(" " = Metric,
         "PHM" = STPHM,
         "STSC\\textsubscript{highdim}" = STSC,
         "STSC\\textsubscript{lowdim}" = STSC_LD) %>%
  kableExtra::kbl(booktabs = T,
                  caption = "Forecast evaluation for aggregate stock returns",
                  format = "latex",
                  escape = F,
                  digits = 4,
                  format.args = list(big.mark = ',')) %>%
  kableExtra::kable_styling(position = "center", latex_options = c("scale_down")) %>%
  kableExtra::pack_rows("1967-2021", 1, 2, underline = T, bold = T) %>%
  kableExtra::pack_rows("1967-1999", 3, 4, underline = T, bold = T) %>%
  kableExtra::pack_rows("1999-2021", 5, 6, underline = T, bold = T) %>%
kableExtra::footnote(general = "The table reports forecast performance in terms of MSFE and ACRPS relative to the PHM. Values below one indicate better performance. STSC\\\\textsubscript{highdim} uses the updated high-dimensional set of predictive signals starting on 01/04/1999, while STSC\\\\textsubscript{lowdim} continues with the low-dimensional set of predictive signals after 01/04/1999. One star indicates significance at the 10\\\\% level; two stars significance at the 5\\\\% level; and three stars significance at the 1\\\\% level from one-tailed \\\\cite{diebold1995} test statistics compared to the PHM.",
                     footnote_as_chunk = TRUE,
                     fixed_small_size = FALSE,
                     threeparttable = TRUE,
                     escape = FALSE,
                     general_title = "")

# Table: Benchmark Models
results_long %>%
  dplyr::select(-Year, -STPHM) %>%
  rename(" " = Metric,
         "STSC\\textsubscript{highdim}" = STSC,
         "STSC\\textsubscript{highdim*}" = STSC_NWC,
         "STSC\\textsubscript{lowdim}" = STSC_LD,
         "STSC\\textsubscript{ExF}" = STSC_EXF,
         "STSC\\textsubscript{S10}" = STSCSx,
         "STSC\\textsubscript{SFlex}" = STSCSFLEX,
         "PCDMA\\textsubscript{5}" = PC5DMA,
         "PCDMA\\textsubscript{10}" = PC10DMA,
         "PCDMA\\textsubscript{15}" = PC15DMA) %>%
  kableExtra::kbl(booktabs = T,
                  caption = "Forecast evaluation for aggregate stock returns",
                  format = "latex",
                  escape = F,
                  digits = 4,
                  format.args = list(big.mark = ',')) %>%
  kableExtra::kable_styling(position = "center", latex_options = c("scale_down")) %>%
  kableExtra::pack_rows("1967-2021", 1, 2, underline = T, bold = T) %>%
  kableExtra::pack_rows("1967-1999", 3, 4, underline = T, bold = T) %>%
  kableExtra::pack_rows("1999-2021", 5, 6, underline = T, bold = T) %>%
kableExtra::footnote(general = "The table reports forecast performance in terms of MSFE and ACRPS relative to the PHM. Values below one indicate better performance. STSC\\\\textsubscript{highdim} uses the updated high-dimensional set of predictive signals starting on 01/04/1999, while STSC\\\\textsubscript{lowdim} continues with the low-dimensional set of predictive signals after 01/04/1999. One star indicates significance at the 10\\\\% level; two stars significance at the 5\\\\% level; and three stars significance at the 1\\\\% level from one-tailed \\\\cite{diebold1995} test statistics compared to the PHM.",
                     footnote_as_chunk = TRUE,
                     fixed_small_size = FALSE,
                     threeparttable = TRUE,
                     escape = FALSE,
                     general_title = "")
######## --------------------------------------------

In [None]:
######### --------------------------------------------
### LaTex Table -- Robustness ###
# Setting
setting <- "robustness"

# Parameter Choices
kappa_names <- paste0("$\\kappa=", format(c(0.86, 0.88, 0.90, 0.92, 0.94, 0.96, 0.98, 1.00), digits = 4), "$")
lambda_names <- paste0("$\\lambda=", format(c(0.9984, 0.9988, 0.9992, 0.9996, 1.0000), digits = 4), "$")
delta_names <- paste0("$\\delta=", format(c(0.9984, 0.9988, 0.9992, 0.9996, 1.0000), digits = 4), "$")
psi_names <- paste0("$\\psi=", format(c(1, 5, 10, 25, 50, 100, 500, "115,947"), digits = 1), "$")
gamma_names <- paste0("$\\gamma=", format(c(0.4, 0.6, 0.8, 0.95, 0.99, 1.00), digits = 2), "$")
params <- c(kappa_names, lambda_names, delta_names, psi_names, gamma_names)

# Load Results
results <- read_rds(glue("{path}/Results/ES1/results_{setting}.rds")) %>%
  t(.) %>%
  as.data.frame() %>%
  dplyr::slice(1:(n() - 1)) %>%
  rownames_to_column("Model") %>%
  rename("MSFE" = MSE, "ACRPS" = CRPS) %>%
  mutate(across(-Model, ~ round(. / .[Model == "STSC"], 4))) %>%
  replace(is.na(.), "--") %>%
  mutate(across(where(is.numeric), ~sprintf("%.4f", .x))) %>%
  dplyr::filter(!Model %in% c("PHM", "STPHM", "STSC")) %>%
  mutate(Model = params,
         MSFE = as.numeric(MSFE),
         ACRPS = as.numeric(ACRPS)) %>%
  rename(" " = Model)

# Split into groups
indices <- list(1:8, 9:13, 14:18, 19:26, 27:32)
tables <- lapply(indices, function(idx) results %>% dplyr::slice(idx))
empty_table <- tables[[5]] %>% mutate(across(everything(), ~ NA))

# Combine in 3x2 grid format
combined_table <- bind_rows(
  bind_cols(tables[[1]], tables[[4]]),
  bind_cols(tables[[2]], tables[[3]]),
  bind_cols(tables[[5]], empty_table)
)

# Rename columns
colnames(combined_table) <- c(" ", "MSFE", "ACRPS", " ", "MSFE", "ACRPS")

# Create LaTeX table
combined_table %>%
  kbl(booktabs = TRUE,
      caption = "Alternative tuning parameters",
      format = "latex",
      escape = F,
      digits = 4,
      format.args = list(big.mark = ',')) %>%
  row_spec(8, hline_after = TRUE) %>%
  row_spec(13, hline_after = TRUE) %>%
  column_spec(3, border_right = T) %>%
  pack_rows("Varying $\\kappa$", underline = T, 1, 8, escape = F) %>%
  pack_rows("Varying $\\lambda$", underline = T, 9, 13, escape = F) %>%
  pack_rows("Varying $\\gamma$", underline = T, 14, 19, escape = F) %>%
  footnote(general = "The table summarizes the forecast results for alternative values of the tuning parameters. All results are relative to the unrestricted STSC, with grids for the tuning parameters as described in the main paper. Values less than one indicate better performance than for STSC. In each setup, we varied one of the tuning parameters and fixed its value, while leaving the remaining (grids of) tuning parameters unchanged.",
           footnote_as_chunk = TRUE,
           fixed_small_size = FALSE,
           threeparttable = TRUE,
           escape = FALSE,
           general_title = "")
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### LaTex Table -- conditional gaussianity ###
# Setting
setting <- "cond_normal_assumption"

# Load Results
read_rds(glue("{path}/Results/ES1/results_{setting}.rds")) %>%
  t(.) %>%
  as.data.frame() %>%
  rownames_to_column("Model") %>%
  rename("MSFE" = MSE, "ACRPS" = CRPS) %>%
  mutate(across(-Model, ~. / .[Model == "STPHM"])) %>%
  filter(! (Model %in% c("PHM"))) %>%
  mutate(Model = replace(Model, Model == "STPHM", "PHM")) %>%
  pivot_longer(cols = -Model, names_to = "Metric", values_to = "Value") %>%
  pivot_wider(names_from = Model, values_from = Value) %>%
  mutate(across(where(is.numeric), ~ ifelse(. == 1, format(1, nsmall = 4), .))) %>%
  kableExtra::kbl(booktabs = T,
                  caption = "STSC vs. DMA",
                  format = "latex",
                  escape = T,
                  digits = 4) %>%
  kableExtra::kable_styling(position = "center", latex_options = c("scale_down")) %>%
  kableExtra::footnote(general = "The table shows the relative MSFEs and ACRPSs of STSC and DMA with $1$, $5$, and $15$ predictive signals, respectively. Values less than one indicate better performance compared to the PHM. The OOS evaluation period is from 01/04/1967 to 12/31/2021.",
                       footnote_as_chunk = TRUE,
                       fixed_small_size = FALSE,
                       threeparttable = TRUE,
                       escape = FALSE,
                       general_title = "")
######### --------------------------------------------

---