## Finance - Signal Set

In [None]:
# Empty the workspace
rm(list = ls())

# Set Folder-Path
path <- "~/LocalPredictability" # Change to project folder

In [None]:
# Packages
library("tidyverse")
library("lubridate")
library("purrr")
library("magrittr")
library("glue")
library("readr")
library("readxl")
library("pbapply")
library("roll")
library("lightgbm")
library("kableExtra")
library("quanteda")

In [None]:
# Load Custom Functions
source(glue("{path}/Code/_helpers.R"))

# Convert Jupyter Notebook to R script
convert_ipynb_to_r(glue("{path}/Code/finance_signal_sets.ipynb"))

---

## 1) Create Economic-Financial / Technical Signal Set

In [None]:
######### --------------------------------------------
### Get Dates
# Extract from Return-Data
dates <- read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/vwretd.csv")) %>%
  rename(Date = DATE) %>%
  mutate(Date = ymd(Date)) %>%
  filter(Date <= ymd("2021-12-31")) %>%
  drop_na(vwretd) %>%
  pull(Date)
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### T-Bill, Yield and Term Spread
read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/FRED_H15.csv")) %>%
  mutate(Date = ymd(Date)) %>%
  filter(Date %in% dates) %>%
  rename(TBL3M = `DTB3`, CMT10Y = `DGS10`) %>%
  mutate(
    across(-Date, ~ 0.01 * as.numeric(.x)),
    TSP1 = CMT10Y - TBL3M
  ) %>%
  fill(!Date, .direction = "down") %>%
  dplyr::select(Date, TBL3M, TSP1) %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/interest_rates.rds")) %>%
  mutate(Rfree = (1 + dplyr::lag(TBL3M, n = 1L)) ** (1 / 252) - 1) %>%
  dplyr::select(Date, Rfree) %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/rfree.rds"),
            compress = "gz")
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Equity Premium
# Load Risk-Free Rate
Rfree <- read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/rfree.rds"))

# CRSP Returns
read_csv(paste0(path, "/Data/ES1/P_Signals/Econ_Fin/Raw/vwretd.csv")) %>%
  rename(Date = DATE, CRSP_SPvw = vwretd) %>%
  mutate(Date = ymd(Date)) %>%
  filter(Date %in% dates) %>%
  left_join(Rfree, by = "Date") %>%
  mutate(equity_premium = CRSP_SPvw - Rfree) %>%
  drop_na(equity_premium) %>%
  dplyr::select(Date, equity_premium) %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/equity_premium.rds"),
            compress = "gz")

# Clean environment
rm(list = c("Rfree"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Dividend-Price-Ratio
# Dividends
div <- read_csv2(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/divm.csv")) %>%
  rename(year_month = yyyymm) %>%
  mutate(year_month = ym(year_month))

# S&P 500 Index
read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/spind.csv")) %>%
  rename(index = Close) %>%
  mutate(Date = ymd(Date),
         year_month = floor_date(Date, "month")) %>%
  left_join(div, by = "year_month") %>%
  filter(Date %in% dates) %>%
  mutate(dp = log(dplyr::lag(D12, n = 1L)) - log(index)) %>%
  dplyr::select(Date, dp) %>%
  fill(!Date, .direction = "down") %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/dp_ratio.rds"),
            compress = "gz")

# Clean environment
rm(list = c("div"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Exchange Rates
read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/FRED_H10.csv")) %>%
  mutate(Date = ymd(Date)) %>%
  filter(Date %in% dates) %>%
  mutate(across(-Date, ~ as.numeric(.x))) %>%
  mutate(across(ends_with("_USD"), ~ 1.0 / .x)) %>%
  rename_with(~ ifelse(str_ends(., "_USD"), paste0("USD_", str_remove(., "_USD")), .)) %>%
  mutate(across(-Date, ~ (.x - dplyr::lag(.x, n = 1L)) / dplyr::lag(.x, n = 1L))) %>%
  dplyr::select(Date, starts_with("USD_")) %>%
  dplyr::select(-c(USD_MYR, USD_LKR)) %>%
  fill(!Date, .direction = "down") %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/exchange_rates.rds"),
            compress = "gz")
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Moving-Average Indicator
# Time Periods
periods <- c(1*21, 3*21, 6*21, 9*21, 12*21)

# Load S&P 500 Index
spind <- read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/spind.csv")) %>%
  rename(Index = Close) %>%
  filter(Date %in% dates)

# Calculate Moving Averages
for (period in periods) {
  period_var <- glue("ma_D{period}")
  spind <- spind %>%
    mutate(!!period_var := roll::roll_mean(Index, width = period))
}

# Generate relevant combinations of periods
combinations <- expand.grid(i = seq_along(periods), j = seq_along(periods)) %>%
  filter(i < j)

# Compute MA-Indicator
ma_indicator <- purrr::pmap(combinations, function(i, j) {
  new_var <- glue("MA_D{periods[i]}_D{periods[j]}")
  left_var <- glue("ma_D{periods[i]}")
  right_var <- glue("ma_D{periods[j]}")
  spind %>%
    mutate(!!new_var := (spind[[left_var]] - spind[[right_var]]) / spind[[right_var]]) %>%
    dplyr::select(Date, !!new_var)
}) %>%
  purrr::reduce(left_join, by = c("Date"))

# Save
ma_indicator %>%
  dplyr::select(Date, starts_with("MA_D", ignore.case = FALSE)) %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/ma_indicator.rds"),
            compress = "gz")

# Clean environment
rm(list = c("spind", "periods", "combinations", "ma_indicator", "period_var"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Price Momentum
# Time Periods
pm_periods <- c(1, 3, 5, 1 * 21, 3 * 21, 6 * 21, 9 * 21, 12 * 21)

# Load S&P 500 Index
pmom_indicator <- read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/spind.csv")) %>%
  rename(Index = Close) %>%
  filter(Date %in% dates)

# Compute Momentum
for (period in pm_periods) {
  period_var <- glue("PMOM_D{period}")
  pmom_indicator <- pmom_indicator %>%
    mutate(!!period_var := (Index - dplyr::lag(Index, n = period)) / dplyr::lag(Index, n = period))
}

# Save
pmom_indicator %>%
  dplyr::select(Date, starts_with("PMOM_D", ignore.case = FALSE)) %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/pmom_indicator.rds"),
            compress = "gz")

# Clean environment
rm(list = c("pmom_indicator", "period_var"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Volume Indicators
# Load Volume Data
vol <- read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/spvol.csv"))

# Load S&P 500 Index Data
spind <- read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/spind.csv")) %>%
  rename(Index = Close)

# Combine Data
data <- spind %>%
  left_join(vol, by = "Date") %>%
  filter(Date %in% dates) %>%
  mutate(
    RVol = (Volume - dplyr::lag(Volume, n = 1L)) / dplyr::lag(Volume, n = 1L),
    D = dplyr::if_else(Index >= dplyr::lag(Index), 1, -1),
    OBV = roll::roll_sum(Volume * D, width = n(), min_obs = 1)
  ) %>%
  dplyr::select(Date, starts_with("RVol"), OBV)

# Time Periods
periods <- c(1*21, 3*21, 6*21, 9*21, 12*21)

# Calculate OBV-Moving-Averages
for (period in periods) {
  period_var <- glue("OBV_D{period}")
  data <- data %>%
    mutate(!!period_var := roll::roll_mean(OBV, width = period))
}

# Generate combinations of periods
combinations <- expand.grid(i = seq_along(periods), j = seq_along(periods)) %>%
  filter(i < j)

# Compute Volume-Indicator
vol_indicator <- purrr::pmap(combinations, function(i, j) {
  new_var   <- glue("VOL_D{periods[i]}_D{periods[j]}")
  left_var  <- glue("OBV_D{periods[i]}")
  right_var <- glue("OBV_D{periods[j]}")
  data %>%
    mutate(!!new_var := (data[[left_var]] - data[[right_var]]) / data[[right_var]]) %>%
    dplyr::select(Date, !!new_var)
}) %>%
  purrr::reduce(left_join, by = c("Date"))

# Save
vol_indicator %>%
  left_join(data, by = "Date") %>%
  dplyr::select(Date,
         starts_with("RVol"),
         starts_with("VOL_D", ignore.case = FALSE)) %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/vol_indicator.rds"),
            compress = "gz")

# Clean environment
rm(list = c("vol", "spind", "data", "periods", "combinations", "vol_indicator", "period_var"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Commodities
read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/GSCITOT.csv")) %>%
  rename(Date = Date_) %>%
  mutate(Date = ymd(Date)) %>%
  arrange(Date) %>%
  filter(Date %in% dates) %>%
  mutate(GSCITOT = (Close_ - dplyr::lag(Close_, n = 1L)) / dplyr::lag(Close_, n = 1L)) %>%
  dplyr::select(Date, GSCITOT) %>%
  fill(GSCITOT, .direction = "down") %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/commodity_index.rds"),
            compress = "gz")
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Gold
read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/GOLDBLN.csv")) %>%
  rename(Date = Date_) %>%
  mutate(Date = ymd(Date)) %>%
  arrange(Date) %>%
  filter(Date %in% dates) %>%
  distinct(Date, .keep_all = TRUE) %>%
  mutate(GOLDBLN = (Close_ - dplyr::lag(Close_, n = 1)) / dplyr::lag(Close_, n = 1)) %>%
  dplyr::select(Date, GOLDBLN) %>%
  fill(GOLDBLN, .direction = "down") %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/goldbln.rds"),
            compress = "gz")
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Realized Volatility and Vix
# Load VIX Data
vix <- read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/vix.csv")) %>%
  rename(Date = DATE, vix = CLOSE) %>%
  dplyr::select(Date, vix) %>%
  mutate(Date = mdy(Date)) %>%
  filter(Date %in% dates)

# Calculate Realized Volatility
read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/equity_premium.rds")) %>%
  left_join(vix, by = "Date") %>%
  mutate(
    vola = roll::roll_sum((equity_premium ** 2), width = 63),
    vola_mele_vix = 100 * sqrt(pi / 2) * sqrt(12 * 21) * roll::roll_mean(x = abs(equity_premium), width = 12 * 21),
    vola_mele_vix = ifelse(is.na(vix), vola_mele_vix, vix)
  ) %>%
  dplyr::select(Date, vola, vola_mele_vix) %>%
  fill(!Date, .direction = "down") %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/volatility.rds"),
            compress = "gz")

# Clean environment
rm(list = c("vix"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Sentiment
# Load Michigan Consumer Sentiment Data
sent <- read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/UMCSENT.csv")) %>%
  rename(year_month = DATE, sentiment = UMCSENT) %>%
  mutate(sentiment = dplyr::lag(sentiment, n = 1L))

# Time Periods
senti_periods <- c(1, 3, 6, 9, 12)

# Calculate Sentiment Momentum
for (period in senti_periods) {
  new_var <- glue("smom_M{period}")
  sent <- sent %>%
    mutate(!!new_var := sentiment - dplyr::lag(sentiment, n = period))
}

# Load Price Momentum Data
pmom <- read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/pmom_indicator.rds")) %>%
  mutate(year_month = floor_date(Date, "month"))

# Merge sentiment and price momentum data
data <- pmom %>%
  left_join(sent, by = "year_month") %>%
  dplyr::select(Date, starts_with("smom_M"), starts_with("PMOM_D"))

# Generate combinations of periods
combinations <- expand.grid(i = senti_periods,
                            j = pm_periods)

# Compute interactions between sentiment and price momentum
interactions <- purrr::pmap(combinations, function(i, j) {
  new_var <- glue("SPMOM_INT_M{i}_D{j}")
  left_var <- glue("smom_M{i}")
  right_var <- glue("PMOM_D{j}")
  data %>%
    mutate(!!new_var := data[[left_var]] * data[[right_var]]) %>%
    dplyr::select(Date, !!new_var)
}) %>%
  purrr::reduce(left_join, by = c("Date"))

# Save
data %>%
  left_join(interactions, by = "Date") %>%
  dplyr::select(Date, starts_with("SPMOM_INT")) %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/sentiment.rds"),
            compress = "gz")

# Clean environment
rm(list = c("sent", "senti_periods", "pmom", "data", "combinations", "interactions", "new_var"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### World Indices
# Function to process index data
get_returns <- function(file_name, index_name, output_file) {
  read_csv(glue("{path}/Data/ES1/P_Signals/Econ_Fin/Raw/{file_name}.csv")) %>%
    rename(Date = valuedate) %>%
    mutate(Date = ymd(Date)) %>%
    filter(Date %in% dates) %>%
    mutate(!!index_name := (pi_ - dplyr::lag(pi_, n = 1)) / dplyr::lag(pi_, n = 1)) %>%
    dplyr::select(Date, !!index_name) %>%
    fill(!Date, .direction = "down") %>%
    write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/{output_file}.rds"), compress = "gz")
}

# Process each index
get_returns("DJINDUS", "DJI", "djindus")
get_returns("IXIC", "IXIC", "ixic")
get_returns("HNGKNGI", "HSI", "hsi")
get_returns("JAPDOWA", "N225", "n225")
get_returns("KORCOMP", "KOSPI", "kospi")
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Create Economic and Financial Data Set
# Define file names
file_names <- c("equity_premium",
                "dp_ratio", "interest_rates", "volatility",
                "ma_indicator", "pmom_indicator", "vol_indicator",
                "sentiment",
                "commodity_index", "goldbln",
                "djindus", "ixic", "hsi", "n225", "kospi",
                "exchange_rates")

# Load and Join
fin_econ_set <- purrr::map(file_names, ~ read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/{.x}.rds"))) %>%
  purrr::reduce(left_join, by = "Date")

# Fill and Remove
fin_econ_set <- fin_econ_set %>%
  fill(!equity_premium, .direction = "down") %>%
  drop_na(equity_premium) %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/econ_fin_signals.rds"),
            compress = "gz")

# Display Dimensions
cat("Number of Signals:", ncol(fin_econ_set), "\n")
cat("Number of Observations:", nrow(fin_econ_set), "\n")

# Clean environment
rm(list = c("file_names", "fin_econ_set"))
######### --------------------------------------------

## 2.) Create Text-based Signal Sets

In [None]:
######### --------------------------------------------
### Prepare Word-Count Data
# Load and Prepare DFM
dfm_daily <- read_rds(glue("{path}/Data/ES1/P_Signals/Text_Signals/Raw/dfm_daily.rds")) %>%
  quanteda::dfm_weight(scheme = "prop") %>%
  quanteda::convert(to = "data.frame") %>%
  rename("Date" = doc_id) %>%
  mutate(Date = ymd(Date)) %>%
  filter(Date %in% dates) %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Text_Signals/text_signals.rds"),
            compress = "gz")

# Display Dimensions
cat("Number of Signals:", ncol(dfm_daily), "\n")
cat("Number of Observations:", nrow(dfm_daily), "\n")

# Clean environment
rm(list = c("dfm_daily"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Prepare Topic-Model Data
# Load all files
ctm_files <- list.files(glue("{path}/Data/ES1/P_Signals/Text_Signals/Raw/"),
                        pattern = "^ctm_.*_topics_.*\\.csv$",
                        full.names = TRUE)

# Read all csv files (topics)
topic_data <- ctm_files %>%
  map(read_csv) %>%
  reduce(left_join, by = "date") %>%
  rename(Date = date) %>%
  filter(Date %in% dates) %>%
  write_rds(glue("{path}/Data/ES1/P_Signals/Text_Signals/topic_signals.rds"),
            compress = "gz")

# Display Dimensions
cat("Number of Signals:", ncol(topic_data), "\n")
cat("Number of Observations:", nrow(topic_data), "\n")

# Clean environment
rm(list = c("ctm_files", "topic_data"))
######### --------------------------------------------

## 3.) Create External Point Forecasts

In [30]:
######### --------------------------------------------
### Rolling Mean of Equity Premium
# Load Equity Premium
ep <- read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/equity_premium.rds"))

# Calculate rolling mean forecast
pred_phm <- ep %>%
  mutate(PHM = dplyr::lag(roll::roll_mean(equity_premium,
                                          width = (5 * 252),
                                          min_obs = 1),
                          n = 1L)) %>%
  dplyr::select(Date, PHM) %>%
  write_rds(glue("{path}/Data/ES1/F_Signals/phm.rds"),
            compress = "gz")

# Clean environment
rm(list = c("ep", "pred_phm"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Gradient Boosting Volatility Forecast
# Load custom function
source(paste0(path, "/Code/_cmodels.R"), local = TRUE)

# Load Volatility Data
volatility <- read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/econ_fin_signals.rds")) %>%
  dplyr::select(Date, equity_premium, vola_mele_vix)

# Create lagged variables
for (i in 1:10) {
  volatility <- volatility %>%
    mutate(!!paste0("v", i) := dplyr::lag(vola_mele_vix, n = i))
}

# Finalize data
data <- volatility %>%
  column_to_rownames("Date") %>%
  dplyr::select(-vola_mele_vix) %>%
  drop_na() %>%
  as.matrix()

# Result-Object: Predictions
pred_vola <- matrix(NA, nrow = nrow(volatility),
                    ncol = 1, dimnames = list(as.character(volatility$Date),
                                              "Refi_GBM"))

# Set Time Parameter
window_size <- 252 * 5
t_start <- window_size
t_end <- nrow(data) - 1
t_seq <- seq(t_start, t_end)
adj <- nrow(volatility) - nrow(data)

# Set Tuning Parameter
folds <- 5
ntrees <- 500
learning_rate <- 0.1
n_core <- 1

# Parallel-Backend
cores <- 6
cl <- parallel::makeCluster(cores)
clusterExport(cl = cl, varlist = c("data",
                                   "window_size",
                                   "folds",
                                   "ntrees",
                                   "learning_rate",
                                   "n_core",
                                   "cm_gbm"), envir = environment())
clusterEvalQ(cl, library("lightgbm"))
pbo <- pbapply::pboptions(type = "timer")

# Perform predictions in parallel
pred_vola[t_seq + adj + 1, ] <- do.call("rbind", pbapply::pblapply(cl = cl, X = t_seq, FUN = function(t) {

  # Train Data
  x_train <- data[(t - window_size + 1):t, -1, drop = FALSE]
  y_train <- data[(t - window_size + 1):t,  1, drop = FALSE]

  # Test Data
  x_pred <- data[(t + 1), -1, drop = FALSE]

  # Predict
  pred <- cm_gbm(x_train,
                 y_train,
                 x_pred,
                 folds,
                 ntrees,
                 learning_rate,
                 n_core,
                 t)

  # Return Prediction
  return(pred)
}))

# Stop Parallel Back-end
parallel::stopCluster(cl)

# Convert to Tibble and Save
pred_vola <- pred_vola %>%
  dplyr::as_tibble(rownames = "Date") %>%
  dplyr::mutate(Date = ymd(Date)) %>%
  readr::write_rds(glue("{path}/Data/ES1/F_Signals/refi_gbm.rds"),
                   compress = "gz")

# Clean environment
rm(list = c("volatility", "data", "pred_vola",
            "window_size", "t_start", "t_end", "t_seq", "adj",
            "folds", "ntrees", "learning_rate",
            "n_core", "cores"))
######### --------------------------------------------

In [32]:
######### --------------------------------------------
### Equally-Weighted Forecast Combination of DP + TBL
# Load Custom Function
source(glue("{path}/Code/_cmodels.R"), local = TRUE)

# Load Data
econ_fin <- read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/econ_fin_signals.rds")) %>%
  dplyr::select(Date, equity_premium, dp, TBL3M) %>%
  rename(tbl = TBL3M) %>%
  mutate(dp  = dplyr::lag(dp,  n = 1L),
         tbl = dplyr::lag(tbl, n = 1L)) %>%
  column_to_rownames("Date")

# Drop Na-Values
data <- econ_fin %>%
  drop_na() %>%
  as.matrix()

# Result-Object: Predictions
pred_dp_tbl <- matrix(NA, nrow = nrow(econ_fin),
                      ncol = 1, dimnames = list(rownames(econ_fin),
                                                "Refi_DP_TBL"))

# Set Time Parameter
window_size <- 252 * 5
t_start <- window_size
t_end <- nrow(data) - 1
t_seq <- seq(t_start, t_end)
adj <- nrow(econ_fin) - nrow(data)

# Loop over Rolling Window
for (t in t_seq) {

  # Train Data
  x_train <- data[(t - window_size + 1):t, -1, drop = FALSE]
  y_train <- data[(t - window_size + 1):t,  1, drop = FALSE]

  # Test Data
  x_pred  <- data[(t + 1), -1, drop = FALSE]
  y_pred  <- data[(t + 1),  1, drop = FALSE]

  # Fit and Predict
  pred_dp_tbl[t + 1 + adj, ] <- cm_dp_tbl(x_train,
                                          y_train,
                                          x_pred)
}

# Convert to Tibble and Save
pred_dp_tbl <- pred_dp_tbl %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  write_rds(glue("{path}/Data/ES1/F_Signals/refi_dp_tbl.rds"),
            compress = "gz")

# Clean environment
rm(list = c("econ_fin", "data", "pred_dp_tbl",
            "window_size", "t_start", "t_end", "t_seq", "adj"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Principal Component Regression Forecast
# Load Custom Function
source(glue("{path}/Code/_cmodels.R"), local = TRUE)

# Load Data
econ_fin <- read_rds(glue("{path}/Data/ES1/P_Signals/Econ_Fin/econ_fin_signals.rds")) %>%
  column_to_rownames("Date") %>%
  mutate(across(!equity_premium, dplyr::lag))

# Drop Na-Values
data <- econ_fin %>%
  drop_na() %>%
  as.matrix()

# Result-Object: Predictions
pred_pcr <- matrix(NA, nrow = nrow(econ_fin),
                   ncol = 1, dimnames = list(rownames(econ_fin),
                                             "Refi_PCR_Econ_Fin"))
# Set Time Parameter
window_size <- 252 * 5
t_start <- window_size
t_end <- nrow(data) - 1
t_seq <- seq(t_start, t_end)
adj <- nrow(econ_fin) - nrow(data)

# Set Tuning Parameter
n_comp <- 10
val <- TRUE

# Parallel-Backend
cores <- 4
cl <- parallel::makeCluster(cores)
clusterExport(cl = cl, varlist = c("data",
                                   "window_size",
                                   "n_comp",
                                   "val",
                                   "cm_pcr"), envir = environment())
pbo <- pbapply::pboptions(type = "timer")

# Perform predictions in parallel
pred_pcr[t_seq + adj + 1, ] <- do.call("rbind", pbapply::pblapply(cl = cl, X = t_seq, FUN = function(t) {

  # Train Data
  x_train <- data[(t - window_size + 1):t, -1, drop = FALSE]
  y_train <- data[(t - window_size + 1):t,  1, drop = FALSE]

  # Test Data
  x_pred <- data[(t + 1), -1, drop = FALSE]
  y_pred <- data[(t + 1),  1, drop = FALSE]

  # Fit and Predict
  pred <- cm_pcr(x_train,
                 y_train,
                 x_pred,
                 n_comp,
                 val)
  # Return
  return(pred)
}))

# Stop Parallel Back-end
parallel::stopCluster(cl)

# Convert to Tibble and Save
pred_pcr <- pred_pcr %>%
  as_tibble(rownames = "Date") %>%
  mutate(Date = ymd(Date)) %>%
  write_rds(glue("{path}/Data/ES1/F_Signals/refi_pcr_econ_fin.rds"),
            compress = "gz")

# Clean environment
rm(list = c("econ_fin", "data", "pred_pcr",
            "window_size", "t_start", "t_end", "t_seq", "adj",
            "n_comp", "cores"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
# Create F-Signal Set
# File Names
file_names <- c("phm",
                "refi_gbm",
                "refi_dp_tbl",
                "refi_pcr_econ_fin")

# Load and Join
refi_set <- purrr::map(file_names, ~ read_rds(glue("{path}/Data/ES1/F_Signals/{.x}.rds"))) %>%
  purrr::reduce(left_join, by = "Date") %>%
  write_rds(glue("{path}/Data/ES1/F_Signals/refined_signals.rds"),
            compress = "gz")

# Display Dimensions
cat("Number of Signals:", ncol(refi_set), "\n")
cat("Number of Observations:", nrow(refi_set), "\n")

# Clean environment
rm(list = c("file_names", "refi_set"))
######### --------------------------------------------

## 4) LaTex Table

In [None]:
# Read Signal-Description-Excel
readxl::read_excel("/Users/slehmann/Library/CloudStorage/Dropbox/HighFrequency_TextPredictors/Results/ES1/Signals/overview_signals.xlsx", sheet = 2, col_types = "text") %>%
  mutate(across(everything(), ~str_replace_all(., "\\u2026", "..."))) %>%
  dplyr::slice(-1) %>%
  replace(is.na(.), "") %>%
  kableExtra::kbl(booktabs = T,
                  caption = "Signals for predicting daily U.S. stock returns",
                  format = "latex",
                  escape = T) %>%
  kableExtra::row_spec(0, bold = T) %>%
  kableExtra::kable_styling(position = "center", latex_options = c("scale_down")) %>%
  kableExtra::pack_rows("Interest Rates (2)", 1, 2, underline = F, bold = T, hline_before = F) %>%
  kableExtra::pack_rows("Exchange Rates (10)", 3, 12, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Moving-Average Indicator (10)", 13, 14, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Price-Momentum (8)", 15, 16, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Volume-Indicator (11)", 17, 19, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Volatility (2)", 20, 21, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Dividends (1)", 22, 22, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Commodity (2)", 23, 24, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("World Indices (5)", 25, 29, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Sentiment (40)", 30, 31, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Word Counts (12,288)", 32, 32, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Topics (500)", 33, 33, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Ext. Point Forecasts (4)", 34, 37, underline = F, bold = T, hline_before = T) %>%
  kableExtra::footnote(general = "NOTES: Excerpt from the menu of signals used for predicting daily U.S. equity premium. First model: (01/06/1954 - 12/31/1998): dp, TBL3M, TSP1, vola, PHM. Second Model (01/06/1954 - 12/31/2021): all 12,883 signals.",
                       footnote_as_chunk = TRUE,
                       fixed_small_size = FALSE,
                       threeparttable = TRUE,
                       escape = F,
                       general_title = "")

----