## Inflation - Signal Set

In [None]:
# Empty the workspace
rm(list = ls())

# Set Folder-Path
path <- "~/LocalPredictability" # Change to project folder

In [None]:
# Packages
library("tidyverse")
library("lubridate")
library("purrr")
library("magrittr")
library("glue")
library("readr")
library("readxl")
library("doParallel")
library("pbapply")
library("R.matlab")
library("glmnet")
library("rpart")
library("kableExtra")

In [None]:
# Load Custom-Functions
source(glue("{path}/Code/_helpers.R"))

# Convert Jupyter Notebook to R script
convert_ipynb_to_r(glue("{path}/Code/inflation_signal_sets.ipynb"))

---

## 1) Create P-Signals (for all 4 Target Variables)

In [None]:
######### --------------------------------------------
### Reference: see https://sites.google.com/site/dimitriskorobilis/matlab/vbdvs
# Load Koop-Korobilis (2023) Data
data <- read_excel(glue("{path}/Data/ES2/P_Signals/dataset.xlsx"), sheet = "Data") %>%
  rename(Date = sasdate) %>%
  mutate(Date = yq(Date)) %>%
  dplyr::slice(-1)

# Clean Column Names
colnames(data) <- gsub("[^A-Za-z0-9_]+", "", colnames(data))

# Train Length for outlier detection
train_length <- 123
######### --------------------------------------------

In [5]:
######### --------------------------------------------
### Create Target-Variables
# Transform Y-Variable
y_data <- data %>%
  dplyr::slice(-1) %>%
  select(any_of(c("Date",
                  "GDPCTPI",
                  "PCECTPI",
                  "CPIAUCSL",
                  "CPILFESL"))) %>%
  mutate(across(-Date, transform_ytcode)) %>%
  mutate(across(-Date, ~ adjout_oos(., train_length, 4.5, 6))) %>%
  rename_with(.cols = -Date, function(x) {paste0(x, "_h1")}) %>%
  write_rds(glue("{path}/Data/ES2/P_Signals/y_data.rds"))

# Create Y-Lags
y_lags <- y_data %>%
  mutate(across(-Date, list(lag1 = ~dplyr::lag(.x, n = 1L),
                            lag2 = ~dplyr::lag(.x, n = 2L)))) %>%
  select(Date, contains("_lag")) %>%
  write_rds(glue("{path}/Data/ES2/P_Signals/y_lags.rds"))
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Create P-Signals
# Load Koop-Korobilis (2023) Data (see https://sites.google.com/site/dimitriskorobilis/matlab/vbdvs)
data <- read_excel(glue("{path}/Data/ES2/P_Signals/dataset.xlsx"), sheet = "Data") %>%
  rename(Date = sasdate) %>%
  mutate(Date = yq(Date)) %>%
  dplyr::slice(-1)

# Clean Column Names
colnames(data) <- gsub("[^A-Za-z0-9_]+", "", colnames(data))

# Train Length for outlier detection
train_length <- 123

# Transform X-Data
x_data <- data %>%
  mutate(across(-Date, transform_tcode)) %>%
  dplyr::slice(-1) %>%
  mutate(across(-Date, ~ adjout_oos(., train_length, 4.5, 6))) %>%
  relocate(GDPCTPI, .after = 1) %>%
  relocate(PCECTPI, .after = 2) %>%
  relocate(CPIAUCSL, .after = 3) %>%
  relocate(CPILFESL, .after = 4) %>%
  filter(Date >= "1960-01-01" & Date <= "2021-12-01") %>%
  write_rds(glue("{path}/Data/ES2/P_Signals/x_data.rds"))
######### --------------------------------------------

## 2) Create F-Signals

In [None]:
######### --------------------------------------------
### Predict Quarterly Inflation -- F-Signals (I) ###
# Set Target-Variable-Names
target_names <- c("GDPCTPI", "PCECTPI", "CPIAUCSL", "CPILFESL")

# Set up Parallel-Backend
cores <- 4
cl <- parallel::makeCluster(cores)
parallel::clusterExport(cl = cl, varlist = c("target_names", "path"),
                        envir = environment())
parallel::clusterEvalQ(cl, {
  library("tidyverse")
  library("glue")
  library("glmnet")
  library("rpart")
})
pbo <- pbapply::pboptions(type = "timer")

# Parallel Loop over Target-Variables
res <- pbapply::pblapply(cl = cl, X = seq_along(target_names), FUN = function(p) {

  ### Load Custom Functions
  source(glue("{path}/Code/_cmodels.R"), local = TRUE)
  source(glue("{path}/Code/_helpers.R"), local = TRUE)

  ### Build Dataset
  # Response-Name
  y_target <- glue("{target_names[p]}_h1")
  y_signal <- target_names[p]
  not_target <- setdiff(glue("{target_names}_h1"), y_target)

  # Load Y-Data
  y_data <- read_rds(glue("{path}/Data/ES2/P_Signals/y_data.rds")) %>%
    select(-!!not_target)

  # Load Y-Lags
  y_lags <- read_rds(glue("{path}/Data/ES2/P_Signals/y_lags.rds")) %>%
    select(-starts_with(!!not_target))

  # Load P-Signals
  x_data <- read_rds(glue("{path}/Data/ES2/P_Signals/x_data.rds")) %>%
    select(-!!y_signal)

  # Combine, lag and prepare data
  data <- y_data %>%
    dplyr::left_join(y_lags, by = "Date") %>%
    dplyr::left_join(x_data, by = "Date") %>%
    dplyr::mutate(across(-c(1, 2, 3, 4), dplyr::lag)) %>%
    dplyr::slice(-c(1:3)) %>%
    tibble::column_to_rownames("Date") %>%
    as.matrix()

  # Dates and Observations
  tdates <- rownames(data)
  tlength <- length(tdates)

  # Assign Target Variable
  y <- data[, 1, drop = FALSE]

  # Assign P-Signals
  X <- data[, -1, drop = FALSE]

  # Clean Environment
  rm(list = c("data", "y_data", "x_data", "y_lags", "y_signal", "not_target"))

  ######### --------------------------------------------
  ### Create F-Signals ###
  # Remove NA-Rows and subset Response
  S_sub <- na.omit(X)
  y_sub <- y[rownames(S_sub), , drop = FALSE]

  # First Complete Observation
  first_complete <- which(complete.cases(X))[1]
  adj <- first_complete - 1

  # Clean Environment
  rm(list = c("X", "y"))

  # Tuning Parameter: Regression Trees
  dt_depth <- c(1, 2, 3, 4)
  dt_windows <- c(0, 15 * 4)

  # Tuning Parameter: Ridge / Elastic Net
  eln_alpha <- c(0.0, 0.5)
  eln_windows <- c(0, 15 * 4)

  # Time Sequence
  t_start <- max(dt_windows, eln_windows)
  t_end <- nrow(y_sub) - 1
  t_seq <- seq(t_start, t_end)

  # Result-Object: F-Signals (I)
  coln_dt <- apply(expand.grid(dt_depth, dt_windows), 1,
                   function(x) paste0("TREE_W", x[2], "_D", x[1]))
  coln_eln <- apply(expand.grid(eln_alpha, eln_windows), 1,
                    function(x) paste0("ELN_W", x[2], "_A", x[1]))
  Ext_F <- matrix(NA, ncol = length(c(coln_dt, coln_eln)),
                  nrow = tlength, dimnames = list(tdates, c(coln_dt, coln_eln)))

  # Loop over time
  for (t in t_seq) {

    ### Split Data ###
    # Train Data
    s_train <- S_sub[1:t, , drop = FALSE]
    y_train <- y_sub[1:t, , drop = FALSE]

    # Predict Data
    s_pred <- S_sub[t + 1, , drop = FALSE]

    ### Decision Trees
    # Fit & Predict Model
    dt_idx <- seq_along(coln_dt)
    Ext_F[t + 1 + adj, dt_idx] <- cm_dt(s_train,
                                        y_train,
                                        s_pred,
                                        dt_depth,
                                        NULL,
                                        dt_windows)

    ### Elastic Net
    # Fit & Predict Model
    eln_idx <- length(coln_dt) + seq_along(coln_eln)
    Ext_F[t + 1 + adj, eln_idx] <- cm_eln(s_train,
                                          y_train,
                                          s_pred,
                                          eln_alpha,
                                          NULL,
                                          eln_windows,
                                          t)
  }

  # Convert to Tibble and Save
  Ext_F %>%
    as_tibble(rownames = "Date") %>%
    mutate(Date = ymd(Date)) %>%
    write_rds(glue("{path}/Data/ES2/F_Signals/CM_Forecasts_{y_target}.rds"))
})

# Stop Cluster
parallel::stopCluster(cl)
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Predict Quarterly Inflation -- F-Signals (II) ###
### Load and Re-Save (Point) Forecasts from Koop & Korobilis
### See https://sites.google.com/site/dimitriskorobilis/matlab/vbdvs
# Set Target-Variables
target_var_names  <-  c("GDPCTPI", "PCECTPI", "CPIAUCSL", "CPILFESL")

# Set Model Names
model_names <- c("ARp", "SBAR", "UCSV", "TVPAR",
                 "FAC5", "BAG_FAC5", "GPR_FAC5", "DMA_FAC5", "TVD_FAC5", "TVPTVS", "VBDVS_FAC5",
                 "SSVS_FAC60", "ELN_FAC60", "VBDVS_FAC60",
                 "ELN_X", "PLS_X", "VBDVS_X", "TVP_VAR")

# Set Date-Sequence
date_seq <- seq(as.Date("1976-01-01"), as.Date("2021-10-01"), by = "quarter")

# Loop over Target-Variables
for (p in 1:4) {

  # Load Koop & Korobilis Point Forecasts
  kk2023 <- readMat(glue("{path}/Data/ES2/F_Signals/Forecasts_{target_var_names[p]}2023.mat"))[[1]]
  kk2013 <- readMat(glue("{path}/Data/ES2/F_Signals/Forecast_{target_var_names[p]}2013.mat"))[[1]]

  # Prepare Matfile and Save
  kk2023 %>%
    as_tibble() %>%
    bind_cols(kk2013) %>%
    mutate(Date = date_seq, .before = 1) %>%
    rename_with(~model_names, .cols = -starts_with("Date")) %>%
    write_rds(glue("{path}/Data/ES2/F_Signals/Forecasts_{target_var_names[p]}.rds"))
}
######### --------------------------------------------

In [None]:
######### --------------------------------------------
### Predict Quarterly Inflation -- F-Signals (III) - AR-Augmented-Signals (Robustness) ###
# Set Target-Variable-Names
target_names <- c("GDPCTPI", "PCECTPI", "CPIAUCSL", "CPILFESL")
window_size <- 15 * 4

# Set up Parallel-Backend
cores <- 4
cl <- parallel::makeCluster(cores)
parallel::clusterExport(cl = cl, varlist = c("target_names", "path", "window_size"),
                        envir = environment())
parallel::clusterEvalQ(cl, {
  library("tidyverse")
  library("glue")
})
pbo <- pbapply::pboptions(type = "timer")

# Parallel Loop over Target-Variables
res <- pbapply::pblapply(cl = cl, X = seq_along(target_names), FUN = function(p) {

  ### Load Custom Functions
  source(glue("{path}/Code/_cmodels.R"), local = TRUE)
  source(glue("{path}/Code/_helpers.R"), local = TRUE)

  ### Build Dataset
  # Response-Name
  y_target <- glue("{target_names[p]}_h1")
  y_signal <- target_names[p]
  not_target <- setdiff(glue("{target_names}_h1"), y_target)

  # Load Y-Data
  y_data <- read_rds(glue("{path}/Data/ES2/P_Signals/y_data.rds")) %>%
    select(-!!not_target)

  # Load Y-Lags
  y_lags <- read_rds(glue("{path}/Data/ES2/P_Signals/y_lags.rds")) %>%
    select(-starts_with(!!not_target))

  # Load P-Signals
  x_data <- read_rds(glue("{path}/Data/ES2/P_Signals/x_data.rds")) %>%
    select(-!!y_signal)

  # Combine, lag and prepare data
  data <- y_data %>%
    dplyr::left_join(y_lags, by = "Date") %>%
    dplyr::left_join(x_data, by = "Date") %>%
    dplyr::mutate(across(-c(1, 2, 3, 4), dplyr::lag)) %>%
    dplyr::slice(-c(1:3)) %>%
    tibble::column_to_rownames("Date") %>%
    as.matrix()

  # Dates and Observations
  tdates <- rownames(data)
  tlength <- length(tdates)

  # Assign Target Variable
  y <- data[, 1, drop = FALSE]

  # Assign P-Signals
  X <- data[, -1, drop = FALSE]

  # Clean Environment
  rm(list = c("data", "y_data", "x_data", "y_lags", "y_signal", "not_target"))

  ######### --------------------------------------------
  ### Create F-Signals ###
  # Remove NA-Rows and subset Response
  S_sub <- na.omit(X)
  y_sub <- y[rownames(S_sub), , drop = FALSE]

  # First Complete Observation
  first_complete <- which(complete.cases(X))[1]
  adj <- first_complete - 1

  # Clean Environment
  rm(list = c("X", "y"))

  # Time Sequence
  t_start <- window_size
  t_end <- nrow(y_sub) - 1
  t_seq <- seq(t_start, t_end)

  # Signal Index
  s_idx <- 3:ncol(S_sub)

  # Result-Object: F-Signals (I)
  Ext_F <- matrix(
    NA,
    ncol = ncol(S_sub) - 2,
    nrow = tlength,
    dimnames = list(tdates, paste0(colnames(S_sub[, -c(1, 2)]), "_AR"))
  )

  # Loop over time
  for (t in t_seq) {

    # Loop over Signals
    for (i in s_idx) {

      # Split Data ###
      s_train <- cbind(int = 1, S_sub[(t - window_size + 1):t, c(1, i), drop = FALSE])
      s_pred  <- cbind(int = 1, S_sub[t + 1, c(1, i), drop = FALSE])
      y_train <- y_sub[(t - window_size + 1):t, , drop = FALSE]

      # Fit AR-Model
      model <- stats::.lm.fit(s_train, y_train)

      # Predict
      Ext_F[t + 1 + adj, i - 2] <- s_pred %*% model$coefficients
    }
  }

  # Convert to Tibble and Save
  Ext_F %>%
    as_tibble(rownames = "Date") %>%
    mutate(Date = ymd(Date)) %>%
    write_rds(glue("{path}/Data/ES2/F_Signals/CM_Forecasts_AR_{y_target}.rds"))
})

# Stop Cluster
parallel::stopCluster(cl)
######### --------------------------------------------

## 3) LaTex Table

In [None]:
######### --------------------------------------------
# Read Signal-Description-Excel
readxl::read_excel("/Users/slehmann/Library/CloudStorage/Dropbox/HighFrequency_TextPredictors/Results/ES2/overview_signals.xlsx", sheet = 3, col_types = "text") %>%
  mutate(across(everything(), ~str_replace_all(., "\\u2026", "..."))) %>%
  replace(is.na(.), "") %>%
  kableExtra::kbl(booktabs = T,
                  caption = "Application to inflation: Signals",
                  format = "latex",
                  escape = T) %>%
  kableExtra::row_spec(0, bold = T) %>%
  kableExtra::kable_styling(position = "center", latex_options = c("scale_down")) %>%
  kableExtra::pack_rows("Inflation (4)", 1, 4, underline = F, bold = T, hline_before = F) %>%
  kableExtra::pack_rows("AR-Terms (2)", 5, 6, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("FRED (24)", 7, 12, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("FRED-QD (219)", 13, 18, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Goyal and Welch (10)", 19, 24, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Portfolio Data (140)", 25, 30, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Survey Data (11)", 31, 36, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("World Bank (36)", 37, 42, underline = F, bold = T, hline_before = T) %>%
  kableExtra::pack_rows("Point Forecasts (20)", 43, 53, underline = F, bold = T, hline_before = T) %>%
  kableExtra::footnote(general = "Excerpt from the signals used: in total 2 + 439 + 8 + 12 = 461 signals. For further details on the data, see \\\\cite{koop2022}.",
                       footnote_as_chunk = TRUE,
                       fixed_small_size = FALSE,
                       threeparttable = TRUE,
                       escape = F,
                       general_title = "")
######### --------------------------------------------

---