In [1]:
# https://otexts.com/fpp3/prophet.html
# Load required libraries. Make sure to install themknhibwdewedh eewd
library(dplyr)
library(Rcpp)
library(ggplot2)
library(lubridate)
library(forecast)
library(prophet)
library(Metrics)
library(zoo) # use for date formatting 
library(stats) # use for additive
library(caret)
library(glmnet)
library(fredr)
#library(tidyverse)

ERROR: Error in library(readxl): there is no package called ‘readxl’


In [None]:
# Extract deposit_levels to use it in this file
deposits <- readRDS("/workspaces/DUKE_MIDS_QFC_Final_Project/model_prediction/deposits.rds") # Load the data
deposits$date <- deposits$time_as_date

In [None]:
# Extract regressors_data to use it in this file
regressors_data <- readRDS("/workspaces/DUKE_MIDS_QFC_Final_Project/data_cleaning/regressors_data.rds")

In [None]:
# Extract regressors_data to use it in this file
prediction_data_2 <-readRDS("/workspaces/DUKE_MIDS_QFC_Final_Project/model_prediction/prediction_data_2.rds")

In [None]:
# Create a linear regression model to asssess the relationship (sensitivity=coefficient) of different factors on the deposit 

# Step 1: Fetch the regressors online: fed_rate (FEDFUNDS), unemployment_rate (UNRATE), consumer_price_index (UMCSENT), GDP, NASDAQ (NASDAQCOM), (U.S. National Home Price Index) CSUSHPINSA.
# NOTE: use change rate for all the indicators to capture the reaction to shifts in rates and not the plain absolute value

  # FRED API KEY: 4bb02eefc8f7c17a772e030373156e50
fredr_set_key("4bb02eefc8f7c17a772e030373156e50")

  # Define the time range
start_date <- as.Date("1984-01-01")
end_date <- as.Date("2024-04-01")

  # Fetch federal funds rate data
fed_funds <- fredr(
  series_id = "FEDFUNDS",
  observation_start = start_date,
  observation_end = end_date,
  frequency = "q",
  units = "pch"
)
fed_funds <- fed_funds %>%
  rename(federal_rate = value)

  # Fetch unemployment rate data
unemployment <- fredr(
  series_id = "UNRATE",
  observation_start = start_date,
  observation_end = end_date,
  frequency = "q",
  units = "pch"

)
unemployment <- unemployment %>%
  rename(unemployment_rate = value)

  # Fetch consumer_price_index (UMCSENT)

CPI <- fredr(
  series_id = "UMCSENT",
  observation_start = start_date,
  observation_end = end_date,
  frequency = "q",
  units = "pch"

)
CPI <- CPI %>%
  rename(CPI = value)

   # Fetch GDP rate data
GDP <- fredr(
  series_id = "GDP",
  observation_start = start_date,
  observation_end = end_date,
  frequency = "q",
  units = "pch"

)
GDP <- GDP %>%
  rename(gdp_rate = value)


   # Fetch NASDAQ rate data
NASDAQ <- fredr(
  series_id = "NASDAQCOM",
  observation_start = start_date,
  observation_end = end_date,
  frequency = "q",
  units = "pch"
)
NASDAQ <- NASDAQ %>%
  rename(NASDAQ_value = value)

   # Fetch Case-Shiller Home Price Index rate data
housing <- fredr(
  series_id = "CSUSHPINSA",
  observation_start = start_date,
  observation_end = end_date,
  frequency = "q",
  units = "pch"
)
housing <- housing %>%
  rename(Home_Price_Index = value)

# Step 2: Create a dataframe off all the regressors with the deposit level from 1984 to 2024 

data_to_merge <- list(fed_funds, unemployment, CPI, GDP, NASDAQ, housing, deposits)

merge_data <- Reduce(function(x, y) {
  merge(x, y, by = c("date"), all = TRUE)
}, data_to_merge) # merge to get only the all regressors

merge_data <- merge_data %>%
  select(-matches("series_id|realtime_end|realtime_start|time|time_as_date|time_as_numeric")) # filter to remove unwanted columns



# Step 1: Create a linear regression model to asssess the relationship (sensitivity=coefficient) of different factors on the deposit 

In [None]:
# Calculate sensitivy:

# Drop nas
merge_data <- na.omit(merge_data)

# Linear regression
linear_regression <- lm(deposits ~ federal_rate + gdp_rate + unemployment_rate + CPI + Home_Price_Index + NASDAQ_value , data = merge_data)
summary(linear_regression)
# Running the model with all the regressors gives me a pvalue > 0.05 so i had to remove some regressors for the regressors to be significant
# A 1% change in federal rate leads to a $548,000,000 change in deposits

# Step 2: Run the Monte Carlo Simulations

In [None]:

# Step 3: Define stress test parameters

drop_in_interest_rate <- 0.05  # 5% drop based mid 2007-to december 2028
sensitivity <- -0.591  # ($ 0.59 in billion)  A 1% change in federal rate leads to a $596,000,000 decline in deposits based on lm mode


# Step 4: Monte Carlo simulation
set.seed(42) # For reproducibility
num_simulations <- 100000
simulated_deposits <- numeric(num_simulations)
deposit_2025 <- 354271700 #(in thousands, from XGboost ==> 3.54 trillion)

for (i in 1:num_simulations) {
  random_factor <- rnorm(1, mean = 1 + sensitivity * drop_in_interest_rate, sd = 0.05) # Adjust standard deviation as needed
  simulated_deposits[i] <- deposit_2025 * random_factor
}

# step 5: Calculate statistics
mean_value <- mean(simulated_deposits)
VAR <- quantile(simulated_deposits, 0.05)

data <- data.frame(simulated_deposits)

# Step 6: Plot
ggplot(data, aes(x = simulated_deposits)) +
  geom_histogram(binwidth = 10000, fill = "skyblue", color = "skyblue", alpha = 0.7) +
  geom_vline(xintercept = mean_value, color = "blue", linetype = "dashed", size = 1, label = "Mean") +
  geom_vline(xintercept = VAR, color = "red", linetype = "dashed", size = 1) +
  labs(
    title = "Distribution of Deposit Levels Under Stress Test 2025",
    x = "Deposit Level ($)",
    y = "Frequency"
  ) +
  annotate("text", x = mean_value, y = 45, label = ("Mean:  $343 trillion"), color = "blue", size = 8, face = "bold") +
  annotate("text", x = VAR, y = 50, label = ("VAR: $314 trillion"), color = "red", size = 8, face = "bold") +
  theme_classic() +
  theme(
    plot.title = element_text(size = 15, face = "bold"),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12)
  )
