In [None]:
suppressWarnings(suppressPackageStartupMessages(library("dplyr")))
suppressWarnings(suppressPackageStartupMessages(library("duckdb")))
suppressWarnings(suppressPackageStartupMessages(library(rugarch)))
suppressWarnings(suppressPackageStartupMessages(library(tidyr)))
suppressWarnings(suppressPackageStartupMessages(library(zoo)))
suppressWarnings(suppressPackageStartupMessages(library(forecast)))
suppressWarnings(suppressPackageStartupMessages(library(lubridate)))
library(fs)
library(dotenv)
load_dot_env()

In [None]:
DB_PATH = Sys.getenv("DB_PATH")
DB_FILE = Sys.getenv("DB_FILE")
duckdb_path = path(DB_PATH, DB_FILE)

In [None]:
# con = dbConnect(duckdb::duckdb(), "../financial_news.db", read_only = FALSE)
con = dbConnect(duckdb::duckdb(), duckdb_path, read_only = TRUE)

In [None]:
# # drop table first 
# dbExecute(con, "DROP TABLE IF EXISTS headlines.rolling_predictions_daily;")
# dbExecute(con, "
#     CREATE TABLE IF NOT EXISTS headlines.rolling_predictions_daily (
#         date DATE,
#         actual_vix FLOAT,
#         predicted_vix FLOAT,
#         primary key (date)
#     );
# ")

# # drop table first
# dbExecute(con, "DROP TABLE IF EXISTS headlines.rolling_predictions_weekly;")
# dbExecute(con, "
#     CREATE TABLE IF NOT EXISTS headlines.rolling_predictions_weekly (
#         date DATE,
#         actual_vix FLOAT,
#         predicted_vix FLOAT,
#         primary key (date)
#     );
# ")


In [None]:
# Pull VIX data from the database
df = dbGetQuery(con, "SELECT * FROM SP500.vix_index")
# df2 = dbGetQuery(con, "SELECT * FROM headlines.trading_calendar")
dbDisconnect(con, shutdown = TRUE)

# want to limit data to start at 2022/01/01 and end at 2025-01-02
df = df %>% filter(vix_date >= "2022-01-01" & vix_date <= "2025-01-02")

df$vix_date = as.Date(df$vix_date)
# head(df)
# tail(df2)
# tail(df)

df_weekly = df %>%
    mutate(week = format(vix_date, "%Y-%U")) %>% # this does YYYY-WW where WW is week #
    group_by(week) %>%
    summarise(vix_last = last(vix_value))

n_weekly = nrow(df_weekly)
train_df_weekly = df_weekly[1:(n_weekly * 0.85), ]
test_df_weekly = df_weekly[(floor(n_weekly * 0.85) + 1):n_weekly, ]

# Train-Test Split (85% Train, 15% Test)
n = nrow(df)
train_df = df[1:(n * 0.85), ]
test_df = df[floor((n * 0.85) + 1):n, ]

In [5]:
vix_weekly_ts_last = ts(df_weekly$vix_last, start = c(2022, 1), frequency = 52, end=c(2025, 2))
vix_ts = ts(df$vix_value, start = c(2022, 1), frequency = 251)

n = length(vix_weekly_ts_last)
vix_weekly_ts_last.train = vix_weekly_ts_last[1:(n*0.85)]
vix_weekly_ts_last.test = vix_weekly_ts_last[floor((n*0.85)+1):n]

n = length(vix_ts)
vix_ts.train = vix_ts[1:(n*0.85)]
vix_ts.test = vix_ts[floor((n*0.85)+1):n]


In [None]:
garch_pred = function(spec, training_df, testing_df){
    # lets predict garch
    wnfore = length(testing_df)
    vix_ts.fore.series_new = NULL
    vix_ts.fore.sigma_new = NULL
    pred_values = numeric(wnfore)


    for(f in 1: wnfore) {
    
        #Fit models
        wdata = training_df
        if(f>=2)
            # need to use the dataset you wanna predict. goal is to predict the next value in the validation set so roll it
            wdata = c(training_df, testing_df[1:(f - 1)])  
        wdata = ts(wdata, start = start(training_df), frequency = frequency(training_df))

        # Fit ARIMA-GARCH Model
        w.final.model.1 = tryCatch(
            suppressWarnings(ugarchfit(spec, wdata, solver = "hybrid")),
            error = function(e) NULL
        )
        
        #Forecast
        if (!is.null(w.final.model.1)) {
            forecast = ugarchforecast(w.final.model.1, n.ahead = 1)
            pred_values[f] = forecast@forecast$seriesFor[1]
        } else {
            pred_values[f] = 0
        }
    }

    return(pred_values)
}

In [None]:
weekly.spec = ugarchspec(variance.model=list(garchOrder= c(0,1)),
                        mean.model=list(armaOrder=c(2,2), include.mean=T), # update order!
                        distribution.model="std")
weekly_pred  = garch_pred(weekly.spec, vix_weekly_ts_last.train, vix_weekly_ts_last.test)
weekly_results_df = data.frame(
    date = test_df_weekly$week,
    actual_vix = test_df_weekly$vix_last,
    predicted_vix = weekly_pred
)


# daily.spec = ugarchspec(variance.model=list(garchOrder=c(0,0)),
#                         mean.model=list(armaOrder=c(5,4), include.mean=T), # update order!
#                         distribution.model="std")
# daily_pred  = garch_pred(daily.spec, vix_ts.train, vix_ts.test)
# daily_results_df = data.frame(
#     date = test_df$vix_date,
#     actual_vix = test_df$vix_value,
#     predicted_vix = daily_pred
# )

# head(daily_results_df)
head(weekly_results_df)

Unnamed: 0_level_0,date,actual_vix,predicted_vix
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>
1,2024-30,23.39,16.95253
2,2024-31,20.37,23.73948
3,2024-32,14.8,19.4452
4,2024-33,15.86,15.54554
5,2024-34,15.0,16.30197
6,2024-35,22.38,15.12041


In [None]:
# print("writing to database")
# dbWriteTable(con, "headlines.rolling_predictions_daily", daily_results_df, overwrite=T)
# dbWriteTable(con, "headlines.rolling_predictions_weekly", weekly_results_df, overwrite=T)

# dbDisconnect(con, shutdown=TRUE)

# print("Daily rolling predictions complete!")
# print("Weekly rolling predictions complete!")


[1] "writing to database"


[1] "Daily rolling predictions complete!"
[1] "Weekly rolling predictions complete!"


In [None]:
write = function(df, filename) {
    write.csv(df, file = filename, row.names = FALSE)
}

# weekly_results_df = weekly_results_df %>%
#   mutate(date = as.Date(paste0(date, "-1"), format = "%Y-%U-%u")) # Monday as start of the week

train_df_weekly = train_df_weekly %>%
  mutate(week = as.Date(paste0(week, "-1"), format = "%Y-%U-%u")) # Monday as start of the week


write(daily_results_df, "daily_results.csv")
write(weekly_results_df, "weekly_results.csv")
write(train_df, "daily_train_df.csv")
write(train_df_weekly, "weekly_train_df.csv")