## base

In [1]:
# Define homepath and data path
homepath <- "C:/A.PROJECTS/stockprediction/"
data_address <- "C:/A.PROJECTS/stockprediction/data/sp500_timeseries.txt"

# Load required libraries
library(dplyr)
library(tidyr)
library(lubridate)
library(ggplot2)
library(zoo)
library(mFLICA)
library(foreach)
library(doParallel)
library(future.apply)



Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Attaching package: 'lubridate'


The following objects are masked from 'package:base':

    date, intersect, setdiff, union



Attaching package: 'zoo'


The following objects are masked from 'package:base':

    as.Date, as.Date.numeric


Loading required package: iterators

Loading required package: parallel

Loading required package: future



In [2]:
# Read data
sp500_price <- read.table(data_address, header = TRUE, sep = "\t", stringsAsFactors = FALSE)

# Convert Date column to datetime
sp500_price$Date <- as.Date(sp500_price$Date, format = "%Y-%m-%d")

# Set Date as index
sp500_price <- sp500_price %>% arrange(Date)

Date <- sp500_price$Date %>% as.Date()

sp500_price$Date <- NULL

# Handle missing values by interpolation
sp500_price <- na.approx(sp500_price, na.rm = FALSE)

# Convert matrix to dataframe
sp500_price <- as.data.frame(sp500_price)

# Drop "Long-NA" columns
sp500_price = sp500_price %>% select_if(~ sum(is.na(.)) <= 20)

# Add Date column

sp500_price = cbind(Date, sp500_price)

In [30]:
compute_returns <- function(price_data) {
  
  price_data$Date <- as.Date(price_data$Date)
  
  annual_log <- price_data %>%
    group_by(Year = year(Date)) %>%
    summarise(across(where(is.numeric), last), .groups = "drop") %>%
    mutate(across(where(is.numeric), ~ log(. / lag(.)))) %>%
    slice(-1)
    
  
  # Compute simple returns
  annual_simple <- price_data %>%
    group_by(Year = year(Date)) %>%
    summarise(across(where(is.numeric), last), .groups = "drop") %>%
    mutate(across(where(is.numeric), ~ (. / lag(.) - 1))) %>%
    slice(-1)


  quarterly_log <- price_data %>%
    group_by(Year = year(Date), Quarter = quarter(Date)) %>%
    summarise(across(where(is.numeric), last), .groups = "drop") %>%
    mutate(across(where(is.numeric), ~ log(. / lag(.)))) %>%
    slice(-1)
  

  quarterly_simple <- price_data %>%
    group_by(Year = year(Date), Quarter = quarter(Date)) %>%
    summarise(across(where(is.numeric), last), .groups = "drop") %>%
    mutate(across(where(is.numeric), ~ (. / lag(.) - 1))) %>%
    slice(-1)  


  monthly_log <- price_data %>%
    group_by(Year = year(Date), Month = month(Date)) %>%
    summarise(across(where(is.numeric), last), .groups = "drop") %>%
    mutate(across(where(is.numeric), ~ log(. / lag(.)))) %>%
    slice(-1) 
  

  monthly_simple <- price_data %>%
    group_by(Year = year(Date), Month = month(Date)) %>%
    summarise(across(where(is.numeric), last), .groups = "drop") %>%
    mutate(across(where(is.numeric), ~ (. / lag(.) - 1))) %>%
    slice(-1)  


  weekly_log <- price_data %>%
    group_by(Year = year(Date), Week = week(Date)) %>%
    summarise(across(where(is.numeric), last), .groups = "drop") %>%
    mutate(across(where(is.numeric), ~ log(. / lag(.)))) %>%
    slice(-1) 
  

  weekly_simple <- price_data %>%
    group_by(Year = year(Date), Week = week(Date)) %>%
    summarise(across(where(is.numeric), last), .groups = "drop") %>%
    mutate(across(where(is.numeric), ~ (. / lag(.) - 1))) %>%
    slice(-1)  

  rets_df <- list(
    "weekly_log" = weekly_log,
    "weekly_simple" = weekly_simple,
    "monthly_log" = monthly_log,
    "monthly_simple" = monthly_simple,
    "quarterly_log" = quarterly_log,
    "quarterly_simple" = quarterly_simple,
    "annual_log" = annual_log,
    "annual_simple" = annual_simple
  )

  return(rets_df)
}

In [31]:
rets <- compute_returns(sp500_price)

### Leader-Follower (running parallel)

In [51]:
library(dplyr)
library(future.apply)
library(future)
library(data.table)
library(mFLICA)  # Assuming mFLICA is from an external package

plan(multisession)  # Enable parallel execution

# Convert weekly_log data to matrix (ENSURE ONLY NUMERIC COLUMNS)
sp500_matrix <- rets[['weekly_log']] %>%
  select(where(is.numeric)) %>%
  as.matrix()

# Ensure matrix has valid numeric values
if (is.null(sp500_matrix) || nrow(sp500_matrix) == 0 || ncol(sp500_matrix) == 0) {
  stop("Error: sp500_matrix is empty or incorrectly formatted.")
}

# Parallelized function (Processing WHOLE matrix, not by column)
result_mFLICA <- future_lapply(list(sp500_matrix), function(x) {
    print("Processing batch:")
    print(dim(x))  # Check the matrix dimensions before passing to mFLICA
    print(str(x))  # Debug structure

    if (is.null(x) || any(is.na(x)) || all(x == 0)) {
      print("Skipping due to NULL/NA or all-zero input.")
      return(NULL)
    }

    tryCatch({
      result <- mFLICA(x)
      print("Completed batch processing.")  
      return(result)
    }, error = function(e) {
      print(paste("Error in mFLICA:", e$message))
      return(NULL)
    })
  }, future.seed = TRUE)

plan(sequential)  # Reset execution mode

print(result_mFLICA)


: 

In [None]:
""" THIS CODE IS THE SAME AS ABOVE BUT WITH MAXIMUM SPEED"""


library(dplyr)
library(future.apply)
library(future)
library(data.table)
library(mFLICA)  # Assuming mFLICA is from an external package
library(furrr)  # Faster alternative to future_lapply()

# 🔥 Set up optimized parallel execution
plan(multisession, workers = availableCores() - 1)  # Use all but 1 core

# 🚀 Convert weekly_log data to matrix (ENSURE ONLY NUMERIC COLUMNS)
sp500_matrix <- rets[['weekly_log']] %>%
  select(where(is.numeric)) %>%
  as.matrix()

# Ensure matrix has valid numeric values
if (is.null(sp500_matrix) || nrow(sp500_matrix) == 0 || ncol(sp500_matrix) == 0) {
  stop("Error: sp500_matrix is empty or incorrectly formatted.")
}

# 🔥 Parallelized function using `future_map()` (Faster than future_lapply)
result_mFLICA <- future_map(list(sp500_matrix), function(x) {
    print("Processing batch:")
    print(dim(x))  # Debug matrix dimensions
    print(str(x))  # Debug structure

    if (is.null(x) || any(is.na(x)) || all(x == 0)) {
      print("Skipping due to NULL/NA or all-zero input.")
      return(NULL)
    }

    tryCatch({
      result <- mFLICA(x)
      print("Completed batch processing.")  
      return(result)
    }, error = function(e) {
      print(paste("Error in mFLICA:", e$message))
      return(NULL)
    })
  }, .options = furrr_options(seed = TRUE))  # Ensures reproducibility

plan(sequential)  # Reset execution mode

print(result_mFLICA)


### Leader-Follower (simple running)

In [None]:
result_mFLICA <- sp500_price %>%
  # Remove "Year" and "Week" columns only if they exist
  select(-any_of(c("Year", "Week"))) %>%
  # Keep only numeric columns to avoid conversion issues
  select(where(is.numeric)) %>%
  # Convert to matrix
  as.matrix() %>%
  # Apply mFLICA function
  mFLICA()

### Plotting

In [None]:
plot(result_mFLICA)

###