In [13]:
import numpy as np

data_path = "../02_Data/Processed/sp500_model_data_window30.npz"

npz = np.load(data_path, allow_pickle=True)

print("Keys in file:", npz.files)

X_train = npz["X_train"]
y_train = npz["y_train"]
X_val = npz["X_val"]
y_val = npz["y_val"]
X_test = npz["X_test"]
y_test = npz["y_test"]

feature_cols = npz["feature_cols"]
window_size = int(npz["window_size"])

print("Window size:", window_size)
print("Number of features:", X_train.shape[2])
print("Train shape:", X_train.shape, y_train.shape)
print("Val shape:", X_val.shape, y_val.shape)
print("Test shape:", X_test.shape, y_test.shape)
print("First few feature names:", feature_cols[:10])


Keys in file: ['window_size', 'feature_cols', 'X_train', 'y_train', 'dates_train', 'X_val', 'y_val', 'dates_val', 'X_test', 'y_test', 'dates_test']
Window size: 30
Number of features: 13
Train shape: (706, 30, 13) (706,)
Val shape: (250, 30, 13) (250,)
Test shape: (488, 30, 13) (488,)
First few feature names: ['Close' 'High' 'Low' 'Open' 'Volume' 'log_return' 'ma_5' 'ma_20'
 'rolling_vol_20' 'sentiment_mean']


In [14]:
import numpy as np
import pandas as pd

from pathlib import Path

import sys
sys.path.append("../04_Src")  # so we can import our modules

from evaluation.metrics import regression_metrics

data_path = "../02_Data/Processed/sp500_model_data_window30.npz"

npz = np.load(data_path, allow_pickle=True)

X_train = npz["X_train"]
y_train = npz["y_train"]
X_val = npz["X_val"]
y_val = npz["y_val"]
X_test = npz["X_test"]
y_test = npz["y_test"]

feature_cols = npz["feature_cols"]
window_size = int(npz["window_size"])

print("Window size:", window_size)
print("Number of features:", X_train.shape[2])
print("Train shape:", X_train.shape, y_train.shape)
print("Val shape:", X_val.shape, y_val.shape)
print("Test shape:", X_test.shape, y_test.shape)
print("First few feature names:", feature_cols[:10])


Window size: 30
Number of features: 13
Train shape: (706, 30, 13) (706,)
Val shape: (250, 30, 13) (250,)
Test shape: (488, 30, 13) (488,)
First few feature names: ['Close' 'High' 'Low' 'Open' 'Volume' 'log_return' 'ma_5' 'ma_20'
 'rolling_vol_20' 'sentiment_mean']


In [15]:
feature_cols_list = [str(c) for c in feature_cols]
print("Feature columns:", feature_cols_list)

log_return_index = feature_cols_list.index("log_return")
print("Index of log_return in feature dimension:", log_return_index)


Feature columns: ['Close', 'High', 'Low', 'Open', 'Volume', 'log_return', 'ma_5', 'ma_20', 'rolling_vol_20', 'sentiment_mean', 'sentiment_pos_mean', 'sentiment_neg_mean', 'news_count']
Index of log_return in feature dimension: 5


In [16]:
# Baseline one: predict next return = last observed return in the window
last_returns_test = X_test[:, -1, log_return_index]
y_pred_naive = last_returns_test

metrics_naive = regression_metrics(y_test, y_pred_naive)
metrics_naive


{'mae': 0.00986573769000976,
 'rmse': 0.014724061388990689,
 'directional_accuracy': 0.49385245901639346}

In [17]:
last_returns_val = X_val[:, -1, log_return_index]
y_pred_naive_val = last_returns_val

metrics_naive_val = regression_metrics(y_val, y_pred_naive_val)
metrics_naive_val


{'mae': 0.009149708786975696,
 'rmse': 0.011460160549923139,
 'directional_accuracy': 0.48}

In [18]:
# Baseline two: predict next return = mean of last 5 log returns in the window

def rolling_mean_baseline(X, log_return_idx, window_tail=5):
    # X shape: (samples, window_size, num_features)
    # We take the last "window_tail" steps of log_return and compute their mean
    log_returns_segment = X[:, -window_tail:, log_return_idx]
    return log_returns_segment.mean(axis=1)

y_pred_ma5_test = rolling_mean_baseline(X_test, log_return_index, window_tail=5)
metrics_ma5 = regression_metrics(y_test, y_pred_ma5_test)
metrics_ma5


{'mae': 0.007425175713441336,
 'rmse': 0.011315619647611063,
 'directional_accuracy': 0.5102459016393442}

In [19]:
y_pred_ma5_val = rolling_mean_baseline(X_val, log_return_index, window_tail=5)
metrics_ma5_val = regression_metrics(y_val, y_pred_ma5_val)
metrics_ma5_val


{'mae': 0.007177495266604542,
 'rmse': 0.00906796341729078,
 'directional_accuracy': 0.516}

In [20]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Flatten the time dimension: (samples, window_size, num_features) -> (samples, window_size * num_features)
def flatten_sequences(X):
    n_samples, w_size, n_features = X.shape
    return X.reshape(n_samples, w_size * n_features)

X_train_flat = flatten_sequences(X_train)
X_val_flat = flatten_sequences(X_val)
X_test_flat = flatten_sequences(X_test)

print("Flattened shapes:")
print("Train:", X_train_flat.shape)
print("Val:  ", X_val_flat.shape)
print("Test: ", X_test_flat.shape)


Flattened shapes:
Train: (706, 390)
Val:   (250, 390)
Test:  (488, 390)


In [21]:
linreg_pipeline = Pipeline(
    [
        ("scaler", StandardScaler()),
        ("regressor", LinearRegression()),
    ]
)

linreg_pipeline.fit(X_train_flat, y_train)

y_pred_lin_val = linreg_pipeline.predict(X_val_flat)
y_pred_lin_test = linreg_pipeline.predict(X_test_flat)

metrics_lin_val = regression_metrics(y_val, y_pred_lin_val)
metrics_lin_test = regression_metrics(y_test, y_pred_lin_test)

metrics_lin_val, metrics_lin_test


({'mae': 0.008873289555253358,
  'rmse': 0.010976856363987531,
  'directional_accuracy': 0.472},
 {'mae': 0.014376251572824477,
  'rmse': 0.020164945455263674,
  'directional_accuracy': 0.4918032786885246})

In [22]:
baseline_results = pd.DataFrame(
    {
        "model": ["naive_last_return", "ma5_last_returns", "linear_regression"],
        "val_mae": [
            metrics_naive_val["mae"],
            metrics_ma5_val["mae"],
            metrics_lin_val["mae"],
        ],
        "val_rmse": [
            metrics_naive_val["rmse"],
            metrics_ma5_val["rmse"],
            metrics_lin_val["rmse"],
        ],
        "val_directional_accuracy": [
            metrics_naive_val["directional_accuracy"],
            metrics_ma5_val["directional_accuracy"],
            metrics_lin_val["directional_accuracy"],
        ],
        "test_mae": [
            metrics_naive["mae"],
            metrics_ma5["mae"],
            metrics_lin_test["mae"],
        ],
        "test_rmse": [
            metrics_naive["rmse"],
            metrics_ma5["rmse"],
            metrics_lin_test["rmse"],
        ],
        "test_directional_accuracy": [
            metrics_naive["directional_accuracy"],
            metrics_ma5["directional_accuracy"],
            metrics_lin_test["directional_accuracy"],
        ],
    }
)

baseline_results


Unnamed: 0,model,val_mae,val_rmse,val_directional_accuracy,test_mae,test_rmse,test_directional_accuracy
0,naive_last_return,0.00915,0.01146,0.48,0.009866,0.014724,0.493852
1,ma5_last_returns,0.007177,0.009068,0.516,0.007425,0.011316,0.510246
2,linear_regression,0.008873,0.010977,0.472,0.014376,0.020165,0.491803


In [23]:
results_path = "../05_Results/baseline_metrics.csv"
baseline_results.to_csv(results_path, index=False)
results_path


'../05_Results/baseline_metrics.csv'

### Baseline model summary

In [25]:
from evaluation.metrics import format_metrics

print("Validation metrics")
print(format_metrics(metrics_naive_val, prefix="Naive last return"))
print(format_metrics(metrics_ma5_val, prefix="MA5 last returns"))
print(format_metrics(metrics_lin_val, prefix="Linear regression"))

print("\nTest metrics")
print(format_metrics(metrics_naive, prefix="Naive last return"))
print(format_metrics(metrics_ma5, prefix="MA5 last returns"))
print(format_metrics(metrics_lin_test, prefix="Linear regression"))

Validation metrics
Naive last return: mae=0.009150, rmse=0.011460, directional_accuracy=0.480000
MA5 last returns: mae=0.007177, rmse=0.009068, directional_accuracy=0.516000
Linear regression: mae=0.008873, rmse=0.010977, directional_accuracy=0.472000

Test metrics
Naive last return: mae=0.009866, rmse=0.014724, directional_accuracy=0.493852
MA5 last returns: mae=0.007425, rmse=0.011316, directional_accuracy=0.510246
Linear regression: mae=0.014376, rmse=0.020165, directional_accuracy=0.491803
