In [None]:
# 5-day accumulated volume — volume only
from data_utils import set_seed, read_and_merge, build_features_5_21, split_by_date, fit_transform_scalers, make_sequences_generic, time_aware_train_val_split
from model_utils import build_lstm, train
from eval_utils import regression_report_log_and_orig, plot_history, scatter_actual_vs_pred, plot_timeseries

# Config
WINDOW = 30; BATCH_SIZE = 64; EPOCHS = 30; LR = 1e-3; SEED = 42
set_seed(SEED)

# 1) Load & features
df, split_date = read_and_merge("5_21_train.csv", "5_21_test.csv")
df = build_features_5_21(df, use_vix=False, target="target_5d")

# 2) Split & scale
train_df, test_df = split_by_date(df, split_date)
feature_cols = ["log_vol"]
train_s, test_s, scaler = fit_transform_scalers(train_df, test_df, feature_cols)

# 3) Sequences
X_train, y_train, _ = make_sequences_generic(train_s, WINDOW, feature_cols)
X_test,  y_test,  idx_test = make_sequences_generic(test_s,  WINDOW, feature_cols)
X_tr, y_tr, X_val, y_val = time_aware_train_val_split(X_train, y_train, val_frac=0.1)

# 4) Model
model = build_lstm(WINDOW, n_features=len(feature_cols), lr=LR)
hist = train(model, X_tr, y_tr, X_val, y_val, epochs=EPOCHS, batch_size=BATCH_SIZE)

# 5) Eval
import numpy as np
y_pred_log = model.predict(X_test).ravel()
report = regression_report_log_and_orig(y_test, y_pred_log)
print(report)

# 6) Plots
plot_history(hist, title="LSTM Training History — 5-day")
scatter_actual_vs_pred(y_test, y_pred_log, title="LSTM 5-day — Actual vs Pred (log)")
plot_timeseries(idx_test, np.exp(y_test), np.exp(y_pred_log), ylabel="5-day accumulated volume", title="LSTM 5-day — Original scale")