In [None]:
# Next-day volume forecast
from data_utils import set_seed, read_and_merge, build_features_nextday, split_by_date, fit_transform_scalers, make_sequences_generic, time_aware_train_val_split
from model_utils import build_lstm, train
from eval_utils import regression_report_log_and_orig, plot_history, scatter_actual_vs_pred, plot_timeseries

WINDOW = 30; BATCH_SIZE = 64; EPOCHS = 100; LR = 1e-3; SEED = 42
set_seed(SEED)

# Load from dedicated csvs used in your notebook
df, split_date = read_and_merge("train_volume_vix.csv", "test_volume_vix.csv")
df = build_features_nextday(df)

train_df, test_df = split_by_date(df, split_date)
feature_cols = ["log_volume_t", "vix_close", "vix_lag1", "vix_change", "vix_5d_ma"]
train_s, test_s, scaler = fit_transform_scalers(train_df, test_df, feature_cols)

X_train, y_train, _ = make_sequences_generic(train_s, WINDOW, feature_cols)
X_test,  y_test,  idx_test = make_sequences_generic(test_s,  WINDOW, feature_cols)
X_tr, y_tr, X_val, y_val = time_aware_train_val_split(X_train, y_train, val_frac=0.15)

model = build_lstm(WINDOW, n_features=len(feature_cols), lr=LR)
hist = train(model, X_tr, y_tr, X_val, y_val, epochs=EPOCHS, batch_size=BATCH_SIZE)

y_pred_log = model.predict(X_test).ravel()
report = regression_report_log_and_orig(y_test, y_pred_log)
print(report)

plot_history(hist, title="Training History — Next-day")
scatter_actual_vs_pred(y_test, y_pred_log, title="Next-day — Actual vs Pred (log)")
plot_timeseries(idx_test, np.exp(y_test), np.exp(y_pred_log), ylabel="Volume", title="Next-day — Original scale")