In [1]:
import sys
from pathlib import Path

ROOT = Path.cwd()
# If your notebook is inside a "notebooks" folder, go one level up
if ROOT.name == "notebooks":
    ROOT = ROOT.parent

sys.path.insert(0, str(ROOT))
print("Project root on sys.path:", ROOT)


Project root on sys.path: C:\Users\saita\Yahoo Stock Forecasting


In [2]:
import pickle
import numpy as np
import pandas as pd

from src.config import DATA_PATH, DATE_COL, TARGET_COL, LSTM_LOOKBACK
from src.data_utils import (
    load_raw_data,
    train_val_test_split_time,
    fit_scalers,
    apply_scalers,
    create_lstm_sequences,
)


In [3]:
df = load_raw_data(DATA_PATH)
train_df, val_df, test_df = train_val_test_split_time(df)

len(train_df), len(val_df), len(test_df)

(1277, 273, 275)

In [4]:
feature_cols = ["Open", "High", "Low", "Close", "Adj Close", "Volume"]
feature_cols = [c for c in feature_cols if c in df.columns]

scalers = fit_scalers(train_df, feature_cols, target_col=TARGET_COL)

# Save scalers
with open("scalers.pkl", "wb") as f:
    pickle.dump(scalers, f)

In [5]:
X_train, y_train = apply_scalers(train_df, feature_cols, scalers, TARGET_COL)
X_val, y_val = apply_scalers(val_df, feature_cols, scalers, TARGET_COL)
X_test, y_test = apply_scalers(test_df, feature_cols, scalers, TARGET_COL)

X_train.shape, X_val.shape, X_test.shape

((1277, 6), (273, 6), (275, 6))

In [6]:
X_train_seq, y_train_seq = create_lstm_sequences(X_train, y_train, LSTM_LOOKBACK)
X_val_seq, y_val_seq = create_lstm_sequences(X_val, y_val, LSTM_LOOKBACK)
X_test_seq, y_test_seq = create_lstm_sequences(X_test, y_test, LSTM_LOOKBACK)

X_train_seq.shape, y_train_seq.shape

((1217, 60, 6), (1217, 1))

In [7]:
np.savez(
    "lstm_data.npz",
    X_train=X_train_seq, y_train=y_train_seq,
    X_val=X_val_seq, y_val=y_val_seq,
    X_test=X_test_seq, y_test=y_test_seq,
)