# Feature Engineering

Engineer technical features and save to `data/preprocessed/`.

In [1]:
import os
import pandas as pd
import numpy as np

In [11]:
def rsi(series, period = 14):
    delta = series.diff()
    gain = (delta.clip(lower = 0)).ewm(alpha = 1 / period, adjust = False).mean()
    loss = (-delta.clip(upper=0)).ewm(alpha = 1 / period, adjust = False).mean()
    rs = gain / (loss.replace(0, np.nan))
    return 100 - (100 / (1 + rs))

def compute_features(df):
    """
    Expect columns: ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
    Returns a DataFrame with engineered features and the target 'y_next_ret'.
    """
    out = df.copy()
    out["ret_1d"] = out["Close"].astype(float).pct_change()
    out["ret_5d"] = out["Close"].astype(float).pct_change(5)
    out["ma_5"] = out["Close"].astype(float).rolling(5).mean()
    out["ma_10"] = out["Close"].astype(float).rolling(10).mean()
    out["ema_10"] = out["Close"].astype(float).ewm(span = 10, adjust = False).mean()
    out["vol_5d"] = out["ret_1d"].astype(float).rolling(5).std()
    out["rsi_14"] = rsi(out["Close"].astype(float), 14)
    out["y_next_ret"] = out["Close"].astype(float).pct_change().shift(-1)
    out = out.dropna()
    feature_cols = ["ret_1d", "ret_5d", "ma_5", "ma_10", "ema_10", "vol_5d", "rsi_14"]
    return out, feature_cols

In [14]:
for csv in os.listdir("../data/raw/"):
    df = pd.read_csv("../data/raw/" + csv)
    df = df[2:]
    df.columns = ["Date", "Close", "High", "Low", "Open", "Volume"]
    feats, feature_cols = compute_features(df)
    out = f"../data/processed/{csv.replace(".csv", "")}_features.csv"
    feats.to_csv(out)
    print("Saved ->", out, "(features:", feature_cols, ")")


Saved -> ../data/processed/MSFT_features.csv (features: ['ret_1d', 'ret_5d', 'ma_5', 'ma_10', 'ema_10', 'vol_5d', 'rsi_14'] )
Saved -> ../data/processed/SPY_features.csv (features: ['ret_1d', 'ret_5d', 'ma_5', 'ma_10', 'ema_10', 'vol_5d', 'rsi_14'] )
Saved -> ../data/processed/AAPL_features.csv (features: ['ret_1d', 'ret_5d', 'ma_5', 'ma_10', 'ema_10', 'vol_5d', 'rsi_14'] )
