# 01 - Feature Engineering

This notebook demonstrates how to:

- Load OHLCV data from `data/`
- Build microstructure / technical / volume-based features
- Inspect feature distributions and correlations

Run the cells top-to-bottom; adapt the data path and parameters for your own assets.



In [None]:
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from research.common import load_ohlcv_csv, BarConfig
from research.feature_engineering import FeatureConfig, build_features

DATA_PATH = pathlib.Path("../data/equities/your_asset.csv")

if not DATA_PATH.exists():
    raise FileNotFoundError(f"Please drop an OHLCV CSV at {DATA_PATH}")

raw = load_ohlcv_csv(str(DATA_PATH))
raw.head()


In [None]:
bar_cfg = BarConfig(lookback_window=128, prediction_horizon=5)
feat_cfg = FeatureConfig(bar=bar_cfg)

X, y, feature_cols = build_features(raw, feat_cfg)

X.shape, y.shape, feature_cols[:10]


In [None]:
# Plot a few example features over time for intuition

feat_df = pd.DataFrame(
    X[:, -1, :],  # last step in each window
    columns=feature_cols,
)

fig, axes = plt.subplots(3, 1, figsize=(12, 8), sharex=True)
for ax, col in zip(axes, feature_cols[:3]):
    ax.plot(feat_df[col].values)
    ax.set_title(col)
plt.tight_layout()
plt.show()


In [None]:
# Simple correlation heatmap of a subset of features

corr = feat_df[feature_cols[:15]].corr()
plt.figure(figsize=(10, 8))
plt.imshow(corr, cmap="coolwarm", aspect="auto")
plt.colorbar(label="Correlation")
plt.xticks(range(len(corr.columns)), corr.columns, rotation=90)
plt.yticks(range(len(corr.columns)), corr.columns)
plt.title("Feature Correlation (subset)")
plt.tight_layout()
plt.show()
