# 03 - Feature Engineering

This notebook computes derived features from raw metrics:
- Lagged features (1, 3, 7, 14, 30 days)
- Rolling statistics (7d, 30d means, std)
- Momentum indicators
- Technical indicators (RSI, Bollinger Bands)
- Efficiency ratios
- Seasonality features

In [None]:
import os
import sys

sys.path.insert(0, os.path.dirname(os.getcwd()))

import pandas as pd
import numpy as np

from src import preprocessing as prep

In [None]:
# Load raw panel
panel = pd.read_csv('../data/processed/panel_raw.csv')
panel['date'] = pd.to_datetime(panel['date'])
print(f"Loaded panel with shape: {panel.shape}")
print(f"Columns: {list(panel.columns)}")

In [None]:
# Compute features
print("\nComputing derived features...")
panel_feat = prep.compute_features(panel)

print(f"\nNew shape: {panel_feat.shape}")
print(f"New columns: {len(panel_feat.columns)}")

In [None]:
# List all feature columns
print("\nFeature columns:")
feature_cols = [c for c in panel_feat.columns if c not in ['protocol', 'date']]
for i, col in enumerate(feature_cols):
    print(f"  {i+1}. {col}")

In [None]:
# Feature statistics
print("\nFeature statistics:")
numeric_cols = panel_feat.select_dtypes(include=[np.number]).columns
panel_feat[numeric_cols].describe().T

In [None]:
# Save processed panel
parquet_path = '../data/processed/panel.parquet'
panel_feat.to_parquet(parquet_path, index=False)
print(f"\nProcessed panel saved to {parquet_path}")