# Feature Engineering Validation

This notebook validates the feature engineering pipeline.

In [None]:
import pandas as pd
import sys
import os
import seaborn as sns
import matplotlib.pyplot as plt

# Add src to path
sys.path.append(os.path.abspath('../src'))
from features import FeatureEngineer, create_targets
from labelers import CandlestickLabeler
from utils import preprocess_ohlcv

In [None]:
# Load data
data_path = '../data/raw/AAPL_1h.parquet'

if os.path.exists(data_path):
    df = pd.read_parquet(data_path)
    df = preprocess_ohlcv(df)
    
    # 1. Label Patterns
    labeler = CandlestickLabeler(df)
    df = (labeler
        .label_doji()
        .label_hammer()
        .label_engulfing()
        .get_labeled_data()
    )
    
    # 2. Engineer Features
    engineer = FeatureEngineer(df)
    df_features = (engineer
        .add_candle_features()
        .add_technical_indicators()
        .add_price_context()
        .add_volatility_features()
        .get_features()
    )
    
    # 3. Create Targets
    df_features = create_targets(df_features, horizon=1)
    
    # Drop NaNs
    df_features = df_features.dropna()
    
    print(f"Final shape: {df_features.shape}")
    print("Columns:", df_features.columns.tolist()[:10], "...")
    
    # Correlation analysis
    target_corr = df_features.corrwith(df_features['target_return']).sort_values(ascending=False)
    print("\nTop 10 features correlated with target:")
    print(target_corr.head(10))
    
    # Save features
    df_features.to_parquet('../data/features/AAPL_1h_features.parquet')
else:
    print(f"File not found: {data_path}")