# Pattern Detection Validation

This notebook validates the rule-based pattern detection logic.

In [None]:
import pandas as pd
import mplfinance as mpf
import sys
import os
import matplotlib.pyplot as plt

# Add src to path
sys.path.append(os.path.abspath('../src'))
from labelers import CandlestickLabeler
from utils import preprocess_ohlcv

In [None]:
# Load data
data_path = '../data/raw/AAPL_1h.parquet'

if os.path.exists(data_path):
    df = pd.read_parquet(data_path)
    df = preprocess_ohlcv(df)
    
    # Apply labels
    labeler = CandlestickLabeler(df)
    labeled_df = (labeler
        .label_doji()
        .label_hammer()
        .label_shooting_star()
        .label_marubozu()
        .label_inverted_hammer()
        .label_hanging_man()
        .label_engulfing()
        .label_harami()
        .label_morning_star()
        .label_evening_star()
        .label_three_white_soldiers()
        .label_three_black_crows()
        .get_labeled_data()
    )
    
    print("Pattern counts:")
    pattern_cols = ['doji', 'hammer', 'shooting_star', 'bullish_engulfing', 
                    'bearish_engulfing', 'morning_star', 'evening_star']
    print(labeled_df[pattern_cols].sum())
    
    # Save labeled data
    labeled_df.to_parquet('../data/processed/AAPL_1h_labeled.parquet')
else:
    print(f"File not found: {data_path}")

In [None]:
def visualize_pattern(df, pattern_name, n_samples=5):
    """Visualize random samples of detected pattern"""
    if pattern_name not in df.columns:
        return
        
    pattern_rows = df[df[pattern_name] == 1]
    
    if len(pattern_rows) == 0:
        print(f"No {pattern_name} patterns found")
        return
    
    samples = pattern_rows.sample(min(n_samples, len(pattern_rows)))
    
    for idx, (timestamp, row) in enumerate(samples.iterrows()):
        # Get context window
        try:
            loc = df.index.get_loc(timestamp)
            start_idx = max(0, loc - 10)
            end_idx = min(len(df), loc + 10)
            context = df.iloc[start_idx:end_idx]
            
            # Plot
            mpf.plot(context, type='candle', volume=True, 
                     title=f'{pattern_name} - Sample {idx+1}',
                     style='charles')
        except Exception as e:
            print(f"Error plotting: {e}")

# Visualize
if 'labeled_df' in locals():
    visualize_pattern(labeled_df, 'hammer')
    visualize_pattern(labeled_df, 'bullish_engulfing')