In [None]:
"""
================================================================================
PHASE 06A: LABELLING ANOMALIES OF LARGE INFORMED TRADES (LIT)
================================================================================

Pipeline Position: 
    Phase 04 (Anomaly Detection) → **Phase 06A** → Phase 06B (Scoring System)

Purpose:
    Labels each flagged contract and trade with forward-looking return metrics
    (MFE, MAE, exit return) so we can later analyze which characteristics
    predict profitable signals.

Metrics Computed:
    - MFE (Maximum Favorable Excursion): Best case profit during holding period
    - MAE (Maximum Adverse Excursion): Worst case drawdown during holding period
    - Exit Return: Actual return at exit
    - MFE/MAE Ratio: Risk-adjusted return potential

Trading Rules:
    - Entry: Next trading day's OPEN after anomaly detected
    - Exit: Exit day's OPEN
    - Holding Period: min(DTE - 1, 60 days)
    - Direction: CALL → Long underlying, PUT → Short underlying

Input:
    - Anomalies data: {ANOMALIES_FOLDER}/{TICKER}/uoa_flagged_contracts_YYYY-MM-DD.csv
    - Anomalies data: {ANOMALIES_FOLDER}/{TICKER}/uoa_anomalies_YYYY-MM-DD.csv
    - Underlying OHLCV: {OHLCV_FOLDER}/YYYY-MM-DD.parquet
    
Output:
    - {OUTPUT_FOLDER}/{TICKER}/uoa_labelled_contracts_YYYY-MM-DD.csv
    - {OUTPUT_FOLDER}/{TICKER}/uoa_labelled_anomalies_YYYY-MM-DD.csv

Author: [Your Name]
Created: 2026-02-XX
Version: 1.0

Dependencies:
    - pandas >= 1.5.0
    - numpy >= 1.20.0
    - pyarrow >= 10.0.0

Usage:
    python Phase_06A_labelling_anomalies.py

================================================================================
"""

import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Tuple
import warnings
import logging

warnings.filterwarnings('ignore')

# =============================================================================
# CONFIGURATION
# =============================================================================

CONFIG = {
    # -------------------------------------------------------------------------
    # INPUT FOLDERS
    # -------------------------------------------------------------------------
    
    # Folder containing anomaly detection output (Phase 04)
    # Structure: {anomalies_folder}/{TICKER}/uoa_flagged_contracts_YYYY-MM-DD.csv
    "anomalies_folder": Path(r"D:\cyclelabs_codes\CL_20251120_siphontrades\01_FIXINGRAWDATA\output_5a_anomaly"),
    
    # Folder containing underlying OHLCV data
    # Structure: {ohlcv_folder}/YYYY-MM-DD.parquet
    "ohlcv_folder": Path(r"D:\cyclelabs_codes\CL_20260116_anomalyfixed\000_DATA_US_STOCKS_DAY_AGGS"),
    
    # -------------------------------------------------------------------------
    # OUTPUT FOLDER
    # -------------------------------------------------------------------------
    
    # Output folder for labelled data
    # Structure: {output_folder}/{TICKER}/uoa_labelled_contracts_YYYY-MM-DD.csv
    "output_folder": Path(r"D:\cyclelabs_codes\CL_20251120_siphontrades\01_FIXINGRAWDATA\output_6a_labelledLITanomalies"),
    
    # -------------------------------------------------------------------------
    # TICKERS TO PROCESS
    # -------------------------------------------------------------------------
    
    # List of tickers to process (modify as needed)
    "tickers_to_process": [
        "CIFR",
        # "AAPL",
        # "TSLA",
        # "MARA",
        # Add more tickers here
    ],
    
    # -------------------------------------------------------------------------
    # DATE RANGE
    # -------------------------------------------------------------------------
    
    "start_date": "2025-01-01",
    "end_date": "2025-12-31",
    
    # -------------------------------------------------------------------------
    # TRADING PARAMETERS
    # -------------------------------------------------------------------------
    
    # Maximum holding period in days
    "max_holding_days": 60,
    
    # -------------------------------------------------------------------------
    # LOGGING
    # -------------------------------------------------------------------------
    
    "log_level": logging.INFO,
}

# =============================================================================
# LOGGING SETUP
# =============================================================================

logging.basicConfig(
    level=CONFIG["log_level"],
    format='%(asctime)s | %(levelname)s | %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# =============================================================================
# OHLCV DATA LOADER
# =============================================================================

class OHLCVLoader:
    """
    Loads and caches underlying OHLCV data.
    """
    
    def __init__(self, ohlcv_folder: Path):
        self.ohlcv_folder = ohlcv_folder
        self.cache = {}  # {date_str: DataFrame}
        self.available_dates = self._scan_available_dates()
        
    def _scan_available_dates(self) -> List[str]:
        """Scan folder for available trading dates."""
        dates = []
        for filepath in self.ohlcv_folder.glob("*.parquet"):
            # Filename format: YYYY-MM-DD.parquet
            date_str = filepath.stem
            try:
                datetime.strptime(date_str, '%Y-%m-%d')
                dates.append(date_str)
            except ValueError:
                continue
        return sorted(dates)
    
    def get_price_data(self, date_str: str, ticker: str) -> Optional[Dict]:
        """
        Get OHLCV data for a specific ticker on a specific date.
        
        Returns:
            Dict with keys: open, high, low, close, volume
            None if data not available
        """
        if date_str not in self.available_dates:
            return None
        
        # Load from cache or file
        if date_str not in self.cache:
            filepath = self.ohlcv_folder / f"{date_str}.parquet"
            try:
                df = pd.read_parquet(filepath)
                self.cache[date_str] = df
            except Exception as e:
                logger.warning(f"Error loading OHLCV for {date_str}: {e}")
                return None
        
        df = self.cache[date_str]
        
        # Filter to ticker
        ticker_data = df[df['ticker'] == ticker]
        
        if len(ticker_data) == 0:
            return None
        
        row = ticker_data.iloc[0]
        return {
            'open': float(row['open']),
            'high': float(row['high']),
            'low': float(row['low']),
            'close': float(row['close']),
            'volume': int(row['volume']),
        }
    
    def get_next_trading_day(self, date_str: str) -> Optional[str]:
        """Get the next available trading day after date_str."""
        try:
            idx = self.available_dates.index(date_str)
            if idx + 1 < len(self.available_dates):
                return self.available_dates[idx + 1]
        except ValueError:
            # date_str not in list, find next available
            target_dt = datetime.strptime(date_str, '%Y-%m-%d')
            for d in self.available_dates:
                d_dt = datetime.strptime(d, '%Y-%m-%d')
                if d_dt > target_dt:
                    return d
        return None
    
    def get_trading_day_offset(self, date_str: str, offset: int) -> Optional[str]:
        """Get trading day that is 'offset' trading days from date_str."""
        try:
            idx = self.available_dates.index(date_str)
            target_idx = idx + offset
            if 0 <= target_idx < len(self.available_dates):
                return self.available_dates[target_idx]
        except ValueError:
            pass
        return None
    
    def get_price_range(
        self, 
        ticker: str, 
        start_date: str, 
        end_date: str
    ) -> Optional[pd.DataFrame]:
        """
        Get OHLCV data for a ticker between start_date and end_date (inclusive).
        
        Returns:
            DataFrame with columns: date, open, high, low, close, volume
            None if no data available
        """
        start_dt = datetime.strptime(start_date, '%Y-%m-%d')
        end_dt = datetime.strptime(end_date, '%Y-%m-%d')
        
        # Filter available dates to range
        dates_in_range = [
            d for d in self.available_dates
            if start_dt <= datetime.strptime(d, '%Y-%m-%d') <= end_dt
        ]
        
        if not dates_in_range:
            return None
        
        rows = []
        for date_str in dates_in_range:
            price_data = self.get_price_data(date_str, ticker)
            if price_data:
                price_data['date'] = date_str
                rows.append(price_data)
        
        if not rows:
            return None
        
        df = pd.DataFrame(rows)
        df = df.sort_values('date').reset_index(drop=True)
        return df
    
    def clear_cache(self):
        """Clear the price data cache to free memory."""
        self.cache = {}


# =============================================================================
# RETURN METRICS CALCULATOR
# =============================================================================

def calculate_return_metrics(
    ohlcv_loader: OHLCVLoader,
    underlying_ticker: str,
    anomaly_date: str,
    days_to_expiry: int,
    option_type: str,  # 'CALL' or 'PUT'
    max_holding_days: int = 60
) -> Optional[Dict]:
    """
    Calculate MFE, MAE, and exit return for a single anomaly.
    
    Args:
        ohlcv_loader: OHLCVLoader instance
        underlying_ticker: The underlying stock ticker
        anomaly_date: Date the anomaly was detected (YYYY-MM-DD)
        days_to_expiry: Days to expiry of the option
        option_type: 'CALL' or 'PUT'
        max_holding_days: Maximum holding period
        
    Returns:
        Dict with metrics, or None if calculation not possible
    """
    # Determine direction
    direction = 'LONG' if option_type.upper() == 'CALL' else 'SHORT'
    
    # Calculate holding period
    holding_days = min(days_to_expiry - 1, max_holding_days)
    
    if holding_days <= 0:
        return None
    
    # Get entry date (next trading day after anomaly)
    entry_date = ohlcv_loader.get_next_trading_day(anomaly_date)
    if not entry_date:
        return None
    
    # Get entry price (open of entry date)
    entry_data = ohlcv_loader.get_price_data(entry_date, underlying_ticker)
    if not entry_data:
        return None
    
    entry_price = entry_data['open']
    
    if entry_price <= 0:
        return None
    
    # Get exit date (entry_date + holding_days trading days)
    exit_date = ohlcv_loader.get_trading_day_offset(entry_date, holding_days)
    if not exit_date:
        # If we can't get the exact exit date, use the last available date
        # Get price range and use last date
        pass
    
    # Get price data for the entire holding period (inclusive)
    price_df = ohlcv_loader.get_price_range(underlying_ticker, entry_date, exit_date if exit_date else entry_date)
    
    if price_df is None or len(price_df) == 0:
        return None
    
    # If exit_date not found, use last available date in range
    if exit_date is None:
        exit_date = price_df['date'].iloc[-1]
    
    # Get exit price (open of exit date)
    exit_data = ohlcv_loader.get_price_data(exit_date, underlying_ticker)
    if not exit_data:
        # Use last row of price_df
        exit_price = price_df['open'].iloc[-1]
    else:
        exit_price = exit_data['open']
    
    # Calculate max high and min low during holding period
    max_high = price_df['high'].max()
    min_low = price_df['low'].min()
    
    # Calculate metrics based on direction
    if direction == 'LONG':
        # Long position: profit when price goes up
        mfe = max_high - entry_price  # Best case: sold at highest high
        mae = entry_price - min_low   # Worst case: lowest low
        exit_pnl = exit_price - entry_price
        
        mfe_pct = (mfe / entry_price) * 100
        mae_pct = (mae / entry_price) * 100
        exit_return_pct = (exit_pnl / entry_price) * 100
        
    else:  # SHORT
        # Short position: profit when price goes down
        mfe = entry_price - min_low   # Best case: covered at lowest low
        mae = max_high - entry_price  # Worst case: highest high
        exit_pnl = entry_price - exit_price
        
        mfe_pct = (mfe / entry_price) * 100
        mae_pct = (mae / entry_price) * 100
        exit_return_pct = (exit_pnl / entry_price) * 100
    
    # Calculate MFE/MAE ratio (handle division by zero)
    if mae_pct > 0:
        mfe_mae_ratio = mfe_pct / mae_pct
    else:
        mfe_mae_ratio = np.inf if mfe_pct > 0 else 0
    
    # Actual holding days (trading days)
    actual_holding_days = len(price_df)
    
    return {
        'direction': direction,
        'anomaly_date': anomaly_date,
        'entry_date': entry_date,
        'entry_price': round(entry_price, 4),
        'exit_date': exit_date,
        'exit_price': round(exit_price, 4),
        'planned_holding_days': holding_days,
        'actual_holding_days': actual_holding_days,
        'max_high': round(max_high, 4),
        'min_low': round(min_low, 4),
        'mfe': round(mfe, 4),
        'mfe_pct': round(mfe_pct, 4),
        'mae': round(mae, 4),
        'mae_pct': round(mae_pct, 4),
        'exit_return': round(exit_pnl, 4),
        'exit_return_pct': round(exit_return_pct, 4),
        'mfe_mae_ratio': round(mfe_mae_ratio, 4) if mfe_mae_ratio != np.inf else 999.0,
    }


# =============================================================================
# PROCESS FLAGGED CONTRACTS
# =============================================================================

def process_flagged_contracts(
    contracts_df: pd.DataFrame,
    underlying_ticker: str,
    anomaly_date: str,
    ohlcv_loader: OHLCVLoader,
    max_holding_days: int
) -> pd.DataFrame:
    """
    Process flagged contracts file and add return metrics.
    """
    if len(contracts_df) == 0:
        return pd.DataFrame()
    
    results = []
    
    for idx, row in contracts_df.iterrows():
        # Get option type
        option_type = row.get('option_type', None)
        if option_type is None:
            # Try to infer from contract name or other columns
            if 'contract' in row:
                contract = row['contract']
                if 'C' in contract.upper():
                    option_type = 'CALL'
                elif 'P' in contract.upper():
                    option_type = 'PUT'
            
        if option_type is None:
            logger.warning(f"  Cannot determine option type for row {idx}, skipping")
            continue
        
        # Get days to expiry
        days_to_expiry = row.get('days_to_expiry', None)
        if days_to_expiry is None or pd.isna(days_to_expiry) or days_to_expiry <= 1:
            logger.debug(f"  Invalid DTE for row {idx}, skipping")
            continue
        
        days_to_expiry = int(days_to_expiry)
        
        # Calculate return metrics
        metrics = calculate_return_metrics(
            ohlcv_loader=ohlcv_loader,
            underlying_ticker=underlying_ticker,
            anomaly_date=anomaly_date,
            days_to_expiry=days_to_expiry,
            option_type=option_type,
            max_holding_days=max_holding_days
        )
        
        if metrics is None:
            logger.debug(f"  Could not calculate metrics for row {idx}")
            continue
        
        # Combine original row with metrics
        result_row = row.to_dict()
        result_row.update(metrics)
        results.append(result_row)
    
    if not results:
        return pd.DataFrame()
    
    return pd.DataFrame(results)


# =============================================================================
# PROCESS ANOMALIES (TRADE-LEVEL)
# =============================================================================

def process_anomalies(
    anomalies_df: pd.DataFrame,
    underlying_ticker: str,
    anomaly_date: str,
    ohlcv_loader: OHLCVLoader,
    max_holding_days: int
) -> pd.DataFrame:
    """
    Process anomalies file (trade-level) and add return metrics.
    """
    if len(anomalies_df) == 0:
        return pd.DataFrame()
    
    results = []
    
    for idx, row in anomalies_df.iterrows():
        # Get option type
        option_type = row.get('option_type', None)
        
        # Try alternative column names
        if option_type is None:
            if 'option_type_call' in row:
                option_type = 'CALL' if row['option_type_call'] == 1 else 'PUT'
        
        if option_type is None:
            # Try to infer from ticker/contract name
            ticker = row.get('ticker', '')
            if 'C' in str(ticker).upper()[-10:]:  # Check last part of ticker
                option_type = 'CALL'
            elif 'P' in str(ticker).upper()[-10:]:
                option_type = 'PUT'
        
        if option_type is None:
            logger.debug(f"  Cannot determine option type for row {idx}, skipping")
            continue
        
        # Get days to expiry
        days_to_expiry = row.get('days_to_expiry', None)
        if days_to_expiry is None or pd.isna(days_to_expiry) or days_to_expiry <= 1:
            logger.debug(f"  Invalid DTE for row {idx}, skipping")
            continue
        
        days_to_expiry = int(days_to_expiry)
        
        # Calculate return metrics
        metrics = calculate_return_metrics(
            ohlcv_loader=ohlcv_loader,
            underlying_ticker=underlying_ticker,
            anomaly_date=anomaly_date,
            days_to_expiry=days_to_expiry,
            option_type=option_type,
            max_holding_days=max_holding_days
        )
        
        if metrics is None:
            logger.debug(f"  Could not calculate metrics for row {idx}")
            continue
        
        # Combine original row with metrics
        result_row = row.to_dict()
        result_row.update(metrics)
        results.append(result_row)
    
    if not results:
        return pd.DataFrame()
    
    return pd.DataFrame(results)


# =============================================================================
# MAIN PROCESSING
# =============================================================================

def get_anomaly_files_for_ticker(
    anomalies_folder: Path,
    ticker: str,
    start_date: str,
    end_date: str
) -> List[Tuple[str, Path, Path]]:
    """
    Get all anomaly files for a ticker within the date range.
    
    Returns:
        List of tuples: (date_str, contracts_filepath, anomalies_filepath)
    """
    ticker_folder = anomalies_folder / ticker
    
    if not ticker_folder.exists():
        return []
    
    start_dt = datetime.strptime(start_date, '%Y-%m-%d')
    end_dt = datetime.strptime(end_date, '%Y-%m-%d')
    
    # Find all flagged_contracts files
    files = []
    for contracts_file in ticker_folder.glob("uoa_flagged_contracts_*.csv"):
        # Extract date from filename
        filename = contracts_file.stem  # uoa_flagged_contracts_YYYY-MM-DD
        date_str = filename.replace("uoa_flagged_contracts_", "")
        
        try:
            file_dt = datetime.strptime(date_str, '%Y-%m-%d')
        except ValueError:
            continue
        
        # Check if within date range
        if not (start_dt <= file_dt <= end_dt):
            continue
        
        # Check for corresponding anomalies file
        anomalies_file = ticker_folder / f"uoa_anomalies_{date_str}.csv"
        
        files.append((date_str, contracts_file, anomalies_file))
    
    # Sort by date
    files = sorted(files, key=lambda x: x[0])
    
    return files


def process_ticker(
    ticker: str,
    anomalies_folder: Path,
    ohlcv_loader: OHLCVLoader,
    output_folder: Path,
    start_date: str,
    end_date: str,
    max_holding_days: int
) -> Dict:
    """
    Process all anomaly files for a single ticker.
    """
    logger.info(f"Processing ticker: {ticker}")
    
    # Get all files for this ticker
    files = get_anomaly_files_for_ticker(anomalies_folder, ticker, start_date, end_date)
    
    if not files:
        logger.info(f"  No anomaly files found for {ticker}")
        return {
            'ticker': ticker,
            'files_processed': 0,
            'contracts_labelled': 0,
            'anomalies_labelled': 0,
        }
    
    logger.info(f"  Found {len(files)} anomaly files")
    
    # Create output folder for ticker
    ticker_output_folder = output_folder / ticker
    ticker_output_folder.mkdir(parents=True, exist_ok=True)
    
    total_contracts_labelled = 0
    total_anomalies_labelled = 0
    files_processed = 0
    
    for date_str, contracts_file, anomalies_file in files:
        logger.info(f"  Processing {date_str}...")
        
        # Process flagged contracts
        if contracts_file.exists():
            try:
                contracts_df = pd.read_csv(contracts_file)
                
                if len(contracts_df) > 0:
                    labelled_contracts = process_flagged_contracts(
                        contracts_df=contracts_df,
                        underlying_ticker=ticker,
                        anomaly_date=date_str,
                        ohlcv_loader=ohlcv_loader,
                        max_holding_days=max_holding_days
                    )
                    
                    if len(labelled_contracts) > 0:
                        output_path = ticker_output_folder / f"uoa_labelled_contracts_{date_str}.csv"
                        labelled_contracts.to_csv(output_path, index=False)
                        total_contracts_labelled += len(labelled_contracts)
                        logger.info(f"    Labelled {len(labelled_contracts)} contracts")
                    else:
                        logger.info(f"    No contracts could be labelled")
                        
            except Exception as e:
                logger.error(f"    Error processing contracts: {e}")
        
        # Process anomalies (trade-level)
        if anomalies_file.exists():
            try:
                anomalies_df = pd.read_csv(anomalies_file)
                
                if len(anomalies_df) > 0:
                    labelled_anomalies = process_anomalies(
                        anomalies_df=anomalies_df,
                        underlying_ticker=ticker,
                        anomaly_date=date_str,
                        ohlcv_loader=ohlcv_loader,
                        max_holding_days=max_holding_days
                    )
                    
                    if len(labelled_anomalies) > 0:
                        output_path = ticker_output_folder / f"uoa_labelled_anomalies_{date_str}.csv"
                        labelled_anomalies.to_csv(output_path, index=False)
                        total_anomalies_labelled += len(labelled_anomalies)
                        logger.info(f"    Labelled {len(labelled_anomalies)} anomalies (trade-level)")
                    else:
                        logger.info(f"    No anomalies could be labelled")
                        
            except Exception as e:
                logger.error(f"    Error processing anomalies: {e}")
        
        files_processed += 1
        
        # Clear OHLCV cache periodically to manage memory
        if files_processed % 10 == 0:
            ohlcv_loader.clear_cache()
    
    return {
        'ticker': ticker,
        'files_processed': files_processed,
        'contracts_labelled': total_contracts_labelled,
        'anomalies_labelled': total_anomalies_labelled,
    }


def main():
    """Main execution function."""
    
    start_time = datetime.now()
    
    logger.info("=" * 70)
    logger.info("PHASE 06A: LABELLING ANOMALIES OF LARGE INFORMED TRADES")
    logger.info("=" * 70)
    
    # -------------------------------------------------------------------------
    # SETUP
    # -------------------------------------------------------------------------
    
    anomalies_folder = CONFIG["anomalies_folder"]
    ohlcv_folder = CONFIG["ohlcv_folder"]
    output_folder = CONFIG["output_folder"]
    tickers_to_process = CONFIG["tickers_to_process"]
    start_date = CONFIG["start_date"]
    end_date = CONFIG["end_date"]
    max_holding_days = CONFIG["max_holding_days"]
    
    output_folder.mkdir(parents=True, exist_ok=True)
    
    logger.info(f"Anomalies folder: {anomalies_folder}")
    logger.info(f"OHLCV folder: {ohlcv_folder}")
    logger.info(f"Output folder: {output_folder}")
    logger.info(f"Tickers to process: {tickers_to_process}")
    logger.info(f"Date range: {start_date} to {end_date}")
    logger.info(f"Max holding days: {max_holding_days}")
    
    # -------------------------------------------------------------------------
    # INITIALIZE OHLCV LOADER
    # -------------------------------------------------------------------------
    
    logger.info("-" * 70)
    logger.info("INITIALIZING OHLCV LOADER")
    logger.info("-" * 70)
    
    ohlcv_loader = OHLCVLoader(ohlcv_folder)
    logger.info(f"Found {len(ohlcv_loader.available_dates)} trading days in OHLCV data")
    
    if len(ohlcv_loader.available_dates) > 0:
        logger.info(f"Date range: {ohlcv_loader.available_dates[0]} to {ohlcv_loader.available_dates[-1]}")
    
    # -------------------------------------------------------------------------
    # PROCESS EACH TICKER
    # -------------------------------------------------------------------------
    
    logger.info("-" * 70)
    logger.info("PROCESSING TICKERS")
    logger.info("-" * 70)
    
    all_results = []
    
    for i, ticker in enumerate(tickers_to_process):
        logger.info(f"\n[{i+1}/{len(tickers_to_process)}] {ticker}")
        
        result = process_ticker(
            ticker=ticker,
            anomalies_folder=anomalies_folder,
            ohlcv_loader=ohlcv_loader,
            output_folder=output_folder,
            start_date=start_date,
            end_date=end_date,
            max_holding_days=max_holding_days
        )
        
        all_results.append(result)
    
    # -------------------------------------------------------------------------
    # SUMMARY
    # -------------------------------------------------------------------------
    
    processing_time = (datetime.now() - start_time).total_seconds()
    
    logger.info("\n" + "=" * 70)
    logger.info("SUMMARY")
    logger.info("=" * 70)
    
    total_files = sum(r['files_processed'] for r in all_results)
    total_contracts = sum(r['contracts_labelled'] for r in all_results)
    total_anomalies = sum(r['anomalies_labelled'] for r in all_results)
    
    logger.info(f"Tickers processed: {len(tickers_to_process)}")
    logger.info(f"Total files processed: {total_files}")
    logger.info(f"Total contracts labelled: {total_contracts}")
    logger.info(f"Total anomalies labelled: {total_anomalies}")
    logger.info(f"Processing time: {processing_time:.1f} seconds")
    
    # Per-ticker summary
    logger.info("\nPer-Ticker Summary:")
    logger.info("-" * 50)
    for r in all_results:
        logger.info(f"  {r['ticker']}: {r['files_processed']} files, "
                   f"{r['contracts_labelled']} contracts, "
                   f"{r['anomalies_labelled']} anomalies")
    
    logger.info("\n" + "=" * 70)
    logger.info("PHASE 06A COMPLETE")
    logger.info("=" * 70)


# =============================================================================
# ENTRY POINT
# =============================================================================

if __name__ == "__main__":
    main()