## 1. Imports and Configuration

Import dependencies and verify configuration.

In [None]:
import logging
from datetime import datetime

import pandas as pd
import plotly.express as px

from aponyx.config import DATA_DIR, LOGS_DIR, SUITABILITY_REGISTRY_PATH, EVALUATION_DIR
from aponyx.data import fetch_cdx
from aponyx.data.sources import BloombergSource
from aponyx.persistence import save_json
from aponyx.evaluation.suitability import (
    evaluate_signal_suitability,
    SuitabilityConfig,
    SuitabilityRegistry,
    generate_suitability_report,
    save_report,
)

# Configure logging for notebook
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
)
logger = logging.getLogger(__name__)

print("=" * 80)
print("SIGNAL SUITABILITY EVALUATION WORKFLOW — Step 3 of 5")
print("=" * 80)
print(f"\nConfiguration:")
print(f"  Data directory: {DATA_DIR}")
print(f"  Logs directory: {LOGS_DIR}")
print(f"  Evaluation directory: {EVALUATION_DIR}")
print(f"  Registry path: {SUITABILITY_REGISTRY_PATH}")
print(f"\n✓ Imports complete")

## 2. Load Signals from Step 2

Load computed signals DataFrame from previous workflow step.

In [None]:
# Check for signals file
signals_path = DATA_DIR / "processed" / "signals.parquet"

if not signals_path.exists():
    raise FileNotFoundError(
        f"No signals found at {signals_path}.\n"
        "Run 02_signal_computation.ipynb first to compute signals."
    )

# Load signals
signals = pd.read_parquet(signals_path)

print(f"\n{'='*80}")
print(f"SIGNALS LOADED")
print(f"{'='*80}\n")
print(f"File: {signals_path}")
print(f"Shape: {signals.shape}")
print(f"Columns: {list(signals.columns)}")
print(f"Date range: {signals.index.min()} to {signals.index.max()}")
print(f"Total observations: {len(signals)}")

# Display summary statistics
summary_data = []
for col in signals.columns:
    summary_data.append({
        'Signal': col,
        'Valid Obs': signals[col].notna().sum(),
        'Mean': f"{signals[col].mean():.3f}",
        'Std': f"{signals[col].std():.3f}",
        'Min': f"{signals[col].min():.2f}",
        'Max': f"{signals[col].max():.2f}",
    })

summary_df = pd.DataFrame(summary_data)
print(f"\nSignal Summary:\n")
print(summary_df.to_markdown(index=False))

print(f"\n✓ Signals loaded successfully")

## 3. Load Target Product Data

Load CDX spread data as the evaluation target.

In [None]:
print(f"\n{'='*80}")
print(f"LOADING TARGET PRODUCT DATA")
print(f"{'='*80}\n")

# Initialize source
source = BloombergSource()

# Fetch CDX data
print("Loading CDX IG 5Y spreads...")
cdx_df = fetch_cdx(
    source=source,
    security="cdx_ig_5y",
    use_cache=True,
)

# Extract spread column
cdx_spread = cdx_df['spread']

print(f"✓ Loaded CDX IG 5Y: {len(cdx_spread)} rows")
print(f"  Date range: {cdx_spread.index.min()} to {cdx_spread.index.max()}")
print(f"  Missing values: {cdx_spread.isna().sum()}")

# Verify alignment with signals
aligned = signals.index.equals(cdx_spread.index)
if aligned:
    print(f"\n✓ Target aligned with signals index ({len(signals)} dates)")
else:
    print(f"\n⚠️  Target index differs from signals")
    print(f"  Signals dates: {len(signals)}")
    print(f"  Target dates: {len(cdx_spread)}")
    print(f"  Evaluation will use aligned subset")

print(f"\n✓ Target product data loaded")

## 4. Configure Evaluation Parameters

Set parameters for suitability evaluation.

In [None]:
# Minimum observations (1 trading year for tactical signals)
min_obs = 252

# Create configuration
config = SuitabilityConfig(
    lags=[1, 3, 5],
    min_obs=min_obs,
)

print(f"\n{'='*80}")
print(f"EVALUATION CONFIGURATION")
print(f"{'='*80}\n")

# Display configuration
config_data = [
    {'Parameter': 'Forecast Lags', 'Value': str(config.lags)},
    {'Parameter': 'Minimum Observations', 'Value': str(config.min_obs)},
    {'Parameter': 'PASS Threshold', 'Value': f"{config.pass_threshold:.2f}"},
    {'Parameter': 'HOLD Threshold', 'Value': f"{config.hold_threshold:.2f}"},
    {'Parameter': 'Data Health Weight', 'Value': f"{config.data_health_weight:.1%}"},
    {'Parameter': 'Predictive Weight', 'Value': f"{config.predictive_weight:.1%}"},
    {'Parameter': 'Economic Weight', 'Value': f"{config.economic_weight:.1%}"},
    {'Parameter': 'Stability Weight', 'Value': f"{config.stability_weight:.1%}"},
]

config_df = pd.DataFrame(config_data)
print(config_df.to_markdown(index=False))

print(f"\n\nRationale:")
print(f"  - Lags {config.lags}: Test 1-, 3-, and 5-day forecast horizons")
print(f"  - Min obs {config.min_obs}: Requires 1 trading year for statistical validity")
print(f"  - Component weights: Emphasis on predictive power (40%)")
print(f"  - Decision thresholds: PASS ≥0.7, HOLD 0.4-0.7, FAIL <0.4")

print(f"\n✓ Configuration ready")

## 5. Evaluate Signal-Product Suitability

Run suitability evaluation for each signal.

In [None]:
print(f"\n{'='*80}")
print(f"EVALUATING SIGNAL SUITABILITY")
print(f"{'='*80}\n")

# Store results
results_dict = {}

# Evaluate each signal (fail-fast: no try/except)
for signal_name in signals.columns:
    print(f"Evaluating {signal_name}...")
    
    result = evaluate_signal_suitability(
        signal=signals[signal_name],
        target_change=cdx_spread,
        config=config,
    )
    
    results_dict[signal_name] = result
    
    # Display decision
    decision_indicator = {
        "PASS": "✅",
        "HOLD": "⚠️",
        "FAIL": "❌",
    }[result.decision]
    
    print(f"  {decision_indicator} {result.decision}: Score = {result.composite_score:.3f}")
    print()

print(f"✓ Evaluated {len(results_dict)} signals\n")

# Create evaluation summary table
eval_summary_data = []
for signal_name, result in results_dict.items():
    decision_indicator = {
        "PASS": "✅ PASS",
        "HOLD": "⚠️ HOLD",
        "FAIL": "❌ FAIL",
    }[result.decision]
    
    eval_summary_data.append({
        'Signal': signal_name,
        'Decision': decision_indicator,
        'Composite': f"{result.composite_score:.3f}",
        'Data Health': f"{result.data_health_score:.3f}",
        'Predictive': f"{result.predictive_score:.3f}",
        'Economic': f"{result.economic_score:.3f}",
        'Stability': f"{result.stability_score:.3f}",
    })

eval_summary_df = pd.DataFrame(eval_summary_data)

print(f"\nEvaluation Summary:\n")
print(eval_summary_df.to_markdown(index=False))

print(f"\n✓ Evaluation complete")

## 6. Visualize Composite Scores

Plot composite scores with decision thresholds.

In [None]:
print(f"\n{'='*80}")
print(f"VISUALIZING EVALUATION RESULTS")
print(f"{'='*80}\n")

# Prepare data for plotting
plot_data = []
for signal_name, result in results_dict.items():
    plot_data.append({
        'Signal': signal_name,
        'Composite Score': result.composite_score,
        'Decision': result.decision,
    })

plot_df = pd.DataFrame(plot_data)

# Create bar chart with discrete colors by decision
color_map = {
    'PASS': '#2ecc71',   # Green
    'HOLD': '#f39c12',   # Yellow/Orange
    'FAIL': '#e74c3c',   # Red
}

fig = px.bar(
    plot_df,
    x='Signal',
    y='Composite Score',
    color='Decision',
    color_discrete_map=color_map,
    title='Signal Suitability Composite Scores',
)

# Add threshold lines with annotations
fig.add_hline(
    y=0.7,
    line_dash="dash",
    line_color="gray",
    annotation_text="PASS threshold (≥0.7)",
    annotation_position="right",
)

fig.add_hline(
    y=0.4,
    line_dash="dash",
    line_color="gray",
    annotation_text="HOLD threshold (≥0.4)",
    annotation_position="right",
)

fig.update_layout(
    xaxis_title="Signal",
    yaxis_title="Composite Score",
    template="plotly_white",
    height=500,
)

fig.show()

print(f"✓ Composite score chart complete")

## 7. Visualize Component Scores

Heatmap showing breakdown of component scores.

In [None]:
# Extract component scores into DataFrame
component_data = []
for signal_name, result in results_dict.items():
    component_data.append({
        'Signal': signal_name,
        'Data Health': result.data_health_score,
        'Predictive': result.predictive_score,
        'Economic': result.economic_score,
        'Stability': result.stability_score,
    })

component_df = pd.DataFrame(component_data)
component_df = component_df.set_index('Signal')

# Create heatmap
fig = px.imshow(
    component_df,
    text_auto=".3f",
    color_continuous_scale='RdYlGn',
    zmin=0,
    zmax=1,
    title='Component Score Breakdown by Signal',
    labels=dict(color="Score"),
    aspect="auto",
)

fig.update_layout(
    xaxis_title="Component",
    yaxis_title="Signal",
    template="plotly_white",
    height=400,
)

fig.show()

print(f"✓ Component score heatmap complete")

## 8. Visualize Predictive vs Economic Scores

Scatter plot showing relationship between predictive and economic components.

In [None]:
# Prepare scatter plot data
scatter_data = []
for signal_name, result in results_dict.items():
    scatter_data.append({
        'Signal': signal_name,
        'Predictive Score': result.predictive_score,
        'Economic Score': result.economic_score,
        'Decision': result.decision,
    })

scatter_df = pd.DataFrame(scatter_data)

# Create scatter plot
fig = px.scatter(
    scatter_df,
    x='Predictive Score',
    y='Economic Score',
    color='Decision',
    color_discrete_map=color_map,
    text='Signal',
    title='Predictive vs Economic Scores',
)

# Add reference lines at 0.5
fig.add_hline(
    y=0.5,
    line_dash="dot",
    line_color="gray",
    opacity=0.5,
)

fig.add_vline(
    x=0.5,
    line_dash="dot",
    line_color="gray",
    opacity=0.5,
)

fig.update_traces(
    textposition='top center',
    marker=dict(size=12),
)

fig.update_layout(
    xaxis_title="Predictive Score",
    yaxis_title="Economic Score",
    template="plotly_white",
    height=500,
)

fig.show()

print(f"\n✓ Scatter plot complete")
print(f"\n✓ All visualizations complete")

## 9. Generate and Save Evaluation Reports

Create Markdown reports for each signal evaluation.

In [None]:
print(f"\n{'='*80}")
print(f"GENERATING EVALUATION REPORTS")
print(f"{'='*80}\n")

# Reports directory (already created by ensure_directories)
reports_dir = EVALUATION_DIR

# Generate and save reports
report_paths = {}

for signal_name, result in results_dict.items():
    print(f"Generating report for {signal_name}...")
    
    # Generate report content
    report_content = generate_suitability_report(
        result=result,
        signal_id=signal_name,
        product_id="CDX_IG_5Y",
    )
    
    # Save report (returns timestamped path)
    report_path = save_report(
        report=report_content,
        signal_id=signal_name,
        product_id="CDX_IG_5Y",
        output_dir=reports_dir,
    )
    
    report_paths[signal_name] = report_path
    print(f"  ✓ Saved to: {report_path.name}")

print(f"\n✓ Generated {len(report_paths)} reports")
print(f"\nReport files:")
for signal_name, path in report_paths.items():
    print(f"  {path}")

print(f"\n✓ Reports saved to {reports_dir}")

## 10. Register Evaluations

Track evaluation metadata in suitability registry.

In [None]:
print(f"\n{'='*80}")
print(f"REGISTERING EVALUATIONS")
print(f"{'='*80}\n")

# Initialize registry
registry = SuitabilityRegistry(SUITABILITY_REGISTRY_PATH)

# Register each evaluation
eval_ids = {}

for signal_name, result in results_dict.items():
    report_path = report_paths[signal_name]
    
    print(f"Registering {signal_name}...")
    
    eval_id = registry.register_evaluation(
        result=result,
        signal_id=signal_name,
        product_id="CDX_IG_5Y",
        report_path=str(report_path),
        evaluator_version="0.1.0",
    )
    
    eval_ids[signal_name] = eval_id
    print(f"  ✓ Registered as: {eval_id}")

print(f"\n✓ Registered {len(eval_ids)} evaluations\n")

# Display registry summary
total_evals = len(registry.list_evaluations())
pass_evals = len(registry.list_evaluations(decision="PASS"))
hold_evals = len(registry.list_evaluations(decision="HOLD"))
fail_evals = len(registry.list_evaluations(decision="FAIL"))

print(f"Registry Summary:")
print(f"  Total evaluations: {total_evals}")
print(f"  PASS: {pass_evals}")
print(f"  HOLD: {hold_evals}")
print(f"  FAIL: {fail_evals}")

print(f"\n✓ Registry updated at {SUITABILITY_REGISTRY_PATH}")

## 11. Persist Evaluation Metadata

Save evaluation metadata for reproducibility.

In [None]:
print(f"\n{'='*80}")
print(f"PERSISTING METADATA")
print(f"{'='*80}\n")

# Count decisions
pass_count = sum(1 for r in results_dict.values() if r.decision == "PASS")
hold_count = sum(1 for r in results_dict.values() if r.decision == "HOLD")
fail_count = sum(1 for r in results_dict.values() if r.decision == "FAIL")

# Calculate mean composite score
mean_score = sum(r.composite_score for r in results_dict.values()) / len(results_dict)

# Build metadata
metadata = {
    "timestamp": datetime.now().isoformat(),
    "config": {
        "lags": config.lags,
        "min_obs": config.min_obs,
        "pass_threshold": config.pass_threshold,
        "hold_threshold": config.hold_threshold,
        "data_health_weight": config.data_health_weight,
        "predictive_weight": config.predictive_weight,
        "economic_weight": config.economic_weight,
        "stability_weight": config.stability_weight,
    },
    "signals_evaluated": list(results_dict.keys()),
    "results_summary": {
        "pass_count": pass_count,
        "hold_count": hold_count,
        "fail_count": fail_count,
        "mean_composite_score": mean_score,
    },
    "report_directory": str(EVALUATION_DIR),
}

# Save metadata
metadata_path = LOGS_DIR / "suitability_evaluation_metadata.json"
save_json(metadata, metadata_path)

metadata_size_kb = metadata_path.stat().st_size / 1024

print(f"✓ Metadata saved to: {metadata_path}")
print(f"  Size: {metadata_size_kb:.2f} KB")

print(f"\n✓ Metadata persisted successfully")

## 12. Decision Summary

Determine which signals proceed to backtesting.

In [None]:
print(f"\n{'='*80}")
print(f"DECISION SUMMARY")
print(f"{'='*80}\n")

# Filter signals by decision
pass_signals = [name for name, result in results_dict.items() if result.decision == "PASS"]
hold_signals = [name for name, result in results_dict.items() if result.decision == "HOLD"]
fail_signals = [name for name, result in results_dict.items() if result.decision == "FAIL"]

# Create summary table
decision_summary = [
    {
        'Decision': '✅ PASS',
        'Count': len(pass_signals),
        'Signals': ', '.join(pass_signals) if pass_signals else '(none)',
    },
    {
        'Decision': '⚠️ HOLD',
        'Count': len(hold_signals),
        'Signals': ', '.join(hold_signals) if hold_signals else '(none)',
    },
    {
        'Decision': '❌ FAIL',
        'Count': len(fail_signals),
        'Signals': ', '.join(fail_signals) if fail_signals else '(none)',
    },
]

decision_df = pd.DataFrame(decision_summary)
print(decision_df.to_markdown(index=False))

# Next steps guidance
print(f"\n\nNext Steps:")

if len(pass_signals) == 0:
    print(f"\n⚠️  WARNING: No signals passed evaluation")
    print(f"\nReview component scores to identify weaknesses:")
    print(f"  - Data health: Check sample size and missing data")
    print(f"  - Predictive: Review correlations and t-statistics")
    print(f"  - Economic: Verify effect size is meaningful")
    print(f"  - Stability: Check sign consistency across subperiods")
    print(f"\nConsider signal refinements before proceeding to backtest.")
else:
    print(f"\n✓ {len(pass_signals)} signal(s) passed evaluation")
    print(f"\nReady to proceed to Step 4 (Backtest Execution):")
    for signal in pass_signals:
        print(f"  - {signal}")

if len(hold_signals) > 0:
    print(f"\n⚠️  {len(hold_signals)} signal(s) require manual review (HOLD decision)")
    print(f"\nHOLD signals flagged for judgment:")
    for signal in hold_signals:
        score = results_dict[signal].composite_score
        print(f"  - {signal} (score: {score:.3f})")

if len(fail_signals) > 0:
    print(f"\n❌ {len(fail_signals)} signal(s) failed evaluation")
    print(f"\nFAIL signals archived (do not backtest):")
    for signal in fail_signals:
        score = results_dict[signal].composite_score
        print(f"  - {signal} (score: {score:.3f})")

print(f"\n✓ Decision summary complete")

---

## Workflow Complete

Signal suitability evaluation successful! Signals have been assessed using the four-component framework and decisions have been made.

### What Was Accomplished

✓ **Signals Loaded** — Imported computed signals from Step 2  
✓ **Target Loaded** — Fetched CDX spread data for evaluation  
✓ **Evaluations Complete** — Four-component scoring for all signals  
✓ **Visualizations Created** — Composite scores, components, scatter plots  
✓ **Reports Generated** — Markdown reports saved for each signal  
✓ **Registry Updated** — Evaluation metadata tracked in catalog  
✓ **Decisions Made** — Signals classified as PASS/HOLD/FAIL

### Data Flow

```
Signals DataFrame (Step 2)
    ↓
Suitability Evaluation (this notebook)
├─ Four-component scoring
├─ Decision thresholds applied
└─ Registry tracking
    ↓
Filtered Signals
├─ PASS → Backtest Execution (Step 4)
├─ HOLD → Manual Review
└─ FAIL → Archived
```

### Re-Running This Notebook

- **Evaluation recomputation:** Scores are recalculated from scratch each run
- **Reports:** New timestamped reports created (previous reports preserved)
- **Registry:** New evaluations appended to registry catalog
- **Metadata:** Overwrites previous `suitability_evaluation_metadata.json`
- **Configuration changes:** Edit `min_obs` or config parameters in cell 4

### Key Files Generated

```
reports/
└── suitability/
    ├── cdx_etf_basis_CDX_IG_5Y_{timestamp}.md
    ├── cdx_vix_gap_CDX_IG_5Y_{timestamp}.md
    └── spread_momentum_CDX_IG_5Y_{timestamp}.md

logs/
└── suitability_evaluation_metadata.json (updated)

src/aponyx/evaluation/suitability/
└── suitability_registry.json (updated)
```

### Troubleshooting

**Signals file not found:**
- Run `02_signal_computation.ipynb` first
- Verify file exists: `data/processed/signals.parquet`
- Check DATA_DIR configuration

**Evaluation errors:**
- Check signal and target alignment (DatetimeIndex)
- Verify minimum observations threshold (252 by default)
- Review ERROR logs for missing data or invalid inputs
- Ensure signal has `.name` attribute

**All signals FAIL:**
- Review component scores in evaluation summary (cell 5)
- Check data health: sufficient observations and low missing data?
- Check predictive: are t-statistics significant (>2.0)?
- Check economic: is effect size meaningful (>0.5 bps)?
- Check stability: is sign consistent across subperiods?
- Consider adjusting signal specifications or lookback periods

**Low scores despite good signals:**
- Review component weights in configuration (cell 4)
- Check lag horizons match signal characteristics
- Verify target product is appropriate for signal type
- Consider adjusting min_obs threshold if sample is limited

**Registry or report errors:**
- Ensure EVALUATION_DIR and SUITABILITY_REGISTRY_PATH are configured
- Check write permissions for reports/ and src/aponyx/evaluation/
- Verify JSON registry is valid (not corrupted)