# Task Extractor Configuration Debugger

This notebook helps debug YAML configurations for the task extractor. It's particularly useful for:
1. Validating YAML syntax
2. Checking predicate relationships
3. Visualizing window trees
4. Understanding inclusion/exclusion criteria
5. Debugging temporal windows and stopping criteria

In [None]:
import tempfile
from pathlib import Path

import networkx as nx

# Import the TaskExtractorConfig class
# Assuming the module is available as clinical_zeroshot_labeler
from aces.config import TaskExtractorConfig
from bigtree import print_tree

## Load and Validate YAML

First, let's create a function to load and validate YAML configurations:

In [None]:
def load_and_validate_yaml(yaml_str):
    """Load and validate a YAML configuration string."""
    try:
        # Create a temporary file
        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as f:
            f.write(yaml_str)
            f.flush()

            # Load the config
            config = TaskExtractorConfig.load(Path(f.name))
            return config
    except Exception as e:
        print(f"Error loading configuration: {str(e)}")
        raise


def analyze_config(config):
    """Analyze a TaskExtractorConfig object and print useful information."""
    print("=== Configuration Analysis ===")

    # Analyze predicates
    print("\nPredicates:")
    print("Plain predicates:")
    for name, pred in config.plain_predicates.items():
        print(f"  {name}: {pred}")

    print("\nDerived predicates:")
    for name, pred in config.derived_predicates.items():
        print(f"  {name}: {pred}")

    # Analyze windows
    print("\nWindows:")
    for name, window in config.windows.items():
        print(f"\nWindow: {name}")
        print(f"  Start: {window.start}")
        print(f"  End: {window.end}")
        print(f"  Start inclusive: {window.start_inclusive}")
        print(f"  End inclusive: {window.end_inclusive}")
        print(f"  Has constraints: {window.has}")
        print(f"  Label: {window.label}")
        print(f"  Index timestamp: {window.index_timestamp}")

    # Print window tree
    print("\nWindow Tree:")
    print_tree(config.window_tree)

    # Print predicate DAG
    print("\nPredicate DAG:")
    print(nx.write_network_text(config.predicates_DAG))

## Example: ICU Mortality Prediction

Let's create an example configuration for ICU mortality prediction that handles the case where death and discharge can occur on the same date:

In [None]:
icu_mortality_yaml = """
metadata:
  description: ICU Mortality Prediction Task

predicates:
  icu_admission:
    code: "event_type//ICU_ADMISSION"
  death:
    code: "event_type//DEATH"
  discharge:
    code: "event_type//DISCHARGE"
  death_or_discharge:
    expr: "or(death, discharge)"

trigger: "icu_admission"

windows:
  observation:
    start: null
    end: "trigger + 24h"
    start_inclusive: true
    end_inclusive: true
    has:
      "_ANY_EVENT": "(1, None)"
    index_timestamp: "end"

  outcome:
    start: "observation.end"
    end: "start -> death_or_discharge"
    start_inclusive: false
    end_inclusive: true
    has: {}
    label: "death"
"""

# Load and analyze the configuration
try:
    config = load_and_validate_yaml(icu_mortality_yaml)
    analyze_config(config)
except Exception as e:
    print(f"Failed to load configuration: {e}")

## Example: ICU Readmission Prediction

Now let's create an example for ICU readmission prediction that excludes samples with ICU admissions in a specific timeframe:

In [None]:
icu_readmission_yaml = """
metadata:
  description: ICU Readmission Prediction Task

predicates:
  icu_admission:
    code: "event_type//ICU_ADMISSION"
  icu_discharge:
    code: "event_type//ICU_DISCHARGE"
  death:
    code: "event_type//DEATH"

trigger: "icu_discharge"

windows:
  observation:
    start: null
    end: "trigger"
    start_inclusive: true
    end_inclusive: true
    has:
      "_ANY_EVENT": "(1, None)"
    index_timestamp: "end"

  exclusion:
    start: "observation.end"
    end: "start + 90d"
    start_inclusive: false
    end_inclusive: true
    has:
      "death": "(None, 0)"

  outcome:
    start: "observation.end"
    end: "start + 90d"
    start_inclusive: false
    end_inclusive: true
    has: {}
    label: "icu_admission"
"""

# Load and analyze the configuration
try:
    config = load_and_validate_yaml(icu_readmission_yaml)
    analyze_config(config)
except Exception as e:
    print(f"Failed to load configuration: {e}")