<a href="https://colab.research.google.com/github/micah-shull/AI_Agents/blob/main/293_HITL_DataLoading_utils.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data loading utilities for HITL Orchestrator

In [None]:
"""Data loading utilities for HITL Orchestrator"""

import json
from pathlib import Path
from typing import List, Dict, Any, Optional


def load_tasks(data_dir: str, tasks_file: str) -> List[Dict[str, Any]]:
    """
    Load tasks from JSON file.

    Args:
        data_dir: Directory containing data files
        tasks_file: Name of tasks file

    Returns:
        List of task dictionaries
    """
    file_path = Path(data_dir) / tasks_file
    with open(file_path, 'r') as f:
        return json.load(f)


def load_agent_outputs(data_dir: str, agent_outputs_file: str) -> List[Dict[str, Any]]:
    """
    Load agent outputs from JSON file.

    Args:
        data_dir: Directory containing data files
        agent_outputs_file: Name of agent outputs file

    Returns:
        List of agent output dictionaries
    """
    file_path = Path(data_dir) / agent_outputs_file
    with open(file_path, 'r') as f:
        return json.load(f)


def load_routing_policy(data_dir: str, routing_policy_file: str) -> Dict[str, Any]:
    """
    Load routing policy from JSON file.

    Args:
        data_dir: Directory containing data files
        routing_policy_file: Name of routing policy file

    Returns:
        Routing policy dictionary
    """
    file_path = Path(data_dir) / routing_policy_file
    with open(file_path, 'r') as f:
        return json.load(f)


def load_human_reviews(data_dir: str, human_reviews_file: str) -> List[Dict[str, Any]]:
    """
    Load existing human reviews from JSON file (optional).

    Args:
        data_dir: Directory containing data files
        human_reviews_file: Name of human reviews file

    Returns:
        List of human review dictionaries (empty list if file doesn't exist)
    """
    file_path = Path(data_dir) / human_reviews_file
    if not file_path.exists():
        return []

    with open(file_path, 'r') as f:
        return json.load(f)


def load_audit_logs(data_dir: str, audit_logs_file: str) -> List[Dict[str, Any]]:
    """
    Load existing audit logs from JSON file (optional).

    Args:
        data_dir: Directory containing data files
        audit_logs_file: Name of audit logs file

    Returns:
        List of audit log dictionaries (empty list if file doesn't exist)
    """
    file_path = Path(data_dir) / audit_logs_file
    if not file_path.exists():
        return []

    with open(file_path, 'r') as f:
        return json.load(f)


def build_task_output_lookup(
    tasks: List[Dict[str, Any]],
    agent_outputs: List[Dict[str, Any]]
) -> Dict[str, Dict[str, Any]]:
    """
    Build lookup dictionary mapping task_id to combined task + output data.

    Args:
        tasks: List of tasks
        agent_outputs: List of agent outputs

    Returns:
        Dictionary mapping task_id to combined task and output data
    """
    lookup = {}

    # Create task lookup
    task_lookup = {task["task_id"]: task for task in tasks}

    # Create output lookup
    output_lookup = {output["task_id"]: output for output in agent_outputs}

    # Combine
    for task_id in task_lookup.keys():
        task_data = task_lookup[task_id].copy()
        output_data = output_lookup.get(task_id, {})

        lookup[task_id] = {
            "task": task_data,
            "agent_output": output_data.get("agent_output", {}),
            "confidence_score": output_data.get("confidence_score", 0.0)
        }

    return lookup

