<a href="https://colab.research.google.com/github/micah-shull/AI_Agents/blob/main/411_MO_DataLoading_Utils.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Loading Utilities

In [None]:
"""Data Loading Utilities

Load marketing data from JSON files and build lookup dictionaries.
"""

import json
from pathlib import Path
from typing import Dict, Any, List, Optional


def load_json_file(file_path: str) -> List[Dict[str, Any]]:
    """
    Load data from a JSON file.

    Args:
        file_path: Path to JSON file

    Returns:
        List of dictionaries from JSON file

    Raises:
        FileNotFoundError: If file doesn't exist
        json.JSONDecodeError: If file is not valid JSON
    """
    path = Path(file_path)
    if not path.exists():
        raise FileNotFoundError(f"Data file not found: {file_path}")

    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)


def load_campaigns(data_dir: str, filename: str = "campaigns.json") -> List[Dict[str, Any]]:
    """Load campaigns from JSON file"""
    file_path = Path(data_dir) / filename
    return load_json_file(str(file_path))


def load_audience_segments(data_dir: str, filename: str = "audience_segments.json") -> List[Dict[str, Any]]:
    """Load audience segments from JSON file"""
    file_path = Path(data_dir) / filename
    return load_json_file(str(file_path))


def load_channels(data_dir: str, filename: str = "channels.json") -> List[Dict[str, Any]]:
    """Load channels from JSON file"""
    file_path = Path(data_dir) / filename
    return load_json_file(str(file_path))


def load_creative_assets(data_dir: str, filename: str = "creative_assets.json") -> List[Dict[str, Any]]:
    """Load creative assets from JSON file"""
    file_path = Path(data_dir) / filename
    return load_json_file(str(file_path))


def load_experiments(data_dir: str, filename: str = "experiments.json") -> List[Dict[str, Any]]:
    """Load experiments from JSON file"""
    file_path = Path(data_dir) / filename
    return load_json_file(str(file_path))


def load_performance_metrics(data_dir: str, filename: str = "performance_metrics.json") -> List[Dict[str, Any]]:
    """Load performance metrics from JSON file"""
    file_path = Path(data_dir) / filename
    return load_json_file(str(file_path))


def load_orchestrator_decisions(data_dir: str, filename: str = "orchestrator_decisions.json") -> List[Dict[str, Any]]:
    """Load orchestrator decisions from JSON file"""
    file_path = Path(data_dir) / filename
    return load_json_file(str(file_path))


def load_roi_ledger(data_dir: str, filename: str = "roi_ledger.json") -> List[Dict[str, Any]]:
    """Load ROI ledger from JSON file"""
    file_path = Path(data_dir) / filename
    return load_json_file(str(file_path))


# ============================================================================
# Lookup Dictionary Builders
# ============================================================================

def build_campaigns_lookup(campaigns: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
    """Create fast lookup dictionary: campaign_id -> campaign dict"""
    return {campaign["campaign_id"]: campaign for campaign in campaigns}


def build_segments_lookup(segments: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
    """Create fast lookup dictionary: segment_id -> segment dict"""
    return {segment["segment_id"]: segment for segment in segments}


def build_channels_lookup(channels: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
    """Create fast lookup dictionary: channel_id -> channel dict"""
    return {channel["channel_id"]: channel for channel in channels}


def build_assets_lookup(assets: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
    """Create fast lookup dictionary: asset_id -> asset dict"""
    return {asset["asset_id"]: asset for asset in assets}


def build_experiments_lookup(experiments: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
    """Create fast lookup dictionary: experiment_id -> experiment dict"""
    return {exp["experiment_id"]: exp for exp in experiments}


def build_metrics_by_asset(metrics: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
    """
    Group metrics by asset_id.

    Returns:
        Dictionary mapping asset_id -> list of metrics for that asset
    """
    metrics_by_asset: Dict[str, List[Dict[str, Any]]] = {}
    for metric in metrics:
        asset_id = metric.get("asset_id")
        if asset_id:
            if asset_id not in metrics_by_asset:
                metrics_by_asset[asset_id] = []
            metrics_by_asset[asset_id].append(metric)
    return metrics_by_asset


def build_metrics_by_experiment(metrics: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
    """
    Group metrics by experiment_id.

    Returns:
        Dictionary mapping experiment_id -> list of metrics for that experiment
    """
    metrics_by_experiment: Dict[str, List[Dict[str, Any]]] = {}
    for metric in metrics:
        experiment_id = metric.get("experiment_id")
        if experiment_id:
            if experiment_id not in metrics_by_experiment:
                metrics_by_experiment[experiment_id] = []
            metrics_by_experiment[experiment_id].append(metric)
    return metrics_by_experiment


def build_decisions_by_campaign(decisions: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
    """
    Group decisions by campaign_id.

    Returns:
        Dictionary mapping campaign_id -> list of decisions for that campaign
    """
    decisions_by_campaign: Dict[str, List[Dict[str, Any]]] = {}
    for decision in decisions:
        campaign_id = decision.get("campaign_id")
        if campaign_id:
            if campaign_id not in decisions_by_campaign:
                decisions_by_campaign[campaign_id] = []
            decisions_by_campaign[campaign_id].append(decision)
    return decisions_by_campaign


def filter_by_campaign_id(
    items: List[Dict[str, Any]],
    campaign_id: Optional[str],
    id_field: str = "campaign_id"
) -> List[Dict[str, Any]]:
    """
    Filter items by campaign_id if campaign_id is provided.

    Args:
        items: List of items to filter
        campaign_id: Campaign ID to filter by (None = return all)
        id_field: Field name to check for campaign_id

    Returns:
        Filtered list (or all items if campaign_id is None)
    """
    if campaign_id is None:
        return items
    return [item for item in items if item.get(id_field) == campaign_id]


# Data Loading Node

In [None]:
# ============================================================================
# Phase 2: Data Loading Node
# ============================================================================

def data_loading_node(state: MarketingOrchestratorState) -> Dict[str, Any]:
    """
    Data Loading Node: Orchestrate loading all marketing data.

    Loads 8 JSON files and builds lookup dictionaries for fast access.
    """
    errors = state.get("errors", [])
    campaign_id = state.get("campaign_id")

    # Create config instance
    config = MarketingOrchestratorConfig()

    try:
        # Load all data files
        campaigns = load_campaigns(config.data_dir, config.campaigns_file)
        audience_segments = load_audience_segments(config.data_dir, config.audience_segments_file)
        channels = load_channels(config.data_dir, config.channels_file)
        creative_assets = load_creative_assets(config.data_dir, config.creative_assets_file)
        experiments = load_experiments(config.data_dir, config.experiments_file)
        performance_metrics = load_performance_metrics(config.data_dir, config.performance_metrics_file)
        orchestrator_decisions = load_orchestrator_decisions(config.data_dir, config.orchestrator_decisions_file)
        roi_ledger = load_roi_ledger(config.data_dir, config.roi_ledger_file)

        # Filter by campaign_id if specified
        if campaign_id:
            campaigns = filter_by_campaign_id(campaigns, campaign_id, "campaign_id")
            creative_assets = filter_by_campaign_id(creative_assets, campaign_id, "campaign_id")
            experiments = filter_by_campaign_id(experiments, campaign_id, "campaign_id")
            orchestrator_decisions = filter_by_campaign_id(orchestrator_decisions, campaign_id, "campaign_id")
            roi_ledger = filter_by_campaign_id(roi_ledger, campaign_id, "campaign_id")

            # Filter metrics for assets in filtered campaigns
            filtered_asset_ids = {asset["asset_id"] for asset in creative_assets}
            performance_metrics = [
                metric for metric in performance_metrics
                if metric.get("asset_id") in filtered_asset_ids
            ]

        # Build lookup dictionaries
        campaigns_lookup = build_campaigns_lookup(campaigns)
        segments_lookup = build_segments_lookup(audience_segments)
        channels_lookup = build_channels_lookup(channels)
        assets_lookup = build_assets_lookup(creative_assets)
        experiments_lookup = build_experiments_lookup(experiments)
        metrics_by_asset = build_metrics_by_asset(performance_metrics)
        metrics_by_experiment = build_metrics_by_experiment(performance_metrics)
        decisions_by_campaign = build_decisions_by_campaign(orchestrator_decisions)

        return {
            "campaigns": campaigns,
            "audience_segments": audience_segments,
            "channels": channels,
            "creative_assets": creative_assets,
            "experiments": experiments,
            "performance_metrics": performance_metrics,
            "orchestrator_decisions": orchestrator_decisions,
            "roi_ledger": roi_ledger,
            "campaigns_lookup": campaigns_lookup,
            "segments_lookup": segments_lookup,
            "channels_lookup": channels_lookup,
            "assets_lookup": assets_lookup,
            "experiments_lookup": experiments_lookup,
            "metrics_by_asset": metrics_by_asset,
            "metrics_by_experiment": metrics_by_experiment,
            "decisions_by_campaign": decisions_by_campaign,
            "errors": errors
        }
    except FileNotFoundError as e:
        return {
            "errors": errors + [f"data_loading_node: File not found - {str(e)}"]
        }
    except json.JSONDecodeError as e:
        return {
            "errors": errors + [f"data_loading_node: Invalid JSON - {str(e)}"]
        }
    except Exception as e:
        return {
            "errors": errors + [f"data_loading_node: Unexpected error - {str(e)}"]
        }
