<a href="https://colab.research.google.com/github/micah-shull/AI_Agents/blob/main/224_Cross_Sell_Upsell_Orchestrator_Tier2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data loading utilities for Tier 2 Cross-Sell & Upsell Orchestrator

In [None]:
"""Data loading utilities for Tier 2 Cross-Sell & Upsell Orchestrator"""

import json
from pathlib import Path
from typing import Dict, List, Any, Optional


# Data file paths (relative to project root)
DATA_DIR = Path(__file__).parent.parent.parent.parent / "data_tier_2"
CUSTOMERS_FILE = DATA_DIR / "customers.json"
PRODUCT_CATALOG_FILE = DATA_DIR / "product_catalog.json"


def load_customer_data(customer_id: str) -> Optional[Dict[str, Any]]:
    """
    Load customer data from JSON file by customer_id.

    Args:
        customer_id: Customer ID to load (e.g., "C001")

    Returns:
        Customer data dict if found, None if not found

    Raises:
        FileNotFoundError: If customers.json file doesn't exist
        json.JSONDecodeError: If JSON file is malformed
    """
    if not CUSTOMERS_FILE.exists():
        raise FileNotFoundError(f"Customers file not found: {CUSTOMERS_FILE}")

    with open(CUSTOMERS_FILE, 'r', encoding='utf-8') as f:
        customers = json.load(f)

    # Find customer by customer_id
    for customer in customers:
        if customer.get("customer_id") == customer_id:
            return customer

    # Customer not found
    return None


def load_product_catalog() -> List[Dict[str, Any]]:
    """
    Load all Tier 2 products from product catalog JSON file.

    Returns:
        List of product dictionaries

    Raises:
        FileNotFoundError: If product_catalog.json file doesn't exist
        json.JSONDecodeError: If JSON file is malformed
    """
    if not PRODUCT_CATALOG_FILE.exists():
        raise FileNotFoundError(f"Product catalog file not found: {PRODUCT_CATALOG_FILE}")

    with open(PRODUCT_CATALOG_FILE, 'r', encoding='utf-8') as f:
        products = json.load(f)

    # Filter to only Tier 2 products (safety check)
    tier2_products = [p for p in products if p.get("tier") == 2]

    return tier2_products


def build_product_lookup(products: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
    """
    Build a fast lookup dictionary: product_id -> product dict.

    This enables O(1) lookup by product_id instead of O(n) list search.

    Args:
        products: List of product dictionaries

    Returns:
        Dictionary mapping product_id to product dict

    Example:
        {
            "P101": {
                "product_id": "P101",
                "name": "Pro Renewal Gel Cleanser",
                ...
            },
            "P102": {...}
        }
    """
    lookup = {}

    for product in products:
        product_id = product.get("product_id")
        if product_id:
            lookup[product_id] = product

    return lookup



# Tests for data loading utilities

In [None]:
"""Tests for data loading utilities"""

from agents.tier2_crosssell_upsell.utilities.data_loading import (
    load_customer_data,
    load_product_catalog,
    build_product_lookup
)


def test_load_customer_data():
    """Test load_customer_data loads customer correctly"""
    print("\n=== Testing load_customer_data ===")

    # Test loading existing customer
    customer = load_customer_data("C001")

    assert customer is not None, "Should load customer C001"
    assert customer["customer_id"] == "C001", "Customer ID should match"
    assert "name" in customer, "Customer should have name"
    assert "products_owned" in customer, "Customer should have products_owned"
    assert "skin_concerns" in customer, "Customer should have skin_concerns"
    assert "tier_preference" in customer, "Customer should have tier_preference"

    print("✅ load_customer_data test passed!")
    print(f"   Customer: {customer['name']} ({customer['customer_id']})")
    print(f"   Products owned: {len(customer['products_owned'])}")
    print(f"   Skin concerns: {customer['skin_concerns']}")
    print(f"   Tier preference: {customer['tier_preference']}")

    return customer


def test_load_customer_data_not_found():
    """Test load_customer_data returns None for non-existent customer"""
    print("\n=== Testing load_customer_data (not found) ===")

    customer = load_customer_data("C999")

    assert customer is None, "Should return None for non-existent customer"

    print("✅ load_customer_data (not found) test passed!")

    return customer


def test_load_product_catalog():
    """Test load_product_catalog loads all products"""
    print("\n=== Testing load_product_catalog ===")

    products = load_product_catalog()

    assert isinstance(products, list), "Should return a list"
    assert len(products) > 0, "Should load at least one product"

    # Verify all products are Tier 2
    for product in products:
        assert product.get("tier") == 2, f"Product {product.get('product_id')} should be Tier 2"
        assert "product_id" in product, "Product should have product_id"
        assert "name" in product, "Product should have name"
        assert "category" in product, "Product should have category"
        assert "routine_step" in product, "Product should have routine_step"
        assert "active_ingredients" in product, "Product should have active_ingredients"
        assert "target_concerns" in product, "Product should have target_concerns"

    print("✅ load_product_catalog test passed!")
    print(f"   Total products loaded: {len(products)}")
    print(f"   Sample product: {products[0]['name']} ({products[0]['product_id']})")

    # Check for expected fields
    sample = products[0]
    print(f"   Sample fields: product_id={sample.get('product_id')}, "
          f"routine_step={sample.get('routine_step')}, "
          f"category={sample.get('category')}")

    return products


def test_build_product_lookup():
    """Test build_product_lookup creates correct lookup dict"""
    print("\n=== Testing build_product_lookup ===")

    # Load products first
    products = load_product_catalog()
    assert len(products) > 0, "Need products to test lookup"

    # Build lookup
    lookup = build_product_lookup(products)

    assert isinstance(lookup, dict), "Should return a dictionary"
    assert len(lookup) == len(products), "Lookup should have same count as products"

    # Test lookup works
    first_product = products[0]
    product_id = first_product["product_id"]

    assert product_id in lookup, f"Lookup should contain {product_id}"
    assert lookup[product_id]["product_id"] == product_id, "Lookup should return correct product"
    assert lookup[product_id]["name"] == first_product["name"], "Lookup should return same product"

    print("✅ build_product_lookup test passed!")
    print(f"   Lookup size: {len(lookup)} products")
    print(f"   Sample lookup: {product_id} -> {lookup[product_id]['name']}")

    # Test lookup for multiple products
    test_ids = [p["product_id"] for p in products[:3]]
    for test_id in test_ids:
        assert test_id in lookup, f"Lookup should contain {test_id}"
        assert lookup[test_id]["product_id"] == test_id, f"Lookup for {test_id} should be correct"

    print(f"   Verified lookup for {len(test_ids)} products")

    return lookup


def test_data_loading_integration():
    """Test all data loading utilities together"""
    print("\n=== Testing data loading integration ===")

    # Load customer
    customer = load_customer_data("C001")
    assert customer is not None, "Should load customer"

    # Load products
    products = load_product_catalog()
    assert len(products) > 0, "Should load products"

    # Build lookup
    product_lookup = build_product_lookup(products)
    assert len(product_lookup) == len(products), "Lookup should match products"

    # Test: Look up products that customer owns
    customer_product_ids = [p["product_id"] for p in customer["products_owned"]]
    found_products = []
    missing_products = []

    for product_id in customer_product_ids:
        if product_id in product_lookup:
            found_products.append(product_lookup[product_id])
        else:
            missing_products.append(product_id)

    print(f"✅ Integration test passed!")
    print(f"   Customer: {customer['name']} ({customer['customer_id']})")
    print(f"   Products owned: {len(customer_product_ids)}")
    print(f"   Found in catalog: {len(found_products)}")
    if missing_products:
        print(f"   Missing from catalog: {missing_products}")

    # Verify we can look up at least one product
    assert len(found_products) > 0, "Should find at least one customer product in catalog"

    return {
        "customer": customer,
        "products": products,
        "product_lookup": product_lookup,
        "found_products": found_products
    }


if __name__ == "__main__":
    """Run all tests"""
    print("=" * 60)
    print("Testing Tier 2 Data Loading Utilities")
    print("=" * 60)

    # Test individual utilities
    test_load_customer_data()
    test_load_customer_data_not_found()
    test_load_product_catalog()
    test_build_product_lookup()

    # Integration test
    test_data_loading_integration()

    print("\n" + "=" * 60)
    print("✅ All data loading utility tests passed!")
    print("=" * 60)



In [None]:
(.venv) micahshull@Micahs-iMac LG_Cursor_030_CrossSell_Upsell_Orchestrator % python3 -m agents.tier2_crosssell_upsell.tests.test_data_loading
============================================================
Testing Tier 2 Data Loading Utilities
============================================================

=== Testing load_customer_data ===
✅ load_customer_data test passed!
   Customer: Sarah Lee (C001)
   Products owned: 6
   Skin concerns: ['dryness', 'sensitivity']
   Tier preference: 2

=== Testing load_customer_data ===
✅ load_customer_data test passed!

=== Testing load_product_catalog ===
✅ load_product_catalog test passed!
   Total products loaded: 20
   Sample product: Pro Renewal Gel Cleanser (P101)
   Sample fields: product_id=P101, routine_step=1, category=cleanser

=== Testing build_product_lookup ===
✅ build_product_lookup test passed!
   Lookup size: 20 products
   Sample lookup: P101 -> Pro Renewal Gel Cleanser
   Verified lookup for 3 products

=== Testing data loading integration ===
✅ Integration test passed!
   Customer: Sarah Lee (C001)
   Products owned: 6
   Found in catalog: 6

============================================================
✅ All data loading utility tests passed!
============================================================
(.venv) micahshull@Micahs-iMac LG_Cursor_030_CrossSell_Upsell_Orchestrator %

All data loading utility tests passed. Building the `data_loading_node` that uses these utilities:

## Completed

1. Data loading utilities — all tests passing:
   - `load_customer_data()` — loads customer by ID
   - `load_product_catalog()` — loads all Tier 2 products
   - `build_product_lookup()` — creates fast O(1) lookup dict

2. Data loading node — orchestrates the utilities:
   - Loads customer data
   - Loads product catalog
   - Builds product lookup
   - Error handling for missing customer, file errors, etc.

3. Tests added:
   - Basic functionality test
   - Missing customer_id error handling
   - Customer not found error handling
   - Full integration test (goal → planning → data_loading)

## Next steps

You can test the `data_loading_node`. This will test all three nodes together (goal, planning, data_loading) and verify the integration works.


# Node functions for Tier 2 Cross-Sell & Upsell Orchestrator

In [None]:
"""Node functions for Tier 2 Cross-Sell & Upsell Orchestrator"""

from typing import Dict, Any
from config import Tier2CrossSellUpsellState
from agents.tier2_crosssell_upsell.utilities.data_loading import (
    load_customer_data,
    load_product_catalog,
    build_product_lookup
)


def goal_node(state: Tier2CrossSellUpsellState) -> Dict[str, Any]:
    """
    Goal Node: Define the goal for cross-sell/upsell analysis.

    This is a simple rule-based goal definition that sets the framework
    for analyzing a customer's routine and identifying opportunities.

    Args:
        state: Current state with customer_id

    Returns:
        Updated state with goal field
    """
    customer_id = state.get("customer_id")

    if not customer_id:
        return {
            "errors": state.get("errors", []) + ["goal_node: customer_id is required"]
        }

    goal = {
        "objective": "Identify cross-sell and upsell opportunities for Tier 2 customer",
        "customer_id": customer_id,
        "analysis_type": "tier2_crosssell_upsell",
        "focus_areas": [
            "routine_completeness",  # Essential categories and routine steps
            "ingredient_matching",   # Match customer concerns to product ingredients
            "upgrade_opportunities",  # Tier 1 → Tier 2 upgrades
            "replenishment_needs",   # Products due for repurchase
            "routine_step_adjacency" # Adjacent routine steps for cross-sell
        ],
        "success_criteria": {
            "identify_at_least_3_opportunities": True,
            "prioritize_high_value_opportunities": True,
            "consider_customer_concerns": True,
            "consider_routine_gaps": True
        }
    }

    return {
        "goal": goal,
        "errors": state.get("errors", [])
    }


def planning_node(state: Tier2CrossSellUpsellState) -> Dict[str, Any]:
    """
    Planning Node: Create execution plan based on goal.

    This creates a step-by-step plan for analyzing the customer and
    identifying opportunities. Rule-based, no LLM needed.

    Args:
        state: Current state with goal

    Returns:
        Updated state with plan field
    """
    goal = state.get("goal")

    if not goal:
        return {
            "errors": state.get("errors", []) + ["planning_node: goal is required"]
        }

    customer_id = goal.get("customer_id")

    plan = [
        {
            "step": 1,
            "name": "data_loading",
            "description": "Load customer data and Tier 2 product catalog",
            "dependencies": [],
            "outputs": ["customer_data", "product_catalog", "product_lookup"]
        },
        {
            "step": 2,
            "name": "routine_analysis",
            "description": "Analyze customer's current routine: products, categories, routine steps, gaps",
            "dependencies": ["data_loading"],
            "outputs": ["customer_products", "customer_categories", "customer_routine_steps",
                       "routine_gaps", "routine_step_gaps", "replenishment_needs"]
        },
        {
            "step": 3,
            "name": "opportunity_detection",
            "description": "Detect opportunities: cross-sell, ingredient matches, upgrades, replenishment",
            "dependencies": ["routine_analysis"],
            "outputs": ["cross_sell_opportunities", "ingredient_opportunities",
                       "upgrade_opportunities", "replenishment_opportunities", "bundle_opportunities"]
        },
        {
            "step": 4,
            "name": "scoring",
            "description": "Score opportunities multi-dimensionally: business value, customer fit, routine completeness, etc.",
            "dependencies": ["opportunity_detection"],
            "outputs": ["scored_opportunities"]
        },
        {
            "step": 5,
            "name": "ranking",
            "description": "Rank opportunities by final score and select top N",
            "dependencies": ["scoring"],
            "outputs": ["ranked_opportunities", "top_opportunities", "opportunity_summary"]
        },
        {
            "step": 6,
            "name": "report_generation",
            "description": "Generate markdown report with recommendations",
            "dependencies": ["ranking"],
            "outputs": ["recommendations_report", "report_file_path"]
        }
    ]

    return {
        "plan": plan,
        "errors": state.get("errors", [])
    }


def data_loading_node(state: Tier2CrossSellUpsellState) -> Dict[str, Any]:
    """
    Data Loading Node: Load customer data and product catalog.

    This node orchestrates data loading using the data loading utilities.
    It loads the customer data, product catalog, and builds a fast lookup dictionary.

    Args:
        state: Current state with customer_id (from goal)

    Returns:
        Updated state with customer_data, product_catalog, and product_lookup
    """
    errors = state.get("errors", [])

    # Get customer_id from goal or state
    customer_id = state.get("customer_id")
    if not customer_id:
        goal = state.get("goal")
        if goal:
            customer_id = goal.get("customer_id")

    if not customer_id:
        return {
            "errors": errors + ["data_loading_node: customer_id is required"]
        }

    # Load customer data
    try:
        customer_data = load_customer_data(customer_id)
        if customer_data is None:
            return {
                "errors": errors + [f"data_loading_node: Customer {customer_id} not found"]
            }
    except FileNotFoundError as e:
        return {
            "errors": errors + [f"data_loading_node: {str(e)}"]
        }
    except Exception as e:
        return {
            "errors": errors + [f"data_loading_node: Error loading customer data: {str(e)}"]
        }

    # Load product catalog
    try:
        product_catalog = load_product_catalog()
        if not product_catalog:
            return {
                "errors": errors + ["data_loading_node: No products found in catalog"]
            }
    except FileNotFoundError as e:
        return {
            "errors": errors + [f"data_loading_node: {str(e)}"]
        }
    except Exception as e:
        return {
            "errors": errors + [f"data_loading_node: Error loading product catalog: {str(e)}"]
        }

    # Build product lookup
    try:
        product_lookup = build_product_lookup(product_catalog)
    except Exception as e:
        return {
            "errors": errors + [f"data_loading_node: Error building product lookup: {str(e)}"]
        }

    return {
        "customer_data": customer_data,
        "product_catalog": product_catalog,
        "product_lookup": product_lookup,
        "errors": errors
    }



# test results

In [None]:
(.venv) micahshull@Micahs-iMac LG_Cursor_030_CrossSell_Upsell_Orchestrator % python3 -m agents.tier2_crosssell_upsell.tests.test_nodes
============================================================
Testing Tier 2 Cross-Sell & Upsell Orchestrator Nodes
============================================================

=== Testing goal_node ===
✅ goal_node test passed!
   Goal: Identify cross-sell and upsell opportunities for Tier 2 customer
   Focus areas: ['routine_completeness', 'ingredient_matching', 'upgrade_opportunities', 'replenishment_needs', 'routine_step_adjacency']

=== Testing goal_node with missing customer_id ===
✅ goal_node error handling test passed!
   Error: goal_node: customer_id is required

=== Testing planning_node ===
✅ planning_node test passed!
   Plan has 6 steps:
      Step 1: data_loading
      Step 2: routine_analysis
      Step 3: opportunity_detection
      Step 4: scoring
      Step 5: ranking
      Step 6: report_generation

=== Testing planning_node with missing goal ===
✅ planning_node error handling test passed!
   Error: planning_node: goal is required

=== Testing goal_node → planning_node integration ===
✅ Integration test passed!
   Customer ID: C001
   Goal: Identify cross-sell and upsell opportunities for Tier 2 customer
   Plan steps: 6

=== Testing data_loading_node ===
✅ data_loading_node test passed!
   Customer: Sarah Lee (C001)
   Products in catalog: 20
   Lookup size: 20

=== Testing data_loading_node with missing customer_id ===
✅ data_loading_node error handling test passed!
   Error: data_loading_node: customer_id is required

=== Testing data_loading_node with customer not found ===
✅ data_loading_node (customer not found) test passed!
   Error: data_loading_node: Customer C999 not found

=== Testing goal_node → planning_node → data_loading_node integration ===
✅ Integration test passed!
   Customer ID: C001
   Customer: Sarah Lee
   Products in catalog: 20
   Plan steps: 6

============================================================
✅ All tests passed!
============================================================




## Progress summary

### Completed nodes
1. `goal_node` — defines analysis goal
2. `planning_node` — creates 6-step execution plan
3. `data_loading_node` — loads customer data, product catalog, and builds lookup

### Completed utilities
- `load_customer_data()` — loads customer by ID
- `load_product_catalog()` — loads all Tier 2 products
- `build_product_lookup()` — creates fast O(1) lookup dictionary

### Test coverage
- All individual node tests passing
- Error handling tests passing
- Full integration test passing (goal → planning → data_loading)

## Next steps

Next utilities to build:
1. `analyze_customer_routine()` — identify routine gaps
2. `check_replenishment_needs()` — find products needing replenishment
3. `identify_routine_gaps()` — find missing essential categories

Then build `routine_analysis_node` to orchestrate these utilities.

