# Soldier Pipeline Testing

Simple notebook to test the alignment pipeline. Uses:
- **Jina** for embeddings and reranking
- **OpenRouter** (Cerebras/Groq) for fast LLM calls

All setup is handled by `bootstrap()` - just run the cells!

In [7]:
# Setup
import asyncio
import nest_asyncio
from dotenv import load_dotenv

load_dotenv()
nest_asyncio.apply()

def run(coro):
    return asyncio.get_event_loop().run_until_complete(coro)

print("Setup complete!")

Setup complete!


In [8]:
# Bootstrap engine and stores
from soldier.bootstrap import bootstrap, create_sample_rule, create_sample_scenario

engine, ctx = bootstrap(log_level="INFO")

print(f"Tenant: {ctx.tenant_id}")
print(f"Agent: {ctx.agent_id}")
print(f"Session: {ctx.session_id}")

{"event": "jina_providers_created", "level": "info", "timestamp": "[PHONE]T02:00:06.730530Z"}
{"tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "agent_id": "53bf3f[PHONE]e-b38f-2316c4557686", "model": "openrouter/openai/gpt-oss-120b", "provider_order": ["Cerebras", "Groq", "SambaNova"], "event": "engine_bootstrapped", "level": "info", "timestamp": "[PHONE]T02:00:06.759657Z"}
{"session_id": "0be0b7af-d362-4b5e-97f4-98d5e1e19dee", "event": "session_created", "level": "info", "timestamp": "[PHONE]T02:00:06.761077Z"}


Tenant: 79f522b8-dd35-4685-bf37-6aa9f387f314
Agent: 53bf3f72-9974-438e-b38f-2316c4557686
Session: 0be0b7af-d362-4b5e-97f4-98d5e1e19dee


## Create Rules

Rules define agent behavior. Each has condition + action.

In [9]:
# Create sample rules
async def setup_rules():
    await create_sample_rule(
        ctx,
        name="Greeting Rule",
        condition_text="User sends a greeting or says hello",
        action_text="Respond with a friendly greeting and ask how you can help",
        priority=80,
    )
    
    await create_sample_rule(
        ctx,
        name="Order Status Rule",
        condition_text="User asks about their order status or tracking",
        action_text="Ask for order number if not provided, then look up status",
        priority=90,
    )
    
    await create_sample_rule(
        ctx,
        name="Professional Language",
        condition_text="Any customer interaction",
        action_text="Always use professional, helpful language",
        priority=100,
        is_hard_constraint=True,
    )
    
    await create_sample_rule(
        ctx,
        name="Refund Request Rule",
        condition_text="User asks for a refund or wants money back",
        action_text="Express understanding, ask for order number, explain refund policy",
        priority=85,
    )

run(setup_rules())
print("Rules created!")

{"model": "jina-embeddings-v3", "task": "retrieval.query", "dimensions": 1024, "num_texts": 1, "event": "jina_embed_request", "level": "debug", "timestamp": "[PHONE]T02:00:06.783240Z"}
{"model": "jina-embeddings-v3", "num_embeddings": 1, "dimensions": 1024, "event": "jina_embed_success", "level": "debug", "timestamp": "[PHONE]T02:00:07.906569Z"}
{"name": "Greeting Rule", "has_embedding": true, "event": "rule_created", "level": "info", "timestamp": "[PHONE]T02:00:07.907373Z"}
{"model": "jina-embeddings-v3", "task": "retrieval.query", "dimensions": 1024, "num_texts": 1, "event": "jina_embed_request", "level": "debug", "timestamp": "[PHONE]T02:00:07.908004Z"}
{"model": "jina-embeddings-v3", "num_embeddings": 1, "dimensions": 1024, "event": "jina_embed_success", "level": "debug", "timestamp": "[PHONE]T02:00:08.543205Z"}
{"name": "Order Status Rule", "has_embedding": true, "event": "rule_created", "level": "info", "timestamp": "[PHONE]T02:00:08.544906Z"}
{"model": "jina-embeddings-v3", "tas

Rules created!


In [10]:
# Create sample scenario
run(create_sample_scenario(
    ctx,
    name="Order Support Flow",
    description="Handle customer order inquiries",
    entry_condition_text="User asks about an order",
    step_instructions="Ask for order number and provide status",
))
print("Scenario created!")

{"model": "jina-embeddings-v3", "task": "retrieval.query", "dimensions": 1024, "num_texts": 1, "event": "jina_embed_request", "level": "debug", "timestamp": "[PHONE]T02:00:09.514176Z"}
{"model": "jina-embeddings-v3", "num_embeddings": 1, "dimensions": 1024, "event": "jina_embed_success", "level": "debug", "timestamp": "[PHONE]T02:00:09.976082Z"}
{"name": "Order Support Flow", "has_embedding": true, "event": "scenario_created", "level": "info", "timestamp": "[PHONE]T02:00:09.976801Z"}


Scenario created!


## Helper Function

In [11]:
def display_result(result, message):
    """Pretty print pipeline result."""
    print("=" * 70)
    print(f"MESSAGE: {message}")
    print("=" * 70)
    print(f"\nRESPONSE:\n{result.response}")
    print(f"\n{'=' * 70}")
    print("PIPELINE LATENCIES:")
    print("=" * 70)
    for timing in result.pipeline_timings:
        status = "SKIPPED" if timing.skipped else f"{timing.duration_ms:.2f}ms"
        print(f"  {timing.step:25} {status}")
    print(f"  {'─' * 40}")
    print(f"  {'TOTAL':25} {result.total_time_ms:.2f}ms")
    print(f"\nMatched Rules: {len(result.matched_rules)}")
    for mr in result.matched_rules:
        score = getattr(mr, 'relevance_score', getattr(mr, 'match_score', 0.0))
        print(f"  - {mr.rule.name} (score: {score:.2f})")
    if result.situational_snapshot:
        print(f"\nSituational Snapshot:")
        print(f"  Language: {result.situational_snapshot.language}")
        print(f"  Sentiment: {result.situational_snapshot.sentiment}")
        print(f"  Topic: {result.situational_snapshot.topic}")
    print()

## Test Messages

In [12]:
# Test 1: Greeting
message = "Hello! I need some help today."

result = run(engine.process_turn(
    message=message,
    session_id=ctx.session_id,
    tenant_id=ctx.tenant_id,
    agent_id=ctx.agent_id,
))

display_result(result, message)

{"session_id": "0be0b7af-d362-4b5e-97f4-98d5e1e19dee", "tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "agent_id": "53bf3f[PHONE]e-b38f-2316c4557686", "message_length": 30, "event": "processing_turn", "level": "info", "timestamp": "[PHONE]T02:00:09.999147Z"}
{"tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "customer_id": "a255b789-4fdb-4af0-a88b-996b97b86521", "channel": "api", "event": "customer_created", "level": "info", "timestamp": "[PHONE]T02:00:09.999967Z"}
{"tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "agent_id": "53bf3f[PHONE]e-b38f-2316c4557686", "item_count": 0, "event": "glossary_loaded", "level": "info", "timestamp": "[PHONE]T02:00:10.000568Z"}
{"tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "agent_id": "53bf3f[PHONE]e-b38f-2316c4557686", "field_count": 0, "event": "customer_data_schema_loaded", "level": "info", "timestamp": "[PHONE]T02:00:10.001155Z"}
{"customer_id": "a255b789-4fdb-4af0-a88b-996b97b86521", "tenant_id": "79f522b8-dd35-4685-bf37-6aa9f

MESSAGE: Hello! I need some help today.

RESPONSE:
Hello! How can I help you today?

PIPELINE LATENCIES:
  customer_resolution       0.64ms
  reconciliation            SKIPPED
  situational_sensor        3144.69ms
  retrieval                 977.86ms
  rule_filtering            1293.87ms
  scenario_filtering        0.14ms
  requirement_check         4.61ms
  tool_execution            SKIPPED
  response_planning         2.20ms
  generation                1403.77ms
  enforcement               SKIPPED
  ────────────────────────────────────────
  TOTAL                     6835.16ms

Matched Rules: 1
  - Greeting Rule (score: 1.00)

Situational Snapshot:
  Language: en
  Sentiment: Sentiment.NEUTRAL
  Topic: help



In [13]:
# Test 2: Order inquiry
message = "Where is my order? I ordered something last week."

result = run(engine.process_turn(
    message=message,
    session_id=ctx.session_id,
    tenant_id=ctx.tenant_id,
    agent_id=ctx.agent_id,
))

display_result(result, message)

{"session_id": "0be0b7af-d362-4b5e-97f4-98d5e1e19dee", "tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "agent_id": "53bf3f[PHONE]e-b38f-2316c4557686", "message_length": 49, "event": "processing_turn", "level": "info", "timestamp": "[PHONE]T02:00:16.862919Z"}
{"tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "customer_id": "a255b789-4fdb-4af0-a88b-996b97b86521", "channel": "api", "event": "customer_resolved", "level": "info", "timestamp": "[PHONE]T02:00:16.865608Z"}
{"session_id": "0be0b7af-d362-4b5e-97f4-98d5e1e19dee", "pending_migration": false, "version_mismatch": true, "event": "executing_reconciliation", "level": "info", "timestamp": "[PHONE]T02:00:16.867062Z"}
{"session_id": "0be0b7af-d362-4b5e-97f4-98d5e1e19dee", "session_version": null, "current_version": 1, "event": "version_mismatch_detected", "level": "info", "timestamp": "[PHONE]T02:00:16.868088Z"}
{"session_id": "0be0b7af-d362-4b5e-97f4-98d5e1e19dee", "scenario_id": "[PHONE]b6-4175-94ef-c6e95a1d7b3c", "event": "fal

MESSAGE: Where is my order? I ordered something last week.

RESPONSE:
I’m happy to help track your order. Could you please share your order number or the email address you used when placing the order? Once I have that information, I can look up the status for you.

PIPELINE LATENCIES:
  customer_resolution       1.34ms
  reconciliation            5.10ms
  situational_sensor        5613.76ms
  retrieval                 840.57ms
  rule_filtering            1406.38ms
  scenario_filtering        0.03ms
  requirement_check         SKIPPED
  tool_execution            SKIPPED
  response_planning         2.26ms
  generation                1434.84ms
  enforcement               SKIPPED
  ────────────────────────────────────────
  TOTAL                     9316.80ms

Matched Rules: 0

Situational Snapshot:
  Language: en
  Sentiment: Sentiment.NEUTRAL
  Topic: order_status



In [14]:
# Test 3: Refund request
message = "I want a refund. The product arrived damaged."

result = run(engine.process_turn(
    message=message,
    session_id=ctx.session_id,
    tenant_id=ctx.tenant_id,
    agent_id=ctx.agent_id,
))

display_result(result, message)

{"session_id": "0be0b7af-d362-4b5e-97f4-98d5e1e19dee", "tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "agent_id": "53bf3f[PHONE]e-b38f-2316c4557686", "message_length": 45, "event": "processing_turn", "level": "info", "timestamp": "[PHONE]T02:00:26.201880Z"}
{"tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "customer_id": "a255b789-4fdb-4af0-a88b-996b97b86521", "channel": "api", "event": "customer_resolved", "level": "info", "timestamp": "[PHONE]T02:00:26.202963Z"}
{"tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "agent_id": "53bf3f[PHONE]e-b38f-2316c4557686", "item_count": 0, "event": "glossary_loaded", "level": "info", "timestamp": "[PHONE]T02:00:26.203573Z"}
{"tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "agent_id": "53bf3f[PHONE]e-b38f-2316c4557686", "field_count": 0, "event": "customer_data_schema_loaded", "level": "info", "timestamp": "[PHONE]T02:00:26.204054Z"}
{"customer_id": "a255b789-4fdb-4af0-a88b-996b97b86521", "tenant_id": "79f522b8-dd35-4685-bf37-6aa9

MESSAGE: I want a refund. The product arrived damaged.

RESPONSE:
I’m sorry to hear that the product arrived damaged. Let’s get this sorted quickly.

1. Could you please provide your order number (or the email you used for the purchase)?
2. If possible, send a photo of the damage—this helps us process the refund faster.

Once I have that information, I’ll initiate the refund and arrange for a return label if needed. Thank you for your patience.

PIPELINE LATENCIES:
  customer_resolution       0.55ms
  reconciliation            SKIPPED
  situational_sensor        1631.07ms
  retrieval                 647.65ms
  rule_filtering            1297.46ms
  scenario_filtering        0.03ms
  requirement_check         SKIPPED
  tool_execution            SKIPPED
  response_planning         4.43ms
  generation                1522.40ms
  enforcement               SKIPPED
  ────────────────────────────────────────
  TOTAL                     5108.70ms

Matched Rules: 0

Situational Snapshot:
  Langua

## Try Your Own

In [None]:
# Try your own message
message = "Can you help me track order #12345?"

result = run(engine.process_turn(
    message=message,
    session_id=ctx.session_id,
    tenant_id=ctx.tenant_id,
    agent_id=ctx.agent_id,
))

display_result(result, message)



{"session_id": "0be0b7af-d362-4b5e-97f4-98d5e1e19dee", "tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "agent_id": "53bf3f[PHONE]e-b38f-2316c4557686", "message_length": 35, "event": "processing_turn", "level": "info", "timestamp": "[PHONE]T05:14:24.649005Z"}
{"tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "customer_id": "a255b789-4fdb-4af0-a88b-996b97b86521", "channel": "api", "event": "customer_resolved", "level": "info", "timestamp": "[PHONE]T05:14:24.689385Z"}
{"tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "agent_id": "53bf3f[PHONE]e-b38f-2316c4557686", "item_count": 0, "event": "glossary_loaded", "level": "info", "timestamp": "[PHONE]T05:14:24.712126Z"}
{"tenant_id": "79f522b8-dd35-4685-bf37-6aa9f387f314", "agent_id": "53bf3f[PHONE]e-b38f-2316c4557686", "field_count": 0, "event": "customer_data_schema_loaded", "level": "info", "timestamp": "[PHONE]T05:14:24.714492Z"}
{"customer_id": "a255b789-4fdb-4af0-a88b-996b97b86521", "tenant_id": "79f522b8-dd35-4685-bf37-6aa9

MESSAGE: Can you help me track order #12345?

RESPONSE:
I’d be happy to help you check on that order. Could you please confirm a few details for me?

1. The email address or phone number associated with the order.  
2. Any shipping or billing name you used.

Once I have that information, I can look up the current status of order #12345 for you.

PIPELINE LATENCIES:
  customer_resolution       8.79ms
  reconciliation            SKIPPED
  situational_sensor        SKIPPED
  retrieval                 1865.31ms
  rule_filtering            3570.35ms
  scenario_filtering        1.96ms
  requirement_check         SKIPPED
  tool_execution            SKIPPED
  response_planning         16.59ms
  generation                1778.69ms
  enforcement               SKIPPED
  ────────────────────────────────────────
  TOTAL                     7444.23ms

Matched Rules: 0



## Notes

**Expected Latencies (with Cerebras/Groq routing):**

| Step | Time | Notes |
|------|------|-------|
| customer_resolution | <1ms | In-memory lookup |
| situational_sensor | 2-5s | LLM extracts context |
| retrieval | 500ms | Jina embedding + search |
| rule_filtering | 1-3s | LLM judges rules |
| generation | 1-3s | LLM generates response |

**Without provider routing** = 20-30s per LLM call  
**With provider routing** = 1-5s per LLM call