# Pre-UI Reconfiguration ReAct Agent Loop Testing - Single and Multi-Turn Conversations

This notebook comprehensively tests the ReAct loop implementation with:
- **Single-turn conversations**: One-shot queries with tool usage
- **Multi-turn conversations**: Context-aware follow-up questions
- **Memory validation**: Ensuring conversation context is preserved
- **Tool execution**: Verifying correct tool calls and responses


## Setup: Imports and Configuration

In [1]:
import sys
import os
from pathlib import Path
import json

# Add project root to Python path
project_root = Path.cwd()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print(f"Project root: {project_root}")

# Import ReAct loop components
from src.agent.core.react_loop import ReActLoop, ReActResult, ReActStep
from src.agent.core.llm_client import LLMClient
from src.agent.core.memory import ConversationMemory

# Import tools
from src.agent.tools.query_tools import (
    get_recent_transactions,
    get_transaction_full,
    get_failed_transactions,
    get_print_failures,
    get_revenue_by_period,
    get_dashboard_summary
)

# Import configuration
from src.config_loader import config

# Import message types for verification
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage, SystemMessage

print("All imports successful")

Project root: /Users/nickhunt/Desktop/portfolioprojs/vinted-os-db
All imports successful


## Database Verification

In [2]:
# Verify mock database is being used
db_filename = config.get('database.filename', 'vinted_os.db')
print(f"Database: {db_filename}")

if db_filename != "mock_db.db":
    print("WARNING: Not using mock_db.db!")
else:
    print("Using mock_db.db")

# Verify database file exists
db_path = Path(db_filename)
if db_path.exists():
    print(f"Database exists: {db_path.absolute()}")
    print(f"   Size: {db_path.stat().st_size:,} bytes")
else:
    print(f"Database not found: {db_path.absolute()}")

# Test database connection
from src.services.query_service import QueryService

qs = QueryService()
try:
    recent = qs.get_recent_transactions(limit=3)
    print(f"Database connection OK - {len(recent)} transactions found")
except Exception as e:
    print(f"Database connection failed: {e}")

Database: mock_db.db
Using mock_db.db
Database exists: /Users/nickhunt/Desktop/portfolioprojs/vinted-os-db/mock_db.db
   Size: 49,152 bytes
Database connection OK - 3 transactions found


## Initialize ReAct Loop Components

In [3]:
# Initialize LLM Client
print("Initializing LLM Client...")
llm_client = LLMClient(
    provider="google",
    model="gemini-2.5-flash-lite",
    temperature=0.7,
    max_tokens=4096
)
print(f"LLM Client: {llm_client.model}")

# Collect all available query tools
print("\nLoading Query Tools...")
available_tools = [
    get_recent_transactions,
    get_transaction_full,
    get_failed_transactions,
    get_print_failures,
    get_revenue_by_period,
    get_dashboard_summary
]

print(f"Loaded {len(available_tools)} tools:")
for tool in available_tools:
    print(f"   - {tool.name}: {tool.description[:60]}...")

# Initialize ConversationMemory
print("\nInitializing Conversation Memory...")
memory = ConversationMemory(max_messages=20)
print(f"Memory ready (max_messages={memory.max_messages})")

# Initialize ReActLoop
print("\nInitializing ReAct Loop...")
react_loop = ReActLoop(
    llm_client=llm_client,
    tools=available_tools,
    memory=memory,
    max_iterations=10
)
print(f"ReAct Loop ready (max_iterations={react_loop.max_iterations})")

Initializing LLM Client...
LLM Client: gemini-2.5-flash-lite

Loading Query Tools...
Loaded 6 tools:
   - get_recent_transactions: Get most recent transactions.

Returns summary of recent tra...
   - get_transaction_full: Get complete transaction details by database ID.

Returns fu...
   - get_failed_transactions: Get transactions that failed processing.

Returns transactio...
   - get_print_failures: Get print job failures.

Returns all failed print jobs in th...
   - get_revenue_by_period: Get revenue aggregated by time period.

Returns revenue grou...
   - get_dashboard_summary: Get overall system health dashboard.

Returns high-level met...

Initializing Conversation Memory...
Memory ready (max_messages=20)

Initializing ReAct Loop...
ReAct Loop ready (max_iterations=10)


---
# Part 1: Single-Turn Conversation Tests

Testing individual queries without conversation context.

## Test 1.1: Simple Query with Tool Call

In [4]:
# Test single-turn query that requires tool usage
print("Test 1.1: Simple query requiring tool call\n")

query = "What are the 5 most recent transactions?"
print(f"Query: {query}\n")

result = react_loop.run(query, use_memory=False)

print("\n" + "="*60)
print("RESULTS")
print("="*60)
print(f"\nSuccess: {result.success}")
print(f"Total Steps: {result.total_steps}")
print(f"\nAnswer:\n{result.answer}\n")

# Verify tool was called
print("Steps Executed:")
for i, step in enumerate(result.steps, 1):
    print(f"\n  Step {i}:")
    if step.action:
        print(f"    Action: {step.action}")
        print(f"    Input: {step.action_input}")
        print(f"    Observation: {step.observation[:100]}..." if step.observation else "    No observation")
    else:
        print(f"    Final Answer: {step.thought[:100]}...")

# Show token usage
if 'token_usage' in result.metadata:
    print(f"\nToken Usage: {result.metadata['token_usage']}")
if 'estimated_cost' in result.metadata:
    print(f"Estimated Cost: ${result.metadata['estimated_cost']:.6f}")

# Validate that get_recent_transactions was called
assert result.success, "Query should succeed"
assert any(step.action == 'get_recent_transactions' for step in result.steps), \
    "Should have called get_recent_transactions tool"
print("\nTest 1.1 PASSED")

Test 1.1: Simple query requiring tool call

Query: What are the 5 most recent transactions?


RESULTS

Success: True
Total Steps: 2

Answer:
The 5 most recent transactions are:
* Transaction ID: 40, Vinted Order ID: 16444339295, Created at: 2026-02-02T08:48:22.550646, Status: PENDING, Items: 1, Total Value: 17.99
* Transaction ID: 39, Vinted Order ID: 16413326734, Created at: 2026-02-02T08:48:22.335022, Status: PENDING, Items: 1, Total Value: 24.99
* Transaction ID: 38, Vinted Order ID: 16405895353, Created at: 2026-02-02T08:48:22.039428, Status: PENDING, Items: 1, Total Value: 6.49
* Transaction ID: 37, Vinted Order ID: 16399838501, Created at: 2026-02-02T08:48:21.837940, Status: PENDING, Items: 1, Total Value: 23.00
* Transaction ID: 36, Vinted Order ID: 16390174926, Created at: 2026-02-02T08:48:21.431239, Status: PENDING, Items: 1, Total Value: 29.99

Steps Executed:

  Step 1:
    Action: get_recent_transactions
    Input: {'limit': 5}
    Observation: {
  "success": true,
  "data"

## Test 1.2: Query Requiring Multiple Tool Calls

In [None]:
# Test single-turn query that may require multiple tool calls
print("Test 1.2: Query requiring multiple tools\n")

query = "Show me the dashboard summary and any print failures from the last 7 days"
print(f"Query: {query}\n")

result = react_loop.run(query, use_memory=False)

print("\n" + "="*60)
print("RESULTS")
print("="*60)
print(f"\nSuccess: {result.success}")
print(f"Total Steps: {result.total_steps}")
print(f"\nAnswer:\n{result.answer}\n")

# Show which tools were used
tools_used = [step.action for step in result.steps if step.action]
print(f"Tools Used: {tools_used}")

# Validate multiple tools were called
assert result.success, "Query should succeed"
unique_tools = set(tools_used)
print(f"\nCalled {len(unique_tools)} unique tool(s): {unique_tools}")
print("Test 1.2 PASSED")

## Test 1.3: Query Without Tool Requirement

In [None]:
# Test query that doesn't require tools (general question)
print("Test 1.3: Query not requiring tools\n")

query = "What types of queries can you help me with?"
print(f"Query: {query}\n")

result = react_loop.run(query, use_memory=False)

print("\n" + "="*60)
print("RESULTS")
print("="*60)
print(f"\nSuccess: {result.success}")
print(f"Total Steps: {result.total_steps}")
print(f"\nAnswer:\n{result.answer}\n")

# Should have minimal steps (no tool calls)
tools_used = [step.action for step in result.steps if step.action]
print(f"Tools Used: {tools_used if tools_used else 'None'}")

assert result.success, "Query should succeed"
print("\nTest 1.3 PASSED")

---
# Part 2: Multi-Turn Conversation Tests

Testing conversation flow with context preservation across multiple turns.

## Test 2.1: Two-Turn Conversation with Context

In [None]:
# Initialize fresh memory for multi-turn test
print("Test 2.1: Two-turn conversation with context\n")

memory_test = ConversationMemory(max_messages=20)
react_test = ReActLoop(
    llm_client=llm_client,
    tools=available_tools,
    memory=memory_test,
    max_iterations=10
)

# Turn 1: Initial query
query1 = "Show me the 3 most recent transactions"
print(f"Turn 1: {query1}\n")

result1 = react_test.run(query1, use_memory=True)

print("\n" + "="*60)
print("TURN 1 RESULT")
print("="*60)
print(f"Success: {result1.success}")
print(f"Steps: {result1.total_steps}")
print(f"\nAnswer:\n{result1.answer}\n")

# Verify memory was updated
messages_after_turn1 = memory_test.get_messages()
print(f"Messages in memory after Turn 1: {len(messages_after_turn1)}")

# Turn 2: Follow-up question that references previous context
query2 = "What was the total revenue from those transactions?"
print(f"\nTurn 2: {query2}\n")

result2 = react_test.run(query2, use_memory=True)

print("\n" + "="*60)
print("TURN 2 RESULT")
print("="*60)
print(f"Success: {result2.success}")
print(f"Steps: {result2.total_steps}")
print(f"\nAnswer:\n{result2.answer}\n")

# Verify memory contains both turns
messages_after_turn2 = memory_test.get_messages()
print(f"Messages in memory after Turn 2: {len(messages_after_turn2)}")

# Count message types
human_msgs = sum(1 for m in messages_after_turn2 if isinstance(m, HumanMessage))
ai_msgs = sum(1 for m in messages_after_turn2 if isinstance(m, AIMessage))
print(f"   - HumanMessages: {human_msgs}")
print(f"   - AIMessages: {ai_msgs}")

# Validate conversation context
assert result1.success and result2.success, "Both queries should succeed"
assert human_msgs >= 2, "Should have at least 2 human messages"
assert ai_msgs >= 2, "Should have at least 2 AI messages"
print("\nTest 2.1 PASSED - Context preserved across turns")

## Test 2.3: Context Reference Validation

In [None]:
# Test that the agent correctly uses previous context
print("Test 2.3: Context reference validation\n")

memory_context = ConversationMemory(max_messages=20)
react_context = ReActLoop(
    llm_client=llm_client,
    tools=available_tools,
    memory=memory_context,
    max_iterations=10
)

# Turn 1: Get specific data
query1 = "Get the dashboard summary"
print(f"Turn 1: {query1}\n")
result1 = react_context.run(query1, use_memory=True)
print(f"Answer 1: {result1.answer}\n")

# Turn 2: Ask about it using pronoun ("it") - requires context
query2 = "Based on that information, is the system healthy?"
print(f"Turn 2: {query2}\n")
result2 = react_context.run(query2, use_memory=True)
print(f"Answer 2: {result2.answer}\n")

# Verify the second answer references the first
answer2_lower = result2.answer.lower()
context_indicators = ['dashboard', 'summary', 'transaction', 'print', 'based on', 'information']
has_context = any(indicator in answer2_lower for indicator in context_indicators)

print(f"\nAnalysis:")
print(f"   Turn 2 answer references context: {has_context}")
print(f"   Both queries succeeded: {result1.success and result2.success}")

assert result1.success and result2.success, "Both queries should succeed"
assert has_context, "Second answer should reference previous context"
print("\nTest 2.3 PASSED - Agent correctly uses conversation context")

---
# Part 3: Memory Management Tests

## Test 3.1: Memory vs No-Memory Comparison

In [None]:
# Compare behavior with and without memory
print("Test 3.1: Memory vs No-Memory comparison\n")

# Test WITH memory
print("=" * 60)
print("WITH MEMORY")
print("=" * 60)

memory_on = ConversationMemory(max_messages=20)
react_with_memory = ReActLoop(
    llm_client=llm_client,
    tools=available_tools,
    memory=memory_on,
    max_iterations=10
)

result_mem_1 = react_with_memory.run(
    "Get 3 recent transactions", 
    use_memory=True
)
print(f"\nTurn 1: Success={result_mem_1.success}, Steps={result_mem_1.total_steps}")

result_mem_2 = react_with_memory.run(
    "How many were there?",  # Pronoun reference
    use_memory=True
)
print(f"Turn 2: Success={result_mem_2.success}, Steps={result_mem_2.total_steps}")
print(f"Answer: {result_mem_2.answer}")

# Test WITHOUT memory
print(f"\n{'=' * 60}")
print("WITHOUT MEMORY")
print("=" * 60)

react_no_memory = ReActLoop(
    llm_client=llm_client,
    tools=available_tools,
    memory=None,
    max_iterations=10
)

result_no_mem = react_no_memory.run(
    "How many were there?",  # Same pronoun reference - should fail or be confused
    use_memory=False
)
print(f"\nSingle turn: Success={result_no_mem.success}, Steps={result_no_mem.total_steps}")
print(f"Answer: {result_no_mem.answer}")

print(f"\n{'=' * 60}")
print("COMPARISON")
print("=" * 60)
print(f"\nWith memory - Turn 2 answer: {result_mem_2.answer[:100]}...")
print(f"Without memory answer: {result_no_mem.answer[:100]}...")


## Test 3.2: Memory Message Structure

In [None]:
# Verify memory stores correct message types
print("Test 3.2: Memory message structure validation\n")

memory_struct = ConversationMemory(max_messages=20)
react_struct = ReActLoop(
    llm_client=llm_client,
    tools=available_tools,
    memory=memory_struct,
    max_iterations=10
)

# Run query that requires tool call
result = react_struct.run(
    "Show me print failures from the last 3 days",
    use_memory=True
)

messages = memory_struct.get_messages()
print(f"Total messages: {len(messages)}\n")

# Analyze message types
message_types = {}
for msg in messages:
    msg_type = type(msg).__name__
    message_types[msg_type] = message_types.get(msg_type, 0) + 1

print("Message Type Breakdown:")
for msg_type, count in message_types.items():
    print(f"   {msg_type}: {count}")

# Validate expected structure
assert 'HumanMessage' in message_types, "Should have HumanMessage"
assert 'AIMessage' in message_types, "Should have AIMessage"
print("\nMemory contains expected message types")

# Show message sequence
print("\nMessage Sequence:")
for i, msg in enumerate(messages, 1):
    msg_type = type(msg).__name__
    content_preview = str(msg.content)[:50] if hasattr(msg, 'content') else "N/A"
    print(f"   {i}. {msg_type}: {content_preview}...")

print("\nTest 3.2 PASSED")

---
# Part 4: Error Handling and Edge Cases

## Test 4.1: Invalid Tool Parameters

In [None]:
# Test how agent handles queries that might cause tool errors
print("Test 4.1: Handling edge case queries\n")

react_edge = ReActLoop(
    llm_client=llm_client,
    tools=available_tools,
    memory=None,
    max_iterations=10
)

# Query with extreme parameters
query = "Get me 1000 recent transactions"  # Large limit
print(f"Query: {query}\n")

result = react_edge.run(query, use_memory=False)

print(f"Success: {result.success}")
print(f"Steps: {result.total_steps}")
print(f"\nAnswer:\n{result.answer}\n")

# Agent should either handle gracefully or explain limitation
print("Test 4.1 PASSED - Edge case handled")

## Test 4.2: Ambiguous Queries

In [None]:
# Test agent's handling of ambiguous queries
print("Test 4.2: Ambiguous query handling\n")

react_ambig = ReActLoop(
    llm_client=llm_client,
    tools=available_tools,
    memory=None,
    max_iterations=10
)

query = "Show me the recent stuff"  # Intentionally vague
print(f"Query: {query}\n")

result = react_ambig.run(query, use_memory=False)

print(f"Success: {result.success}")
print(f"Steps: {result.total_steps}")
print(f"\nAnswer:\n{result.answer}\n")


---
# Final Summary

Run this cell to see a complete test summary.

In [None]:
print("="*70)
print("REACT AGENT LOOP TEST SUMMARY")
print("="*70)

print("\nSINGLE-TURN TESTS")
print("   • Test 1.1: Simple query with tool call")
print("   • Test 1.2: Multiple tool calls")
print("   • Test 1.3: No tool requirement")

print("\nMULTI-TURN TESTS")
print("   • Test 2.1: Two-turn conversation")
print("   • Test 2.2: Three-turn conversation")
print("   • Test 2.3: Context reference validation")

print("\nMEMORY MANAGEMENT TESTS")
print("   • Test 3.1: Memory vs no-memory comparison")
print("   • Test 3.2: Message structure validation")

print("\nERROR HANDLING TESTS")
print("   • Test 4.1: Invalid tool parameters")
print("   • Test 4.2: Ambiguous queries")

print("\n" + "="*70)
print("ALL TESTS COMPLETED SUCCESSFULLY")
print("="*70)

print("\nTest Coverage:")
print("   - ReActLoop initialization")
print("   - Tool binding and execution")
print("   - Single-turn query processing")
print("   - Multi-turn conversation flow")
print("   - ConversationMemory integration")
print("   - Context preservation")
print("   - Message type handling")
print("   - Error resilience")

print("\nReAct agent loop is fully functional!")