# Multi-Agent vs Single Model Comparison

**Experiment:** Compare debate and manager-worker strategies to single-model baseline

**Date:** 2025-10-26

**Goals:**
- Run same tasks with multiple strategies
- Compare accuracy, latency, cost
- Identify where multi-agent helps

In [None]:
import sys
sys.path.append('../code')

from harness import (
    run_strategy,
    ExperimentConfig,
    ExperimentResult,
    get_tracker,
    compare_experiments
)
import pandas as pd
import matplotlib.pyplot as plt

## 1. Define Task

In [None]:
task = {
    'id': 'reasoning_001',
    'input': 'A farmer has 17 sheep. All but 9 die. How many are left?',
    'type': 'reasoning'
}

## 2. Run Single Model Baseline

In [None]:
config_single = ExperimentConfig(
    experiment_name='single_baseline',
    task_type='reasoning',
    strategy='single',
    provider='ollama',
    model='llama3.2:latest'
)

tracker = get_tracker()
tracker.start_experiment(config_single)

result_single = run_strategy(
    'single',
    task['input'],
    provider='ollama',
    model='llama3.2:latest'
)

print(f"Output: {result_single.output}")
print(f"Latency: {result_single.latency_s:.2f}s")

## 3. Run Debate Strategy

In [None]:
result_debate = run_strategy(
    'debate',
    task['input'],
    n_debaters=3,
    provider='ollama',
    model='llama3.2:latest'
)

print(f"Output: {result_debate.output}")
print(f"Latency: {result_debate.latency_s:.2f}s")
print(f"\nDebater arguments:")
for i, arg in enumerate(result_debate.metadata['arguments']):
    print(f"{i+1}. {arg[:100]}...")

## 4. Compare Results

In [None]:
comparison = pd.DataFrame([
    {
        'strategy': 'single',
        'latency_s': result_single.latency_s,
        'tokens': result_single.tokens_in + result_single.tokens_out,
        'cost': result_single.cost_usd
    },
    {
        'strategy': 'debate',
        'latency_s': result_debate.latency_s,
        'tokens': result_debate.tokens_in + result_debate.tokens_out,
        'cost': result_debate.cost_usd
    }
])

print(comparison)