In [12]:
import pandas as pd
import matplotlib.pyplot as plt

# Define file names
CSV_FILE = "/Users/i/Downloads/ultimate_results.csv"
PLOT_OUTPUT_TOKEN_LEN_FILE = "avg_output_token_len_by_agent.png"

# ============================================================
# APPLY STYLING
# ============================================================
# Use the 'ggplot' style for a clean, visually appealing look
plt.style.use('ggplot')

# ============================================================
# READ AND TRANSFORM DATA
# ============================================================
df = pd.read_csv(CSV_FILE)

# 1. Calculate Response Length (using character count as a proxy for token length)
# This assumes 'response' is the column containing the agent's output text.
df['response_len'] = df['response'].astype(str).apply(len)

# 2. Aggregate Data: Group by 'agent' and calculate the mean of the response length
agent_avg_len = df.groupby("agent")["response_len"].mean().reset_index()

# 3. Sort by average length for better visualization
agent_avg_len = agent_avg_len.sort_values(by="response_len", ascending=False)

# ============================================================
# PLOT
# ============================================================
# Define a set of POPPING, high-contrast colors
POP_COLORS = [
    "#E63946",       # Bright Red
    "#457B9D",       # Steel Blue
    "#FFB703",       # Vivid Yellow/Orange
    "#0096C7",       # Bright Cyan
    "#2A9D8F",       # Teal
    "#A8DADC",       # Light Sky Blue
    "#70E000",       # Electric Lime Green
    "#F77F00",       # Dark Orange
    "#6A057F",       # Deep Purple
    "#003049"        # Navy Blue
]

num_agents = len(agent_avg_len)
colors = POP_COLORS[:num_agents]

# Set up the plot with a larger size for impact
fig, ax = plt.subplots(figsize=(12, 7))

# Create the bars
bars = ax.bar(
    agent_avg_len["agent"],
    agent_avg_len["response_len"],
    color=colors,
    edgecolor='black', # Black edge for definition
    alpha=0.9
)

# Title and Labels
ax.set_title(
    "Average Output Token Length (Character Count) per Agent",
    fontsize=16,
    fontweight='bold',
    color='#333333',
    pad=20
)
ax.set_ylabel("Average Output Length (Characters)", fontsize=12, fontweight='semibold')
ax.set_xlabel("Agent", fontsize=12, fontweight='semibold')

# Add Data Labels for clear value comparison and ensure they don't clip
max_len = agent_avg_len["response_len"].max()
Y_OFFSET = max_len * 0.015  # Dynamic offset based on max value

for bar in bars:
    height = bar.get_height()
    ax.text(
        bar.get_x() + bar.get_width() / 2.,
        height + Y_OFFSET, # Apply the offset directly to the height
        f'{height:.0f}', # Format as integer for clean look
        ha='center',
        va='bottom',
        fontsize=10,
        fontweight='bold',
    )

# Customize Ticks and Grid
# Extend the Y-axis limit to 15% above the highest bar to fit the labels
ax.set_ylim(0, max_len * 1.15) 
plt.xticks(rotation=45, ha='right')

# Final adjustments and save
plt.tight_layout()
plt.savefig(PLOT_OUTPUT_TOKEN_LEN_FILE)
plt.close()

print(f"Plot of average output token length saved as {PLOT_OUTPUT_TOKEN_LEN_FILE}")

Plot of average output token length saved as avg_output_token_len_by_agent.png
