# AI Agent KPI Dashboard\n\nOperational metrics and analytics for GTM AI Agent performance

In [None]:
import pandas as pd\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport sys\nfrom pathlib import Path\n\n# Add analytics directory to path\nsys.path.insert(0, str(Path.cwd().parent / 'analytics'))\nfrom load_data import DataLoader\n\n# Configure plotting style\nsns.set_style('whitegrid')\nplt.rcParams['figure.figsize'] = (12, 6)\nplt.rcParams['font.size'] = 10\n\nprint('✅ Imports complete')

## Load Data

In [None]:
# Initialize data loader\ndata_file = Path.cwd().parent / 'data' / 'sample_agent_runs.csv'\nloader = DataLoader(':memory:')\ndf = loader.load_csv_to_db(str(data_file))\n\nprint(f'Loaded {len(df)} agent runs')\nprint(f'Date range: {df["timestamp"].min()} to {df["timestamp"].max()}')

## Overall Summary Metrics

In [None]:
summary = loader.get_summary_stats()\ndisplay(summary)

## A/B Test Comparison

In [None]:
ab_comparison = loader.get_metrics_by_version()\ndisplay(ab_comparison)

In [None]:
# Visualize A/B comparison\nfig, axes = plt.subplots(2, 2, figsize=(14, 10))\n\n# Accuracy comparison\naxes[0, 0].bar(ab_comparison['agent_version'], ab_comparison['accuracy_pct'], color=['#1f77b4', '#ff7f0e'])\naxes[0, 0].axhline(y=85, color='green', linestyle='--', label='Target (85%)')\naxes[0, 0].set_ylabel('Accuracy (%)')\naxes[0, 0].set_title('Task Accuracy by Agent Version')\naxes[0, 0].legend()\naxes[0, 0].set_ylim([0, 100])\n\n# Satisfaction comparison\naxes[0, 1].bar(ab_comparison['agent_version'], ab_comparison['avg_satisfaction'], color=['#1f77b4', '#ff7f0e'])\naxes[0, 1].axhline(y=4.0, color='green', linestyle='--', label='Target (4.0)')\naxes[0, 1].set_ylabel('Avg Satisfaction (1-5)')\naxes[0, 1].set_title('User Satisfaction by Agent Version')\naxes[0, 1].legend()\naxes[0, 1].set_ylim([0, 5])\n\n# Resolution time comparison\naxes[1, 0].bar(ab_comparison['agent_version'], ab_comparison['avg_resolution_time'], color=['#1f77b4', '#ff7f0e'])\naxes[1, 0].axhline(y=5.0, color='green', linestyle='--', label='Target (<5s)')\naxes[1, 0].set_ylabel('Avg Resolution Time (s)')\naxes[1, 0].set_title('Resolution Speed by Agent Version')\naxes[1, 0].legend()\n\n# Error rate comparison\naxes[1, 1].bar(ab_comparison['agent_version'], ab_comparison['error_rate'], color=['#1f77b4', '#ff7f0e'])\naxes[1, 1].axhline(y=2.0, color='green', linestyle='--', label='Target (<2%)')\naxes[1, 1].set_ylabel('Error Rate (%)')\naxes[1, 1].set_title('Error Rate by Agent Version')\naxes[1, 1].legend()\naxes[1, 1].set_ylim([0, 5])\n\nplt.tight_layout()\nplt.show()

## Performance by Task Type

In [None]:
by_task = loader.get_metrics_by_task_type()\ndisplay(by_task)

In [None]:
# Visualize task type performance\nfig, axes = plt.subplots(1, 2, figsize=(14, 5))\n\n# Task volume\naxes[0].barh(by_task['task_type'], by_task['total_tasks'], color='steelblue')\naxes[0].set_xlabel('Total Tasks')\naxes[0].set_title('Task Volume by Type')\n\n# Accuracy by task type\naxes[1].barh(by_task['task_type'], by_task['accuracy_pct'], color='seagreen')\naxes[1].axvline(x=85, color='red', linestyle='--', label='Target (85%)')\naxes[1].set_xlabel('Accuracy (%)')\naxes[1].set_title('Task Accuracy by Type')\naxes[1].legend()\naxes[1].set_xlim([0, 100])\n\nplt.tight_layout()\nplt.show()

## Daily Trends

In [None]:
trends = loader.get_daily_trends()\ntrends['date'] = pd.to_datetime(trends['date'])\ndisplay(trends)

In [None]:
# Visualize daily trends\nfig, axes = plt.subplots(3, 1, figsize=(12, 10))\n\n# Task volume trend\naxes[0].plot(trends['date'], trends['total_tasks'], marker='o', color='steelblue', linewidth=2)\naxes[0].set_ylabel('Total Tasks')\naxes[0].set_title('Daily Task Volume')\naxes[0].grid(True, alpha=0.3)\n\n# Accuracy trend\naxes[1].plot(trends['date'], trends['accuracy_pct'], marker='o', color='seagreen', linewidth=2)\naxes[1].axhline(y=85, color='red', linestyle='--', label='Target (85%)', alpha=0.7)\naxes[1].set_ylabel('Accuracy (%)')\naxes[1].set_title('Daily Task Accuracy')\naxes[1].set_ylim([0, 100])\naxes[1].legend()\naxes[1].grid(True, alpha=0.3)\n\n# Active users trend\naxes[2].plot(trends['date'], trends['active_users'], marker='o', color='darkorange', linewidth=2)\naxes[2].set_ylabel('Active Users')\naxes[2].set_xlabel('Date')\naxes[2].set_title('Daily Active Users')\naxes[2].grid(True, alpha=0.3)\n\nplt.tight_layout()\nplt.show()

## User Satisfaction Distribution

In [None]:
# Calculate satisfaction distribution\nsatisfaction_dist = df[df['user_rating'].notna()]['user_rating'].value_counts().sort_index(ascending=False)\n\n# Plot\nplt.figure(figsize=(10, 6))\nbars = plt.barh(satisfaction_dist.index.astype(str) + ' stars', satisfaction_dist.values, color='skyblue')\nplt.xlabel('Count')\nplt.title('User Satisfaction Distribution')\n\n# Add percentage labels\ntotal = satisfaction_dist.sum()\nfor i, (bar, count) in enumerate(zip(bars, satisfaction_dist.values)):\n    pct = count / total * 100\n    plt.text(count + 0.2, bar.get_y() + bar.get_height()/2, f'{pct:.1f}%', va='center')\n\nplt.tight_layout()\nplt.show()\n\nprint(f'Average satisfaction: {df["user_rating"].mean():.2f} / 5.0')

## Key Insights\n\nBased on the data analysis:\n\n### Positive Signals\n- Task accuracy consistently above 85% target\n- User satisfaction strong at 4.0+ average\n- Resolution speed well under 5s target\n- Low error rate (<2%)\n\n### Areas for Improvement\n- User adoption growing but below Phase 1 target\n- Agent B variant did not outperform Agent A in current test\n- Need more sample size for confident A/B conclusions\n\n### Recommendations\n1. Continue with Agent A while iterating on Agent B based on feedback\n2. Drive adoption through enablement and manager endorsement\n3. Extend A/B test duration to reach statistical significance\n4. Consider Phase 2 rollout to SDR team given strong metrics

In [None]:
# Close database connection\nloader.close()\nprint('✅ Dashboard complete')