# Queue Simulation Analysis - Clean Data & Visualization

This notebook provides clean data analysis of 4 software development workflow scenarios:
1. **Traditional PR** - Individual developers with PR review process
2. **AI-Enhanced PR** - AI-assisted developers with PR review
3. **Pair Programming** - Traditional pairs with trunk-based development
4. **AI-Enhanced Pairs** - AI-assisted pairs with trunk-based development

Focus: Clean data output for analysis and visualization.

In [None]:
# Import Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime, timedelta
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Dict, Optional
import random

# Set random seed for reproducible results
random.seed(42)
np.random.seed(42)

# Configure plotting
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

print("✅ Libraries imported successfully")

In [None]:
# Core Data Classes (Clean, No Debug Output)

class TicketState(Enum):
    BACKLOG = "backlog"
    IN_PROGRESS = "in_progress"
    IN_REVIEW = "in_review"
    COMPLETED = "completed"

@dataclass
class Ticket:
    id: int
    state: TicketState = TicketState.BACKLOG
    started_at: Optional[datetime] = None
    completed_at: Optional[datetime] = None
    coding_start_time: Optional[datetime] = None
    coding_end_time: Optional[datetime] = None
    rework_cycles: int = 0
    
    @property
    def lead_time(self) -> timedelta:
        if self.started_at and self.completed_at:
            return self.completed_at - self.started_at
        return timedelta(0)

@dataclass
class Developer:
    id: int
    name: str
    is_ai_enhanced: bool = False
    is_pair: bool = False
    
    @property
    def coding_speed_multiplier(self) -> float:
        return 1.43 if self.is_ai_enhanced else 1.0  # 30% faster = 1/0.7 = 1.43
    
    @property
    def defect_rate(self) -> float:
        if self.is_pair:
            return 0.18  # 40% fewer defects than 30% baseline
        return 0.21 if self.is_ai_enhanced else 0.30  # AI has 30% fewer defects

class DefectManager:
    def check_for_defects(self, developer: Developer) -> bool:
        return random.random() < developer.defect_rate
    
    def calculate_rework_time(self, original_time: timedelta, developer: Developer) -> timedelta:
        return original_time * 0.25  # 25% of original time for rework

print("✅ Core classes defined")

In [None]:
# Clean Simulation Classes (Data-Focused)

class CleanSimulation:
    """Base simulation class focused on data generation"""
    
    def __init__(self, scenario_name: str):
        self.scenario_name = scenario_name
        self.completed_tickets = []
        self.defect_manager = DefectManager()
        self.daily_data = []  # Track daily metrics
        self.current_day = 0
    
    def advance_day(self):
        self.current_day += 1
    
    def record_daily_metrics(self, tickets_completed_today: int):
        """Record metrics for the current day"""
        total_completed = len(self.completed_tickets)
        avg_lead_time = 0
        if total_completed > 0:
            avg_lead_time = sum(t.lead_time.total_seconds() / 3600 for t in self.completed_tickets) / total_completed
        
        self.daily_data.append({
            'day': self.current_day,
            'scenario': self.scenario_name,
            'tickets_completed_today': tickets_completed_today,
            'total_tickets_completed': total_completed,
            'cumulative_throughput': total_completed / self.current_day if self.current_day > 0 else 0,
            'avg_lead_time_hours': round(avg_lead_time, 2),
            'total_rework_cycles': sum(t.rework_cycles for t in self.completed_tickets)
        })
    
    def get_summary_metrics(self) -> Dict:
        """Get final summary metrics"""
        total_tickets = len(self.completed_tickets)
        if total_tickets > 0:
            avg_lead_time = sum(t.lead_time.total_seconds() / 3600 for t in self.completed_tickets) / total_tickets
            tickets_per_day = total_tickets / self.current_day if self.current_day > 0 else 0
            total_rework = sum(t.rework_cycles for t in self.completed_tickets)
        else:
            avg_lead_time = 0
            tickets_per_day = 0
            total_rework = 0
        
        return {
            'scenario': self.scenario_name,
            'total_tickets': total_tickets,
            'avg_lead_time_hours': round(avg_lead_time, 2),
            'tickets_per_day': round(tickets_per_day, 2),
            'total_rework_cycles': total_rework,
            'simulation_days': self.current_day
        }

print("✅ Clean simulation base class ready")

In [None]:
# Traditional PR Scenario (Clean, Data-Focused)

class TraditionalPRScenario(CleanSimulation):
    def __init__(self):
        super().__init__("Traditional PR")
        
    def run_simulation(self, days: int = 30) -> Dict:
        """Run traditional PR simulation for specified days"""
        
        # Create 8 traditional developers
        developers = [Developer(i+1, f"Dev{i+1}", is_ai_enhanced=False) for i in range(8)]
        
        for day in range(days):
            self.advance_day()
            tickets_today = 0
            
            # Each developer works on 1 ticket per day
            for dev_idx, dev in enumerate(developers):
                ticket = Ticket(id=len(self.completed_tickets) + 1)
                ticket.started_at = datetime.now()
                
                # Coding time (8 hours baseline)
                coding_time = timedelta(hours=8) / dev.coding_speed_multiplier
                ticket.coding_start_time = ticket.started_at
                ticket.coding_end_time = ticket.started_at + coding_time
                
                # Check for defects and rework
                if self.defect_manager.check_for_defects(dev):
                    ticket.rework_cycles = 1
                    rework_time = self.defect_manager.calculate_rework_time(timedelta(hours=8), dev)
                    ticket.coding_end_time += rework_time
                
                # PR Review time (2 hours)
                review_time = timedelta(hours=2)
                ticket.completed_at = ticket.coding_end_time + review_time
                ticket.state = TicketState.COMPLETED
                
                self.completed_tickets.append(ticket)
                tickets_today += 1
            
            self.record_daily_metrics(tickets_today)
        
        return self.get_summary_metrics()

print("✅ Traditional PR scenario ready")

In [None]:
# AI-Enhanced PR Scenario

class AIEnhancedPRScenario(CleanSimulation):
    def __init__(self):
        super().__init__("AI-Enhanced PR")
        
    def run_simulation(self, days: int = 30) -> Dict:
        """Run AI-enhanced PR simulation"""
        
        # Create 8 AI-enhanced developers
        developers = [Developer(i+1, f"AI-Dev{i+1}", is_ai_enhanced=True) for i in range(8)]
        
        for day in range(days):
            self.advance_day()
            tickets_today = 0
            
            for dev_idx, dev in enumerate(developers):
                ticket = Ticket(id=len(self.completed_tickets) + 1)
                ticket.started_at = datetime.now()
                
                # AI-enhanced coding (30% faster)
                coding_time = timedelta(hours=8) / dev.coding_speed_multiplier
                ticket.coding_start_time = ticket.started_at
                ticket.coding_end_time = ticket.started_at + coding_time
                
                # Check for defects (AI has lower defect rate)
                if self.defect_manager.check_for_defects(dev):
                    ticket.rework_cycles = 1
                    rework_time = self.defect_manager.calculate_rework_time(timedelta(hours=8), dev)
                    ticket.coding_end_time += rework_time
                
                # AI-enhanced PR review (1.5 hours instead of 2)
                review_time = timedelta(hours=1.5)
                ticket.completed_at = ticket.coding_end_time + review_time
                ticket.state = TicketState.COMPLETED
                
                self.completed_tickets.append(ticket)
                tickets_today += 1
            
            self.record_daily_metrics(tickets_today)
        
        return self.get_summary_metrics()

print("✅ AI-Enhanced PR scenario ready")

In [None]:
# Pair Programming Scenarios

class PairProgrammingScenario(CleanSimulation):
    def __init__(self):
        super().__init__("Pair Programming")
        
    def run_simulation(self, days: int = 30) -> Dict:
        """Run pair programming simulation"""
        
        # Create 4 pairs (8 developers total)
        pairs = [Developer(i+1, f"Pair{i+1}", is_ai_enhanced=False, is_pair=True) for i in range(4)]
        
        for day in range(days):
            self.advance_day()
            tickets_today = 0
            
            for pair in pairs:
                ticket = Ticket(id=len(self.completed_tickets) + 1)
                ticket.started_at = datetime.now()
                
                # Pair coding (baseline 8 hours, but 2 people so effectively 16 person-hours)
                coding_time = timedelta(hours=16) / pair.coding_speed_multiplier
                ticket.coding_start_time = ticket.started_at
                ticket.coding_end_time = ticket.started_at + coding_time
                
                # Check for defects (pairs have lower defect rate)
                if self.defect_manager.check_for_defects(pair):
                    ticket.rework_cycles = 1
                    rework_time = self.defect_manager.calculate_rework_time(timedelta(hours=16), pair)
                    ticket.coding_end_time += rework_time
                
                # No PR review needed (trunk-based development)
                ticket.completed_at = ticket.coding_end_time
                ticket.state = TicketState.COMPLETED
                
                self.completed_tickets.append(ticket)
                tickets_today += 1
            
            self.record_daily_metrics(tickets_today)
        
        return self.get_summary_metrics()

class AIEnhancedPairScenario(CleanSimulation):
    def __init__(self):
        super().__init__("AI-Enhanced Pairs")
        
    def run_simulation(self, days: int = 30) -> Dict:
        """Run AI-enhanced pair programming simulation"""
        
        # Create 4 AI-enhanced pairs
        pairs = [Developer(i+1, f"AI-Pair{i+1}", is_ai_enhanced=True, is_pair=True) for i in range(4)]
        
        for day in range(days):
            self.advance_day()
            tickets_today = 0
            
            for pair in pairs:
                ticket = Ticket(id=len(self.completed_tickets) + 1)
                ticket.started_at = datetime.now()
                
                # AI-enhanced pair coding (30% faster than regular pairs)
                coding_time = timedelta(hours=16) / pair.coding_speed_multiplier
                ticket.coding_start_time = ticket.started_at
                ticket.coding_end_time = ticket.started_at + coding_time
                
                # Check for defects (AI pairs have lowest defect rate)
                if self.defect_manager.check_for_defects(pair):
                    ticket.rework_cycles = 1
                    rework_time = self.defect_manager.calculate_rework_time(timedelta(hours=16), pair)
                    ticket.coding_end_time += rework_time
                
                # No PR review needed (trunk-based development)
                ticket.completed_at = ticket.coding_end_time
                ticket.state = TicketState.COMPLETED
                
                self.completed_tickets.append(ticket)
                tickets_today += 1
            
            self.record_daily_metrics(tickets_today)
        
        return self.get_summary_metrics()

print("✅ Pair programming scenarios ready")

In [None]:
# Run All Simulations and Generate Clean Data

def run_complete_simulation(days: int = 30) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Run all 4 scenarios and return clean data for analysis"""
    
    print(f"🚀 Running {days}-day simulation for all scenarios...")
    
    # Initialize scenarios
    scenarios = {
        'Traditional PR': TraditionalPRScenario(),
        'AI-Enhanced PR': AIEnhancedPRScenario(),
        'Pair Programming': PairProgrammingScenario(),
        'AI-Enhanced Pairs': AIEnhancedPairScenario()
    }
    
    # Run simulations
    summary_results = []
    daily_data = []
    
    for name, scenario in scenarios.items():
        print(f"  Running {name}...")
        summary = scenario.run_simulation(days)
        summary_results.append(summary)
        daily_data.extend(scenario.daily_data)
    
    # Convert to DataFrames
    summary_df = pd.DataFrame(summary_results)
    daily_df = pd.DataFrame(daily_data)
    
    print("✅ Simulation complete!")
    return summary_df, daily_df

# Run the simulation
summary_data, daily_data = run_complete_simulation(days=30)

print("\n📊 Summary Results:")
print(summary_data.to_string(index=False))

print("\n📈 Daily data shape:", daily_data.shape)
print("Daily data columns:", list(daily_data.columns))

In [None]:
# Visualization 1: Cumulative Throughput Over Time

plt.figure(figsize=(14, 8))

# Plot cumulative tickets completed over time
for scenario in daily_data['scenario'].unique():
    scenario_data = daily_data[daily_data['scenario'] == scenario]
    plt.plot(scenario_data['day'], scenario_data['total_tickets_completed'], 
             marker='o', linewidth=2, label=scenario, markersize=4)

plt.title('Cumulative Tickets Completed Over Time', fontsize=16, fontweight='bold')
plt.xlabel('Day', fontsize=12)
plt.ylabel('Total Tickets Completed', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Show final numbers
final_day_data = daily_data[daily_data['day'] == daily_data['day'].max()]
print("📊 Final Throughput Results (30 days):")
for _, row in final_day_data.iterrows():
    print(f"  {row['scenario']}: {row['total_tickets_completed']} tickets ({row['cumulative_throughput']:.1f}/day)")

In [None]:
# Visualization 2: Lead Time Comparison

plt.figure(figsize=(14, 8))

# Lead time over time
for scenario in daily_data['scenario'].unique():
    scenario_data = daily_data[daily_data['scenario'] == scenario]
    plt.plot(scenario_data['day'], scenario_data['avg_lead_time_hours'], 
             marker='s', linewidth=2, label=scenario, markersize=4)

plt.title('Average Lead Time Over Time', fontsize=16, fontweight='bold')
plt.xlabel('Day', fontsize=12)
plt.ylabel('Average Lead Time (hours)', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Summary statistics
print("📊 Lead Time Summary:")
print(summary_data[['scenario', 'avg_lead_time_hours']].to_string(index=False))

In [None]:
# Visualization 3: Daily Throughput Comparison

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Daily throughput bar chart
summary_data.plot(x='scenario', y='tickets_per_day', kind='bar', ax=ax1, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
ax1.set_title('Average Daily Throughput', fontsize=14, fontweight='bold')
ax1.set_ylabel('Tickets per Day', fontsize=12)
ax1.set_xlabel('Scenario', fontsize=12)
ax1.tick_params(axis='x', rotation=45)
ax1.grid(True, alpha=0.3)

# Rework cycles comparison
summary_data.plot(x='scenario', y='total_rework_cycles', kind='bar', ax=ax2, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
ax2.set_title('Total Rework Cycles (30 days)', fontsize=14, fontweight='bold')
ax2.set_ylabel('Total Rework Cycles', fontsize=12)
ax2.set_xlabel('Scenario', fontsize=12)
ax2.tick_params(axis='x', rotation=45)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Calculate improvements
traditional_tpd = summary_data[summary_data['scenario'] == 'Traditional PR']['tickets_per_day'].iloc[0]
print("\n🚀 Productivity Improvements vs Traditional PR:")
for _, row in summary_data.iterrows():
    if row['scenario'] != 'Traditional PR':
        improvement = ((row['tickets_per_day'] / traditional_tpd - 1) * 100)
        print(f"  {row['scenario']}: {improvement:+.1f}%")

In [None]:
# Export Data for Further Analysis

# Save to CSV files
summary_data.to_csv('simulation_summary.csv', index=False)
daily_data.to_csv('simulation_daily_data.csv', index=False)

print("💾 Data exported to CSV files:")
print("  - simulation_summary.csv (summary metrics)")
print("  - simulation_daily_data.csv (daily time series data)")

# Display final summary table
print("\n📋 FINAL SUMMARY TABLE:")
print("=" * 80)
display_cols = ['scenario', 'tickets_per_day', 'avg_lead_time_hours', 'total_rework_cycles']
print(summary_data[display_cols].to_string(index=False))

print("\n✅ Clean data analysis complete!")
print("📊 You now have clean data ready for graphing and analysis")
print("📈 Time series data shows trends over 30 days")
print("🎯 All debug logs removed - pure data focus")