# Scale Test Duration Analysis

Analyze correlations between run duration and various metrics (blast radius, edges, observations) to understand what drives performance.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import os

# Set style for better plots
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

## 1. Load Data

Fetch from dashboard API or use sample data.

In [None]:
# Option 1: Fetch from Dashboard API
DASHBOARD_URL = os.getenv('SCALE_DASHBOARD_URL', '')
DASHBOARD_API_KEY = os.getenv('SCALE_DASHBOARD_API_KEY', '')

def fetch_from_api():
    """Fetch run data from dashboard API."""
    if not DASHBOARD_URL or not DASHBOARD_API_KEY:
        print("Dashboard credentials not set. Using sample data.")
        return None
    
    try:
        response = requests.get(
            f"{DASHBOARD_URL}/api/results",
            headers={"Authorization": f"Bearer {DASHBOARD_API_KEY}"},
            timeout=30
        )
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Failed to fetch from API: {e}")
        return None

# Option 2: Sample data from recent runs
SAMPLE_DATA = [
    # Latest run data from user
    {"scenario": "kms_orphan_simulation", "duration_seconds": 761, "risk_count": 0, "blast_radius": 677, "edges": 1972, "observations": 246},
    {"scenario": "combined_all", "duration_seconds": 1215, "risk_count": 0, "blast_radius": 803, "edges": 2054, "observations": 304},
    {"scenario": "combined_network", "duration_seconds": 1177, "risk_count": 1, "blast_radius": 749, "edges": 1896, "observations": 281},
    {"scenario": "central_sns_change", "duration_seconds": 594, "risk_count": 1, "blast_radius": 719, "edges": 1989, "observations": 241},
    {"scenario": "vpc_peering_change", "duration_seconds": 814, "risk_count": 0, "blast_radius": 786, "edges": 2108, "observations": 278},
    {"scenario": "lambda_timeout", "duration_seconds": 1445, "risk_count": 0, "blast_radius": 1096, "edges": 12010, "observations": 408},
    {"scenario": "shared_sg_open", "duration_seconds": 1457, "risk_count": 0, "blast_radius": 846, "edges": 1965, "observations": 315},
    # Previous run data
    {"scenario": "kms_orphan_simulation", "duration_seconds": 782, "risk_count": 0, "blast_radius": 768, "edges": 2094, "observations": 276},
    {"scenario": "combined_all", "duration_seconds": 501, "risk_count": 1, "blast_radius": 543, "edges": 1853, "observations": 169},
    {"scenario": "combined_network", "duration_seconds": 845, "risk_count": 1, "blast_radius": 722, "edges": 2107, "observations": 228},
    {"scenario": "central_sns_change", "duration_seconds": 678, "risk_count": 0, "blast_radius": 598, "edges": 1727, "observations": 178},
    {"scenario": "vpc_peering_change", "duration_seconds": 457, "risk_count": 0, "blast_radius": 695, "edges": 2091, "observations": 209},
    {"scenario": "lambda_timeout", "duration_seconds": 776, "risk_count": 0, "blast_radius": 1053, "edges": 7042, "observations": 347},
    {"scenario": "shared_sg_open", "duration_seconds": 636, "risk_count": 2, "blast_radius": 530, "edges": 1617, "observations": 186},
]

# Try API first, fall back to sample data
api_data = fetch_from_api()
if api_data:
    # Transform API response to match our format
    data = []
    for run in api_data.get('results', api_data):
        data.append({
            "scenario": run.get('scenario'),
            "duration_seconds": run.get('overmindDurationMs', 0) / 1000,
            "risk_count": run.get('riskCount', 0),
            "blast_radius": run.get('blastRadiusNodes', 0),
            "edges": run.get('blastRadiusEdges', 0),
            "observations": run.get('observations', 0),
        })
    df = pd.DataFrame(data)
    print(f"Loaded {len(df)} runs from API")
else:
    df = pd.DataFrame(SAMPLE_DATA)
    print(f"Using {len(df)} sample runs")

df['duration_minutes'] = df['duration_seconds'] / 60
df.head(10)

## 2. Summary Statistics

In [None]:
print("=== Summary Statistics ===")
print(df[['duration_minutes', 'blast_radius', 'edges', 'observations']].describe())

print("\n=== By Scenario (mean) ===")
scenario_stats = df.groupby('scenario').agg({
    'duration_minutes': ['mean', 'std', 'count'],
    'blast_radius': 'mean',
    'edges': 'mean',
    'observations': 'mean'
}).round(2)
scenario_stats.columns = ['_'.join(col).strip() for col in scenario_stats.columns.values]
print(scenario_stats.sort_values('duration_minutes_mean', ascending=False))

## 3. Correlation Analysis

Which metrics are most correlated with duration?

In [None]:
# Correlation matrix
correlation_cols = ['duration_seconds', 'blast_radius', 'edges', 'observations', 'risk_count']
corr_matrix = df[correlation_cols].corr()

print("=== Correlation with Duration ===")
duration_corr = corr_matrix['duration_seconds'].drop('duration_seconds').sort_values(ascending=False)
for col, corr in duration_corr.items():
    strength = "strong" if abs(corr) > 0.7 else "moderate" if abs(corr) > 0.4 else "weak"
    print(f"  {col}: {corr:.3f} ({strength})")

# Heatmap
fig, ax = plt.subplots(figsize=(8, 6))
im = ax.imshow(corr_matrix, cmap='RdYlBu_r', aspect='auto', vmin=-1, vmax=1)
ax.set_xticks(range(len(correlation_cols)))
ax.set_yticks(range(len(correlation_cols)))
ax.set_xticklabels(correlation_cols, rotation=45, ha='right')
ax.set_yticklabels(correlation_cols)
plt.colorbar(im, ax=ax, label='Correlation')

# Add values
for i in range(len(correlation_cols)):
    for j in range(len(correlation_cols)):
        ax.text(j, i, f'{corr_matrix.iloc[i, j]:.2f}', ha='center', va='center', fontsize=10)

plt.title('Correlation Matrix')
plt.tight_layout()
plt.show()

## 4. Scatter Plots

Visualize relationships between duration and each metric.

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

metrics = [
    ('blast_radius', 'Blast Radius (nodes)'),
    ('edges', 'Edges'),
    ('observations', 'Observations'),
    ('risk_count', 'Risk Count')
]

# Color by scenario
scenarios = df['scenario'].unique()
colors = plt.cm.tab10(np.linspace(0, 1, len(scenarios)))
color_map = dict(zip(scenarios, colors))

for ax, (metric, label) in zip(axes.flat, metrics):
    for scenario in scenarios:
        subset = df[df['scenario'] == scenario]
        ax.scatter(subset[metric], subset['duration_minutes'], 
                   c=[color_map[scenario]], label=scenario, s=80, alpha=0.7)
    
    # Add trend line
    z = np.polyfit(df[metric], df['duration_minutes'], 1)
    p = np.poly1d(z)
    x_line = np.linspace(df[metric].min(), df[metric].max(), 100)
    ax.plot(x_line, p(x_line), 'r--', alpha=0.5, label='Trend')
    
    ax.set_xlabel(label)
    ax.set_ylabel('Duration (minutes)')
    ax.set_title(f'Duration vs {label}')

# Add legend
handles, labels = axes[0, 0].get_legend_handles_labels()
fig.legend(handles[:len(scenarios)], labels[:len(scenarios)], 
           loc='center right', bbox_to_anchor=(1.15, 0.5))

plt.tight_layout()
plt.show()

## 5. Duration Prediction Model

Fit a linear model to predict duration from metrics.

In [None]:
# Features for prediction
features = ['blast_radius', 'edges', 'observations']
X = df[features]
y = df['duration_seconds']

# Fit model
model = LinearRegression()
model.fit(X, y)

print("=== Duration Prediction Formula ===")
print(f"duration_seconds = {model.intercept_:.2f}")
for feat, coef in zip(features, model.coef_):
    sign = '+' if coef >= 0 else '-'
    print(f"  {sign} {abs(coef):.4f} × {feat}")

print(f"\nR² Score: {model.score(X, y):.3f}")
print("(1.0 = perfect prediction, 0.0 = no predictive power)")

# Feature importance (standardized coefficients)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
model_scaled = LinearRegression()
model_scaled.fit(X_scaled, y)

print("\n=== Feature Importance (Standardized) ===")
importance = pd.DataFrame({
    'Feature': features,
    'Coefficient': model_scaled.coef_,
    'Abs Importance': np.abs(model_scaled.coef_)
}).sort_values('Abs Importance', ascending=False)

for _, row in importance.iterrows():
    pct = row['Abs Importance'] / importance['Abs Importance'].sum() * 100
    print(f"  {row['Feature']}: {pct:.1f}%")

## 6. Outlier Analysis

Which runs took longer or shorter than expected?

In [None]:
# Predict duration for each run
df['predicted_seconds'] = model.predict(X)
df['residual'] = df['duration_seconds'] - df['predicted_seconds']
df['residual_pct'] = (df['residual'] / df['predicted_seconds']) * 100

print("=== Outlier Analysis ===")
print("\nRuns SLOWER than expected (residual > 20%):")
slow = df[df['residual_pct'] > 20].sort_values('residual_pct', ascending=False)
if len(slow) > 0:
    for _, row in slow.iterrows():
        print(f"  {row['scenario']}: {row['duration_minutes']:.1f}m actual vs {row['predicted_seconds']/60:.1f}m expected ({row['residual_pct']:+.0f}%)")
else:
    print("  None")

print("\nRuns FASTER than expected (residual < -20%):")
fast = df[df['residual_pct'] < -20].sort_values('residual_pct')
if len(fast) > 0:
    for _, row in fast.iterrows():
        print(f"  {row['scenario']}: {row['duration_minutes']:.1f}m actual vs {row['predicted_seconds']/60:.1f}m expected ({row['residual_pct']:+.0f}%)")
else:
    print("  None")

# Residual plot
fig, ax = plt.subplots(figsize=(10, 6))
colors = [color_map[s] for s in df['scenario']]
ax.scatter(df['predicted_seconds']/60, df['residual']/60, c=colors, s=100, alpha=0.7)
ax.axhline(y=0, color='red', linestyle='--', alpha=0.5)
ax.set_xlabel('Predicted Duration (minutes)')
ax.set_ylabel('Residual (actual - predicted, minutes)')
ax.set_title('Residual Plot: Actual vs Predicted Duration')

# Annotate outliers
for _, row in df[abs(df['residual_pct']) > 30].iterrows():
    ax.annotate(row['scenario'], 
                (row['predicted_seconds']/60, row['residual']/60),
                xytext=(5, 5), textcoords='offset points', fontsize=8)

plt.tight_layout()
plt.show()

## 7. Efficiency Metrics

Normalized metrics to compare scenarios fairly.

In [None]:
# Calculate efficiency metrics
df['seconds_per_node'] = df['duration_seconds'] / df['blast_radius']
df['seconds_per_1k_edges'] = df['duration_seconds'] / (df['edges'] / 1000)
df['seconds_per_observation'] = df['duration_seconds'] / df['observations']

print("=== Efficiency Metrics (lower = faster) ===")
efficiency = df.groupby('scenario').agg({
    'seconds_per_node': 'mean',
    'seconds_per_1k_edges': 'mean',
    'seconds_per_observation': 'mean',
    'duration_minutes': 'mean'
}).round(2)

print(efficiency.sort_values('seconds_per_1k_edges', ascending=False))

# Bar chart of efficiency
fig, ax = plt.subplots(figsize=(12, 6))
x = np.arange(len(efficiency))
width = 0.25

ax.bar(x - width, efficiency['seconds_per_node'], width, label='sec/node')
ax.bar(x, efficiency['seconds_per_1k_edges'], width, label='sec/1k edges')
ax.bar(x + width, efficiency['seconds_per_observation'], width, label='sec/observation')

ax.set_xlabel('Scenario')
ax.set_ylabel('Seconds')
ax.set_title('Efficiency Metrics by Scenario')
ax.set_xticks(x)
ax.set_xticklabels(efficiency.index, rotation=45, ha='right')
ax.legend()

plt.tight_layout()
plt.show()

## 8. Key Insights

In [None]:
print("="*60)
print("KEY INSIGHTS")
print("="*60)

# Top correlated metric
top_corr = duration_corr.idxmax()
top_corr_val = duration_corr.max()
print(f"\n1. STRONGEST PREDICTOR: {top_corr}")
print(f"   Correlation: {top_corr_val:.3f}")
print(f"   Implication: Higher {top_corr} → longer duration")

# Model accuracy
r2 = model.score(X, y)
print(f"\n2. PREDICTION ACCURACY: {r2*100:.0f}%")
if r2 > 0.7:
    print("   Duration is well explained by blast radius, edges, and observations.")
else:
    print("   Other factors also significantly affect duration.")

# Slowest scenario
slowest = scenario_stats.sort_values('duration_minutes_mean', ascending=False).iloc[0]
print(f"\n3. SLOWEST SCENARIO: {slowest.name}")
print(f"   Average: {slowest['duration_minutes_mean']:.1f} minutes")

# Most variable
most_variable = scenario_stats.sort_values('duration_minutes_std', ascending=False).iloc[0]
print(f"\n4. MOST VARIABLE: {most_variable.name}")
print(f"   Std Dev: {most_variable['duration_minutes_std']:.1f} minutes")
print(f"   (May benefit from investigation)")

print("\n" + "="*60)