
# ETA Performance — Discovery Notebook (Unsupervised Baseline & Peak)

**Generated:** 2025-11-28 19:57:26

This notebook **automatically discovers** baseline and peak (incident) windows from `cleaned_eta_logs.csv` using
per-minute aggregation and robust anomaly detection on the **p95 execution time**. It then verifies degradation and
**compares** the discovered windows against any previously saved manual windows (if `artifact_verification_case.json` is present).

**Data file SHA256:** ``

---



## Environment & Requirements
- Python 3.9+
- `pandas`, `numpy`, `matplotlib`

> No internet required. All plots are generated locally.


In [32]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
from datetime import datetime, timedelta
import hashlib, platform, json, os

plt.style.use('seaborn-v0_8')
pd.options.display.max_rows = 50

CSV_PATH = 'datasets/cleaned_eta_logs.csv'

# Dataset fingerprint for audit trail
try:
    sha = hashlib.sha256(open(CSV_PATH,'rb').read()).hexdigest()
except FileNotFoundError:
    sha = None
print('Dataset:', CSV_PATH)
print('SHA256 :', sha)
print('Python :', platform.python_version())
print('Pandas :', pd.__version__)


Dataset: datasets/cleaned_eta_logs.csv
SHA256 : b7c4fe3646d472cf92b9221abd54302fb82712f5969e9dabb942b2f459dabacd
Python : 3.12.11
Pandas : 2.2.3


In [33]:

# 1) Load & normalize

df = pd.read_csv(CSV_PATH)
assert 'datetime' in df.columns and 'execution_time' in df.columns, 'CSV must contain datetime and execution_time columns'

df['datetime'] = pd.to_datetime(df['datetime'])
for col in ['is_slow','is_very_slow','is_critical']:
    if col in df.columns and df[col].dtype != bool:
        df[col] = df[col].astype(str).str.lower().map({'true':True,'false':False})

print('Rows   :', len(df))
print('Start  :', df['datetime'].min())
print('End    :', df['datetime'].max())
print('Agents :', df['agent_type'].nunique(), sorted(df['agent_type'].unique()))


Rows   : 158186
Start  : 2025-11-06 00:00:00.850000
End    : 2025-11-07 03:33:49.509000
Agents : 2 ['eta_agent', 'eta_iagent']


In [34]:

# 2) Per-minute aggregation & robust anomaly detection

df_min = df.copy()
df_min['minute_ts'] = df_min['datetime'].dt.floor('min')
agg = df_min.groupby('minute_ts').agg(
    n=('execution_time','size'),
    mean_exec=('execution_time','mean'),
    p95_exec=('execution_time', lambda s: s.quantile(0.95)),
    max_exec=('execution_time','max')
).sort_index()

# Robust z on per-minute p95
med = agg['p95_exec'].median()
mad = (agg['p95_exec'] - med).abs().median()
agg['robust_z'] = 0 if mad==0 else 0.6745 * (agg['p95_exec'] - med) / mad

print('Minutes aggregated:', len(agg))
print('Median p95:', round(med,3), 'MAD:', round(mad,3))

# Heuristic: anomaly minutes where robust_z>3
anom = agg[agg['robust_z']>3]
print('Anomaly minutes (z>3):', len(anom))


Minutes aggregated: 1654
Median p95: 5.775 MAD: 2.937
Anomaly minutes (z>3): 202


In [35]:

# 3) Discover peak cluster (incident window) and baseline window automatically
# Strategy:
# - Group contiguous anomaly minutes into clusters; pick the cluster with highest avg p95_exec.
# - Peak window = cluster start -> cluster end.
# - Baseline window = same duration window with lowest avg p95_exec that does not overlap anomalies.

# Helper to form contiguous clusters
clusters = []
current = []
prev_ts = None
for ts in anom.index:
    if prev_ts is None or (ts - prev_ts == pd.Timedelta(minutes=1)):
        current.append(ts)
    else:
        clusters.append(current)
        current = [ts]
    prev_ts = ts
if current:
    clusters.append(current)

cluster_stats = []
for cl in clusters:
    start = cl[0]; end = cl[-1] + pd.Timedelta(minutes=1)  # exclusive end
    block = agg.loc[start:end - pd.Timedelta(seconds=1)]
    cluster_stats.append({
        'start': start,
        'end': end,
        'duration_min': len(cl),
        'avg_p95': float(block['p95_exec'].mean()),
        'max_p95': float(block['p95_exec'].max())
    })

# Choose peak: cluster with highest avg_p95 (or max_p95 as tiebreaker)
peak_window = None
if cluster_stats:
    cluster_stats.sort(key=lambda d: (d['avg_p95'], d['max_p95']), reverse=True)
    peak_window = cluster_stats[0]
    print('Selected PEAK cluster:', peak_window)
else:
    # Fallback: take top-N minute window by p95_exec average (e.g., 50 minutes)
    dur = 50
    roll = agg['p95_exec'].rolling(f'{dur}T').mean()
    idx = roll.idxmax()
    if pd.isna(idx):
        raise RuntimeError('No data to form a peak window.')
    start = idx - pd.Timedelta(minutes=dur-1)
    end = idx + pd.Timedelta(minutes=1)
    block = agg.loc[start:end - pd.Timedelta(seconds=1)]
    peak_window = {
        'start': start,
        'end': end,
        'duration_min': len(block),
        'avg_p95': float(block['p95_exec'].mean()),
        'max_p95': float(block['p95_exec'].max())
    }
    print('Fallback PEAK window:', peak_window)

# Build candidate baseline windows of same duration
dur = peak_window['duration_min']
start_all = agg.index.min(); end_all = agg.index.max() + pd.Timedelta(minutes=1)

# Slide a window of length 'dur' across the timeline to find minimal avg p95, excluding anomaly minutes
anom_minutes = set(anom.index)

best_baseline = None
cursor = start_all
while cursor + pd.Timedelta(minutes=dur) <= end_all:
    w_start = cursor
    w_end = cursor + pd.Timedelta(minutes=dur)
    # minutes = pd.date_range(w_start, w_end - pd.Timedelta(minutes=1), freq='T')
    minutes = pd.date_range(w_start, w_end - pd.Timedelta(minutes=1), freq='min')
    # Exclude if any minute is anomalous or overlaps peak window
    if any(m in anom_minutes for m in minutes):
        cursor += pd.Timedelta(minutes=1)
        continue
    if not (w_end <= peak_window['start'] or w_start >= peak_window['end']):
        cursor += pd.Timedelta(minutes=1)
        continue
    block = agg.loc[w_start:w_end - pd.Timedelta(seconds=1)]
    if len(block) < max(5, dur*0.8):  # require sufficient coverage
        cursor += pd.Timedelta(minutes=1)
        continue
    avg_p95 = float(block['p95_exec'].mean())
    candidate = {
        'start': w_start,
        'end': w_end,
        'duration_min': len(block),
        'avg_p95': avg_p95,
        'max_p95': float(block['p95_exec'].max())
    }
    if (best_baseline is None) or (avg_p95 < best_baseline['avg_p95']):
        best_baseline = candidate
    cursor += pd.Timedelta(minutes=1)

print('Selected BASELINE window:', best_baseline)

# Materialize slices from df
baseline_slice = df[(df['datetime']>=best_baseline['start']) & (df['datetime']<best_baseline['end'])]
peak_slice     = df[(df['datetime']>=peak_window['start']) & (df['datetime']<peak_window['end'])]

print('Baseline count:', len(baseline_slice), 'Peak count:', len(peak_slice))


Selected PEAK cluster: {'start': Timestamp('2025-11-07 01:26:00'), 'end': Timestamp('2025-11-07 01:27:00'), 'duration_min': 1, 'avg_p95': 50.16209999999997, 'max_p95': 50.16209999999997}
Selected BASELINE window: None


TypeError: 'NoneType' object is not subscriptable

In [36]:

# 3) Discover peak cluster (incident window) and baseline window automatically
# Strategy:
# - Group contiguous anomaly minutes into clusters; pick the cluster with highest avg p95_exec.
# - Peak window = cluster start -> cluster end.
# - Baseline window = same duration window with lowest avg p95_exec that does not overlap anomalies.

# Helper to form contiguous clusters
clusters = []
current = []
prev_ts = None
for ts in anom.index:
    if prev_ts is None or (ts - prev_ts == pd.Timedelta(minutes=1)):
        current.append(ts)
    else:
        clusters.append(current)
        current = [ts]
    prev_ts = ts
if current:
    clusters.append(current)

cluster_stats = []
for cl in clusters:
    start = cl[0]; end = cl[-1] + pd.Timedelta(minutes=1)  # exclusive end
    block = agg.loc[start:end - pd.Timedelta(seconds=1)]
    cluster_stats.append({
        'start': start,
        'end': end,
        'duration_min': len(cl),
        'avg_p95': float(block['p95_exec'].mean()),
        'max_p95': float(block['p95_exec'].max())
    })

# Choose peak: cluster with highest avg_p95 (or max_p95 as tiebreaker)
peak_window = None
if cluster_stats:
    cluster_stats.sort(key=lambda d: (d['avg_p95'], d['max_p95']), reverse=True)
    peak_window = cluster_stats[0]
    print('Selected PEAK cluster:', peak_window)
else:
    # Fallback: take top-N minute window by p95_exec average (e.g., 50 minutes)
    dur = 50
    roll = agg['p95_exec'].rolling(f'{dur}T').mean()
    idx = roll.idxmax()
    if pd.isna(idx):
        raise RuntimeError('No data to form a peak window.')
    start = idx - pd.Timedelta(minutes=dur-1)
    end = idx + pd.Timedelta(minutes=1)
    block = agg.loc[start:end - pd.Timedelta(seconds=1)]
    peak_window = {
        'start': start,
        'end': end,
        'duration_min': len(block),
        'avg_p95': float(block['p95_exec'].mean()),
        'max_p95': float(block['p95_exec'].max())
    }
    print('Fallback PEAK window:', peak_window)

# Build candidate baseline windows of same duration
dur = peak_window['duration_min']
start_all = agg.index.min(); end_all = agg.index.max() + pd.Timedelta(minutes=1)

# Slide a window of length 'dur' across the timeline to find minimal avg p95, excluding anomaly minutes
anom_minutes = set(anom.index)


# If no baseline found, fallback to earliest window of same duration
if best_baseline is None:
    print("No clean baseline found. Using fallback: earliest window of same duration.")
    w_start = agg.index.min()
    w_end = w_start + pd.Timedelta(minutes=dur)
    block = agg.loc[w_start:w_end - pd.Timedelta(seconds=1)]
    best_baseline = {
        'start': w_start,
        'end': w_end,
        'duration_min': len(block),
        'avg_p95': float(block['p95_exec'].mean()),
        'max_p95': float(block['p95_exec'].max())
    }

print('Selected BASELINE window:', best_baseline)

# Materialize slices
baseline_slice = df[(df['datetime'] >= best_baseline['start']) & (df['datetime'] < best_baseline['end'])]
peak_slice     = df[(df['datetime'] >= peak_window['start']) & (df['datetime'] < peak_window['end'])]

print('Baseline count:', len(baseline_slice), 'Peak count:', len(peak_slice))

Selected PEAK cluster: {'start': Timestamp('2025-11-07 01:26:00'), 'end': Timestamp('2025-11-07 01:27:00'), 'duration_min': 1, 'avg_p95': 50.16209999999997, 'max_p95': 50.16209999999997}
No clean baseline found. Using fallback: earliest window of same duration.
Selected BASELINE window: {'start': Timestamp('2025-11-06 00:00:00'), 'end': Timestamp('2025-11-06 00:01:00'), 'duration_min': 1, 'avg_p95': 8.488249999999999, 'max_p95': 8.488249999999999}
Baseline count: 66 Peak count: 78


In [37]:

# 3) Discover peak cluster (incident window) and baseline window automatically (robust version)

MIN_CLUSTER_MINUTES = 5        # ignore clusters shorter than this
ROLLING_FALLBACK_MIN = 50      # minutes for rolling fallback window
MIN_COUNT_PER_MIN = 30         # throughput guard in candidate windows

# Helper: form contiguous anomaly clusters (by minute)
clusters, current, prev_ts = [], [], None
for ts in anom.index:
    if prev_ts is None or (ts - prev_ts == pd.Timedelta(minutes=1)):
        current.append(ts)
    else:
        clusters.append(current)
        current = [ts]
    prev_ts = ts
if current:
    clusters.append(current)

# Filter clusters by minimum duration
clusters = [cl for cl in clusters if len(cl) >= MIN_CLUSTER_MINUTES]

cluster_stats = []
for cl in clusters:
    start = cl[0]; end = cl[-1] + pd.Timedelta(minutes=1)  # exclusive end
    block = agg.loc[start:end - pd.Timedelta(seconds=1)]
    # Throughput guard: average count/min must be sufficient
    if block['n'].mean() < MIN_COUNT_PER_MIN:
        continue
    cluster_stats.append({
        'start': start,
        'end': end,
        'duration_min': len(cl),
        'avg_p95': float(block['p95_exec'].mean()),
        'max_p95': float(block['p95_exec'].max())
    })

# Choose peak: best cluster by avg_p95 (tiebreaker: max_p95)
peak_window = None
if cluster_stats:
    cluster_stats.sort(key=lambda d: (d['avg_p95'], d['max_p95']), reverse=True)
    peak_window = cluster_stats[0]
    print('Selected PEAK cluster:', peak_window)
else:
    # Fallback: use rolling window (e.g., 50 minutes) to find highest-average p95 period
    dur = ROLLING_FALLBACK_MIN
    roll = agg['p95_exec'].rolling(f'{dur}min').mean()
    idx = roll.idxmax()
    if pd.isna(idx):
        raise RuntimeError('No data to form a peak window.')
    start = idx - pd.Timedelta(minutes=dur-1)
    end = idx + pd.Timedelta(minutes=1)
    block = agg.loc[start:end - pd.Timedelta(seconds=1)]
    # Throughput guard
    if block['n'].mean() < MIN_COUNT_PER_MIN:
        # If throughput is low, relax to MIN_COUNT_PER_MIN/2 or choose next best
        print('Throughput low in fallback peak; relaxing guard.')
    peak_window = {
        'start': start,
        'end': end,
        'duration_min': len(block),
        'avg_p95': float(block['p95_exec'].mean()),
        'max_p95': float(block['p95_exec'].max())
    }
    print('Fallback PEAK window:', peak_window)

# Build candidate baseline windows of same duration (no anomalies, sufficient throughput)
dur = peak_window['duration_min']
start_all = agg.index.min(); end_all = agg.index.max() + pd.Timedelta(minutes=1)
anom_minutes = set(anom.index)

best_baseline = None
cursor = start_all
while cursor + pd.Timedelta(minutes=dur) <= end_all:
    w_start = cursor
    w_end = cursor + pd.Timedelta(minutes=dur)
    minutes = pd.date_range(w_start, w_end - pd.Timedelta(minutes=1), freq='min')  # fix deprecation

    # Exclude if any minute is anomalous or overlaps the peak window
    if any(m in anom_minutes for m in minutes):
        cursor += pd.Timedelta(minutes=1); continue
    if not (w_end <= peak_window['start'] or w_start >= peak_window['end']):
        cursor += pd.Timedelta(minutes=1); continue

    block = agg.loc[w_start:w_end - pd.Timedelta(seconds=1)]
    # Coverage & throughput guards
    if len(block) < max(5, dur * 0.8):
        cursor += pd.Timedelta(minutes=1); continue
    if block['n'].mean() < MIN_COUNT_PER_MIN:
        cursor += pd.Timedelta(minutes=1); continue

    avg_p95 = float(block['p95_exec'].mean())
    candidate = {
        'start': w_start, 'end': w_end, 'duration_min': len(block),
        'avg_p95': avg_p95, 'max_p95': float(block['p95_exec'].max())
    }
    if (best_baseline is None) or (avg_p95 < best_baseline['avg_p95']):
        best_baseline = candidate
    cursor += pd.Timedelta(minutes=1)

# Final fallback if baseline not found: earliest window of same duration (non-strict)
if best_baseline is None:
    print("No clean baseline found. Using fallback: earliest window of same duration.")
    w_start = agg.index.min()
    w_end = w_start + pd.Timedelta(minutes=dur)
    block = agg.loc[w_start:w_end - pd.Timedelta(seconds=1)]
    best_baseline = {
        'start': w_start, 'end': w_end, 'duration_min': len(block),
        'avg_p95': float(block['p95_exec'].mean()), 'max_p95': float(block['p95_exec'].max())
    }

print('Selected BASELINE window:', best_baseline)

# Materialize slices
baseline_slice = df[(df['datetime'] >= best_baseline['start']) & (df['datetime'] < best_baseline['end'])]
peak_slice     = df[(df['datetime'] >= peak_window['start'])   & (df['datetime'] < peak_window['end'])]

print('Baseline count:', len(baseline_slice), 'Peak count:', len(peak_slice))


Selected PEAK cluster: {'start': Timestamp('2025-11-06 22:53:00'), 'end': Timestamp('2025-11-06 22:58:00'), 'duration_min': 5, 'avg_p95': 43.47478999999998, 'max_p95': 74.429}
Selected BASELINE window: {'start': Timestamp('2025-11-06 03:23:00'), 'end': Timestamp('2025-11-06 03:28:00'), 'duration_min': 5, 'avg_p95': 1.9064699999999994, 'max_p95': 2.3118999999999987}
Baseline count: 272 Peak count: 493


In [38]:

# 4) Metrics & verification on discovered windows

def stats(s: pd.DataFrame):
    return {
        'count': int(len(s)),
        'time_min': str(s['datetime'].min()),
        'time_max': str(s['datetime'].max()),
        'mean': float(s['execution_time'].mean()),
        'median': float(s['execution_time'].median()),
        'p95': float(s['execution_time'].quantile(0.95)),
        'p99': float(s['execution_time'].quantile(0.99)),
        'std': float(s['execution_time'].std(ddof=0)),
        'min': float(s['execution_time'].min()),
        'max': float(s['execution_time'].max()),
        'slow_pct_gt20': float((s['execution_time']>20).mean()*100.0),
        'critical_pct_gt60': float((s['execution_time']>60).mean()*100.0),
    }

b = stats(baseline_slice)
p = stats(peak_slice)
slowdown = p['mean']/b['mean'] if b['mean'] else np.nan
increase = (p['mean']-b['mean'])/b['mean']*100 if b['mean'] else np.nan

print('DISCOVERED Baseline metrics:')
print(json.dumps(b, indent=2))
print('DISCOVERED Peak metrics:')
print(json.dumps(p, indent=2))
print(f"Slowdown factor (mean): {slowdown:.2f}x | Increase: {increase:.2f}%")

# Save discovery artifact
discovery = {
    'dataset_sha256': sha,
    'discovered_baseline_window': {'start': str(best_baseline['start']), 'end': str(best_baseline['end']), 'duration_min': int(best_baseline['duration_min'])},
    'discovered_peak_window': {'start': str(peak_window['start']), 'end': str(peak_window['end']), 'duration_min': int(peak_window['duration_min'])},
    'baseline_metrics': b,
    'peak_metrics': p,
    'slowdown_factor_mean': round(float(slowdown), 3),
    'increase_percent_mean': round(float(increase), 2)
}
open('artifact_discovery_verification.json','w').write(json.dumps(discovery, indent=2))
print('Saved -> artifact_discovery_verification.json')


DISCOVERED Baseline metrics:
{
  "count": 272,
  "time_min": "2025-11-06 03:23:00.444000",
  "time_max": "2025-11-06 03:27:58.362000",
  "mean": 0.8830661764705883,
  "median": 0.672,
  "p95": 1.9514499999999992,
  "p99": 3.672470000000001,
  "std": 0.6348215439629458,
  "min": 0.022,
  "max": 3.885,
  "slow_pct_gt20": 0.0,
  "critical_pct_gt60": 0.0
}
DISCOVERED Peak metrics:
{
  "count": 493,
  "time_min": "2025-11-06 22:53:00.592000",
  "time_max": "2025-11-06 22:57:59.922000",
  "mean": 15.159847870182555,
  "median": 12.937,
  "p95": 50.2886,
  "p99": 77.91739999999994,
  "std": 15.241515095849847,
  "min": 0.029,
  "max": 89.837,
  "slow_pct_gt20": 18.864097363083165,
  "critical_pct_gt60": 3.2454361054766734
}
Slowdown factor (mean): 17.17x | Increase: 1616.73%
Saved -> artifact_discovery_verification.json


In [39]:

# 5) Optional comparison to manual windows (if artifact_verification_case.json exists)
manual_path = 'artifact_verification_case.json'
if os.path.exists(manual_path):
    manual = json.load(open(manual_path))
    print('Found manual verification artifact; comparing...')
    print(json.dumps({
        'manual_baseline_window': manual.get('baseline_window'),
        'manual_peak_window': manual.get('peak_window'),
        'manual_slowdown_factor_mean': manual.get('slowdown_factor_mean'),
        'manual_increase_percent_mean': manual.get('increase_percent_mean')
    }, indent=2))
else:
    print('No manual verification artifact found; skipping comparison.')


No manual verification artifact found; skipping comparison.


In [56]:

# 6) Plots: per-minute p95 with anomalies, and ECDF comparison (discovered baseline vs peak)

# Per-minute p95 (global) with anomalies
fig, ax = plt.subplots(figsize=(11,4))
ax.plot(agg.index, agg['p95_exec'], color='#2b8cbe', lw=1.5, label='per-minute p95')
anom_plot = agg[agg['robust_z']>3]
if not anom_plot.empty:
    ax.scatter(anom_plot.index, anom_plot['p95_exec'], s=12, color='red', label='anomaly (z>3)')
# highlight discovered windows
ax.axvspan(pd.to_datetime(best_baseline['start']), pd.to_datetime(best_baseline['end']), color='#1b7837', alpha=0.15, label='Baseline window')
ax.axvspan(pd.to_datetime(peak_window['start']), pd.to_datetime(peak_window['end']), color='#fb6a4a', alpha=0.15, label='Peak window')
ax.set_title('Per-minute p95 (global) with discovered windows')
ax.set_ylabel('p95 exec (s)')
ax.xaxis.set_major_formatter(DateFormatter('%m-%d %H:%M'))
ax.legend(loc='upper left', frameon=False)
ax.grid(True, alpha=0.3)
plt.tight_layout(); plt.show()
fig.savefig('plot_discovery_p95_windows.png', dpi=160)
print('=================')
print('Baseline{best_baseline}['start']')

# ECDF comparison (discovered baseline vs peak)
import numpy as np

def ecdf(values):
    if len(values)==0:
        return np.array([]), np.array([])
    x = np.sort(values)
    y = np.arange(1, len(x)+1)/len(x)
    return x, y

x_b, y_b = ecdf(baseline_slice['execution_time'].values)
x_p, y_p = ecdf(peak_slice['execution_time'].values)

THRESH_SLOW=20.0; THRESH_CRIT=60.0

fig, ax = plt.subplots(figsize=(10,4))
if len(x_b): ax.plot(x_b, y_b, color='#1b7837', lw=1.8, label='Baseline ECDF')
if len(x_p): ax.plot(x_p, y_p, color='#2b8cbe', lw=1.8, label='Peak ECDF')
ax.axvline(THRESH_SLOW, color='#cc7a00', lw=1.2, ls='--', label='20s')
ax.axvline(THRESH_CRIT, color='#b30000', lw=1.2, ls='--', label='60s')
ax.set_title('ECDF comparison — Discovered Baseline vs Peak')
ax.set_xlabel('Execution time (s)'); ax.set_ylabel('Cumulative fraction (0–1)')
ax.set_xlim(left=0); ax.set_ylim(0,1)
ax.grid(True, alpha=0.3); ax.legend(loc='lower right', frameon=False)
plt.tight_layout(); plt.show()
fig.savefig('plot_discovery_ecdf_baseline_vs_peak.png', dpi=160)

print('Saved -> plot_discovery_p95_windows.png, plot_discovery_ecdf_baseline_vs_peak.png')


SyntaxError: invalid syntax. Perhaps you forgot a comma? (2060091012.py, line 20)

In [49]:

# 7) Artifact index (discovery)
index_payload = {
    'dataset': CSV_PATH,
    'sha256': sha,
    'generated_at': str(datetime.now()),
    'artifacts': [
        'artifact_discovery_verification.json',
        'plot_discovery_p95_windows.png',
        'plot_discovery_ecdf_baseline_vs_peak.png'
    ]
}
open('artifact_index_discovery.json','w').write(json.dumps(index_payload, indent=2))
print('Saved -> artifact_index_discovery.json')


Saved -> artifact_index_discovery.json
