
# Statistical Analysis of ETA Logs

This notebook extends the normalized correlation analysis by adding descriptive statistics:
- Mean execution time
- Standard deviation
- Z-scores
- 95th percentile (p95)
- 99th percentile (p99)

These metrics help identify anomalies and performance outliers.


In [None]:

import pandas as pd
import numpy as np


In [None]:

# Load ETA logs
eta_logs = pd.read_csv('cleaned_eta_logs.csv')
eta_logs['datetime'] = pd.to_datetime(eta_logs['datetime'])
eta_logs.head()


In [None]:

# Calculate descriptive statistics for execution_time
execution_times = eta_logs['execution_time']
mean_val = execution_times.mean()
std_val = execution_times.std()
p95 = np.percentile(execution_times, 95)
p99 = np.percentile(execution_times, 99)

# Compute z-scores
eta_logs['z_score'] = (execution_times - mean_val) / std_val

{
    'mean_execution_time': mean_val,
    'std_dev': std_val,
    'p95': p95,
    'p99': p99
}


In [None]:

# Identify outliers based on z-score > 3 or execution_time > p99
outliers = eta_logs[(eta_logs['z_score'] > 3) | (eta_logs['execution_time'] > p99)]
outliers.head()
