
# Statistical Analysis with Visualization

This notebook adds visualizations to the statistical analysis of ETA logs:
- Histogram of execution times with p95 and p99 thresholds
- Boxplot to show outliers
- Line chart of execution times over time


In [None]:

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go


In [None]:

# Load ETA logs
eta_logs = pd.read_csv('cleaned_eta_logs.csv')
eta_logs['datetime'] = pd.to_datetime(eta_logs['datetime'])
execution_times = eta_logs['execution_time']


In [None]:

# Calculate statistics
mean_val = execution_times.mean()
std_val = execution_times.std()
p95 = np.percentile(execution_times, 95)
p99 = np.percentile(execution_times, 99)
eta_logs['z_score'] = (execution_times - mean_val) / std_val
{
    'mean': mean_val,
    'std_dev': std_val,
    'p95': p95,
    'p99': p99
}


In [None]:

# Histogram with p95 and p99 lines
fig = go.Figure()
fig.add_trace(go.Histogram(x=execution_times, nbinsx=100, name='Execution Times'))
fig.add_vline(x=p95, line_color='orange', line_dash='dash', annotation_text='p95')
fig.add_vline(x=p99, line_color='red', line_dash='dash', annotation_text='p99')
fig.update_layout(title='Histogram of Execution Times with p95 and p99', xaxis_title='Execution Time', yaxis_title='Count')
fig.show()


In [None]:

# Boxplot for execution times
fig = px.box(eta_logs, y='execution_time', title='Boxplot of Execution Times')
fig.show()


In [None]:

# Line chart of execution times over time
fig = px.line(eta_logs.sample(5000), x='datetime', y='execution_time', title='Execution Times Over Time (Sampled)')
fig.show()
