# 06 | Observability

This notebook summarizes runtime and quality metrics from the generated pipeline artifacts.

In [None]:
# Parameters
source = "fivethirtyeight"
dataset = "recent_grads,bechdel_movies"
run_date = "2026-02-22"
force_refresh = False

In [None]:
import sys
from pathlib import Path

ROOT_DIR = Path.cwd()
if str(ROOT_DIR) not in sys.path:
    sys.path.insert(0, str(ROOT_DIR))

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

from src.common.io import load_pipeline_metrics, update_stage_metrics
from src.common.paths import REPORTS_DIR

metrics = load_pipeline_metrics()

stage_rows = []
for stage_name, stage_payload in metrics.get('stages', {}).items():
    stage_rows.append(
        {
            'stage': stage_name,
            'runtime_seconds': float(stage_payload.get('runtime_seconds', 0) or 0),
            'status': stage_payload.get('status', 'n/a'),
        }
    )

stage_df = (
    pd.DataFrame(stage_rows).sort_values('stage')
    if stage_rows
    else pd.DataFrame(columns=['stage', 'runtime_seconds', 'status'])
)

chart_path = REPORTS_DIR / 'observability_runtime_seconds.png'
if not stage_df.empty:
    fig, ax = plt.subplots(figsize=(12, 4))
    stage_df.plot(kind='bar', x='stage', y='runtime_seconds', ax=ax, legend=False, color='#2a9d8f')
    ax.set_title('Pipeline Runtime by Stage (seconds)')
    ax.set_xlabel('Stage')
    ax.set_ylabel('Runtime (s)')
    fig.tight_layout()
    fig.savefig(chart_path, dpi=120)
    plt.close(fig)

# Failure rate is expectation-level failures / evaluated expectations.
dq_stats = metrics.get('stages', {}).get('dq', {})
evaluated = int(dq_stats.get('evaluated_expectations', 0) or 0)
unsuccessful = int(dq_stats.get('unsuccessful_expectations', 0) or 0)
failure_rate = round(unsuccessful / evaluated, 4) if evaluated else 0.0

summary = {
    'rows_ingested': metrics.get('rows_ingested', 0),
    'rows_bronze': metrics.get('rows_bronze', 0),
    'rows_silver': metrics.get('rows_silver', 0),
    'rows_gold': metrics.get('rows_gold', 0),
    'data_freshness': metrics.get('data_freshness'),
    'dq_pass': metrics.get('dq_pass'),
    'dq_failure_rate': failure_rate,
    'runtime_chart': str(chart_path),
}

update_stage_metrics('observability', summary)

stage_df

In [None]:
pd.DataFrame([summary])