# Residual Analysis Notebook

Use this notebook to explore the latest residual compare JSON outputs.

Environment prep:
1. Activate your Python environment for this repo.
2. Run `pip install -r requirements.txt` inside the same interpreter (adds Jupyter + streaming deps).
3. Launch Jupyter from the project root so all relative paths resolve.

All file IO shown below assumes `encoding='utf-8'` for Windows compatibility.


In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline


In [None]:
from __future__ import annotations

import functools
import itertools
import json
import os
import sys
from datetime import datetime
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from src.analysis.residual_results import (
    all_latest_jsons,
    chunked_metric_frames,
    iter_metric_rows,
    latest_json_for,
    list_models,
    plot_correlation_heatmap,
    plot_metric_distribution,
    plot_metric_scatter,
    plot_metric_trend,
)

PROJECT_ROOT = Path.cwd().resolve()
SRC_DIR = PROJECT_ROOT / "src"
if SRC_DIR.exists() and str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

sns.set_theme(style="whitegrid", context="notebook")

PROJECT_ROOT


## 1. Discover Latest Residual JSON per Model

Use the helper utilities to list every model with available outputs and grab the newest JSON artifact for each one.


In [None]:
latest_map = all_latest_jsons()

latest_df = (
    pd.DataFrame(
        [
            {
                "model": model,
                "path": str(path),
                "updated_at": datetime.fromtimestamp(path.stat().st_mtime),
                "size_mb": round(path.stat().st_size / (1024 ** 2), 2),
            }
            for model, path in latest_map.items()
        ]
    )
    .sort_values("model")
    .reset_index(drop=True)
)

print(f"Discovered {len(latest_map)} models")
latest_df


In [None]:
models = list_models()
if not models:
    print("No residual outputs found yet.")
else:
    example_model = models[0]
    latest_path = latest_json_for(example_model)
    print(f"Latest JSON for {example_model} -> {latest_path}")


## 2. Stream Records Lazily

Operate on one record at a time with the streaming loader utilities. The snippets below preview a few rows without loading an entire file into memory.


In [None]:
if not latest_map:
    print("No residual JSON files detected. Populate h200_outputs first.")
else:
    sample_model, sample_path = next(iter(latest_map.items()))
    print(f"Previewing rows from {sample_model}: {sample_path}")
    row_iter = iter_metric_rows(
        sample_path,
        metadata_fields=("model", "task", "dataset"),
        aggregations_to_run=("residual_strength",),
    )
    sample_rows = list(itertools.islice(row_iter, 3))
    pd.DataFrame(sample_rows)


### Chunked aggregation across models

Use `chunked_metric_frames` to build manageable pandas DataFrames (e.g., 256 rows at a time) spanning all latest outputs.


In [None]:
latest_paths = list(latest_map.values())
metric_chunk = None

if not latest_paths:
    print("No JSON files to stream.")
else:
    metric_chunk = next(
        chunked_metric_frames(
            *latest_paths,
            chunk_size=256,
            metadata_fields=("model", "task", "dataset"),
            aggregations_to_run=("residual_strength",),
        ),
        None,
    )
    if metric_chunk is None or metric_chunk.empty:
        print("Chunk generator produced no rows.")
    else:
        metric_chunk.head()


## 3. Organize, Reduce, and Correlate Metrics

Once you have a DataFrame chunk, standard pandas tooling (groupby, describe, corr) is available. The helpers below run a few common operations.


In [None]:
if metric_chunk is None or metric_chunk.empty:
    print("Run the chunked loader cell above first.")
else:
    delta_col = "agg.residual_strength.mean_norm_delta"
    grouped = (
        metric_chunk.groupby("meta.model", dropna=False)[delta_col]
        .describe()
        .rename_axis("meta.model")
    )
    grouped


In [None]:
if metric_chunk is None or metric_chunk.empty:
    print("Run the chunked loader cell above first.")
else:
    corr_cols = [
        "agg.residual_strength.mean_norm_base",
        "agg.residual_strength.mean_norm_sft",
        "agg.residual_strength.mean_norm_delta",
    ]
    correlation = correlate_metric_columns(metric_chunk, columns=corr_cols)
    correlation


## 4. Visualize Metrics

The plotting helpers wrap matplotlib/seaborn primitives, so they work in any vanilla Jupyter kernel. Each function accepts either a DataFrame or an iterable of rows.


In [None]:
if metric_chunk is None or metric_chunk.empty:
    print("Run the chunked loader cell to generate metric_chunk first.")
else:
    ax = plot_metric_distribution(
        metric_chunk,
        column="agg.residual_strength.mean_norm_delta",
        bins=40,
        kde=True,
    )
    ax.figure.tight_layout()


In [None]:
if metric_chunk is None or metric_chunk.empty:
    print("Run the chunked loader cell to generate metric_chunk first.")
else:
    ax = plot_metric_scatter(
        metric_chunk,
        x="agg.residual_strength.mean_norm_base",
        y="agg.residual_strength.mean_norm_sft",
        hue="meta.model",
        style="sft_embedding",
    )
    ax.figure.tight_layout()


In [None]:
if metric_chunk is None or metric_chunk.empty:
    print("Run the chunked loader cell to generate metric_chunk first.")
else:
    trend_chunk = metric_chunk.reset_index(drop=True).assign(row_id=lambda df: df.index)
    ax = plot_metric_trend(
        trend_chunk,
        x="row_id",
        y="agg.residual_strength.mean_norm_delta",
        hue="meta.model",
        estimator=None,
    )
    ax.set_xlabel("Row index (proxy for prompt order)")
    ax.figure.tight_layout()


In [None]:
if metric_chunk is None or metric_chunk.empty:
    print("Run the chunked loader cell to generate metric_chunk first.")
else:
    corr_cols = [
        "agg.residual_strength.mean_norm_base",
        "agg.residual_strength.mean_norm_sft",
        "agg.residual_strength.mean_norm_delta",
    ]
    ax = plot_correlation_heatmap(metric_chunk, columns=corr_cols)
    ax.figure.tight_layout()
