# Imports

In [None]:
import sqlite3
import pandas as pd
from pathlib import Path
import json
import nbformat
from nbformat.v4 import new_notebook, new_code_cell, new_markdown_cell
from nbclient import NotebookClient
from nbclient.exceptions import CellExecutionError, CellTimeoutError
import hashlib
from pprint import pprint
import sys

In [330]:
# Load and merge the signal table from all .db files
base_dir = Path("rsds-20241113")
parts = []

# Loop over all files and process them in order
for db_path in sorted(base_dir.glob("*.db")):
    con = sqlite3.connect(db_path)
    tables = pd.read_sql("SELECT name FROM sqlite_master WHERE type='table';", con)["name"].tolist()

    # Only load data if the 'signal' table exists
    if "signal" in tables:
        # Read the columns we care about from the signal table into a df
        df = pd.read_sql("SELECT time, panel, kind, args FROM signal;", con)
        # add new column that records which db file this data came from
        df["source_db"] = db_path.name
        parts.append(df)

    con.close()

# Combine separate dataframes into one dataframe and sort by time and source data base
signal_all = pd.concat(parts, ignore_index=True)
signal_all = signal_all.sort_values(["source_db", "time"]).reset_index(drop=True)
signal_all.head()

Unnamed: 0,time,panel,kind,args,source_db
0,1731471896989,id-cc25aca1-c264-46cb-a3f1-ba0e6d1ac4d8,INotebookModel.changed:cellsChange,"{""delta"":[{""op"":""delete"",""arg"":1},{""op"":""inser...",001.db
1,1731471896990,id-cc25aca1-c264-46cb-a3f1-ba0e6d1ac4d8,INotebookModel.changed:metadataChange,"{""delta"":[{""key"":""kernelspec"",""act"":""update"",""...",001.db
2,1731471896990,id-cc25aca1-c264-46cb-a3f1-ba0e6d1ac4d8,INotebookModel.changed:nbformatChanged,"{""key"":""nbformat"",""val"":4}",001.db
3,1731471896990,id-cc25aca1-c264-46cb-a3f1-ba0e6d1ac4d8,INotebookModel.changed:nbformatChanged,"{""key"":""nbformat_minor"",""val"":5}",001.db
4,1731471897275,id-cc25aca1-c264-46cb-a3f1-ba0e6d1ac4d8,ISessionContext.sessionChanged,"{""val"":""5d642d19-ab4c-417a-ae37-cf7900863696""}",001.db


# Analysis

The research question being analyzed in this notebook is as follows: What types of actions most frequently lead to non-reproducible notebook states? 

We attempt to answer this question using two methods: proxy-based detection and replay-based reproducibility validation

### Proxy-based detection
A notebook enters a potentially non-reproducible state when:
- The kernel/session changes
- A cell is executed without a preceding code edit (i.e., execution happens while code hasn’t changed since the prior event)
- The execution count of the previous cell is higher than the execution count of the current cell 

These actions can create hidden states.

After flagging the proxies, we will analyze what action occurred right before it. Then we will compare how frequently such an action happened right before a triggering event to how often it happened in general using lift.
Lift is defined as the ratio between an event’s conditional probability in a target context and its overall probability.

In [331]:
# Transform the 'args' column from JSON strings to dictionaries
def parse_args(x):
    try:
        return json.loads(x) if x is not None else None
    except Exception:
        return None

signal_all["args_parsed"] = signal_all["args"].apply(parse_args)


In [332]:
# Implement proxies as boolean flags
signal_all["prev_kind"] = signal_all.groupby("source_db")["kind"].shift(1)
signal_all["kernel_change"] = (signal_all["kind"] == "ISessionContext.sessionChanged")
signal_all["exec_event"] = (signal_all["kind"] == "ISharedCell.changed:executionCountChange")
signal_all["exec_without_edit"] = (signal_all["exec_event"] & (signal_all["prev_kind"] != "ISharedCell.changed:sourceChange"))

signal_all["exec_count_val"] = signal_all["args_parsed"].apply(lambda x: x.get("val") if isinstance(x, dict) else None)
signal_all["prev_exec_count_val"] = (signal_all.loc[signal_all["exec_event"]].groupby(["source_db", "panel"])["exec_count_val"].shift(1))
signal_all["out_of_order_exec"] = (signal_all["exec_event"] & signal_all["prev_exec_count_val"].notna() & (signal_all["exec_count_val"] < signal_all["prev_exec_count_val"]))

# Define flag for non-reproducible state using proxies
# signal_all["non_repro_state"] = signal_all["kernel_change"] | signal_all["exec_without_edit"]
signal_all["non_repro_state"] = (signal_all["kernel_change"] | signal_all["exec_without_edit"] | signal_all["out_of_order_exec"])

In [333]:
# Filter events that indicate a non-reproducible state
non_repro_events = signal_all[signal_all["non_repro_state"]].copy()
non_repro_events.shape[0]

2900

In [334]:
# Identify which actions lead to it, by analyzing the immediately preceding action
non_repro_events["pos_cause_kind"] = non_repro_events["prev_kind"]
pos_cause_counts = non_repro_events["pos_cause_kind"].value_counts()
pos_cause_counts

pos_cause_kind
ISharedCell.changed:outputsChange           1571
ISharedCell.changed:executionCountChange    1219
INotebookModel.changed:cellsChange            82
INotebookModel.changed:nbformatChanged        22
ISharedCell.changed:sourceChange               2
ISharedCell.changed:metadataChange             2
INotebookModel.changed:metadataChange          2
Name: count, dtype: int64

In [335]:
# Compare frequencies to baseline: how much more often does an action appear right before non-reproducable states than in general?
baseline = signal_all["kind"].value_counts(normalize=True)
before_nonrepro = non_repro_events["pos_cause_kind"].value_counts(normalize=True)
comparison = (pd.concat([baseline.rename("overall_share"),  before_nonrepro.rename("share_before_nonrepro")], axis=1).fillna(0))
comparison["lift"] = comparison["share_before_nonrepro"] / comparison["overall_share"].replace(0, pd.NA)
comparison.sort_values("lift", ascending=False).head(15)

Unnamed: 0,overall_share,share_before_nonrepro,lift
INotebookModel.changed:nbformatChanged,0.001121,0.007586,6.769931
ISharedCell.changed:executionCountChange,0.081331,0.420345,5.168307
ISharedCell.changed:outputsChange,0.264635,0.541724,2.047064
INotebookModel.changed:cellsChange,0.014276,0.028276,1.980642
INotebookModel.changed:metadataChange,0.001031,0.00069,0.668966
ISharedCell.changed:metadataChange,0.001412,0.00069,0.488451
ISharedCell.changed:sourceChange,0.635612,0.00069,0.001085
ISessionContext.sessionChanged,0.000583,0.0,0.0


In [336]:
# Summarize total counts of each proxy
print("kernel_change:", signal_all["kernel_change"].sum())
print("exec_without_edit:", signal_all["exec_without_edit"].sum())
print("out_of_order_exec:", signal_all["out_of_order_exec"].sum())
print("non_repro_state:", signal_all["non_repro_state"].sum())

kernel_change: 26
exec_without_edit: 2873
out_of_order_exec: 2
non_repro_state: 2900


In [337]:
# Overview of which databases are flagged for which proxy
db_proxy_counts = (
    signal_all
    .groupby("source_db")[["kernel_change", "exec_without_edit", "out_of_order_exec"]]
    .sum()
)

db_proxy_counts["total_flags"] = db_proxy_counts.sum(axis=1)

db_proxy_counts = db_proxy_counts.sort_index()
db_proxy_counts

Unnamed: 0_level_0,kernel_change,exec_without_edit,out_of_order_exec,total_flags
source_db,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
001.db,1,135,0,136
002.db,1,168,2,171
003.db,4,101,0,105
004.db,1,113,0,114
005.db,1,86,0,87
006.db,1,73,0,74
007.db,1,841,0,842
008.db,1,183,0,184
009.db,1,46,0,47
010.db,1,57,0,58


###  Replay-based reproducibility validation
For each participant:
- Reconstruct the observed final notebook (from signals/snapshots)
- Execute it top-to-bottom in a fresh kernel
- Compare outputs and label reproducible vs non-reproducible
- Analyze which actions predict that label

In [None]:
# Load data of single participant, change file name to look at results from other participants
db_path = Path("rsds-20241113/011.db")
con = sqlite3.connect(db_path)
signal = pd.read_sql("SELECT time, panel, kind, args FROM signal ORDER BY time", con)
con.close()

In [339]:
# Transform the 'args' column from JSON strings to dictionaries
signal["args_parsed"] = signal["args"].apply(parse_args)
signal[["kind", "args_parsed"]].head(5)

Unnamed: 0,kind,args_parsed
0,INotebookModel.changed:cellsChange,"{'delta': [{'op': 'delete', 'arg': 1}, {'op': ..."
1,INotebookModel.changed:metadataChange,"{'delta': [{'key': 'kernelspec', 'act': 'updat..."
2,INotebookModel.changed:nbformatChanged,"{'key': 'nbformat', 'val': 4}"
3,INotebookModel.changed:nbformatChanged,"{'key': 'nbformat_minor', 'val': 5}"
4,ISessionContext.sessionChanged,{'val': 'c7a4243e-39fa-43b5-8a23-2c06f866ad15'}


In [340]:
# Reconstruct full text from incremental edit operations
def apply_text_delta(old: str, delta_ops: list) -> str:
    """
    Apply Redspot text delta operations to a string.
    Each op(eration) is one of:
      {"op":"retain","arg":N}
      {"op":"delete","arg":N}
      {"op":"insert","arg":"..."}
    """
    if old is None:
        old = ""
    i = 0  
    out = []

    for op in delta_ops:
        t = op.get("op")
        a = op.get("arg")

        if t == "retain":
            out.append(old[i:i+a])
            i += a
        elif t == "delete":
            i += a
        elif t == "insert":
            out.append(a)
        else:
            # unknown op, so ignore
            pass

    # append any remaining old text (usually not needed, just safe practice)
    out.append(old[i:])
    return "".join(out)

In [341]:
# Reconstruct full list from incremental edit operations
def apply_list_delta(old: list, delta_ops: list) -> list:
    """
    Apply Redspot list delta operations to a list.
    Ops:
      {"op":"retain","arg":N}
      {"op":"delete","arg":N}
      {"op":"insert","arg":[...]}  # arg is a list to insert
    """
    if old is None:
        old = []
    i = 0
    out = []

    for op in delta_ops:
        t = op.get("op")
        a = op.get("arg")

        if t == "retain":
            out.extend(old[i:i+a])
            i += a
        elif t == "delete":
            i += a
        elif t == "insert":
            # insert expects a list
            if isinstance(a, list):
                out.extend(a)
            else:
                out.append(a)
        else:
            pass

    out.extend(old[i:])
    return out

In [342]:
# Define function that reconstructs the final notebook state for one participants by replaying logged events in time order
def reconstruct_panel(panel_df: pd.DataFrame):
    """
    Reconstruct final notebook state for one panel by applying signals in time order.
    Returns:
      cell_order: list[str]
      cells: dict[cell_id] -> cell dict
    """
    cell_order = []
    cells = {} 

    for _, row in panel_df.iterrows():
        kind = row["kind"]
        a = row["args_parsed"] or {}

        # cellsChange: edits the notebook cell list (insert/delete/retain)
        if kind == "INotebookModel.changed:cellsChange":
            delta = a.get("delta", [])
            new_order = []
            cursor = 0

            for op in delta:
                t = op.get("op")
                arg = op.get("arg")

                if t == "retain":
                    new_order.extend(cell_order[cursor:cursor+arg])
                    cursor += arg

                elif t == "delete":
                    # delete arg cells from the existing order
                    cursor += arg

                elif t == "insert":
                    # arg is a list of cell objects
                    for cell_obj in arg:
                        cid = cell_obj["id"]
                        # store/overwrite cell content
                        cells[cid] = {
                            "cell_type": cell_obj.get("cell_type", "code"),
                            "source": cell_obj.get("source", ""),
                            "outputs": cell_obj.get("outputs", []) or [],
                            "execution_count": cell_obj.get("execution_count", None),
                            "metadata": cell_obj.get("metadata", {}) or {},
                        }
                        new_order.append(cid)

            # append any remaining cells that weren't consumed (safety)
            new_order.extend(cell_order[cursor:])
            cell_order = new_order

        # sourceChange: apply text delta to a specific cell's source
        elif kind == "ISharedCell.changed:sourceChange":
            cid = a.get("cell")
            delta = a.get("delta", [])
            if cid is not None:
                if cid not in cells:
                    # create placeholder if we haven't seen it via cellsChange yet
                    cells[cid] = {"cell_type": "code", "source": "", "outputs": [], "execution_count": None, "metadata": {}}
                cells[cid]["source"] = apply_text_delta(cells[cid].get("source", ""), delta)

        # outputsChange: apply list delta to a specific cell's outputs
        elif kind == "ISharedCell.changed:outputsChange":
            cid = a.get("cell")
            delta = a.get("delta", [])
            if cid is not None:
                if cid not in cells:
                    cells[cid] = {"cell_type": "code", "source": "", "outputs": [], "execution_count": None, "metadata": {}}
                cells[cid]["outputs"] = apply_list_delta(cells[cid].get("outputs", []), delta)

        # executionCountChange: set execution_count
        elif kind == "ISharedCell.changed:executionCountChange":
            cid = a.get("cell")
            val = a.get("val")
            if cid is not None:
                if cid not in cells:
                    cells[cid] = {"cell_type": "code", "source": "", "outputs": [], "execution_count": None, "metadata": {}}
                cells[cid]["execution_count"] = val

        # Ignore metadata/session changes for reconstruction (you can add later)
        
    return cell_order, cells

In [343]:
# Define function that normalizes outputs to use in state_to_notebook function
def normalize_outputs(outputs):
    """
    Convert a list of output dicts from RSDS into nbformat output objects
    so nbformat.write(...) won't crash.
    """
    if outputs is None:
        return []

    normalized = []
    for o in outputs:
        if isinstance(o, dict):
            # Convert dict to NotebookNode output
            normalized.append(nbformat.from_dict(o))
        else:
            # Already a NotebookNode or something else, keep it
            normalized.append(o)
    return normalized

In [344]:
# Define function that converts reconstructed notebook state into a Jupyter notebook object that can be executed
def state_to_notebook(cell_order, cells):
    nb = new_notebook()
    nb.cells = []

    for cid in cell_order:
        c = cells.get(cid)
        if not c:
            continue

        cell_type = c.get("cell_type", "code")

        if cell_type == "markdown":
            nb.cells.append(new_markdown_cell(source=c.get("source", "")))
        else:
            code = new_code_cell(source=c.get("source", ""))

            # Normalize outputs
            code["outputs"] = normalize_outputs(c.get("outputs", []) or [])

            code["execution_count"] = c.get("execution_count", None)
            code["metadata"] = c.get("metadata", {}) or {}
            nb.cells.append(code)

    return nb

In [345]:
# Use previously defined functions to produce a reconstructed notebook
out_dir = Path("reconstructed")
out_dir.mkdir(exist_ok=True)

# Pick first panel for reconstruction
panel_name = signal["panel"].iloc[0]  
panel_df = signal[signal["panel"] == panel_name].copy()

cell_order, cells = reconstruct_panel(panel_df)
nb_obs = state_to_notebook(cell_order, cells)

obs_path = out_dir / f"{db_path.stem}_{panel_name}_observed.ipynb"
nbformat.write(nb_obs, obs_path)

obs_path

WindowsPath('reconstructed/009_id-3515ca2f-52c1-4a42-9f83-c4de4c3e9ca8_observed.ipynb')

In [346]:
# Check if there is only one panel in the database, otherwise other panels need to be reconstructed as well
con = sqlite3.connect(db_path)
panels = pd.read_sql("SELECT DISTINCT panel FROM signal", con)
con.close()
print(f"Number of panels in {db_path.name}: {len(panels)}")

Number of panels in 009.db: 1


In [347]:
# Define function that normalizes titanic dataset schema to expected format to be able to rerun notebooks
def normalize_titanic_schema_v2(path):
    df = pd.read_csv(path)

    rename_map = {
        "Sex": "sex",
        "SibSp": "sibsp",
        "Parch": "parch",
        "Pclass": "pclass",
        "Fare": "fare",
        "Age": "age",
        "Embarked": "embarked",
        "Cabin": "cabin",
        "Ticket": "ticket",
        "Name": "name",
        "PassengerId": "passenger_id",   
        "Survived": "survived",
        "passengerid": "passenger_id",   
    }

    # Keep only applicable renames
    rename_map = {k: v for k, v in rename_map.items() if k in df.columns}

    df = df.rename(columns=rename_map)
    df.to_csv(path, index=False)

    print(f"{path} -> renamed: {rename_map}")
    print("Columns now:", df.columns.tolist())

# Apply function to train and test file
normalize_titanic_schema_v2("train.csv")
normalize_titanic_schema_v2("test.csv")

train.csv -> renamed: {}
Columns now: ['passenger_id', 'survived', 'pclass', 'name', 'sex', 'age', 'sibsp', 'parch', 'ticket', 'fare', 'cabin', 'embarked']
test.csv -> renamed: {}
Columns now: ['passenger_id', 'pclass', 'name', 'sex', 'age', 'sibsp', 'parch', 'ticket', 'fare', 'cabin', 'embarked', 'survived']


In [348]:
# Check if test has survived column, if not add dummy
test = pd.read_csv("test.csv")
if "survived" not in test.columns:
    test["survived"] = 0
    test.to_csv("test.csv", index=False)
    print("Added dummy survived column to test.csv")
else:
    print("test.csv already has survived")

test.csv already has survived


In [349]:
# Prepare notebook for running
# Load the observed notebook
nb = nbformat.read(obs_path, as_version=4)

# Define function for patch cell to sanitize train_x/test_x columns (right before LightGBM)
patch = new_code_cell(
    """
import re
import unicodedata
import pandas as pd

def _safe_feature_names(cols):
    out = []
    seen = {}
    for c in cols:
        s = unicodedata.normalize("NFKC", str(c)).strip()
        s = re.sub(r"\\s+", "_", s)
        s = re.sub(r"[^0-9a-zA-Z_]+", "_", s)
        s = re.sub(r"_+", "_", s).strip("_")
        if s == "":
            s = "col"
        # ensure uniqueness
        k = s
        if k in seen:
            seen[k] += 1
            s = f"{k}_{seen[k]}"
        else:
            seen[k] = 0
        out.append(s)
    return out

# apply if these variables exist in the notebook at this point
if "train_x" in globals():
    train_x.columns = _safe_feature_names(train_x.columns)
if "test_x" in globals():
    test_x.columns = _safe_feature_names(test_x.columns)

print("Sanitized feature names. Example:", list(train_x.columns)[:10])
"""
)

# Insert patch right before the cell that contains "LightGBMTunerCV" or "lgb.Dataset"
insert_at = None
for i, cell in enumerate(nb.cells):
    if cell.cell_type == "code":
        src = cell.source
        if "LightGBMTunerCV" in src or "lgb.Dataset" in src:
            insert_at = i
            break

if insert_at is None:
    insert_at = len(nb.cells)

nb.cells.insert(insert_at, patch)

patched_obs_path = obs_path.with_name(obs_path.stem + "_patched.ipynb")
nbformat.write(nb, patched_obs_path)

patched_obs_path

WindowsPath('reconstructed/009_id-3515ca2f-52c1-4a42-9f83-c4de4c3e9ca8_observed_patched.ipynb')

In [350]:
# Prepare notebook for running
# Load the patched notebook
nb = nbformat.read(patched_obs_path, as_version=4)

# Define patch cell to align train_x/test_x columns
align_patch = new_code_cell(
    """
# --- PATCH: align test_x columns to train_x columns ---
# LightGBM requires test features to match training features exactly.

train_cols = list(train_x.columns)

# Add any missing columns to test_x with zeros
missing = [c for c in train_cols if c not in test_x.columns]
for c in missing:
    test_x[c] = 0

# Drop any extra columns not seen during training
extra = [c for c in test_x.columns if c not in train_cols]
if len(extra) > 0:
    test_x = test_x.drop(columns=extra)

# Reorder to match training
test_x = test_x[train_cols]

print("Aligned test_x to train_x:",
      "train_x:", train_x.shape,
      "test_x:", test_x.shape,
      "missing added:", len(missing),
      "extra dropped:", len(extra))
"""
)

# Insert cell right before the first cell that calls ".predict(" OR that defines best_model/model training
insert_at = None
for i, cell in enumerate(nb.cells):
    if cell.cell_type == "code" and (".predict(" in cell.source or "best_model" in cell.source):
        insert_at = i
        break

if insert_at is None:
    insert_at = len(nb.cells)

nb.cells.insert(insert_at, align_patch)

patched2_path = patched_obs_path.with_name(patched_obs_path.stem + "_aligned.ipynb")
nbformat.write(nb, patched2_path)

patched2_path

WindowsPath('reconstructed/009_id-3515ca2f-52c1-4a42-9f83-c4de4c3e9ca8_observed_patched_aligned.ipynb')

In [None]:
# Define function to execute notebook and save output
def execute_notebook_safe(input_path: Path, output_path: Path, timeout=600):
    nb = nbformat.read(input_path, as_version=4)
    client = NotebookClient(nb, timeout=timeout, kernel_name="python3")
    try:
        client.execute()
        nbformat.write(nb, output_path)
        return "success"
    except CellExecutionError as e:
        nbformat.write(nb, output_path)
        return f"execution_error: {e}"
    except CellTimeoutError as e:
        nbformat.write(nb, output_path)
        return f"timeout: {e}"
    except Exception as e:
        nbformat.write(nb, output_path)
        return f"other_error: {type(e).__name__}: {e}"
    
# Replay the notebooks
replay_dir = Path("replayed")
replay_dir.mkdir(exist_ok=True)

replay_path = Path("replayed") / patched2_path.name.replace("_aligned", "_replayed")
execute_notebook_safe(patched2_path, replay_path)

"timeout: A cell timed out while it was being executed, after 600 seconds.\nThe message was: Cell execution timed out.\nHere is a preview of the cell contents:\n-------------------\ncat_objective = CatObjective(train_X, train_y)\ncat_study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=SEED))\ncat_study.optimize(cat_objective, n_trials=30, show_progress_bar = True)\n-------------------\n"

In [352]:
# Compare outputs observed vs replayed notebook
# Define needed functions
def normalize_output(output):
    """
    Convert a single output object into a stable JSON-serializable dict.
    This removes volatile fields (like execution count) and keeps content.
    """
    out = dict(output)

    # Remove volatile fields that change even when notebook is "reproducible"
    out.pop("execution_count", None)

    # For errors, keep only the essentials
    if out.get("output_type") == "error":
        out = {
            "output_type": "error",
            "ename": out.get("ename"),
            "evalue": out.get("evalue"),
        }
    return out

def outputs_fingerprint(nb_path):
    """
    Create a hash representing all cell outputs in order.
    If this hash differs between observed and replayed -> outputs differ.
    """
    nb = nbformat.read(nb_path, as_version=4)

    all_outputs = []
    for cell in nb.cells:
        if cell.get("cell_type") == "code":
            outs = cell.get("outputs", [])
            all_outputs.append([normalize_output(o) for o in outs])

    payload = json.dumps(all_outputs, sort_keys=True, ensure_ascii=False).encode("utf-8")
    return hashlib.sha256(payload).hexdigest()

def notebook_ran_fully(nb_path):
    nb = nbformat.read(nb_path, as_version=4)
    for cell in nb.cells:
        if cell.get("cell_type") == "code":
            for out in cell.get("outputs", []):
                if out.get("output_type") == "error":
                    return False
    return True

def replay_failure_cell(nb_path):
    nb = nbformat.read(nb_path, as_version=4)
    for i, cell in enumerate(nb.cells):
        for out in cell.get("outputs", []):
            if out.get("output_type") == "error":
                return i
    return None

# Compute fingerprints of observed notebook path and replayed notebook path
obs_fp = outputs_fingerprint(obs_path)
rep_fp = outputs_fingerprint(replay_path)
ran_fully = notebook_ran_fully(replay_path)

print("Observed outputs hash:", obs_fp)
print("Replayed  outputs hash:", rep_fp)
print("Same outputs?", obs_fp == rep_fp)
print("Replay ran fully?", ran_fully)
if ran_fully == False:
    failure_cell = replay_failure_cell(replay_path)
    print("First error found at cell:", failure_cell)

Observed outputs hash: 782471437c4c1678102caa3c5c482f5b27e4e5939e62d78823affdd626600e6c
Replayed  outputs hash: 8bf3334be202fb74c112c8f915f5c1c814526654e6c5f60ef54d00c1d8af868b
Same outputs? False
Replay ran fully? False
First error found at cell: 18


In [353]:
# Define function that identifies which cells have differing outputs
def diff_cells(obs_path, rep_path):
    obs = nbformat.read(obs_path, as_version=4)
    rep = nbformat.read(rep_path, as_version=4)

    diffs = []
    for i, (c1, c2) in enumerate(zip(obs.cells, rep.cells)):
        if c1.get("cell_type") != "code":
            continue
        o1 = [normalize_output(o) for o in c1.get("outputs", [])]
        o2 = [normalize_output(o) for o in c2.get("outputs", [])]
        if o1 != o2:
            diffs.append(i)
    return diffs

diff_idxs = diff_cells(obs_path, replay_path)
print("Cells with different outputs:", diff_idxs[:20], "..." if len(diff_idxs) > 20 else "")
print("Total differing cells:", len(diff_idxs))

Cells with different outputs: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 13, 14, 15, 18, 20, 21] 
Total differing cells: 16


In [354]:
# Define function that prints source code and outputs of differing cells
def print_differing_cells(obs_path, rep_path):
    obs = nbformat.read(obs_path, as_version=4)
    rep = nbformat.read(rep_path, as_version=4)

    for i, (c1, c2) in enumerate(zip(obs.cells, rep.cells)):
        if c1.get("cell_type") != "code":
            continue

        o1 = [normalize_output(o) for o in c1.get("outputs", [])]
        o2 = [normalize_output(o) for o in c2.get("outputs", [])]

        if o1 != o2:
            print("=" * 80)
            print(f"Cell {i}")
            print("- Source code:")
            print(c1.source)
            print("- Observed outputs:")
            pprint(o1)
            print("- Replayed outputs:")
            pprint(o2)

print_differing_cells(obs_path, replay_path)

Cell 0
- Source code:
!pip install pandas
- Observed outputs:
[{'name': 'stdout',
  'output_type': 'stream',
  'text': 'Collecting pandas\n'
          '  Downloading '
          'pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata '
          '(89 kB)\n'
          'Collecting numpy>=1.26.0 (from pandas)\n'
          '  Downloading '
          'numpy-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata '
          '(62 kB)\n'
          '/kernel/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n'
          'Collecting pytz>=2020.1 (from pandas)\n'
          '  Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n'
          'Collecting tzdata>=2022.7 (from pandas)\n'
          '  Downloading tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)\n'
          '/kernel/lib/python3.12/site-packages (from '
          'python-dateutil>=2.8.2->pandas) (1.16.0)\n'
          'Downloading '
          'pandas-2.2.3-cp312-cp312-manyl

In [355]:
# Define function that gives overview of differing cells
def summarize_results(obs_path, rep_path):
    obs = nbformat.read(obs_path, as_version=4)
    rep = nbformat.read(rep_path, as_version=4)

    for i, (c1, c2) in enumerate(zip(obs.cells, rep.cells)):
        if c1.get("cell_type") != "code":
            continue

        if c1.get("outputs", []) != c2.get("outputs", []):
            n_lines = len(c1.source.splitlines())
            n_chars = len(c1.source)
            print(
                f"Cell {i}: outputs differ | "
                f"lines of code = {n_lines} | "
                f"number of characters = {n_chars} "
            )

summarize_results(obs_path, replay_path)

Cell 0: outputs differ | lines of code = 1 | number of characters = 19 
Cell 1: outputs differ | lines of code = 1 | number of characters = 21 
Cell 2: outputs differ | lines of code = 1 | number of characters = 19 
Cell 3: outputs differ | lines of code = 1 | number of characters = 25 
Cell 4: outputs differ | lines of code = 1 | number of characters = 23 
Cell 5: outputs differ | lines of code = 1 | number of characters = 22 
Cell 6: outputs differ | lines of code = 9 | number of characters = 333 
Cell 7: outputs differ | lines of code = 2 | number of characters = 48 
Cell 8: outputs differ | lines of code = 1 | number of characters = 17 
Cell 9: outputs differ | lines of code = 2 | number of characters = 45 
Cell 13: outputs differ | lines of code = 5 | number of characters = 342 
Cell 14: outputs differ | lines of code = 1 | number of characters = 13 
Cell 15: outputs differ | lines of code = 1 | number of characters = 12 
Cell 18: outputs differ | lines of code = 3 | number of cha

In [356]:
# Safe log outlining differing cells to txt file
def print_differing_cells_to_file(obs_path, rep_path, db_path):
    # create output directory
    out_dir = Path("differences")
    out_dir.mkdir(exist_ok=True)

    # build filename from db name
    out_file = out_dir / f"difference_log_{Path(db_path).stem}.txt"

    # redirect stdout to file
    original_stdout = sys.stdout
    with open(out_file, "w", encoding="utf-8") as f:
        sys.stdout = f

        obs = nbformat.read(obs_path, as_version=4)
        rep = nbformat.read(rep_path, as_version=4)

        for i, (c1, c2) in enumerate(zip(obs.cells, rep.cells)):
            if c1.get("cell_type") != "code":
                continue

            o1 = [normalize_output(o) for o in c1.get("outputs", [])]
            o2 = [normalize_output(o) for o in c2.get("outputs", [])]

            if o1 != o2:
                print("=" * 80)
                print(f"Cell {i}")
                print("- Source code:")
                print(c1.source)
                print("- Observed outputs:")
                pprint(o1)
                print("- Replayed outputs:")
                pprint(o2)

        # restore stdout
        sys.stdout = original_stdout

    return out_file

log_path = print_differing_cells_to_file(
    obs_path=obs_path,
    rep_path=replay_path,
    db_path=db_path
)

print("Saved difference log to:", log_path)

Saved difference log to: differences\difference_log_009.txt
