# XNAT Pipelines – JupyterHub Monitoring Notebook

This notebook mirrors the lightweight dashboard by scanning a local runs directory (default: `./xnat_local_runs`) and displaying:
- live **summary** (total / running / complete / failed)
- a **table** of runs with status, context, command, and log tail
- an optional **chart** of status counts over time

You can run it on JupyterHub to monitor local executions kicked off by `xnat_pipelines`'s local backend.

## 1) Configure paths & refresh interval

In [None]:
from pathlib import Path
RUNS_DIR = Path("./xnat_local_runs").absolute()
REFRESH_SEC = 2.0  # set to 0 to disable auto-refresh loop
LOG_TAIL = 800     # bytes of tail to show for each run
RUNS_DIR.mkdir(parents=True, exist_ok=True)
RUNS_DIR

## 2) Scanner utilities
These functions parse `run.json` and `run.log` written by the local backend.

In [None]:
import json
from typing import List, Dict, Tuple

def scan_runs(root: Path, tail: int = 800) -> Tuple[List[dict], dict]:
    runs = []
    for d in sorted(root.glob('run_*')):
        run = {"dir": d.name, "time": d.stat().st_mtime, "status":"Unknown", "image":"", "context":{}, "cmd":[]}
        try:
            manifest = json.loads((d/'run.json').read_text())
            run.update({
                "time": manifest.get("time", run["time"]),
                "image": manifest.get("image",""),
                "context": manifest.get("context",{}),
                "cmd": manifest.get("cmd", []),
            })
        except Exception:
            pass
        status = "Unknown"
        log_tail = ""
        try:
            log = (d/"run.log").read_text(errors="ignore")
            log_tail = log[-tail:]
            if log.startswith("DRY RUN"):
                status = "Prepared"
            elif "CMD:" in log:
                status = "Running"
            if "error" in log.lower() or "traceback" in log.lower():
                status = "Failed"
            if "Complete" in log or "completed" in log.lower():
                status = "Complete"
        except Exception:
            pass
        run["status"] = status
        run["log_tail"] = log_tail
        runs.append(run)
    # counts
    counts = {"total": len(runs), "running":0, "complete":0, "failed":0}
    for r in runs:
        s = r["status"].lower()
        if s == "running": counts["running"] += 1
        elif s in ("complete","completed","succeeded","done"): counts["complete"] += 1
        elif s in ("failed","error","aborted"): counts["failed"] += 1
    return runs, counts

runs, counts = scan_runs(RUNS_DIR, LOG_TAIL)
counts, len(runs)

## 3) Display helpers

In [None]:
import pandas as pd
from IPython.display import display, clear_output

def runs_dataframe(runs: List[dict]) -> pd.DataFrame:
    rows = []
    for r in runs:
        rows.append({
            "dir": r.get("dir"),
            "time": r.get("time"),
            "status": r.get("status"),
            "image": r.get("image"),
            "context": json.dumps(r.get("context",{})),
            "cmd": " ".join(map(str, r.get("cmd",[])))[:200],
            "log_tail": r.get("log_tail","")[:2000]
        })
    df = pd.DataFrame(rows)
    if not df.empty:
        df.sort_values(by=["time"], ascending=False, inplace=True, ignore_index=True)
    return df

def show_once():
    runs, counts = scan_runs(RUNS_DIR, LOG_TAIL)
    print(f"Summary: total={counts['total']} running={counts['running']} complete={counts['complete']} failed={counts['failed']}")
    display(runs_dataframe(runs))

show_once()

## 4) Live refresh loop (stop with the interrupt button)
Set `REFRESH_SEC=0` above to disable auto-refresh.

In [None]:
import time
if REFRESH_SEC and REFRESH_SEC > 0:
    try:
        while True:
            clear_output(wait=True)
            show_once()
            time.sleep(REFRESH_SEC)
    except KeyboardInterrupt:
        print("Stopped.")

## 5) (Optional) Status chart over time
Run this cell occasionally (or on a timer) to capture a time series of status counts and plot them.

In [None]:
import matplotlib.pyplot as plt
import datetime as dt

history = []  # re-run to append more points
for _ in range(5):  # sample 5 points spaced 1s apart
    _, c = scan_runs(RUNS_DIR, LOG_TAIL)
    history.append({"t": dt.datetime.now(), **c})
    time.sleep(1)

ts = [h["t"] for h in history]
running = [h["running"] for h in history]
complete = [h["complete"] for h in history]
failed = [h["failed"] for h in history]

plt.figure()
plt.plot(ts, running, label="running")
plt.xlabel("time"); plt.ylabel("count"); plt.title("Running jobs")
plt.legend()
plt.show()

plt.figure()
plt.plot(ts, complete, label="complete")
plt.xlabel("time"); plt.ylabel("count"); plt.title("Completed jobs")
plt.legend()
plt.show()

plt.figure()
plt.plot(ts, failed, label="failed")
plt.xlabel("time"); plt.ylabel("count"); plt.title("Failed jobs")
plt.legend()
plt.show()