# Two-Stage OSR Runner (Browser Colab)

Clean flow: mount drive -> clone/pull repo -> install deps -> discover dataset paths -> run stage1 + splits -> summarize metrics.

In [None]:
import os, sys, subprocess
print('python:', sys.executable)
print('cwd:', os.getcwd())
try:
    subprocess.run(['nvidia-smi'], check=False)
except FileNotFoundError:
    print('nvidia-smi not found in PATH (this can happen in some runtimes).')

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
from pathlib import Path
import os, subprocess

REPO_URL = 'https://github.com/spinelessknave8/FYP_code.git'
REPO_DIR = Path('/content/FYP-code')

if not REPO_DIR.exists():
    subprocess.check_call(['git', 'clone', REPO_URL, str(REPO_DIR)])
else:
    subprocess.run(['git', '-C', str(REPO_DIR), 'pull'], check=False)

os.chdir(REPO_DIR)
print('repo root:', Path.cwd())
print('default.yaml exists:', Path('configs/default.yaml').exists())
print('src exists:', Path('src').exists())

In [None]:
import sys, subprocess
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', 'requirements.txt'])

In [None]:
import torch
print('torch:', torch.__version__)
print('cuda available:', torch.cuda.is_available())
if torch.cuda.is_available():
    print('gpu:', torch.cuda.get_device_name(0))
else:
    print('WARNING: CUDA not available; check runtime type and torch build.')

In [None]:
from pathlib import Path

sev = neu = None
search_roots = [
    Path('/content/drive/MyDrive'),
    Path('/content/drive/Shareddrives'),
    Path('/content/drive/.shortcut-targets-by-id'),
]

sev_hits, neu_hits = [], []
for root in search_roots:
    if not root.exists():
        continue
    sev_hits.extend([p for p in root.glob('**/severstal') if p.is_dir()])
    neu_hits.extend([p for p in root.glob('**/neu') if p.is_dir()])

if sev_hits and neu_hits:
    sev, neu = sev_hits[0], neu_hits[0]

print('severstal path:', sev)
print('neu path:', neu)

if sev is None or neu is None:
    raise RuntimeError('Could not find severstal/neu under mounted drive. Add shortcuts in MyDrive or set explicit paths.')


In [None]:
import time
import yaml
from pathlib import Path
from src.pipelines.notebook_entrypoints import run_two_stage_stage1, run_split_pipeline

assert sev is not None and neu is not None, 'Run dataset discovery cell first.'

t_all = time.time()
print('[0/6] Building Colab configs...')

base = yaml.safe_load(Path('configs/default.yaml').read_text())
base['device'] = 'cuda'
base['severstal']['data_root'] = str(sev)
base['severstal']['train_csv'] = 'train.csv'
base['severstal']['images_dir'] = 'train_images'
base['neu']['data_root'] = str(neu)
base['output_dir'] = '/content/drive/MyDrive/fyp_outputs'

Path('configs/default.colab.yaml').write_text(yaml.safe_dump(base, sort_keys=False))
print('  wrote configs/default.colab.yaml')

split_colab = []
for s in ['a', 'b', 'c']:
    split_cfg = yaml.safe_load(Path(f'configs/neu_split_{s}.yaml').read_text())
    merged = yaml.safe_load(yaml.safe_dump(base))
    merged.update(split_cfg)
    out = Path(f'configs/neu_split_{s}.colab.yaml')
    out.write_text(yaml.safe_dump(merged, sort_keys=False))
    split_colab.append(str(out))
    print('  wrote', out)

print('[1/6] Sanity checks...')
assert Path(base['severstal']['data_root']).exists(), base['severstal']['data_root']
assert Path(base['neu']['data_root']).exists(), base['neu']['data_root']
print('  sanity checks passed')

print('[2/6] Stage 1: PatchCore')
t = time.time()
run_two_stage_stage1('configs/default.colab.yaml')
print(f'  stage 1 done in {time.time() - t:.1f}s')

for i, split in enumerate(split_colab, start=3):
    print(f'[{i}/6] Split pipeline: {split}')
    t = time.time()
    run_split_pipeline(split)
    print(f'  {split} done in {time.time() - t:.1f}s')

print(f'[6/6] All done in {time.time() - t_all:.1f}s')

In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from pathlib import Path
import sys, subprocess

base = Path('/content/drive/MyDrive/fyp_outputs')

# quick metric summary
for split in ['split_a', 'split_b', 'split_c']:
    p = base / split / 'cascade' / 'metrics.json'
    if not p.exists():
        print(split, 'missing metrics:', p)
        continue
    m = json.loads(p.read_text())
    print(split, {
        'tpr_unknown_system': m.get('tpr_unknown_system'),
        'fpr_known_system': m.get('fpr_known_system'),
        'stage1_pass_rate_known': m.get('stage1_pass_rate_known'),
        'stage1_pass_rate_unknown': m.get('stage1_pass_rate_unknown'),
    })

# aggregate + combined plots
subprocess.check_call([sys.executable, '-m', 'src.pipelines.aggregate_osr', '--output_dir', str(base)])
subprocess.check_call([sys.executable, '-m', 'src.pipelines.plot_combined_osr', '--output_dir', str(base), '--out_dir', str(base / 'combined')])

# display core plots
for split in ['split_a', 'split_b', 'split_c']:
    for name in ['loss_curve.png', 'acc_curve.png', 'roc_osr.png', 'hist_osr.png']:
        p = base / split / 'plots' / name
        if p.exists():
            plt.figure(figsize=(6, 3.5))
            plt.title(f'{split} - {name}')
            plt.imshow(mpimg.imread(p))
            plt.axis('off')
            plt.show()

for name in ['roc_combined.png', 'mahalanobis_combined.png']:
    p = base / 'combined' / name
    if p.exists():
        plt.figure(figsize=(7, 4))
        plt.title(name)
        plt.imshow(mpimg.imread(p))
        plt.axis('off')
        plt.show()

# stage-1 leakage/pass visuals
rows = []
for s in ['split_a', 'split_b', 'split_c']:
    p = base / s / 'cascade' / 'metrics.json'
    if not p.exists():
        continue
    m = json.loads(p.read_text())
    rows.append({
        'split': s,
        'stage1_pass_rate_known': m.get('stage1_pass_rate_known', np.nan),
        'stage1_pass_rate_unknown': m.get('stage1_pass_rate_unknown', np.nan),
        'stage1_leakage_rate_known': m.get('stage1_leakage_rate_known', np.nan),
        'stage1_leakage_rate_unknown': m.get('stage1_leakage_rate_unknown', np.nan),
        'tpr_unknown_system': m.get('tpr_unknown_system', np.nan),
        'fpr_known_system': m.get('fpr_known_system', np.nan),
        'tpr_unknown_conditional': m.get('tpr_unknown_conditional', np.nan),
        'fpr_known_conditional': m.get('fpr_known_conditional', np.nan),
    })

if rows:
    x = np.arange(len(rows))
    labels = [r['split'] for r in rows]
    w = 0.35

    plt.figure(figsize=(8,4))
    plt.bar(x - w/2, [r['stage1_pass_rate_known'] for r in rows], width=w, label='known pass rate')
    plt.bar(x + w/2, [r['stage1_pass_rate_unknown'] for r in rows], width=w, label='unknown pass rate')
    plt.xticks(x, labels); plt.ylim(0,1); plt.ylabel('Rate'); plt.title('Stage-1 Pass Rates by Split'); plt.legend(); plt.grid(axis='y', alpha=0.25); plt.show()

    plt.figure(figsize=(8,4))
    plt.bar(x - w/2, [r['stage1_leakage_rate_known'] for r in rows], width=w, label='known leakage')
    plt.bar(x + w/2, [r['stage1_leakage_rate_unknown'] for r in rows], width=w, label='unknown leakage')
    plt.xticks(x, labels); plt.ylim(0,1); plt.ylabel('Rate'); plt.title('Stage-1 Leakage Rates by Split'); plt.legend(); plt.grid(axis='y', alpha=0.25); plt.show()

    plt.figure(figsize=(8,4))
    plt.plot(labels, [r['tpr_unknown_system'] for r in rows], marker='o', label='TPR unknown (system)')
    plt.plot(labels, [r['tpr_unknown_conditional'] for r in rows], marker='o', label='TPR unknown (conditional)')
    plt.ylim(0,1); plt.ylabel('Rate'); plt.title('Unknown Detection TPR: System vs Conditional'); plt.legend(); plt.grid(alpha=0.25); plt.show()

    plt.figure(figsize=(8,4))
    plt.plot(labels, [r['fpr_known_system'] for r in rows], marker='o', label='FPR known (system)')
    plt.plot(labels, [r['fpr_known_conditional'] for r in rows], marker='o', label='FPR known (conditional)')
    plt.ylim(0,1); plt.ylabel('Rate'); plt.title('Known Rejection FPR: System vs Conditional'); plt.legend(); plt.grid(alpha=0.25); plt.show()

    print('Split-wise cascade summary:')
    for r in rows:
        print(r['split'], {
            'pass_known': round(r['stage1_pass_rate_known'], 4),
            'pass_unknown': round(r['stage1_pass_rate_unknown'], 4),
            'leak_known': round(r['stage1_leakage_rate_known'], 4),
            'leak_unknown': round(r['stage1_leakage_rate_unknown'], 4),
            'tpr_sys': round(r['tpr_unknown_system'], 4),
            'fpr_sys': round(r['fpr_known_system'], 4),
        })
else:
    print('No cascade metrics found for leakage/pass visualizations.')