# OCR / VLM pipeline launcher (Colab)

This notebook installs dependencies, clones the repo, and runs the pipeline in `pipeline_mode: vlm|hybrid|classic`.

In [None]:
#@title 0. GPU check
import torch, platform, os, subprocess, json
print("Python", platform.python_version())
print("Torch CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA device:", torch.cuda.get_device_name(0))

In [None]:
#@title 1. Clone repo (set your URL)
import os, shutil, subprocess, json

REPO_URL = "https://github.com/smidolt/x.git"  #@param {type:"string"}
TARGET_DIR = "/content/OCR"  #@param {type:"string"}

if os.path.exists(TARGET_DIR):
    print(f"Removing existing {TARGET_DIR}...")
    shutil.rmtree(TARGET_DIR)

print(f"Cloning {REPO_URL} -> {TARGET_DIR}")
subprocess.check_call(["git", "clone", REPO_URL, TARGET_DIR])
os.chdir(TARGET_DIR)
print("Repo ready:", os.getcwd())

In [None]:
#@title 2. Install system deps (tesseract/poppler for pdf2image)
!apt-get update -qq && apt-get install -y -qq tesseract-ocr poppler-utils > /dev/null

In [None]:
#@title 3. Install Python deps (classic + VLM)
!pip install -U pip
!pip install -r requirements.txt
!pip install -r requirements-vlm.txt

In [None]:
#@title 4. Configure pipeline mode
from pathlib import Path
import yaml

CONFIG_PATH = Path("config.yaml")
with CONFIG_PATH.open("r", encoding="utf-8") as f:
    cfg = yaml.safe_load(f)

# Example: VLM reasoner on GPU; disable LayoutLM when not needed
cfg["pipeline_mode"] = "vlm"  # options: classic | vlm | hybrid
cfg.setdefault("vlm", {})
cfg["vlm"].update({
    "enabled": True,
    "backend": "qwen2_vl",  # heuristic | qwen2_vl
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "max_new_tokens": 256,
})
cfg.setdefault("layoutlm", {})
cfg["layoutlm"]["enabled"] = False  # avoid loading when in vlm-only mode

with CONFIG_PATH.open("w", encoding="utf-8") as f:
    yaml.safe_dump(cfg, f, sort_keys=False, allow_unicode=True)
print("Updated config.yaml:")
print(yaml.safe_dump(cfg, sort_keys=False, allow_unicode=True))

In [None]:
#@title 5. Run pipeline
!python -m src.cli --config config.yaml --verbose

In [None]:
#@title 6. Inspect outputs
from pathlib import Path
import json

out_dir = Path("output/json")
for path in sorted(out_dir.glob("*.json")):
    print("-", path.name)
    data = json.loads(path.read_text())
    print("  keys:", list(data.keys()))
    print("  classic present:", "classic" in data, "vlm present:", "vlm" in data)
