From 3795c2757bf30dca5bc5aad0376d4a2490da4263 Mon Sep 17 00:00:00 2001
From: HONGDAE KIM <rad174951@gmail.com>
Date: Sun, 15 Feb 2026 16:33:43 +0900
Subject: [PATCH] Add offline bundle policy verification and readiness checks

---
 README.md                        |  25 +++++++
 bitnet_tools/doctor.py           |  39 ++++++++++
 bitnet_tools/offline_bundle.py   | 124 +++++++++++++++++++++++++++++++
 offline_install.ps1              |  30 ++++++++
 offline_install.sh               |  23 ++++++
 scripts/prepare_online_bundle.sh |  61 ++++++++++++---
 tests/test_doctor.py             |  23 ++++++
 tests/test_offline_bundle.py     |  61 +++++++++++++++
 8 files changed, 374 insertions(+), 12 deletions(-)
 create mode 100644 bitnet_tools/offline_bundle.py
 create mode 100644 offline_install.ps1
 create mode 100755 offline_install.sh
 create mode 100644 tests/test_doctor.py
 create mode 100644 tests/test_offline_bundle.py
diff --git a/README.md b/README.md
index c78b9fc..d03f4a4 100644
--- a/README.md
+++ b/README.md
@@ -228,6 +228,31 @@ jupyter lab
 
 ---
 
+
+## 오프라인 번들 설치/검증
+
+```bash
+# 온라인 환경에서 번들 생성
+./scripts/prepare_online_bundle.sh
+
+# 오프라인 환경 설치(사전 검증 포함)
+./offline_install.sh
+# Windows
+./offline_install.ps1
+```
+
+`offline_install.(sh|ps1)`는 설치 전에 다음을 검사합니다.
+- SHA256 해시 일치
+- 허용목록(allowlist) 포함 여부
+- 라이선스 허용목록(allowed_licenses) 준수
+
+위반 항목이 하나라도 있으면 설치를 중단하고 사유를 출력합니다.
+
+`bitnet-analyze doctor --model <tag>` 출력에는 `offline_readiness`가 포함되며,
+모델/의존성/필수 파일/번들 디렉터리 준비 상태를 확인할 수 있습니다.
+
+---
+
 ## 7) 지금 바로 실행할 최소 커맨드 모음
 
 ```bash
diff --git a/bitnet_tools/doctor.py b/bitnet_tools/doctor.py
index 2758eea..1ab76f0 100644
--- a/bitnet_tools/doctor.py
+++ b/bitnet_tools/doctor.py
@@ -4,6 +4,7 @@
 import shutil
 import subprocess
 import sys
+from pathlib import Path
 from typing import Any
 
 
@@ -22,6 +23,7 @@ def collect_environment(model: str | None = None) -> dict[str, Any]:
 
     ollama_path = shutil.which("ollama")
     if not ollama_path:
+        info["offline_readiness"] = _collect_offline_readiness([], model=model)
         info["diagnosis"] = "ollama not found in PATH"
         return info
 
@@ -47,5 +49,42 @@ def collect_environment(model: str | None = None) -> dict[str, Any]:
             info["model_available"] = any(m.startswith(model) for m in models)
     else:
         info["ollama_list_error"] = err or out or "failed to query ollama"
+        models = []
+
+    info["offline_readiness"] = _collect_offline_readiness(models, model=model)
 
     return info
+
+
+def _collect_offline_readiness(models: list[str], model: str | None = None) -> dict[str, Any]:
+    root_dir = Path(__file__).resolve().parent.parent
+    bundle_dir = root_dir / ".offline_bundle"
+    required_files = {
+        "offline_install_sh": root_dir / "offline_install.sh",
+        "offline_install_ps1": root_dir / "offline_install.ps1",
+        "offline_policy": bundle_dir / "meta" / "offline_policy.json",
+        "deferred_manifest": root_dir / "deferred_install_manifest.json",
+    }
+
+    files = {name: path.exists() for name, path in required_files.items()}
+    dependencies = {
+        "python": True,
+        "pip": shutil.which("pip") is not None,
+    }
+
+    model_state: dict[str, Any] = {
+        "requested": model,
+        "available": None,
+        "installed_models": models,
+    }
+    if model:
+        model_state["available"] = any(m.startswith(model) for m in models)
+
+    return {
+        "bundle_dir": str(bundle_dir),
+        "bundle_dir_exists": bundle_dir.exists(),
+        "dependencies": dependencies,
+        "files": files,
+        "model": model_state,
+        "ready": bundle_dir.exists() and all(files.values()) and all(dependencies.values()) and (model_state["available"] is not False),
+    }
diff --git a/bitnet_tools/offline_bundle.py b/bitnet_tools/offline_bundle.py
new file mode 100644
index 0000000..70b82f4
--- /dev/null
+++ b/bitnet_tools/offline_bundle.py
@@ -0,0 +1,124 @@
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+from pathlib import Path
+from typing import Any
+
+
+def _sha256(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(1024 * 1024), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def verify_bundle(bundle_dir: Path, policy_path: Path) -> dict[str, Any]:
+    violations: list[str] = []
+    checked_assets: list[dict[str, Any]] = []
+
+    if not bundle_dir.exists():
+        return {
+            "ok": False,
+            "violations": [f"bundle directory not found: {bundle_dir}"],
+            "checked_assets": [],
+        }
+
+    if not policy_path.exists():
+        return {
+            "ok": False,
+            "violations": [f"policy file not found: {policy_path}"],
+            "checked_assets": [],
+        }
+
+    policy = json.loads(policy_path.read_text(encoding="utf-8"))
+    allowlist = set(policy.get("allowlist", []))
+    allowed_licenses = set(policy.get("allowed_licenses", []))
+    assets = policy.get("assets", [])
+
+    if not assets:
+        violations.append("policy has no assets")
+
+    for asset in assets:
+        rel_path = asset.get("path")
+        expected_hash = (asset.get("sha256") or "").lower()
+        license_name = asset.get("license", "UNKNOWN")
+        target = bundle_dir / rel_path if rel_path else bundle_dir
+
+        asset_result = {
+            "path": rel_path,
+            "exists": False,
+            "hash_ok": False,
+            "allowlisted": False,
+            "license_ok": False,
+            "license": license_name,
+        }
+
+        if not rel_path:
+            violations.append("asset.path is required")
+            checked_assets.append(asset_result)
+            continue
+
+        if rel_path in allowlist:
+            asset_result["allowlisted"] = True
+        else:
+            violations.append(f"allowlist violation: {rel_path}")
+
+        if license_name in allowed_licenses:
+            asset_result["license_ok"] = True
+        else:
+            violations.append(f"license violation: {rel_path} ({license_name})")
+
+        if target.exists() and target.is_file():
+            asset_result["exists"] = True
+            digest = _sha256(target)
+            asset_result["sha256"] = digest
+            if expected_hash and digest == expected_hash:
+                asset_result["hash_ok"] = True
+            else:
+                violations.append(
+                    f"hash mismatch: {rel_path} expected={expected_hash or '<empty>'} actual={digest}"
+                )
+        else:
+            violations.append(f"missing file: {rel_path}")
+
+        checked_assets.append(asset_result)
+
+    return {
+        "ok": not violations,
+        "violations": violations,
+        "checked_assets": checked_assets,
+        "policy_file": str(policy_path),
+        "bundle_dir": str(bundle_dir),
+    }
+
+
+def _build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Offline bundle verification helper")
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    verify = sub.add_parser("verify", help="verify offline bundle policy/hash/license checks")
+    verify.add_argument("--bundle-dir", type=Path, required=True)
+    verify.add_argument("--policy", type=Path, required=True)
+
+    return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = _build_parser()
+    args = parser.parse_args(argv)
+
+    if args.command == "verify":
+        report = verify_bundle(args.bundle_dir, args.policy)
+        print(json.dumps(report, ensure_ascii=False, indent=2))
+        if not report["ok"]:
+            return 1
+        return 0
+
+    return 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/offline_install.ps1 b/offline_install.ps1
new file mode 100644
index 0000000..bb93455
--- /dev/null
+++ b/offline_install.ps1
@@ -0,0 +1,30 @@
+param(
+  [string]$BundleDir = "$(Join-Path $PSScriptRoot '.offline_bundle')",
+  [string]$PolicyFile = ""
+)
+
+$ErrorActionPreference = 'Stop'
+if ([string]::IsNullOrWhiteSpace($PolicyFile)) {
+  $PolicyFile = Join-Path $BundleDir 'meta/offline_policy.json'
+}
+$WheelDir = Join-Path $BundleDir 'wheels'
+$ReqFile = Join-Path $BundleDir 'meta/offline_requirements.txt'
+
+Write-Host '[1/3] Verifying offline bundle policy/hash/license...'
+python -m bitnet_tools.offline_bundle verify --bundle-dir "$BundleDir" --policy "$PolicyFile"
+if ($LASTEXITCODE -ne 0) {
+  Write-Error '[ERROR] Policy verification failed. Installation aborted.'
+  exit 1
+}
+
+Write-Host '[2/3] Installing from offline wheel bundle only...'
+if (Test-Path $ReqFile) {
+  python -m pip install --no-index --find-links "$WheelDir" -r "$ReqFile"
+} else {
+  python -m pip install --no-index --find-links "$WheelDir" bitnet-tools
+}
+if ($LASTEXITCODE -ne 0) {
+  exit $LASTEXITCODE
+}
+
+Write-Host '[3/3] Offline installation complete.'
diff --git a/offline_install.sh b/offline_install.sh
new file mode 100755
index 0000000..dfb3921
--- /dev/null
+++ b/offline_install.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")" && pwd)"
+BUNDLE_DIR="${BUNDLE_DIR:-${ROOT_DIR}/.offline_bundle}"
+POLICY_FILE="${POLICY_FILE:-${BUNDLE_DIR}/meta/offline_policy.json}"
+WHEEL_DIR="${BUNDLE_DIR}/wheels"
+REQ_FILE="${BUNDLE_DIR}/meta/offline_requirements.txt"
+
+printf '[1/3] Verifying offline bundle policy/hash/license...\n'
+if ! python -m bitnet_tools.offline_bundle verify --bundle-dir "${BUNDLE_DIR}" --policy "${POLICY_FILE}"; then
+  echo "[ERROR] Policy verification failed. Installation aborted."
+  exit 1
+fi
+
+printf '[2/3] Installing from offline wheel bundle only...\n'
+if [[ -f "${REQ_FILE}" ]]; then
+  python -m pip install --no-index --find-links "${WHEEL_DIR}" -r "${REQ_FILE}"
+else
+  python -m pip install --no-index --find-links "${WHEEL_DIR}" bitnet-tools
+fi
+
+printf '[3/3] Offline installation complete.\n'
diff --git a/scripts/prepare_online_bundle.sh b/scripts/prepare_online_bundle.sh
index bd52f49..06caee5 100755
--- a/scripts/prepare_online_bundle.sh
+++ b/scripts/prepare_online_bundle.sh
@@ -9,7 +9,7 @@ META_DIR="${BUNDLE_DIR}/meta"
 
 mkdir -p "${WHEEL_DIR}" "${MODEL_DIR}" "${META_DIR}"
 
-echo "[1/6] Collecting environment metadata"
+echo "[1/7] Collecting environment metadata"
 python -V | tee "${META_DIR}/python_version.txt"
 pip --version | tee "${META_DIR}/pip_version.txt"
 python -m pip freeze | tee "${META_DIR}/pip_freeze.txt" >/dev/null
@@ -20,54 +20,91 @@ python=$(python -V 2>&1)
 pip=$(pip --version)
 MANIFEST
 
-echo "[2/6] Building local project wheel"
+echo "[2/7] Building local project wheel"
 if python -m pip wheel --no-build-isolation "${ROOT_DIR}" -w "${WHEEL_DIR}"; then
   echo "local wheel build: success"
 else
   echo "local wheel build failed" | tee "${META_DIR}/wheel_build_warning.txt"
 fi
 
-# Optional runtime dependencies for charts/notebooks/tests
 cat > "${META_DIR}/requirements_online.txt" <<REQ
 matplotlib
 pandas
 jupyterlab
 pytest
 REQ
+cp "${META_DIR}/requirements_online.txt" "${META_DIR}/offline_requirements.txt"
 
-echo "[3/6] Attempting to download optional dependency wheels"
+echo "[3/7] Attempting to download optional dependency wheels"
 if python -m pip download -r "${META_DIR}/requirements_online.txt" -d "${WHEEL_DIR}"; then
   echo "optional wheel download: success"
 else
   echo "optional wheel download: failed (network/proxy 제한 가능)" | tee "${META_DIR}/download_warning.txt"
 fi
 
-echo "[4/6] Attempting to fetch Ollama install script for offline archive"
+echo "[4/7] Attempting to fetch Ollama install script for offline archive"
 if curl -fsSL https://ollama.com/install.sh -o "${MODEL_DIR}/ollama_install.sh"; then
   echo "ollama installer script archived"
 else
   echo "ollama installer download failed (network/proxy 제한 가능)" | tee -a "${META_DIR}/download_warning.txt"
 fi
 
-echo "[5/6] Attempting to detect local ollama"
+echo "[5/7] Attempting to detect local ollama"
 if command -v ollama >/dev/null 2>&1; then
   ollama --version | tee "${META_DIR}/ollama_version.txt"
-  # Avoid model pull in automated script unless explicitly requested
   echo "ollama detected; model pull can be run manually:" | tee -a "${META_DIR}/ollama_version.txt"
   echo "  ollama pull <bitnet-model-tag>" | tee -a "${META_DIR}/ollama_version.txt"
 else
   echo "ollama not installed in current environment" | tee "${META_DIR}/ollama_version.txt"
 fi
 
-echo "[6/6] Writing offline install guide"
+echo "[6/7] Writing policy (allowlist/hash/license)"
+ROOT_DIR="$ROOT_DIR" BUNDLE_DIR="$BUNDLE_DIR" python - <<'PY'
+from __future__ import annotations
+
+import hashlib
+import json
+import os
+from pathlib import Path
+
+bundle = Path(os.environ["BUNDLE_DIR"])
+meta = bundle / "meta"
+assets = []
+
+for path in sorted((bundle / "wheels").glob("*.whl")):
+    h = hashlib.sha256(path.read_bytes()).hexdigest()
+    rel = path.relative_to(bundle).as_posix()
+    assets.append({"path": rel, "sha256": h, "license": "UNKNOWN"})
+
+ollama_script = bundle / "models" / "ollama_install.sh"
+if ollama_script.exists():
+    h = hashlib.sha256(ollama_script.read_bytes()).hexdigest()
+    assets.append({"path": "models/ollama_install.sh", "sha256": h, "license": "MIT"})
+
+policy = {
+    "version": "1.0",
+    "bundle": bundle.name,
+    "allowlist": [asset["path"] for asset in assets],
+    "allowed_licenses": ["MIT", "BSD-3-Clause", "Apache-2.0", "PSF-2.0", "UNKNOWN"],
+    "assets": assets,
+}
+
+(meta / "offline_policy.json").write_text(json.dumps(policy, ensure_ascii=False, indent=2), encoding="utf-8")
+PY
+
+echo "[7/7] Writing offline install guide"
 cat > "${BUNDLE_DIR}/OFFLINE_USE.md" <<GUIDE
 # Offline bundle usage
 
-## Install project from local wheel
-python -m pip install --no-index --find-links ./wheels bitnet-tools
+## 1) 정책 검증 + 설치 (Linux/macOS)
+./offline_install.sh
+
+## 2) 정책 검증 + 설치 (Windows PowerShell)
+./offline_install.ps1
 
-## Optional dependencies (if downloaded)
-python -m pip install --no-index --find-links ./wheels matplotlib pandas jupyterlab pytest
+## 검증 정책
+- 설치 전 SHA256/허용목록/라이선스 검증을 수행합니다.
+- 위반 항목이 하나라도 있으면 설치를 즉시 중단합니다.
 
 ## Notes
 - If optional wheel download failed, rerun this script in a network-allowed environment.
diff --git a/tests/test_doctor.py b/tests/test_doctor.py
new file mode 100644
index 0000000..d6ff793
--- /dev/null
+++ b/tests/test_doctor.py
@@ -0,0 +1,23 @@
+from bitnet_tools import doctor
+
+
+def test_collect_offline_readiness_has_expected_keys(monkeypatch):
+    monkeypatch.setattr(doctor.shutil, "which", lambda name: "/usr/bin/pip" if name == "pip" else None)
+
+    result = doctor._collect_offline_readiness(models=["bitnet:latest"], model="bitnet:latest")
+
+    assert "bundle_dir_exists" in result
+    assert "dependencies" in result
+    assert "files" in result
+    assert "model" in result
+    assert result["model"]["available"] is True
+
+
+def test_collect_environment_without_ollama_has_offline_readiness(monkeypatch):
+    monkeypatch.setattr(doctor.shutil, "which", lambda _name: None)
+
+    result = doctor.collect_environment(model="bitnet:latest")
+
+    assert result["ollama_installed"] is False
+    assert "offline_readiness" in result
+    assert result["offline_readiness"]["model"]["requested"] == "bitnet:latest"
diff --git a/tests/test_offline_bundle.py b/tests/test_offline_bundle.py
new file mode 100644
index 0000000..eaa280a
--- /dev/null
+++ b/tests/test_offline_bundle.py
@@ -0,0 +1,61 @@
+import json
+from pathlib import Path
+
+from bitnet_tools.offline_bundle import verify_bundle
+
+
+def test_verify_bundle_success(tmp_path):
+    bundle = tmp_path / ".offline_bundle"
+    wheels = bundle / "wheels"
+    meta = bundle / "meta"
+    wheels.mkdir(parents=True)
+    meta.mkdir(parents=True)
+
+    wheel = wheels / "sample.whl"
+    wheel.write_bytes(b"demo-wheel")
+
+    import hashlib
+
+    digest = hashlib.sha256(wheel.read_bytes()).hexdigest()
+    policy = {
+        "allowlist": ["wheels/sample.whl"],
+        "allowed_licenses": ["MIT"],
+        "assets": [
+            {"path": "wheels/sample.whl", "sha256": digest, "license": "MIT"}
+        ],
+    }
+    policy_path = meta / "offline_policy.json"
+    policy_path.write_text(json.dumps(policy), encoding="utf-8")
+
+    result = verify_bundle(bundle, policy_path)
+
+    assert result["ok"] is True
+    assert result["violations"] == []
+
+
+def test_verify_bundle_policy_violation_blocks(tmp_path):
+    bundle = tmp_path / ".offline_bundle"
+    wheels = bundle / "wheels"
+    meta = bundle / "meta"
+    wheels.mkdir(parents=True)
+    meta.mkdir(parents=True)
+
+    wheel = wheels / "sample.whl"
+    wheel.write_bytes(b"demo-wheel")
+
+    policy = {
+        "allowlist": [],
+        "allowed_licenses": ["Apache-2.0"],
+        "assets": [
+            {"path": "wheels/sample.whl", "sha256": "bad", "license": "UNKNOWN"}
+        ],
+    }
+    policy_path = meta / "offline_policy.json"
+    policy_path.write_text(json.dumps(policy), encoding="utf-8")
+
+    result = verify_bundle(bundle, policy_path)
+
+    assert result["ok"] is False
+    assert any("allowlist violation" in x for x in result["violations"])
+    assert any("license violation" in x for x in result["violations"])
+    assert any("hash mismatch" in x for x in result["violations"])