
# MALTbot Daily Colab Runner (Canonical)

이 노트북이 **일일 실험용 canonical notebook** 입니다.

사용 전 체크:
1. Colab Runtime을 **GPU**로 설정
2. Colab **Secrets**에 `GH_TOKEN` 저장 (repo push 권한)
3. 아래 셀을 순서대로 `Run all`

> Push는 항상 `colab/<DATE>/<RUN_NAME>` 브랜치로만 수행하며, `main`으로 직접 push하지 않습니다.


In [None]:

# CONFIG (edit this cell only)
from datetime import datetime
from zoneinfo import ZoneInfo
import os

DATE = datetime.now(ZoneInfo("Asia/Seoul")).strftime("%Y-%m-%d")
RUN_NAME = "chgnet_daily"
TASK = "matbench_mp_e_form"
SEED = 42
MODEL_CONFIG = "chgnet_finetune_track_v1"
NOTE = "daily colab run"

# export for bash cells
os.environ["MALTBOT_DATE"] = DATE
os.environ["MALTBOT_RUN_NAME"] = RUN_NAME
os.environ["MALTBOT_TASK"] = TASK
os.environ["MALTBOT_SEED"] = str(SEED)
os.environ["MALTBOT_MODEL_CONFIG"] = MODEL_CONFIG
os.environ["MALTBOT_NOTE"] = NOTE

print({
    "DATE": DATE,
    "RUN_NAME": RUN_NAME,
    "TASK": TASK,
    "SEED": SEED,
    "MODEL_CONFIG": MODEL_CONFIG,
    "NOTE": NOTE,
})


In [None]:

import torch, platform
print("Python:", platform.python_version())
print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")


In [None]:

%%bash
set -euo pipefail
REPO_DIR="/content/MALTbot"
REPO_URL="https://github.com/seanwoory/MALTbot.git"

if [ -d "${REPO_DIR}/.git" ]; then
  echo "[info] Repo exists. Pull latest main..."
  cd "${REPO_DIR}"
  git fetch origin
  git checkout main || true
  git pull --ff-only origin main
else
  echo "[info] Cloning repo..."
  git clone "${REPO_URL}" "${REPO_DIR}"
  cd "${REPO_DIR}"
fi

pwd
git rev-parse --short HEAD


In [None]:

%%bash
set -euo pipefail
python -m pip install -U pip setuptools wheel
python -m pip install   "numpy<2.0"   "torch"   "matbench"   "pymatgen"   "pyyaml"   "tqdm"


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)


In [None]:

import json
import os
import platform
import subprocess
import sys
import traceback
from pathlib import Path

repo = Path("/content/MALTbot")
date = os.environ["MALTBOT_DATE"]
run_name = os.environ["MALTBOT_RUN_NAME"]
task = os.environ["MALTBOT_TASK"]
seed = int(os.environ["MALTBOT_SEED"])
model_config = os.environ["MALTBOT_MODEL_CONFIG"]
note = os.environ["MALTBOT_NOTE"]

if "/" in run_name or run_name.strip() == "":
    raise ValueError("RUN_NAME must be non-empty and must not contain '/'.")
out_dir = repo / "results" / "daily" / date / run_name
out_dir.mkdir(parents=True, exist_ok=True)
out_file = out_dir / "results.json"

cmd = [
    sys.executable,
    "scripts/run_chgnet_mp_e_form.py",
    "--config",
    "configs/chgnet_mp_e_form.yaml",
]

status = "success"
error_message = ""
stderr_tail = ""

try:
    proc = subprocess.run(
        cmd,
        cwd=repo,
        capture_output=True,
        text=True,
        check=False,
    )
    if proc.returncode != 0:
        status = "error"
        error_message = f"run script exited with code {proc.returncode}"
        stderr_tail = "
".join((proc.stderr or "").splitlines()[-30:])
except Exception as e:
    status = "error"
    error_message = str(e)
    stderr_tail = traceback.format_exc(limit=3)
    proc = None

# load existing output if script wrote one
payload = {}
if out_file.exists():
    try:
        payload = json.loads(out_file.read_text(encoding="utf-8"))
    except Exception:
        payload = {"raw": out_file.read_text(encoding="utf-8", errors="ignore")[:2000]}

# env info
try:
    git_commit = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=repo, text=True).strip()
except Exception:
    git_commit = None

try:
    import torch
    torch_ver = torch.__version__
    cuda_available = bool(torch.cuda.is_available())
except Exception:
    torch_ver = None
    cuda_available = None

env_info = {
    "python": sys.version,
    "platform": platform.platform(),
    "torch": torch_ver,
    "cuda_available": cuda_available,
    "git_commit": git_commit,
}

payload.setdefault("task", {})
payload["task"]["name"] = task
payload["status"] = status
payload["seed"] = seed
payload["model_config"] = model_config
payload["note"] = note
payload["output_path"] = str(out_file.relative_to(repo))
payload["env"] = env_info

if proc is not None:
    payload["run"] = {
        "returncode": proc.returncode,
        "stdout_tail": "\n".join((proc.stdout or "").splitlines()[-30:]),
        "stderr_tail": "\n".join((proc.stderr or "").splitlines()[-30:]),
    }

if status == "error":
    payload["error_message"] = error_message
    payload["traceback_or_stderr_tail"] = stderr_tail

out_file.write_text(json.dumps(payload, ensure_ascii=False, indent=2, default=str), encoding="utf-8")

# metric rendering for RESULTS.md
metric = "ERROR" if status == "error" else "TBD"
if status == "success":
    scores = payload.get("task", {}).get("scores")
    if scores is not None:
        def first_scalar(x):
            if isinstance(x, dict):
                for v in x.values():
                    r = first_scalar(v)
                    if r is not None:
                        return r
            elif isinstance(x, list):
                for v in x:
                    r = first_scalar(v)
                    if r is not None:
                        return r
            elif isinstance(x, (int, float)):
                return x
            return None
        v = first_scalar(scores)
        metric = f"{v:.6f}" if isinstance(v, (int, float)) else "RECORDED"

results_md = repo / "RESULTS.md"
line = f"{date} | {task} | {model_config} | METRIC={metric} | {note} | {out_file.relative_to(repo)}"
with results_md.open("a", encoding="utf-8") as f:
    f.write("
" + line)

print("status:", status)
print("results:", out_file)
print("RESULTS.md appended:", line)


In [None]:

%%bash
set -euo pipefail
cd /content/MALTbot
find results -type f -name "results.json" | sort


In [None]:

from google.colab import userdata
from getpass import getpass
import os

try:
    token = userdata.get("GH_TOKEN")
except Exception:
    token = None

if not token:
    token = getpass("Paste GH_TOKEN (input hidden): ").strip()

if not token:
    raise ValueError("GH_TOKEN is required to push branch.")

os.environ["GH_TOKEN"] = token
print("GH_TOKEN loaded (hidden).")


In [None]:

%%bash
set -euo pipefail
cd /content/MALTbot

: "${GH_TOKEN:?GH_TOKEN is not set}"
: "${MALTBOT_DATE:?DATE missing}"
: "${MALTBOT_RUN_NAME:?RUN_NAME missing}"

SAFE_RUN_NAME="$(echo "${MALTBOT_RUN_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')"
BRANCH="colab/${MALTBOT_DATE}/${SAFE_RUN_NAME}"

git config user.name "colab-bot"
git config user.email "colab-bot@users.noreply.github.com"

git checkout -B "${BRANCH}"

# Only results artifact + RESULTS.md
git add "results/daily/${MALTBOT_DATE}/${MALTBOT_RUN_NAME}/results.json" RESULTS.md

if git diff --cached --quiet; then
  echo "[info] No staged changes to commit."
else
  git commit -m "results: ${MALTBOT_DATE} ${SAFE_RUN_NAME}"
  git -c http.https://github.com/.extraheader="AUTHORIZATION: bearer ${GH_TOKEN}"     push -u origin "${BRANCH}"
fi

echo "Create PR: https://github.com/seanwoory/MALTbot/compare/main...${BRANCH}?expand=1"
