# mbe-tools walkthrough / 使用指南

**English** — quick tour of fragmenting, building inputs, templating jobs, and saving outputs.

**中文** — 快速演示如何拆分片段、生成输入、制作作业脚本并保存结果。

# Overview / 概览

- English: This notebook demonstrates reading XYZ, fragment sampling, MBE subset generation, input rendering (Q-Chem/ORCA), PBS/Slurm template generation, and writing outputs to notebooks/result.
- 中文：演示读取 XYZ、抽样片段、生成 MBE 子集、构建 Q-Chem/ORCA 输入、生成 PBS/Slurm 模板，并将结果写入 notebooks/result。

In [None]:
# Section 1: Load toolkit source context
from pathlib import Path
import mbe_tools

root = Path(mbe_tools.__file__).resolve().parent
print("mbe_tools package path:", root)
print("Available modules:", [p.name for p in root.glob('*.py')])

## Sections 2 & 3: README (EN) and README_CN snapshots
We verify the documentation already present in the workspace rather than re-generating it here.

In [None]:
# Show first lines of README.md and README_CN.md
from pathlib import Path

# Anchor to repo root explicitly to avoid cwd issues in notebook kernels
root = Path("/Users/jiarui/Downloads/mbe-tools")
readme_en = root / "README.md"
readme_cn = root / "README_CN.md"

for path in [readme_en, readme_cn]:
    print("===", path.name, "===")
    try:
        txt = path.read_text(encoding="utf-8").splitlines()[:5]
        for ln in txt:
            print(ln)
    except FileNotFoundError:
        print("missing")
    print()

## Section 4: Sample usage (inputs, templates, MBE math)

In [None]:
# Geometry block for water dimer (toy)
geom_block = """O  0.000000  0.000000  0.000000
H  0.757000  0.586000  0.000000
H -0.757000  0.586000  0.000000
O  2.900000  0.000000  0.000000
H  3.657000  0.586000  0.000000
H  2.143000  0.586000  0.000000"""

from mbe_tools.input_builder import render_qchem_input, render_orca_input
from mbe_tools.hpc_templates import render_pbs_qchem, render_slurm_orca
from mbe_tools.mbe_math import assemble_mbe_energy

print("Q-Chem input (first lines):")
print("\n".join(render_qchem_input(geom_block, method="wb97m-v", basis="def2-ma-qzvpp").splitlines()[:8]))

print("\nORCA input (first lines):")
print("\n".join(render_orca_input(geom_block, method="wb97m-v", basis="def2-ma-qzvpp").splitlines()[:6]))

print("\nPBS template snippet:")
print("\n".join(render_pbs_qchem(job_name="water", chunk_size=3).splitlines()[:15]))

print("\nSlurm template snippet:")
print("\n".join(render_slurm_orca(job_name="orca_job", chunk_size=2).splitlines()[:15]))

# Synthetic MBE energies for fragments (0), (1), and dimer (0,1)
records = [
    {"subset_indices": [0], "energy_hartree": -75.0},
    {"subset_indices": [1], "energy_hartree": -75.1},
    {"subset_indices": [0, 1], "energy_hartree": -150.5},
]
result = assemble_mbe_energy(records)
print("\nMBE(k) order totals:", result["order_totals"])
print("Contributions:", result["contributions"])
print("Missing subsets:", result["missing_subsets"])


## Section 4b: Run-control example
Run-control is embedded in the PBS/Slurm templates. A control file (`<input>.mbe.control.toml` or `mbe.control.toml`) can confirm success via regexes, retry with cleanup/sleep, and write state to `.mbe_state.json`. The snippet below writes a minimal control file into `notebooks/result` for reuse in the demo.

In [None]:
from pathlib import Path
import json
import subprocess

root = Path("/Users/jiarui/Downloads/mbe-tools")
result_dir = root / "notebooks" / "result"
result_dir.mkdir(parents=True, exist_ok=True)

output_dir = root / "notebooks" / "data" / "Output"
parsed_jsonl = result_dir / "parsed.jsonl"
csv_path = result_dir / "results.csv"
xlsx_path = result_dir / "results.xlsx"
plot_path = result_dir / "mbe.png"
archive_dir = result_dir / "archives"

# (1) Parse if outputs exist
cmd_parse = ["mbe", "parse", str(output_dir), "--program", "auto", "--glob-pattern", "*.out", "--out", str(parsed_jsonl)]
print("parse:", " ".join(cmd_parse))
has_outputs = output_dir.exists() and any(output_dir.glob("*.out"))
if has_outputs:
    subprocess.run(cmd_parse, check=True)
else:
    print("skip parse (no .out files under", output_dir, ")")

def parsed_available() -> bool:
    return parsed_jsonl.exists() and parsed_jsonl.stat().st_size > 0

# (2) Analyze
cmd_analyze = ["mbe", "analyze", str(parsed_jsonl), "--to-csv", str(csv_path), "--to-xlsx", str(xlsx_path), "--plot", str(plot_path)]
print("analyze:", " ".join(cmd_analyze))
if parsed_available():
    subprocess.run(cmd_analyze, check=True)
else:
    print("skip analyze (parsed.jsonl missing)")

# (3) show/info/calc/save/compare
cmd_show = ["mbe", "show", str(parsed_jsonl)]
cmd_info = ["mbe", "info"]  # uses default JSONL selection
cmd_calc = ["mbe", "calc", str(parsed_jsonl), "--unit", "kcal", "--monomer", "0"]
cmd_save = ["mbe", "save", str(parsed_jsonl), "--dest", str(archive_dir)]
cmd_compare = ["mbe", "compare", str(result_dir)]

for name, cmd in [("show", cmd_show), ("info", cmd_info), ("calc", cmd_calc), ("save", cmd_save), ("compare", cmd_compare)]:
    print(f"{name}:", " ".join(cmd))

if parsed_available():
    archive_dir.mkdir(exist_ok=True)
    subprocess.run(cmd_show, check=True)
    subprocess.run(cmd_info, check=True)
    subprocess.run(cmd_calc, check=True)
    subprocess.run(cmd_save, check=True)
    subprocess.run(cmd_compare, check=True)
else:
    print("skip show/info/calc/save/compare (parsed.jsonl missing)")

## Section 5: Validate files in workspace

In [None]:
# List docs and notebook presence
from pathlib import Path

root = Path("/Users/jiarui/Downloads/mbe-tools")
for path in [root / "README.md", root / "README_CN.md", root / "notebooks" / "sample_walkthrough.ipynb"]:
    print(path, "exists:" , path.exists())

## Section 6: Full workflow using W20_3.xyz
We load the provided 20-water cluster, fragment it, generate subsets (k≤2), render a Q-Chem input for the first subset, and emit a PBS template (chunked).

In [None]:
from pathlib import Path
from mbe_tools.cluster import read_xyz, fragment_by_water_heuristic
from mbe_tools.mbe import MBEParams, generate_subsets_xyz
from mbe_tools.input_builder import render_qchem_input
from mbe_tools.hpc_templates import render_pbs_qchem

root = Path("/Users/jiarui/Downloads/mbe-tools")
xyz_path = root / "notebooks" / "data" / "W20_3.xyz"
result_dir = root / "notebooks" / "result"
result_dir.mkdir(parents=True, exist_ok=True)

# Load cluster and fragment
xyz = read_xyz(str(xyz_path))
frags = fragment_by_water_heuristic(xyz, oh_cutoff=1.25)
params = MBEParams()
subset_jobs = list(generate_subsets_xyz(frags, params))
print("fragments:", len(frags), "jobs:", len(subset_jobs))

# Build one Q-Chem input and PBS template
job_id, subset, geom_text = subset_jobs[0]
qchem_inp = render_qchem_input(geom_text, method="wb97m-v", basis="def2-svpd", charge=0, multiplicity=1)
qchem_path = result_dir / "demo_qchem.inp"
qchem_path.write_text(qchem_inp)
print("wrote", qchem_path)

pbs_script = render_pbs_qchem(job_name="demo", walltime="24:00:00", ncpus=16, mem_gb=32.0, queue=None, project=None, module="qchem/5.2.2", chunk_size=10)
pbs_path = result_dir / "demo.pbs"
pbs_path.write_text(pbs_script)
print("wrote", pbs_path)

## Notes / 提示
- English: Run cells in order. Outputs are saved under `notebooks/result`.
- 中文：按顺序运行各个单元；结果保存在 `notebooks/result`。