In [None]:
# OC20 IS2RES dataset download
!wget https://dl.fbaipublicfiles.com/opencatalystproject/data/is2res_train_val_test_lmdbs.tar.gz

# Unzip tar
!tar -zxzf is2res_train_val_test_lmdbs.tar.gz
# OC20 data mapping download
!wget https://dl.fbaipublicfiles.com/opencatalystproject/data/oc20_data_mapping.pkl

In [5]:
# Step-by-step slab reconstruction demo (bulk -> unit_slab -> struct (with vacuum) -> primitive (untiled slab) -> xy-tiling)

import os
import math
from pathlib import Path

import numpy as np
from ase.io import write
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.core.structure import Structure

# Reuse utilities from the project
from recon_test import (
    create_slab_from_index,
    reconstruct_slab_from_shifted_ouc,
    get_slab_params_from_mapping,
    get_sid_from_lmdb,
    extract_true_system_from_lmdb,
)
from fairchem.data.oc.core import Bulk
from fairchem.data.oc.core.slab import tag_surface_atoms, set_fixed_atom_constraints


def print_summary(title: str, struct: Structure):
    a, b, c = struct.lattice.abc
    al, be, ga = struct.lattice.angles
    print(f"\n[{title}]")
    print(f"Atoms: {len(struct)} | a,b,c: {a:.3f}, {b:.3f}, {c:.3f} Å | angles: {al:.2f}, {be:.2f}, {ga:.2f} deg")
    print(f"Formula: {struct.composition.reduced_formula}")



In [11]:
# Parameters
idx = 7
lmdb_path = "is2res_train_val_test_lmdbs/data/is2re/all/train/data.lmdb"
mapping_path = "oc20_data_mapping.pkl"
min_slab_size = 7.0  # Å
min_vacuum_size = 20.0  # Å
min_ab = 8.0  # Å

tol = 0.3
out_dir = Path("toy_outputs") / str(idx)
out_dir.mkdir(parents=True, exist_ok=True)
print(f"Output dir: {out_dir}")


Output dir: toy_outputs/7


In [12]:
# Step 0: Load bulk structure using mapping
sid = get_sid_from_lmdb(lmdb_path, idx)
params = get_slab_params_from_mapping(mapping_path, sid)
if params is None:
    raise RuntimeError(f"Mapping not found for sid={sid}")

bulk = Bulk(bulk_src_id_from_db=params["bulk_mpid"])  # ASE Atoms
bulk_struct = AseAtomsAdaptor.get_structure(bulk.atoms)
print_summary("Bulk (conventional standard)", bulk_struct)
write(out_dir / "00_bulk.cif", bulk.atoms)

miller_index = params["miller_index"]
shift = params["shift"]
top = params["top"]
print(f"miller_index={miller_index}, shift={shift}, top={top}")



[Bulk (conventional standard)]
Atoms: 4 | a,b,c: 4.478, 4.478, 4.478 Å | angles: 90.00, 90.00, 90.00 deg
Formula: Sc3Al
miller_index=(1, 1, 0), shift=0.125, top=True


In [13]:
# Step 1: Get unit_slab (1-layer unit, possibly struct_no_vac) from create_slab_from_index
unit_slab, is_struct_no_vac = create_slab_from_index(
    index=idx,
    lmdb_path=lmdb_path,
    mapping_path=mapping_path,
    output_dir=str(out_dir),
    min_slab_size=min_slab_size,
    min_vacuum_size=min_vacuum_size,
)
print_summary("unit_slab (1-layer unit)", unit_slab)
print(f"is_struct_no_vac={is_struct_no_vac}")
write(out_dir / "01_unit_slab.cif", AseAtomsAdaptor.get_atoms(unit_slab))



[unit_slab (1-layer unit)]
Atoms: 8 | a,b,c: 4.478, 6.333, 6.333 Å | angles: 90.00, 90.00, 90.00 deg
Formula: Sc3Al
is_struct_no_vac=False


In [14]:
# Step 2: Build struct (z-stacked + vacuum) from unit_slab, replicating get_slab pre-primitive state

a_vec, b_vec, c_vec = unit_slab.lattice.matrix
# Surface normal from a,b
n = np.cross(a_vec, b_vec)
n /= np.linalg.norm(n)
height = abs(np.dot(c_vec, n))

# Recompute layers (same as get_slab)
n_layers_slab = int(math.ceil(min_slab_size / height))
n_layers_vac = int(math.ceil(min_vacuum_size / height))
n_layers = n_layers_slab + n_layers_vac

# unit_slab frac coords
base_frac = unit_slab.frac_coords.copy()
base_frac[:, 2] /= n_layers

# Stack along z (only slab layers)
all_coords = []
for i in range(n_layers_slab):
    fc = base_frac.copy()
    fc[:, 2] += i / n_layers
    all_coords.extend(fc)

all_species = unit_slab.species_and_occu
new_lattice = [a_vec, b_vec, n_layers * c_vec]
struct = Structure(new_lattice, all_species * n_layers_slab, all_coords)
print_summary("Struct (with vacuum, before primitive)", struct)
write(out_dir / "02_struct_with_vac.cif", AseAtomsAdaptor.get_atoms(struct))




[Struct (with vacuum, before primitive)]
Atoms: 16 | a,b,c: 4.478, 6.333, 37.997 Å | angles: 90.00, 90.00, 90.00 deg
Formula: Sc3Al


In [15]:
# Step 3: Primitive reduction (untiled slab)
untiled_slab = struct.get_primitive_structure(tolerance=tol)
print_summary("Untiled slab (primitive)", untiled_slab)
write(out_dir / "03_untiled_slab_primitive.cif", AseAtomsAdaptor.get_atoms(untiled_slab))



[Untiled slab (primitive)]
Atoms: 16 | a,b,c: 4.478, 6.333, 37.997 Å | angles: 90.00, 90.00, 90.00 deg
Formula: Sc3Al


In [16]:
# Step 4: Tile in a,b to reach min_ab (final slab like in slab.py)
a_len = float(np.linalg.norm(untiled_slab.lattice.matrix[0]))
b_len = float(np.linalg.norm(untiled_slab.lattice.matrix[1]))
na = int(math.ceil(min_ab / a_len))
nb = int(math.ceil(min_ab / b_len))

final_slab = untiled_slab.copy()
final_slab.make_supercell([na, nb, 1])
print_summary("Final slab (tiled in a,b)", final_slab)
write(out_dir / "04_final_slab_ab_tiled.cif", AseAtomsAdaptor.get_atoms(final_slab))

# Optional: tag and constrain as in pipeline
try:
    print("Applying tags and constraints (optional)...")
    bulk2 = Bulk(bulk_src_id_from_db=params["bulk_mpid"])  # reload ASE atoms
    final_slab_ase = AseAtomsAdaptor.get_atoms(final_slab)
    tagged = tag_surface_atoms(final_slab_ase, bulk2.atoms)
    constrained = set_fixed_atom_constraints(tagged)
    write(out_dir / "05_final_slab_tagged_constrained.cif", constrained)
except Exception as e:
    print(f"[Warning] tagging/constraints failed: {e}")



[Final slab (tiled in a,b)]
Atoms: 64 | a,b,c: 8.956, 12.666, 37.997 Å | angles: 90.00, 90.00, 90.00 deg
Formula: Sc3Al
Applying tags and constraints (optional)...
