In [None]:
import json
import numpy as np
from ase.io import read as ase_read
from ase.io import write as ase_write
import copy

In [None]:
with open("oatom_envs_jp_dio-orig_min4.json", "r") as f:
    oatom_envs= json.load(f)

In [None]:
input_xyz = "./jp_dio-orig_min4.xyz"
min_orig_atoms = ase_read(input_xyz)

with open("./noOidx2orig.json", "r") as f:
    noOidx2orig = json.load(f)

# Reverse the mapping: orig index -> noO index
orig2noO = {int(v): int(k) for k, v in noOidx2orig.items()}

grain_ptm_data = np.load("./grains_ptm_111025_min4_fixed.npz")
noO_grains = grain_ptm_data["grains"]
noO_ptm_types = grain_ptm_data["ptm_types"]

xyz_ptm_types = []
for i, atm in enumerate(min_orig_atoms):
    if atm.symbol == "O":
        xyz_ptm_types.append(-1)
    else:
        xyz_ptm_types.append(int(noO_ptm_types[orig2noO[i]]))


In [None]:
all_positions = min_orig_atoms.get_positions(wrap=True)


In [None]:
cell = min_orig_atoms.get_cell()
boxsize = cell.lengths()
boxsize

In [None]:
with open("sorted_bulk_like_results.json", "r") as f:
    sorted_bulk_like_results = json.load(f)

In [None]:
oatom_envs_dict = {env['index']: env for env in oatom_envs}

Unique bulk-like sites: 
4 should be from grain 1 
4 should be from grain 2 
2 should be from grain 3 

distance to nearest non-hcp has to be greater than 32 Angstrom 

Each chosen env should be separated by 40 Angstrom from another chosen environment

In [None]:
def pbc_distance(central_pos, neighbor_pos, cell):
    """
    Compute distances between a central atom and its neighbors,
    accounting for periodic boundary conditions.

    Parameters:
    -----------
    atoms : ase.Atoms
        The atomic structure
    central_idx : int
        Index of the central atom
    neighbor_idxs : list of int
        Indices of neighbor atoms

    Returns:
    --------
    dict
        Keys are neighbor indices, values are distances (in Angstroms)
    """
    # Compute displacement vector
    delta = neighbor_pos - central_pos

    # orthorhombic assertion that really should just be done once
    for i in range(3):
        for j in range(3):
            if i == j:
                continue
            else:
                assert np.isclose(cell[i, j], 0.0, atol=1e-12)

    # Apply minimum image convention for PBC
    # For orthorhombic cell: wrap to [-L/2, L/2]
    # assume periodic on all sides
    for i in range(3):
        cell_length = cell[i, i]
        delta[i] -= cell_length * np.round(delta[i] / cell_length)

    # Compute distance
    distance = np.linalg.norm(delta)

    return distance

In [None]:
chosen_bulk_envs = {1: [],
                    2: [],
                    3: []}

all_chosen_positions = []
for s in sorted_bulk_like_results:
    idx = s[0]
    grain_fract = oatom_envs_dict[idx]["grain_fract"]

    if len(grain_fract) > 1:
        continue

    grain_id = int(list(grain_fract.keys())[0])
    if grain_id in [1,2,3]:
        curr_num_envs = len(chosen_bulk_envs[grain_id])
        if grain_id in (1,2) and curr_num_envs >=4:
            continue
        elif grain_id == 3 and curr_num_envs >= 2:
            continue
        else:
            ref_pos = all_positions[idx]
            for pos in all_chosen_positions:
                dist = pbc_distance(ref_pos,pos,min_orig_atoms.cell)
                if dist < 40.0:
                    print(f'too close for {idx}, distance: {dist}')
                    continue
            output = {"index": idx,
                      "nearest_hcp_distance": s[1]}
            chosen_bulk_envs[grain_id].append(output)
            all_chosen_positions.append(ref_pos)

            if len(chosen_bulk_envs[1]) == 4 and \
               len(chosen_bulk_envs[2]) == 4 and \
               len(chosen_bulk_envs[3]) == 2:
                break


In [None]:
chosen_bulk_envs

In [None]:
with open("chosen_bulk_envs.json", "w") as f:
    json.dump(chosen_bulk_envs,f,indent=2)

In [None]:
chosen_bulk_env_indices = [d["index"]for v in chosen_bulk_envs.values() for d in v]

In [None]:
with open("chosen_bulk_env_indices.txt", "w") as f:
    for i in chosen_bulk_env_indices:
        f.write(f"{i}\n")

In [None]:
oatom_envs

In [None]:
xyz_grain_idxs = []
for i, atm in enumerate(min_orig_atoms):
    if atm.symbol == "O":
        xyz_grain_idxs.append(-1)
    else:
        xyz_grain_idxs.append(int(noO_grains[orig2noO[i]]))


In [None]:
def generate_temp_bulk_xyz(indices):
    min_orig_out = copy.deepcopy(min_orig_atoms)

    isneighbor = np.zeros(len(min_orig_atoms))
    new_symbols = min_orig_out.get_chemical_symbols().copy()

    for index in indices:
        isneighbor[oatom_envs_dict[index]["neighbor_idxs"]] = 1
        isneighbor[index] = 1
        new_symbols[index] = "Np"

    min_orig_out.set_array("is_neighbor", isneighbor)
    min_orig_out.set_chemical_symbols(new_symbols)


    min_orig_out.set_array("grain_index", np.array(xyz_grain_idxs))
    min_orig_out.set_array("ptm_type", np.array(xyz_ptm_types))

    ase_write("temp_bulk.xyz", min_orig_out, format="extxyz")

In [None]:
generate_temp_bulk_xyz(chosen_bulk_env_indices)

OK So when I visualized, it was very clear that these were not 40 A apart

In [None]:
fixed_chosen_bulk_envs = {1: [],
                    2: [],
                    3: []}

fixed_all_chosen_positions = []
for s in sorted_bulk_like_results:
    idx = s[0]
    grain_fract = oatom_envs_dict[idx]["grain_fract"]

    if len(grain_fract) > 1:
        continue

    grain_id = int(list(grain_fract.keys())[0])
    if grain_id in [1,2,3]:
        curr_num_envs = len(fixed_chosen_bulk_envs[grain_id])
        if grain_id in (1,2) and curr_num_envs >=4:
            continue
        elif grain_id == 3 and curr_num_envs >= 2:
            continue
        else:
            ref_pos = all_positions[idx]
            too_close = False
            for pos in fixed_all_chosen_positions:
                dist = pbc_distance(ref_pos,pos,min_orig_atoms.cell)
                #if dist < 40.0:
                #if dist < 30.0:
                if dist < 25.0:
                    print(f'too close for {idx}, distance: {dist}')
                    too_close=True
                    break
            if too_close:
                continue
            output = {"index": idx,
                      "nearest_hcp_distance": s[1]}
            fixed_chosen_bulk_envs[grain_id].append(output)
            fixed_all_chosen_positions.append(ref_pos)

            if len(fixed_chosen_bulk_envs[1]) == 4 and \
               len(fixed_chosen_bulk_envs[2]) == 4 and \
               len(fixed_chosen_bulk_envs[3]) == 2:
                break


In [None]:
fixed_chosen_bulk_envs

In [None]:
with open("fixed_chosen_bulk_envs.json", "w") as f:
    json.dump(fixed_chosen_bulk_envs,f,indent=2)

In [None]:
fixed_chosen_bulk_env_indices = [d["index"]for v in fixed_chosen_bulk_envs.values() for d in v]

In [None]:
with open("fixed_chosen_bulk_env_indices.txt", "w") as f:
    for i in fixed_chosen_bulk_env_indices:
        f.write(f"{i}\n")

In [None]:
generate_temp_bulk_xyz(fixed_chosen_bulk_env_indices)

In [None]:
len(sorted_bulk_like_results)