<!-- # CNN autoencoder and Clustering from MTRX data

Use this notebook to load Scienta Omicron Matrix format SPM data and create standardised images for machine learning training and analysis. The code can generate both JPG image data, useful for manually checking the data, and windowed numpy data that can be loaded into ML models. 

The notebook then creates an autoencoder for training on a large dataset, followed by KMEANS clustering. 

**Author**: Steven R. Schofield  
**Created**: November, 2024 -->

# CASTEP - Si 001 surface
## Steven R. Schofield (Universtiy College London) May 2025

### Determine appropriate paths whether we are working on macbook or the cluster

In [1]:
import sys
from pathlib import Path

# Define candidate paths using Path objects
module_path_list = [
    Path('/Users/steven/academic-iCloud/Python/modules'),
    Path('/hpc/srs/Python/modules')
]

data_path_list = [
    Path('/Users/steven/academic-iCloud/Calculations/castep/'),
    Path('/hpc/srs/castep')
]

# Resolve actual paths
module_path = next((p for p in module_path_list if p.exists()), None)
data_path = next((p for p in data_path_list if p.exists()), None)

# Check and report missing paths
if module_path is None:
    print("Error: Could not locate a valid module path.")
if data_path is None:
    print("Error: Could not locate a valid data path.")

if module_path is None or data_path is None:
    sys.exit(1)

# Add module_path to sys.path if needed
if str(module_path) not in sys.path:
    sys.path.insert(0, str(module_path))

# Print resolved paths
print(f"module_path = {module_path}")
print(f"data_path = {data_path}")

module_path = /Users/steven/academic-iCloud/Python/modules
data_path = /Users/steven/academic-iCloud/Calculations/castep


### Import modules

In [2]:
# # Ensure modules are reloaded 
%load_ext autoreload
%autoreload 2

# Import standard modules
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt

# Import custom module
import SRSCALCUTILS.castep_tools as ct

from IPython.display import display



### Paths for the calculation

In [3]:
# General
job_folder = 'work-2025/si001_surf'
job_path = data_path / job_folder
verbose = False             # Set this True to print out more information

# Print resolved paths
print(f"job_path = {job_path}")

# If it doesn’t exist, create it (including any missing parent folders)
if not os.path.isdir(job_path):
    os.makedirs(job_path, exist_ok=True)

# Now safely switch into it
os.chdir(job_path)

print('Current files in this directory:')
for file in os.listdir():
    print(file)

job_path = /Users/steven/academic-iCloud/Calculations/castep/work-2025/si001_surf
Current files in this directory:
si001_test.castep_bin
si001_test.cell
si001_test.check
si001_test.geom
si001_test.castep
si001_test-out.cell
si001_test.cst_esp
si001_test.bib
Si_C19_PBE_OTF.usp
si001_test.bands
si001_test.param


### Base parameters of calculation and unit cell

In [19]:
# Set up the param and cell files for the calculation

filename = "si001_surf"
title = "silicon 001 surface tests"

params = {'task' : 'geomopt',
          'xc_functional'           : 'PBE',
          'cut_off_energy'          : 400,
          'spin_polarised'          : 'false',
          'write_cell_structure'    : 'true',
          'charge'                  : 0,
          'nextra_bands'            : 8,
          'geom_energy_tol'         : 0.00005,          # default: 0.00005 eV 
          'geom_disp_tol'           : 0.002,            # default: 0.002 Ang.
          'geom_max_iter'           : 100,              # default: 30
          'geom_convergence_win'    : 2,                # default: 2
          'max_scf_cycles'          : 300               # default: 30
          }              

lattice_cart = np.array([
            [3.8641976,     0.0,     0.0],
            [0.0,     3.8641976,     0.0],
            [0.0,     0.0,     5.4648012]
        ])

positions_frac = np.array([
            ['Si', 0.0,  0.0,  0.0],
            ['Si', 0.5,  0.0,  0.25],
            ['Si', 0.5,  0.5,  0.5],
            ['Si', 0.0,  0.5,  0.75],
        ])

cell_constraints = np.array([
            [0,  0,  0],
            [0,  0,  0]
        ])

kpoints_mp_grid = np.array([2, 2, 2])

fix_all_ions = False
symmetry_generate=False
symmetry_tol = 0.01


### Build supercell

In [20]:
n = [2, 2, 1] 
super_positions_frac, super_lattice_cart = ct.create_supercell_from_fractional_coords(positions_frac,lattice_cart,n)

### Add hydrogen termination to cell bottom 

In [21]:
axis = 'z'
criteria = "minimum"
ref_value, _ = ct.find_plane_value(super_positions_frac, super_lattice_cart, axis, criteria)
tolerance = 0.5
labelled_positions_frac = ct.select_atoms_by_plane(super_positions_frac, super_lattice_cart, axis, ref_value,
                          tolerance=tolerance, include=None, exclude=None)

In [22]:
labelled_positions_frac

[[1, False, 'Si', 0.5, 0.75, 0.75],
 [2, False, 'Si', 0.0, 0.75, 0.75],
 [3, False, 'Si', 0.5, 0.25, 0.75],
 [4, False, 'Si', 0.0, 0.25, 0.75],
 [5, False, 'Si', 0.75, 0.75, 0.5],
 [6, False, 'Si', 0.25, 0.75, 0.5],
 [7, False, 'Si', 0.75, 0.25, 0.5],
 [8, False, 'Si', 0.25, 0.25, 0.5],
 [9, False, 'Si', 0.75, 0.5, 0.25],
 [10, False, 'Si', 0.25, 0.5, 0.25],
 [11, False, 'Si', 0.75, 0.0, 0.25],
 [12, False, 'Si', 0.25, 0.0, 0.25],
 [13, True, 'Si', 0.5, 0.5, 0.0],
 [14, True, 'Si', 0.0, 0.5, 0.0],
 [15, True, 'Si', 0.5, 0.0, 0.0],
 [16, True, 'Si', 0.0, 0.0, 0.0]]

In [36]:
import numpy as np

def add_offset_atoms(labeled_positions_frac, lattice_cart, bond, atom="H"):
    """
    Parameters
    ----------
    labeled_positions_frac : array-like of shape (N, 6)
        Each row: [index, flag_bool, symbol, frac_x, frac_y, frac_z]
    lattice_cart : (3,3) array
        Unit cell vectors in Cartesian coordinates.
    bond : array-like of shape (3,)
        Offset vector in Angstrom (Cartesian).
    atom : str, optional
        Symbol for the new atom to add (default "H").

    Returns
    -------
    positions_frac : ndarray of shape (N+M, 4), dtype=object
        Columns: [symbol, frac_x, frac_y, frac_z]
        where M is the number of rows in input with flag=True.

    new_lattice_cart : ndarray of shape (3,3)
        A copy of lattice_cart with its [2,2] element increased by bond[2].
    """
    # 1) convert bond to fractional coords
    inv_lat = np.linalg.inv(lattice_cart)
    bond = np.asarray(bond, dtype=float)
    bond_frac = bond @ inv_lat

    # 2) build the positions list
    out = []
    for idx, flag, sym, fx, fy, fz in labeled_positions_frac:
        frac = np.array([fx, fy, fz], dtype=float)
        out.append([sym, float(frac[0]), float(frac[1]), float(frac[2])])
        if flag:
            new_f = (frac + bond_frac) % 1.0
            out.append([atom, float(new_f[0]), float(new_f[1]), float(new_f[2])])

    positions_frac = np.array(out, dtype=object)

    # 3) grow the c‐axis of the cell by the bond’s z‐component
    new_lattice_cart = lattice_cart.copy()
    new_lattice_cart[2, 2] += -bond[2]

    return positions_frac, new_lattice_cart

In [38]:
bond = np.array([0.0, 0.0, -0.5]) 
new_positions_frac, new_lattice_cart = add_offset_atoms(labelled_positions_frac,
                                    super_lattice_cart,
                                    bond,
                                    atom="H")
print(new_positions_frac)
print(new_lattice_cart)


[['Si' 0.5 0.75 0.75]
 ['Si' 0.0 0.75 0.75]
 ['Si' 0.5 0.25 0.75]
 ['Si' 0.0 0.25 0.75]
 ['Si' 0.75 0.75 0.5]
 ['Si' 0.25 0.75 0.5]
 ['Si' 0.75 0.25 0.5]
 ['Si' 0.25 0.25 0.5]
 ['Si' 0.75 0.5 0.25]
 ['Si' 0.25 0.5 0.25]
 ['Si' 0.75 0.0 0.25]
 ['Si' 0.25 0.0 0.25]
 ['Si' 0.5 0.5 0.0]
 ['H' 0.5 0.5 0.9085053633790009]
 ['Si' 0.0 0.5 0.0]
 ['H' 0.0 0.5 0.9085053633790009]
 ['Si' 0.5 0.0 0.0]
 ['H' 0.5 0.0 0.9085053633790009]
 ['Si' 0.0 0.0 0.0]
 ['H' 0.0 0.0 0.9085053633790009]]
[[7.7283952 0.        0.       ]
 [0.        7.7283952 0.       ]
 [0.        0.        5.9648012]]


### Add vacuum spacing

In [None]:
vac = 10
super_positions_frac, super_lattice_cart = ct.create_vacuum_spacing(super_positions_frac, super_lattice_cart, vac)

### Add ionic contraints

In [None]:
# Create fixed ion constraints
conditions = "z < 2.5"
ionic_constraints = ct.select_atoms_by_region(super_positions_frac, super_lattice_cart, conditions)
print(ionic_constraints)

In [None]:
filename = f"si001_test"
castep_path = job_path / f"{filename}.castep"

print('=' * 80 + f'\nCASTEP file: {castep_path}\n' + '=' * 80 + '\n')   

param_filename = ct.write_param_file(
    params,
    title = title,
    filename=filename,
    path=job_path,
    display_file=True
    )

cell_filename = ct.write_cell_file(
    title = title,
    path=job_path,
    filename=filename,
    lattice_cart=super_lattice_cart,
    positions_frac=super_positions_frac,
    cell_constraints=cell_constraints,
    ionic_constraints=ionic_constraints,
    fix_all_ions=fix_all_ions,
    symmetry_generate=symmetry_generate,
    symmetry_tol = symmetry_tol,
    kpoints_mp_grid=kpoints_mp_grid,
    display_file=True
)

In [None]:
# if str(module_path) == '/hpc/srs/Python/modules':
#     !bash -l -c "mpirun -np 62 castep.mpi {filename}"
# else:
#     !zsh -l -c "castepmpi {filename}"


In [None]:

# unit_cell, a, b, c, alpha, beta, gamma = ct.get_final_lattice_parameters(castep_path)
# energy_optimisation = ct.get_LBFGS_energies(castep_path)
# energies = [val for _, val in energy_optimisation]
# energy = energies[-1]

# print('Optimised energy {} eV'.format(energy))
# print('Optimised lattice constants a,b = {} Ang., c = {} Ang.'.format(a,c))
# print('Unit cell:')
# for line in unit_cell:
#     print(' '*2,np.abs(line))
