<!-- # CNN autoencoder and Clustering from MTRX data

Use this notebook to load Scienta Omicron Matrix format SPM data and create standardised images for machine learning training and analysis. The code can generate both JPG image data, useful for manually checking the data, and windowed numpy data that can be loaded into ML models. 

The notebook then creates an autoencoder for training on a large dataset, followed by KMEANS clustering. 

**Author**: Steven R. Schofield  
**Created**: November, 2024 -->

# CASTEP - Remove the vacuum spacing from a surface template file
## Steven R. Schofield (Universtiy College London) May 2025

### Set parameters for calculation

### Determine appropriate paths whether we are working on macbook or the cluster

In [59]:
import sys
from pathlib import Path

# Define candidate paths using Path objects
module_path_list = [
    Path('/Users/steven/academic-iCloud/Python/modules'),
    Path('/hpc/srs/Python/modules')
]

data_path_list = [
    Path('/Users/steven/Castep-data/work'),
    Path('/hpc/srs/castep')
]

# Resolve actual paths
module_path = next((p for p in module_path_list if p.exists()), None)
data_path = next((p for p in data_path_list if p.exists()), None)

# Check and report missing paths
if module_path is None:
    print("Error: Could not locate a valid module path.")
if data_path is None:
    print("Error: Could not locate a valid data path.")

if module_path is None or data_path is None:
    sys.exit(1)

# Add module_path to sys.path if needed
if str(module_path) not in sys.path:
    sys.path.insert(0, str(module_path))

# Print resolved paths
print(f"module_path = {module_path}")
print(f"data_path = {data_path}")

module_path = /Users/steven/academic-iCloud/Python/modules
data_path = /Users/steven/Castep-data/work


### Import modules

In [60]:
# # Ensure modules are reloaded 
%load_ext autoreload
%autoreload 2

# Import standard modules
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt

# Import custom module
import SRSCALCUTILS.castep_tools as ct

from IPython.display import display

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [61]:
lattice_cart_bulk = np.array([
            [3.8641976,     0.0,     0.0],
            [0.0,     7.7283952,     0.0],
            [0.0,     0.0,     5.4648012]
        ])

c = 4

lattice_cart_new = lattice_cart_bulk.copy()
lattice_cart_new[2] = c * lattice_cart_new[2]

print("lattice_cart_new, c = {}:".format(c))
for line in lattice_cart_new:
    print(line)

lattice_cart_new, c = 4:
[3.8641976 0.        0.       ]
[0.        7.7283952 0.       ]
[ 0.         0.        21.8592048]


### Load positions_frac_surface from template

In [62]:
template_path = '/Users/steven/academic-iCloud/Python/modules/SRSCALCUTILS/data/templates'
template_filename = "si001_124-out.cell"
template_write_filename = "si001_124"

# Read positions and lattice from template file
positions_frac_template, lattice_cart_template = ct.read_positions_frac(template_path,template_filename)

# Remove H atoms and shift z-coordinates down
labelled_positions_frac_new = ct.select_atoms_by_region(positions_frac_template, lattice_cart_template, condition="atom=='H'")
positions_frac_new = ct.selected_delete(labelled_positions_frac_new)
positions_frac_new = ct.remove_z_offset(positions_frac_new)

# Rescale z-coordinates
rescale = lattice_cart_template[2,2] / lattice_cart_new[2,2]
for row in positions_frac_new:
    row[3] = float(row[3]) * float(rescale)

In [63]:
title = 'Si(001) template file, processed from CASTEP output'

cell_filename = ct.write_cell_file(
    title = title,
    path=template_path,
    filename=template_write_filename,
    lattice_cart=lattice_cart_new,
    positions_frac=positions_frac_new,
    display_file=True
)

Wrote cell file to: /Users/steven/academic-iCloud/Python/modules/SRSCALCUTILS/data/templates/si001_124.cell
! Si(001) template file, processed from CASTEP output

%BLOCK lattice_cart
   ANG
       3.8641976000    0.0000000000    0.0000000000
       0.0000000000    7.7283952000    0.0000000000
       0.0000000000    0.0000000000   21.8592048000
%ENDBLOCK lattice_cart

%BLOCK CELL_CONSTRAINTS
       0    0    0
       0    0    0
%ENDBLOCK CELL_CONSTRAINTS

%BLOCK positions_frac
   Si       0.0000000000    0.6491282000    0.9274519173
   Si       0.0000000000    0.3508672000    0.9274538157
   Si       0.5000000000    0.7386160000    0.8774410428
   Si       0.5000000000    0.2613854000    0.8774412154
   Si       0.5000000000    0.0000008000    0.8205662047
   Si       0.5000000000    0.4999999000    0.8088486947
   Si      -0.0000000000    0.0000001000    0.7563082739
   Si       0.0000000000    0.5000001000    0.7474829067
   Si       0.0000000000    0.7560191000    0.6892296497
   Si