# Surface restructuring analysis of LAMMPS NEB trajectory:
## Preparation of LAMMPS/ASE optimization

## Header

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

print('Please cite DOI: 10.1021/acs.jpcc.9b04863.')
print('This script analyzes LAMMPS NEB calculations of interlayer surface restructuring events (as prepared via MD2NEB.py) and prepares images for LAMMPS/ASE optimization of each event.')
print('This script is interactive and requires user input. Please read carefully before proceeding:\n')

print('Note 1: Intended only for interlayer surface restructuring events in FCC metals.')
print('Note 2: Intended only for periodic slab models with vacuum along the z-direction.')
print('Note 3: Intended only for LAMMPS/ASE optimizations (ionic relaxation for IS/FS; dimer method for TS).')

print('Note 4: Requires a LAMMPS DATA file containing the equilibrated box dimensions.')
print('Note 5: Requires a LAMMPS DATA file containing the 0K box dimensions.')
print('Note 6: Requires a file listing the event numbers chosen to be optimized, line-by-line.')
print('Note 7: Requires NEB images in the form of LAMMPS DUMP file, named "coords.final.$i", 0 <= $i <= Nimg-1.')
print('Note 8: Requires energies evaluated separately via Gaussian process (gp_neb.py), if using GP force field.\n')

## Extract unit cell & system information

In [None]:
pwd = os.getcwd()

# Load LAMMPS data file after NPT equilibration
name = input('Enter the name of the LAMMPS DATA file containing the equilibrated box dimensions : ')
file_eq = pwd + '/' + name
log = open(file_eq,'r')
log_lines = log.readlines()
log_lines = [line.split() for line in log_lines]

for line_index, line in enumerate(log_lines):
    if line:
        for l in line:
            
            if l == 'xlo':
                xlo = float(line[0])
                xhi = float(line[1])
                
            elif l == 'ylo':
                ylo = float(line[0])
                yhi = float(line[1])
            
            elif l == 'zlo':
                zlo = float(line[0])
                zhi = float(line[1])
            
            elif l == 'xy':
                xy = float(line[0])
                xz = float(line[1])
                yz = float(line[2])

lat_eq = np.array([[xhi-xlo, 0.0, 0.0], [xy, yhi-ylo, 0.0], [xz, yz, zhi-zlo]])    # LAMMPS NVT lattice matrix

log.close()

# Load LAMMPS data file after 0K equilibration
name = input('Enter the name of the LAMMPS DATA file containing the 0K box dimensions : ')
file_0K = pwd + '/' + name
log = open(file_0K,'r')
log_lines = log.readlines()
log_lines = [line.split() for line in log_lines]

for line_index, line in enumerate(log_lines):
    if line:
        for l in line:
            
            if l == 'xlo':
                xlo = float(line[0])
                xhi = float(line[1])
                
            elif l == 'ylo':
                ylo = float(line[0])
                yhi = float(line[1])
            
            elif l == 'zlo':
                zlo = float(line[0])
                zhi = float(line[1])
            
            elif l == 'xy':
                xy = float(line[0])
                xz = float(line[1])
                yz = float(line[2])

lat_0K = np.array([[xhi-xlo, 0.0, 0.0], [xy, yhi-ylo, 0.0], [xz, yz, zhi-zlo]])    # LAMMPS 0K lattice matrix

xlo_bound = xlo + np.min(np.array([0.0, xy, xz, xy+xz]))    # Box bounds required for LAMMPS DUMP file
xhi_bound = xhi + np.max(np.array([0.0, xy, xz, xy+xz]))
ylo_bound = ylo + np.min(np.array([0.0, yz]))
yhi_bound = yhi + np.max(np.array([0.0, yz]))
zlo_bound = zlo
zhi_bound = zhi

bound_0K = np.array([[xlo_bound, xhi_bound, xy], [ylo_bound, yhi_bound, xz], [zlo_bound, zhi_bound, yz]])

log.close()

GP = input('Were the energies evaluated separately via Gaussian process (gp_neb.py)? Yes or No : ')

print('Unit cell & system information extracted.\n')

## List of events to be optimized

In [None]:
# List of events chosen to be optimized after visual inspection of the NEB results
name = input('Enter the name of the file listing the event numbers chosen to be optimized, line-by-line : \n Warning: All specified events must visually contain a single, well-defined event. Please quit now and perform visual inspection if not so.\n')
file_ev = pwd + '/' + name
ev = open(file_ev,'r')
ev_lines = ev.readlines()
ev_lines = [line.split() for line in ev_lines]

ls = []
for line in ev_lines:
    ls.append(line[0])
    
ev.close()

print('\n')

## Extract IS, TS, FS - LAMMPS DUMP file preparation

In [None]:
# IS = initial state
# TS = transition state
# FS = final state

print('Preparing LAMMPS DUMP files of IS, TS, FS in respective folders under ./ev_***/ for each event...')

for l in ls:
    
    path = pwd + '/ev_' + str(l)
    
    if GP == 'Yes':
        
        file_mep = path + '/gp_neb.txt'    # List of image number & energy calculated via Gaussian process
        mep = open(file_mep,'r')
        mep_lines = mep.readlines()
        mep_lines = [line.split() for line in mep_lines]
        mep_lines[0] = np.array(mep_lines[0]).astype(int)
        mep_lines[1] = np.array(mep_lines[1]).astype(float)
        
        f = plt.figure()    # Save MEP plot as a PDF file
        plt.plot(mep_lines, 'k.', markersize=10)
        plt.plot(mep_lines, 'k--', markersize=1)
        plt.ylabel('Energy (eV)')
        plt.xlabel('Image number')
        file_pdf = path1 + '/mep.pdf'
        f.savefig(file_pdf)

    else:
        
        file_log = path + '/log.lammps'    # List of NEB distance & energy of the images
        log = open(file_log,'r')
        log_lines = log.readlines()
        log_lines = np.array(log_lines)
        N_line = log_lines.shape[0]
        log_lines = [line.split() for line in log_lines]

        file_mep = path + '/mep.txt'    # MEP = minimum energy pathway
        mep = open(file_mep,'w')

        os.chdir(path)
        N_img = os.popen('ls -1 coords.final.* | wc -l').readlines()    # Extract number of images
        N_img = [line.split() for line in N_img]
        N_img = np.array(N_img).astype(int)
        N_img = int(N_img)

        for i in range(N_img,0,-1):    # Extract the converged NEB energy landscape (last entry)
            out = log_lines[N_line-1][-2*i] + '\t' + log_lines[N_line-1][-2*i+1] + '\n'
            mep.write(out)

        mep.close()
        log.close()
    
        mep = open(file_mep,'r')
        mep_lines = mep.readlines()
        mep_lines = [line.split() for line in mep_lines]
        mep_lines = np.array(mep_lines).astype(float)        

        f = plt.figure()    # Save MEP plot as a PDF file
        plt.plot(mep_lines, 'k.', markersize=10)
        plt.plot(mep_lines, 'k--', markersize=1)
        plt.ylabel('Energy (eV)')
        plt.xlabel('NEB distance')
        file_pdf = path + '/mep.pdf'
        f.savefig(file_pdf)
    
    # Scan MEP for IS, TS, FS
    
    for line_index, line in enumerate(mep_lines):    # TS = highest-energy image
        if line[1] == np.nanmax(mep_lines[:,1]):
            TS = line_index
        
    for i in range(TS,0,-1):    # Start at TS and climb down to the left to find IS
        if mep_lines[i,1] - mep_lines[i-1,1] > 0:
            if i > 1:
                continue
            else:
                IS = i-1
        else:
            IS = i
            break
    
    for i in range(TS,N_img-1):    # Start at TS and climb down to the right to find FS
        if mep_lines[i,1] - mep_lines[i+1,1] > 0:
            if i < N_img-2:
                continue
            else:
                FS = i+1
        else:
            FS = i
            break
    
    file_Nimg = path + '/Nimg.txt'    # List of image numbers corresponding to IS, TS, FS
    Nimg = open(file_Nimg,'w')
    
    for i_index, i in enumerate([IS, TS, FS]):
        for j_index, j in enumerate(['IS','TS','FS']):
            if i_index == j_index:
                os.mkdir(path + '/' + j)    # Create separate directories for IS, TS, FS
                
                out = j + '\t' + str(i) + '\n'
                Nimg.write(out)
                
                file_dump = path + '/' + j + '/' + j + '.dmp'    # LAMMPS DUMP files for optimization
                dump = open(file_dump,'w')
        
        file_img = path + '/coords.final.' + str(i)    # LAMMPS DUMP files of converged NEB images
        img = open(file_img,'r')
        img_lines = img.readlines()
        
        for line_index, line in enumerate(img_lines):
            if (line_index < 5) or (line_index == 8):    # Write header
                dump.write(line)
            elif 5 <= line_index <= 7:    # Write 0K box bounds
                out = '%f %f %f\n'%(bound_0K[line_index-5][0], bound_0K[line_index-5][1], bound_0K[line_index-5][2])
                dump.write(out)
        
        img_lines = [line.split() for line in img_lines]
        
        for line_index, line in enumerate(img_lines):
            if line_index > 8:
                
                line[0:2] = np.array(line[0:2]).astype(int)    # ID, type
                line[2:] = np.array(line[2:]).astype(float)    # x, y, z
                
                ID = line[0]
                Type = line[1]
                x = line[2]
                y = line[3]
                z = line[4]
                r = np.array([x, y, z])    # Cartesian coordinates
                Rep = np.dot(r, np.linalg.inv(lat_eq))    # Direct coordinates
                r_0K = np.dot(Rep, lat_0K)    # Cartesian coordinates rescaled to 0K lattice
                
                out = '%i %i %f %f %f\n'%(ID, Type, r_0K[0], r_0K[1], r_0K[2])    # Write rescaled entries
                dump.write(out)
        
        img.close()
        dump.close()
    
    Nimg.close()
    mep.close()

print('LAMMPS DUMP files generated > ./ev_***/{IS/,TS/,FS/}')