# Surface restructuring analysis of LAMMPS NEB trajectory:
## Preparation of VASP optimization

## Header

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

print('Please cite DOI: 10.1021/acs.jpcc.9b04863.')
print('This script analyzes LAMMPS NEB calculations of interlayer surface restructuring events (as prepared via MD2NEB.py) and generates VASP POSCAR files for DFT optimization of each event.')
print('This script is interactive and requires user input. Please read carefully before proceeding:\n')

print('Note 1: Intended only for interlayer surface restructuring events in FCC metals.')
print('Note 2: Intended only for periodic slab models with vacuum along the z-direction.')
print('Note 3: Intended only for VASP DFT optimizations (ionic relaxation for IS/FS; dimer method for TS).')
print('Note 4: Must have used DFT unit cell in previous LAMMPS analysis (MD2NEB.py).\n')

print('Note 5: Requires a LAMMPS DATA file containing the equilibrated box dimensions.')
print('Note 6: Requires a VASP POSCAR file containing the DFT-optimized lattice vectors of the slab model.')
print('Note 7: Requires a file listing the event numbers chosen to be optimized, line-by-line.')
print('Note 8: Requires NEB images in the form of LAMMPS DUMP file, named "coords.final.$i", 0 <= $i <= Nimg-1.\n')

## Extract unit cell & system information

In [None]:
pwd = os.getcwd()

# Load LAMMPS data file after NPT equilibration
name = input('Enter the name of the LAMMPS DATA file containing the equilibrated box dimensions : ')
file_eq = pwd + '/' + name
log = open(file_eq,'r')
log_lines = log.readlines()
log_lines = [line.split() for line in log_lines]

for line_index, line in enumerate(log_lines):
    if line:
        for l in line:
            
            if l == 'xlo':
                xlo = float(line[0])
                xhi = float(line[1])
                
            elif l == 'ylo':
                ylo = float(line[0])
                yhi = float(line[1])
            
            elif l == 'zlo':
                zlo = float(line[0])
                zhi = float(line[1])
            
            elif l == 'xy':
                xy = float(line[0])
                xz = float(line[1])
                yz = float(line[2])

            elif l == 'Atoms':
                start = line_index + 2
            
            elif l == 'Velocities':
                end = line_index - 1

lat_lmp = np.array([[xhi-xlo, 0.0, 0.0], [xy, yhi-ylo, 0.0], [xz, yz, zhi-zlo]])    # LAMMPS NVT lattice matrix

for line in log_lines[start:end]:
    line[:2] = np.array(line[:2]).astype(int)    # Atom ID, atom type
    line[2:5] = np.array(line[2:5]).astype(float)    # x, y, z

log.close()

sys = input('Enter the system name : ')

ls_element = input('Enter the list of the elements, separated by space, in the same order as LAMMPS atom types : ')

ls_type = []    # List of atom types
for line in log_lines[start:end]:
    Type = line[1]
    if Type not in ls_type:
        ls_type.append(Type)
N_element = len(ls_type)    # Number of elements

N_type = []    # List of number of atoms of each atom type
for atom_type in ls_type:
    N = 0
    for line in log_lines[start:end]:
        Type = line[1]
        if Type == atom_type:
            N += 1
    N_type.append(N)
N_type = ' '.join(str(n) for n in N_type)

# Load reference VASP POSCAR file
name = input('Enter the name of the VASP POSCAR file containing the DFT-optimized lattice vectors of the slab model : ')
poscar = pwd + '/' + name
log = open(poscar,'r')
log_lines = log.readlines()

scale = log_lines[1]    # Lattice scaling factor

a = log_lines[2]    # DFT-optimized lattice
b = log_lines[3]
c = log_lines[4]

log.close()

F = 'F F F'    # Selective dynamics
T = 'T T T'

fix = input('Selective dynamics? Yes or No : ')
if fix == 'Yes':
    print('The fixed atoms are assumed to be numbered consecutively (e.g. height-by-height). Please quit now if not so.')
    header = '%s\n%s%s%s%s%s\n%s\n%s\n%s\n'%(sys, scale, a, b, c, ls_element, N_type, 'Selective dynamics', 'Direct')
    fix_range = input('Enter the IDs of the first and last atom to be fixed, separated by space : ')
    fix_range = fix_range.split()
    fix_range = np.array(fix_range).astype(int)
else:
    header = '%s\n%s%s%s%s%s\n%s\n%s\n'%(sys, scale, a, b, c, ls_element, N_type, 'Direct')

print('Unit cell & system information extracted.\n')

## List of events to be optimized

In [None]:
# List of events chosen to be optimized after visual inspection of the NEB results
name = input('Enter the name of the file listing the event numbers chosen to be optimized, line-by-line : \n Warning: All specified events must visually contain a single, well-defined event. Please quit now and perform visual inspection if not so.\n')
file_ev = pwd + '/' name
ev = open(file_ev,'r')
ev_lines = ev.readlines()
ev_lines = [line.split() for line in ev_lines]

ls = []
for line in ev_lines:
    ls.append(line[0])
    
ev.close()

print('\n')

## Extract IS, TS, FS - VASP POSCAR preparation

In [None]:
# IS = initial state
# TS = transition state
# FS = final state

print('Preparing VASP POSCAR files of IS, TS, FS in respective folders under ./dft/ev_***/ for each event...')

for l in ls:
    
    path = pwd + '/ev_' + str(l)
    
    file_log = path + '/log.lammps'    # List of NEB distance & energy of the images
    log = open(file_log,'r')
    log_lines = log.readlines()
    log_lines = np.array(log_lines)
    N_line = log_lines.shape[0]
    log_lines = [line.split() for line in log_lines]
    
    file_mep = path + '/mep.txt'    # MEP = minimum energy pathway
    mep = open(file_mep,'w')
    
    os.chdir(path1)
    N_img = os.popen('ls -1 coords.final.* | wc -l').readlines()    # Extract number of images
    N_img = [line.split() for line in N_img]
    N_img = np.array(N_img).astype(int)
    N_img = int(N_img)
    
    for i in range(N_img,0,-1):    # Extract the converged NEB energy landscape (last entry)
        out = log_lines[N_line-1][-2*i] + '\t' + log_lines[N_line-1][-2*i+1] + '\n'
        mep.write(out)

    mep.close()
    log.close()
    
    mep = open(file_mep,'r')    # Scan MEP for IS, TS, FS
    mep_lines = mep.readlines()
    mep_lines = [line.split() for line in mep_lines]
    mep_lines = np.array(mep_lines).astype(float)
    
    f = plt.figure()    # Save MEP plot as a PDF file
    plt.plot(mep_lines, 'k.', markersize=10)
    plt.plot(mep_lines, 'k--', markersize=1)
    plt.ylabel('Energy (eV)')
    plt.xlabel('Image number')
    f.savefig('mep.pdf')
    
    for line_index, line in enumerate(mep_lines):        # TS = highest-energy image
        if line[1] == np.nanmax(mep_lines[:,1]):
            TS = line_index
        
    for i in range(TS,0,-1):    # Start at TS and climb down to the left to find IS
        if mep_lines[i,1] - mep_lines[i-1,1] > 0:
            if i > 1:
                continue
            else:
                IS = i-1
        else:
            IS = i
            break
    
    for i in range(TS,N_img-1):    # Start at TS and climb down to the right to find FS
        if mep_lines[i,1] - mep_lines[i+1,1] > 0:
            if i < N_img-2:
                continue
            else:
                FS = i+1
        else:
            FS = i
            break
    
    file_Nimg = path + '/Nimg.txt'    # List of image numbers corresponding to IS, TS, FS
    Nimg = open(file_Nimg,'w')
        
    for i_index, i in enumerate([IS, TS, FS]):
        for j_index, j in enumerate(['IS','TS','FS']):
            if i_index == j_index:
                os.mkdir(path + '/' + j)    # Create separate directories for IS, TS, FS
                
                out = j + '\t' + str(i) + '\n'
                Nimg.write(out)
                
                file_poscar = path + '/' + j + '/POSCAR'    # VASP POSCAR files
                poscar = open(file_poscar,'w')
                poscar.write(header)
                
        file_img = path + '/coords.final.' + str(i)    # LAMMPS DUMP files of converged NEB images
        img = open(file_img,'r')
        img_lines = img.readlines()
        img_lines = [line.split() for line in img_lines]
        
        for k in range(1,N_element+1):    # List atoms element-by-element
            
            for line_index, line in enumerate(img_lines):
                if line_index > 8:
                    
                    line[0:2] = np.array(line[0:2]).astype(int)
                    line[2:] = np.array(line[2:]).astype(float)
                    
                    ID = line[0]
                    Type = line[1]

                    if Type == k:
                        
                        x = line[2]
                        y = line[3]
                        z = line[4]
                        r = np.array([x, y, z])    # Cartesian coordinates
                        Rep = np.dot(r, np.linalg.inv(lat_lmp))    # Direct coordinates

                        if fix == 'Yes':    # Selective dynamics
                            if fix_range[0] <= ID <= fix_range[1]:
                                coord = '%f %f %f %s\n'%(Rep[0], Rep[1], Rep[2], F)
                                poscar.write(coord)
                            else:
                                coord = '%f %f %f %s\n'%(Rep[0], Rep[1], Rep[2], T)
                                poscar.write(coord)
                            
                        else:
                            coord = '%f %f %f\n'%(Rep[0], Rep[1], Rep[2])
                            poscar.write(coord)
        
        img.close()
        poscar.close()
    
    Nimg.close()
    mep.close()

print('POSCAR generated > ./ev_***/{IS/,TS/,FS/}')