# Analysis of LAMMPS MD trajectory:  Preparation of VASP validation

## Header

In [None]:
import lmpdump as lmpdump    # Use lmpdump.py parser
import os
import numpy as np
import math
import itertools

pwd = os.getcwd()

print('This script extracts frames from LAMMPS NVT trajectory and generates VASP POSCAR files for DFT validation of the atomic forces.')
print('This script is interactive and requires user input. Please read carefully before proceeding:\n')

print('Note 1: Intended only for VASP DFT evaluation of the atomic forces (SCF calculation).')
print('Note 2: Requires a LAMMPS DATA file containing the equilibrated box dimensions.')
print('Note 3: Requires a trajectory containing all force components.')
print('Note 4: Requires lmpdump.py to load the LAMMPS trajectories.\n')

## Load LAMMPS trajectory

In [None]:
# LAMMPS custom-style dump file - Raw trajectory
name = input('Enter the name of the LAMMPS trajectory file (ID, type, x, y, z, fx, fy, fz) : \nWarning: Must contain all force components! Please quit now if not so.\n')
print('Loading trajectory...')
file = pwd + '/' + name
xyzf = lmpdump.lmpdump(file, loadmode='all')
print('\n')

print('Loading time steps...')
step = []
for s in xyzf.finaldict.keys():    # keys = time steps
    step.append(s)
print('Done.\n')

size = np.array(step).size    # Number of time steps
N_dump = xyzf.finaldict[0][1].shape[0]    # Number of atoms dumped

## Extract unit cell & system information

In [None]:
# Load LAMMPS data file after NPT equilibration
name = input('Enter the name of the LAMMPS DATA file containing the equilibrated box dimensions : ')
file_eq = pwd + '/' + name
log = open(file_eq,'r')
log_lines = log.readlines()
log_lines = [line.split() for line in log_lines]

for line_index, line in enumerate(log_lines):
    if line:
        for l in line:
            
            if l == 'atoms':
                N_atom = int(line[0])
                
            elif l == 'types':
                N_element = int(line[0])

            elif l == 'xlo':
                xlo = float(line[0])
                xhi = float(line[1])
            
            elif l == 'ylo':
                ylo = float(line[0])
                yhi = float(line[1])
            
            elif l == 'zlo':
                zlo = float(line[0])
                zhi = float(line[1])
            
            elif l == 'xy':
                xy = float(line[0])
                xz = float(line[1])
                yz = float(line[2])
            
            elif l == 'Atoms':
                head = line_index + 1
                start = line_index + 2
            
            elif l == 'Velocities':
                end = line_index - 1

Lx = xhi - xlo    # Box dimensions
Ly = yhi - ylo
Lz = zhi - zlo
tan = Ly/xy

lat = np.array([[xhi-xlo, 0.0, 0.0], [xy, yhi-ylo, 0.0], [xz, yz, zhi-zlo]])    # LAMMPS NVT lattice matrix

for line in log_lines[start:end]:
    line[:2] = np.array(line[:2]).astype(int)    # Atom ID, atom type
    line[2:5] = np.array(line[2:5]).astype(float)    # x, y, z
    line[5:8] = np.array(line[5:8]).astype(int)    # ix, iy, iz (inverse sign)

fix = (N_dump != N_atom)    # Is bottommost layer muted?
if fix:
    
    print('Fixed coordinates were muted. They will be obtained from the LAMMPS DATA file.')
    print('The fixed atoms are assumed to be numbered consecutively (e.g. layer-by-layer). Please quit now if not so.')
    fix_range = input('Enter the IDs of the first and last atom to be fixed, separated by space : ')
    fix_range = fix_range.split()
    fix_range = np.array(fix_range).astype(int)
    
sys = input('Enter the system name : ')

scale = 1.0    # Lattice scaling factor

ls_element = input('Enter the list of the elements, separated by space, in the same order as LAMMPS atom types : ')

N_type = []    # List of number of atoms of each atom type
for atom_type in range(1,N_element+1):
    N = 0
    for line in log_lines[start:end]:
        Type = line[1]
        if Type == atom_type:
            N += 1
    N_type.append(N)
N_type = ' '.join(str(n) for n in N_type)

header = '%s\n%f\n%f %f %f\n%f %f %f\n%f %f %f\n%s\n%s\n%s\n'%(sys, scale, lat[0][0], lat[0][1], lat[0][2], lat[1][0], lat[1][1], lat[1][2], lat[2][0], lat[2][1], lat[2][2], ls_element, N_type, 'Direct')

log.close()

print('Unit cell & system information extracted.\n')

## Extract snapshots - VASP POSCAR preparation

In [None]:
N_frm = int(input('Enter the number of frames to be extracted in regular step interval, starting with the first frame : '))
d = math.floor(size/(N_frm-1))    # Grab interval

for n in range(0,N_frm):
    
    s = step[n*d]
    
    frm_dir = pwd + '/frm' + str(n+1)
    os.mkdir(frm_dir)
    
    file_poscar = frm_dir + '/POSCAR'
    poscar = open(file_poscar,'w')
    poscar.write(header)
    
    for atom_type in range(1,N_element+1):    # List atoms element-by-element
    
        if fix:    # Obtain fixed atoms from LAMMPS DATA file, if any
            for line in log_lines[start:end]:
                
                ID = line[0]
                Type = line[1]
                xu = line[2]
                yu = line[3]
                zu = line[4]
                ix = line[5]
                iy = line[6]
                iz = line[7]
                
                if (Type == atom_type) and (fix_range[0] <= ID <= fix_range[1]):
                    
                    x = xu + ix*Lx + iy*xy    # Wrapped coordinates
                    y = yu + iy*Ly
                    z = zu + iz*Lz
                    r = np.array([x, y, z])    # Cartesian coordinates
                    Rep = np.dot(r, np.linalg.inv(lat))    # Direct coordinates
                    
                    coord = '%f\t%f\t%f\n'%(Rep[0], Rep[1], Rep[2])
                    poscar.write(coord)
        
        for atom in range(0,N_dump):    # Obtain mobile atoms from LAMMPS trajectory
            
            ID = xyzf.finaldict[s][1]['id'][atom]
            Type = xyzf.finaldict[s][1]['type'][atom]
            
            if Type == atom_type:
                
                x = xyzf.finaldict[s][1]['x'][atom]
                y = xyzf.finaldict[s][1]['y'][atom]
                z = xyzf.finaldict[s][1]['z'][atom]
                r = np.array([x, y, z])
                Rep = np.dot(r, np.linalg.inv(lat))
                
                coord = '%f\t%f\t%f\n'%(Rep[0], Rep[1], Rep[2])
                poscar.write(coord)
    
    poscar.close()

print('POSCAR generated > ./validation/frm**/POSCAR')

## Extract force components after calculations completed: Force field vs. DFT

In [None]:
N_frm = int(input('Enter the number of frames extracted : '))
d = math.floor(size/(N_frm-1))    # Grab interval

for n in range(0,N_frm):
    
    s = step[n*d]    # Time step
    
    frm_dir = pwd + '/frm' + str(n+1)
    file_out = frm_dir + '/xyzf.out'    # Position & force data from DFT
    out = open(file_out,'r')
    out_lines = out.readlines()
    out_lines = [line.split() for line in out_lines]
    
    for line_index, line in enumerate(out_lines):
        if line_index > 1:
            line[:] = np.array(line[:]).astype(float)    # x, y, z, fx, fy, fz
        
    index = 2
    for atom_type in range(1,N_element+1):    # Atoms listed element-by-element
        
        # List of force components to be plotted for each element
        file_force = pwd + '/f_validate-' + str(atom_type) + '.txt'
        force = open(file_force,'a')
        
        if n == 0:
            header = 'Force field\tDFT\n'
            force.write(header)

        if fix:    # Skip fixed atoms listed first (zero forces in LAMMPS)
            for line in log_lines[start:end]:
                
                ID = line[0]
                Type = line[1]
                
                if (Type == atom_type) and (fix_range[0] <= ID <= fix_range[1]):
                    index += 1
                    
        for atom in range(0,N_dump):
            
            Type = xyzf.finaldict[s][1]['type'][atom]
            
            if Type == atom_type:
                
                fx_ff = xyzf.finaldict[s][1]['fx'][atom]    # Force components from force field
                fy_ff = xyzf.finaldict[s][1]['fy'][atom]
                fz_ff = xyzf.finaldict[s][1]['fz'][atom]
                
                fx_dft = out_lines[index][3]    # Force components from DFT
                fy_dft = out_lines[index][4]
                fz_dft = out_lines[index][5]
                
                index += 1
                
                line_x = '%f\t%f\n'%(fx_ff, fx_dft)
                line_y = '%f\t%f\n'%(fy_ff, fy_dft)
                line_z = '%f\t%f\n'%(fz_ff, fz_dft)
                
                force.write(line_x)
                force.write(line_y)
                force.write(line_z)
        
        force.close()
        
    out.close()

print('Force field vs. DFT force components extracted for each element > ./f_validate-*.txt')

## Calculate errors

In [None]:
pwd = os.getcwd()

file_ls = os.popen('ls -1 f_validate-*.txt').readlines()    # File list of force components for each element
file_ls = [line.split() for line in file_ls]
file_ls = list(itertools.chain.from_iterable(file_ls))

file_error = pwd + '/error.txt'
error = open(file_error,'w')

for file_index, file in enumerate(file_ls):
    
    file_dir = pwd + '/' + file
    force = open(file_dir,'r')
    force_lines = force.readlines()
    force_lines = [line.split() for line in force_lines]
    
    RMSE = 0.0    # Root mean squre error
    MAE = 0.0    # Mean absolute error
    for line in force_lines[1:]:
        line[:] = np.array(line[:]).astype(float)
        RMSE += (line[0]-line[1])**2
        MAE += abs(line[0]-line[1])
    RMSE = np.sqrt(RMSE / len(force_lines[1:]))
    MAE = MAE / len(force_lines[1:])

    Type = file_index + 1
    header = 'Element #' + str(Type) + '\n'
    out_RMSE = '%s%f%s\n'%('RMSE = ', RMSE, ' eV/Ang')
    out_MAE = '%s%f%s\n'%('MAE = ', MAE, ' eV/Ang')
    
    error.write(header)
    error.write(out_RMSE)
    error.write(out_MAE)
    error.write('\n')

error.close()

print('Error in force components calculated for each element > ./error.txt')