In [1]:
import os, shutil
from os import path
from pymatgen.io.lammps.data import LammpsData
from ase.io.lammpsrun import read_lammps_dump_text
from ase.io.lammpsdata import read_lammps_data
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.io.vasp.outputs import Xdatcar
from io import StringIO, BytesIO
"""
Notebook containing functions to convert a LAMMPS trajectory to either an XDATCAR or numbered POSCARs, with or without sparsification.  
"""


'\nNotebook containing functions to convert a LAMMPS trajectory to either an XDATCAR or numbered POSCARs, with or without sparsification.  \n'

In [None]:
def sparsify_struc(in_traj,sparsify_factor=1,sparse_length=None):
    """   
    in_traj - Full path to the input LAMMPS trajectory.
    sparsify_factor - output every n structures
    sparse_length - output first n structures.

    Defaults perform no sparsification.
    """
    write_time = False
    struc_count = 0
    with open(in_traj, "r") as file:
        traj_lines = file.readlines()
    with open(f"sparsified_lammps_{sparsify_factor}","w+") as file:
        for n, line in enumerate(traj_lines):
            if "TIMESTEP" in line:
                timestep = int(traj_lines[n+1]) - 500000
                if timestep % sparsify_factor == 0:
                    print(timestep)
                    write_time = True
                    struc_count += 1
                    if sparse_length != None and struc_count > sparse_length:
                        break
                else:
                    write_time = False

            if write_time:
                file.write(line)
    if sparse_length == None:
        print(f"Sparsified to {struc_count} structures.")
    else:
        print(f"Sparsified to {struc_count-1} structures.")

def top_struc(traj_lines):
    """
    Outputs the top structure of the input of LAMMPS lines. Intended to be used within lammps_to_xdat or lammps_to_poscars.

    traj_lines - List of lines of LAMMPS trajectory, (eg. file.readlines())
    """
    found_end = False
    stop_next = False
    top_struc_lines = []
    for n, line in enumerate(traj_lines):
        if "TIMESTEP" in line:
            if stop_next == False:
                timestep = int(traj_lines[n+1])
                stop_next = True
                
            elif stop_next:
                break
        if stop_next:
            top_struc_lines.append(line)
        if n == len(traj_lines)-1:
            found_end = True
            break
    if found_end == False:
        new_traj_lines = traj_lines[n:]
    else:
        new_traj_lines = []
    return timestep, new_traj_lines,top_struc_lines

def traj_to_struc(top_struc_lines,atom_types,mass_file):
    """
    Converts the trajectory lines to a format that can be read by pymatgen. Intended to be used within lammps_to_xdat or lammps_to_poscars. Accepts scaled coordinates (fractional).

    top_struc_lines - List of lines of single step of LAMMPS trajectory.
    atom_types - number of species in structure.
    mass_file - text file of masses to append to structure. in format [ATOM TYPE]   [MASS]\n.
    """
    box_bounds_find = False
    dim_find = 0
    atoms_start = False
    atom_no_find = False
    no_atom_types = atom_types
    lammps_struc_lines = []
    for line in top_struc_lines:
        if "NUMBER OF ATOMS" in line:
            atom_no_find = True
        elif atom_no_find:
            len_atoms = line.split()[0]
            atom_no_find = False
        elif "ITEM: BOX BOUNDS pp pp pp" in line:
            box_bounds_find = True
        elif box_bounds_find:
            dims = line.split()
            match dim_find:
                case 0:
                    x_lo = dims[0]
                    x_hi = dims[1]
                    x_len = float(x_hi) - float(x_lo)
                    dim_find += 1
                case 1:
                    y_lo = dims[0]
                    y_hi = dims[1]
                    y_len = float(y_hi) - float(y_lo)
                    dim_find += 1
                case 2:
                    z_lo = dims[0]
                    z_hi = dims[1]
                    z_len = float(z_hi) - float(z_lo)
                    box_bounds_find = False
        elif "ITEM: ATOMS id type xs ys zs" in line:
            atoms_start = True
            with open(mass_file,"r") as masses_file:
                mass_lines = masses_file.readlines()
            lammps_struc_lines.append(f"\n{len_atoms}  atoms\n\n{no_atom_types}  atom types\n\n{x_lo} {x_hi}  xlo xhi\n{y_lo} {y_hi}  ylo yhi\n{z_lo} {z_hi}  zlo zhi\n\nMasses\n\n")
            for mass_line in mass_lines:
                lammps_struc_lines.append(mass_line)
            lammps_struc_lines.append("\nAtoms\n\n")
        elif atoms_start:
            chunks = line.split()
            x_coord = float(chunks[2])
            y_coord = float(chunks[3])
            z_coord = float(chunks[4])
            abs_x = (x_coord * x_len) + float(x_lo)
            abs_y = (y_coord * y_len) + float(y_lo)
            abs_z = (z_coord * z_len) + float(z_lo)
            lammps_struc_lines.append(f"{chunks[0]}     {chunks[1]}  {abs_x}  {abs_y}  {abs_z}\n")
            lammps_struc = ''.join(lammps_struc_lines)
    return lammps_struc



def lammps_to_poscar(lammps_struc,species_dict):
    """
    Converts the input LAMMPS structure to a pymatgen structure object.

    lammps_struc - String of lammps_struc, such as output of traj_to_struc.
    species_dict - Dictionary specifying which atoms types are which elements, eg. {1:3,2:15,3:16,4:17} for LiPSCl.

    """
    with StringIO(lammps_struc) as file:
        struc = read_lammps_data(file,Z_of_type=species_dict)
    pymatgen_struc = AseAtomsAdaptor.get_structure(struc)
    return pymatgen_struc

In [None]:
def lammps_to_xdat(in_traj,out_file,mass_file,species_dict):
    """
    A function to convert an input LAMMPS trajectory (usually sparsified in some way) and output to XDATCAR.

    in_traj - Input LAMMPS trajectory
    out_file - Output file, format VASP XDATCAR
    mass_file - text file of masses to append to structure. in format [ATOM TYPE]   [MASS]\n.
    species_dict - Dictionary specifying which atoms types are which elements, eg. {1:3,2:15,3:16,4:17} for LiPSCl.
    """
    xdat_exists = False
    atom_types = len(species_dict.keys())
    with open(in_traj, "r") as file:
        traj_lines = file.readlines()

    while True:
        timestep,traj_lines,top_struc_lines = top_struc(traj_lines=traj_lines)
        lammps_struc = traj_to_struc(top_struc_lines,atom_types,mass_file)
        pymatgen_struc = lammps_to_poscar(lammps_struc,species_dict)
        pymatgen_struc.to(filename="temp_struc",fmt="poscar")
        if xdat_exists == False:
            data = Xdatcar("temp_struc")
            xdat_exists = True
        else:
            data.concatenate("temp_struc")
        if len(traj_lines) == 0:
            break
    
    data.write_file(filename=out_file)
    os.remove("temp_struc")

def lammps_to_poscars(in_traj,out_folder,mass_file,species_dict):
    """
    A function to convert an input LAMMPS trajectory (usually sparsified in some way) and output to a folder of POSCARs.

    in_traj - Input LAMMPS trajectory
    out_folder - Folder to contain output VASP POSCARs
    mass_file - text file of masses to append to structure. in format [ATOM TYPE]   [MASS]\n.
    species_dict - Dictionary specifying which atoms types are which elements, eg. {1:3,2:15,3:16,4:17} for LiPSCl.
    """
    atom_types = len(species_dict.keys())
    if path.isdir(out_folder) == False:
        os.mkdir(out_folder)
    with open(in_traj, "r") as file:
        traj_lines = file.readlines()

    while True:
        timestep,traj_lines,top_struc_lines = top_struc(traj_lines=traj_lines)
        lammps_struc = traj_to_struc(top_struc_lines,atom_types,mass_file)
        pymatgen_struc = lammps_to_poscar(lammps_struc,species_dict)
        pymatgen_struc.to(filename=f"{out_folder}/{timestep}.vasp",fmt="poscar")
        if len(traj_lines) == 0:
            break
    os.remove("temp_struc")