In [1]:
# import libraries

import MDAnalysis as mda

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def extract_atom_types(file_name, atomic_masses):
    '''
    Defines a mapping from LAMMPS atom types to chemical atom names from input LAMMPS .data file
    
    Inputs:
    file_name (str): path to the file
    atomic_masses (dict): dictionary contaning mass information for each chemical atom

    Returns:
    type_to_name (dict): dictionary mapping LAMMPS atom types to chemical atom name
    '''
    type_to_name = {}
    
    # open LAMMPS data file and read lines
    with open(file_name, 'r') as file:
        lines = file.readlines()

    # finding indexes for beginning and end of masses section of input .data file
    for i in range(len(lines)):
        if 'Masses' in lines[i]:
            masses_start = i +2
            break
    for i in range(masses_start, len(lines)):
        if lines[i].strip() == '':
            masses_end = i
            break

    # locate masses section in file and extract type to mass data
    for line in lines[masses_start:masses_end]:
        line = line.split()
        atom_type = int(line[0])  # LAMMPS atom type number
        mass = float(line[1])  # LAMMPS mass

        # calculate mass differences between atomic_mass dictionary and LAMMPS mass to assign each LAMMPS atom type to a chemical atom name
        closest_mass = None
        smallest_difference = 100 # set high initial mass difference
        for atomic_mass in atomic_masses:
            offset = abs(atomic_mass - mass)
            if offset < smallest_difference:
                smallest_difference = offset
                closest_mass = atomic_mass
        element = closest_mass
        
        type_to_name[atom_type] = atomic_masses[element] # update type_to_name dictionary

    return type_to_name

In [3]:
# atomic mass dictionary for elements, update if working with additional elements
atomic_masses = {
    1.008: 'H', 12.011: 'C', 14.007: 'N', 15.999: 'O', 
    32.065: 'S', 35.453: 'Cl'
}

# Define a mapping from atom types to names, using the 'extract_atom_types' function
type_to_name = extract_atom_types("lammps_files/lammps_input_data.data", atomic_masses)

# Load the system to MDanalysis universe
u = mda.Universe("lammps_files/lammps_input_data.data", "lammps_files/lammps_output_lammpstrj.lammpstrj", format="LAMMPSDUMP", lammps_coordinate_convention="scaled", dt=1)

# define number of atoms in the cage
no_atoms_in_cage = len(u.select_atoms("resid 1"))

In [4]:
#code to extract all atoms to .xyz file, in an unwrapped format

with open('data_storage/data_all_atoms_unwrapped.xyz', 'w') as f:
    for ts in u.trajectory:  # loop through all timesteps
        u.atoms.unwrap(compound="residues")  # unwrap coordinates at the residue level (whole molecules)

        f.write(f"{u.atoms.n_atoms}\n") # write number of atoms
        f.write(f"frame {ts.frame}\n") # write comment line (frame number)

        # loop through each atom in the system, writing atom name and unwrapped coordinates to file
        for atom in u.atoms:
            atom_type = atom.type
            atom_name = type_to_name[int(atom_type)]
            x, y, z = atom.position
            f.write(f"{atom_name} {x:.3f} {y:.3f} {z:.3f}\n")

In [5]:
#code to extract only cage atoms to .xyz file, in an unwrapped format

cage_atoms = u.select_atoms("resid 1")

with open('data_storage/data_cage_only_unwrapped.xyz', 'w') as f:
    for ts in u.trajectory:  # loop through all timesteps

        # obtain indices of cage atoms and create atom group based on these indices
        selected_atoms_indices = [atom.index for atom in cage_atoms]
        selected_atoms_group = u.atoms[selected_atoms_indices]
        # unwrap coordinates at residue level (whole molecules)
        selected_atoms_group.unwrap(compound="residues")

        f.write(f"{len(selected_atoms_group)}\n")  # write number of cage atoms
        f.write(f"frame {ts.frame}\n") # write comment line (frame number)

        # loop through each cage atom, writing atom name and unwrapped coordinates to file
        for atom in selected_atoms_group:
            atom_type = atom.type
            atom_name = type_to_name[int(atom_type)]
            x, y, z = atom.position
            f.write(f"{atom_name} {x:.3f} {y:.3f} {z:.3f}\n")

In [6]:
#code to extract only solvent atoms to .xyz file, in an unwrapped format

solvent_atoms = u.select_atoms("not resid 1")

with open('data_storage/data_solvent_only_unwrapped.xyz', 'w') as f:
    for ts in u.trajectory:  # loop through all timesteps

        # obtain indices of solvent atoms and create atom group based on these indices
        selected_atoms_indices = [atom.index for atom in solvent_atoms]
        selected_atoms_group = u.atoms[selected_atoms_indices]
        # unwrap coordinates at residue level (whole molecules)
        selected_atoms_group.unwrap(compound="residues")

        f.write(f"{len(selected_atoms_group)}\n")  # write number of solvent atoms
        f.write(f"frame {ts.frame}\n") # write comment line (frame number)

        # loop through each solvent atom, writing atom name and unwrapped coordinates to file
        for atom in selected_atoms_group:
            atom_type = atom.type
            atom_name = type_to_name[int(atom_type)]
            x, y, z = atom.position
            f.write(f"{atom_name} {x:.3f} {y:.3f} {z:.3f}\n")

The remaining code allows for writing of coordinates to .xyz files in a wrapped format (as outputted by lammps). This means the cage can be split over a periodic boundary condition in any given frame.

In [7]:
#code to extract all atoms to .xyz file, in a direct, wrapped format

with open('data_storage/data_all_atoms_wrapped.xyz', 'w') as f:
    for ts in u.trajectory: # loop through all timesteps

        f.write(f"{u.atoms.n_atoms}\n") # write number of atoms
        f.write(f"frame {ts.frame}\n") # write comment line (frame number)
        
        # loop through each atom, writing atom name and unwrapped coordinates to file
        for atom in u.atoms:
            atom_type = atom.type
            atom_name = type_to_name[int(atom_type)]
            x, y, z = atom.position
            f.write(f"{atom_name} {x:.3f} {y:.3f} {z:.3f}\n")

In [8]:
#code to extract only cage atoms to .xyz file, in a direct, wrapped format

with open('data_storage/data_cage_only_wrapped.xyz', 'w') as f:
    for ts in u.trajectory:  # loop through all timesteps

        # create atom group for cage atoms
        selected_atoms = [atom for atom in u.atoms if 1 <= atom.id <= no_atoms_in_cage]

        f.write(f"{len(selected_atoms)}\n")  # write number of cage atoms
        f.write(f"frame {ts.frame}\n") # write comment line (frame number)
        
        # loop through each cage atom, writing atom name and unwrapped coordinates to file
        for atom in selected_atoms:
            atom_type = atom.type
            atom_name = type_to_name[int(atom_type)]
            x, y, z = atom.position
            f.write(f"{atom_name} {x:.3f} {y:.3f} {z:.3f}\n")

In [9]:
#code to extract only solvent atoms to .xyz file, in a direct, wrapped format

with open('data_storage/data_solvent_only_wrapped.xyz', 'w') as f:
    for ts in u.trajectory:  # loop through all timesteps

        # create atom group for solvent atoms
        selected_atoms = [atom for atom in u.atoms if atom.id > no_atoms_in_cage]

        f.write(f"{len(selected_atoms)}\n") # write number of solvent atoms
        f.write(f"frame {ts.frame}\n") # write comment line (frame number)
        
        # loop through each solvent atom, writing atom name and unwrapped coordinates to file
        for atom in selected_atoms:
            atom_type = atom.type
            atom_name = type_to_name[int(atom_type)]
            x, y, z = atom.position
            f.write(f"{atom_name} {x:.3f} {y:.3f} {z:.3f}\n")