In [126]:
import os

# File paths
input_file_path1 = './mnt/data/ligand_oplsaa.lmp'
input_file_path2 = './mnt/data/ligand2_oplsaa.lmp'
output_file_path = './mnt/data/replicated_ligand_topology_output_mix.lmp'

# Read the input files
def read_file(filepath):
    with open(filepath, 'r') as file:
        return file.readlines()

lines1 = read_file(input_file_path1)
lines2 = read_file(input_file_path2)

# Function to read the types from the header
def read_header_types(lines):
    types = {}
    for line in lines:
        if 'atom types' in line:
            types['atom_types'] = int(line.split()[0])
        elif 'bond types' in line:
            types['bond_types'] = int(line.split()[0])
        elif 'angle types' in line:
            types['angle_types'] = int(line.split()[0])
        elif 'dihedral types' in line:
            types['dihedral_types'] = int(line.split()[0])
        elif 'improper types' in line:
            types['improper_types'] = int(line.split()[0])
    return types

types1 = read_header_types(lines1)
types2 = read_header_types(lines2)

# Sections of the LAMMPS data file
def parse_sections(lines):
    sections = {
        'Masses': [],
        'Atoms': [],
        'Bonds': [],
        'Angles': [],
        'Dihedrals': [],
        'Impropers': []
    }
    current_section = None
    section_headers = sections.keys()
    
    i = 0
    while i < len(lines):
        line = lines[i].strip()
        if line in section_headers:
            current_section = line
            i += 2  # Skip the header line and the empty line after it
            while i < len(lines) and lines[i].strip() != "":
                sections[current_section].append(lines[i].strip())
                i += 1
        i += 1

    return sections

sections1 = parse_sections(lines1)
sections2 = parse_sections(lines2)

# Function to adjust IDs for Atoms section
def adjust_atom_ids(lines, atom_offset):
    adjusted_lines = []
    for line in lines:
        parts = line.split()
        if parts and parts[0].isdigit():
            parts[0] = str(int(parts[0]) + atom_offset)
            adjusted_lines.append(' '.join(parts))
    return adjusted_lines

# Function to adjust IDs for other sections (Bonds, Angles, Dihedrals, Impropers)
def adjust_other_ids(lines, id_offset, atom_offset):
    adjusted_lines = []
    for line in lines:
        parts = line.split()
        if parts and parts[0].isdigit():
            parts[0] = str(int(parts[0]) + id_offset)
            for i in range(2, len(parts)):
                parts[i] = str(int(parts[i]) + atom_offset)
            adjusted_lines.append(' '.join(parts))
    return adjusted_lines

# Function to replicate sections
def replicate_section(section, n_replicates, atom_offset_step):
    replicated = []
    for i in range(n_replicates):
        atom_offset = i * atom_offset_step
        replicated.extend(adjust_atom_ids(section, atom_offset))
    return replicated

# Function to replicate sections for Bonds, Angles, Dihedrals, Impropers
def replicate_other_section(section, n_replicates, id_offset_step, atom_offset_step):
    replicated = []
    for i in range(n_replicates):
        id_offset = i * id_offset_step
        atom_offset = i * atom_offset_step
        replicated.extend(adjust_other_ids(section, id_offset, atom_offset))
    return replicated

# Function to get the max ID from a section
def get_max_id(lines):
    max_id = 0
    for line in lines:
        parts = line.split()
        if parts and parts[0].isdigit():
            max_id = max(max_id, int(parts[0]))
    return max_id

# Function to get the max column 2 value from a section
def get_max_column2_value(lines):
    max_value = 0
    for line in lines:
        parts = line.split()
        if len(parts) > 1 and parts[1].isdigit():
            max_value = max(max_value, int(parts[1]))
    return max_value

# Calculate the maximum IDs for Bond, Angle, Dihedral, and Improper sections after replication for ligand 1
max_atom_id1 = get_max_id(sections1['Atoms'])
max_bond_id1 = get_max_id(sections1['Bonds'])
max_angle_id1 = get_max_id(sections1['Angles'])
max_dihedral_id1 = get_max_id(sections1['Dihedrals'])
max_improper_id1 = get_max_id(sections1['Impropers'])

# Adjust column 2 of ligand 2's Atoms section once before replication
def adjust_column2_atom_ids(lines, max_value):
    adjusted_lines = []
    for line in lines:
        parts = line.split()
        if len(parts) > 1 and parts[1].isdigit():
            parts[1] = str(int(parts[1]) + max_value)
            adjusted_lines.append(' '.join(parts))
    return adjusted_lines

sections2['Atoms'] = adjust_column2_atom_ids(sections2['Atoms'], max_column2_value1)

# Replicate sections for ligand 1
n_replicates_ligand1 = 180
atom_offset1 = len(sections1['Atoms'])

replicated_atoms1 = replicate_section(sections1['Atoms'], n_replicates_ligand1, atom_offset1)
replicated_bonds1 = replicate_other_section(sections1['Bonds'], n_replicates_ligand1, len(sections1['Bonds']), atom_offset1)
replicated_angles1 = replicate_other_section(sections1['Angles'], n_replicates_ligand1, len(sections1['Angles']), atom_offset1)
replicated_dihedrals1 = replicate_other_section(sections1['Dihedrals'], n_replicates_ligand1, len(sections1['Dihedrals']), atom_offset1)
replicated_impropers1 = replicate_other_section(sections1['Impropers'], n_replicates_ligand1, len(sections1['Impropers']), atom_offset1)

# Adjust IDs for sections of ligand 2
def adjust_other_ids_ligand2(lines, id_offset, atom_offset, n_replicates):
    adjusted_lines = []
    for line in lines:
        parts = line.split()
        if parts and parts[0].isdigit():
            parts[0] = str(int(parts[0]) + id_offset * n_replicates)
            parts[1] = str(int(parts[1]) + id_offset)  # Adjust column 2 only once
            for i in range(2, len(parts)):
                parts[i] = str(int(parts[i]) + (atom_offset * n_replicates))
            adjusted_lines.append(' '.join(parts))
    return adjusted_lines

# Replicate sections for ligand 2
n_replicates_ligand2 = 360
atom_offset2 = len(sections2['Atoms'])

adjusted_atoms2 = adjust_atom_ids(sections2['Atoms'], max_atom_id1)
adjusted_bonds2 = adjust_other_ids_ligand2(sections2['Bonds'], max_bond_id1, max_atom_id1, n_replicates_ligand2)
adjusted_angles2 = adjust_other_ids_ligand2(sections2['Angles'], max_angle_id1, max_atom_id1, n_replicates_ligand2)
adjusted_dihedrals2 = adjust_other_ids_ligand2(sections2['Dihedrals'], max_dihedral_id1, max_atom_id1, n_replicates_ligand2)
adjusted_impropers2 = adjust_other_ids_ligand2(sections2['Impropers'], max_improper_id1, max_atom_id1, n_replicates_ligand2)

# Replicate adjusted sections for ligand 2
replicated_atoms2 = replicate_section(adjusted_atoms2, n_replicates_ligand2, atom_offset2)
replicated_bonds2 = replicate_other_section(adjusted_bonds2, n_replicates_ligand2, len(adjusted_bonds2), atom_offset2)
replicated_angles2 = replicate_other_section(adjusted_angles2, n_replicates_ligand2, len(adjusted_angles2), atom_offset2)
replicated_dihedrals2 = replicate_other_section(adjusted_dihedrals2, n_replicates_ligand2, len(adjusted_dihedrals2), atom_offset2)
replicated_impropers2 = replicate_other_section(adjusted_impropers2, n_replicates_ligand2, len(adjusted_impropers2), atom_offset2)

# Combine masses, ensuring no duplicates and correct indexing
masses1 = sections1['Masses']
masses2 = sections2['Masses']
mass_id_offset = len(masses1)
adjusted_masses2 = []
for line in masses2:
    parts = line.split()
    if parts and parts[0].isdigit():
        parts[0] = str(int(parts[0]) + mass_id_offset)
        adjusted_masses2.append(' '.join(parts))

combined_masses = masses1 + adjusted_masses2

# Combine the original and new data
combined_sections = {
    'Masses': combined_masses,
    'Atoms': replicated_atoms1 + replicated_atoms2,
    'Bonds': replicated_bonds1 + replicated_bonds2,
    'Angles': replicated_angles1 + replicated_angles2,
    'Dihedrals': replicated_dihedrals1 + replicated_dihedrals2,
    'Impropers': replicated_impropers1 + replicated_impropers2
}

# Function to calculate counts
def calculate_counts(sections):
    counts = {}
    counts['atoms'] = len(sections['Atoms'])
    counts['bonds'] = len(sections['Bonds'])
    counts['angles'] = len(sections['Angles'])
    counts['dihedrals'] = len(sections['Dihedrals'])
    counts['impropers'] = len(sections['Impropers'])
    return counts

# Function to calculate types
def calculate_types(sections):
    types = {}
    types['atom_types'] = len(set([line.split()[1] for line in sections['Masses']]))
    types['bond_types'] = len(set([line.split()[1] for line in sections['Bonds']]))
    types['angle_types'] = len(set([line.split()[1] for line in sections['Angles']]))
    types['dihedral_types'] = len(set([line.split()[1] for line in sections['Dihedrals']]))
    types['improper_types'] = len(set([line.split()[1] for line in sections['Impropers']]))
    return types

# Calculate counts and types
counts = calculate_counts(combined_sections)
types1 = read_header_types(lines1)
types2 = read_header_types(lines2)
types = {key: types1[key] + types2[key] for key in types1}

# Function to write the new data file
def write_data_file(output_file_path, counts, types, combined_sections):
    with open(output_file_path, 'w') as file:
        file.write('LAMMPS data file Created by LigParGen - (Written by Leela S. Dodda)\n\n')
        file.write(f"{counts['atoms']} atoms\n")
        file.write(f"{counts['bonds']} bonds\n")
        file.write(f"{counts['angles']} angles\n")
        file.write(f"{counts['dihedrals']} dihedrals\n")
        file.write(f"{counts['impropers']} impropers\n\n")

        file.write(f"{types['atom_types']} atom types\n")
        file.write(f"{types['bond_types']} bond types\n")
        file.write(f"{types['angle_types']} angle types\n")
        file.write(f"{types['dihedral_types']} dihedral types\n")
        file.write(f"{types['improper_types']} improper types\n\n")

        file.write("0.0 25.02 xlo xhi\n")
        file.write("0.0 18.191811665287418 ylo yhi\n")
        file.write("0.0 50.32022347141766 zlo zhi\n\n")

        file.write("Masses\n\n")
        for line in combined_sections['Masses']:
            file.write(f"{line}\n")

        file.write("\nAtoms\n\n")
        for line in combined_sections['Atoms']:
            file.write(f"{line}\n")

        file.write("\nBonds\n\n")
        for line in combined_sections['Bonds']:
            file.write(f"{line}\n")

        file.write("\nAngles\n\n")
        for line in combined_sections['Angles']:
            file.write(f"{line}\n")

        file.write("\nDihedrals\n\n")
        for line in combined_sections['Dihedrals']:
            file.write(f"{line}\n")

        file.write("\nImpropers\n\n")
        for line in combined_sections['Impropers']:
            file.write(f"{line}\n")

write_data_file(output_file_path, counts, types, combined_sections)

print("Replicated data file created successfully.")

Replicated data file created successfully.
