In [None]:
import MDAnalysis as mda
import numpy as np
import re
from vermouth.forcefield import ForceField
from vermouth.gmx.itp_read import read_itp
import networkx as nx

### **_Step 1:_** Generate topology

In [None]:
def itp_to_forcefield(itp):
    """
    Returns a forcefield with all the molecule in the itp added.

    The itp should specify the path to the ITP file.
    """
    path = itp
    with open(path, 'r') as f:
        lines = f.readlines()
    forcefield = ForceField(name='custom')
    read_itp(lines, forcefield)
    return forcefield

In [None]:
# Import the itps which are sub dependencies
ff_rna = itp_to_forcefield('ITP/RNA.itp')
ff_lipids = itp_to_forcefield('ITP/martini_v3.0.0_phospholipids_v1.itp')
ff_lisbeth = itp_to_forcefield('ITP/MC3_KC2_DP_DT_LI5_LI2_LI10_BMHB.itp')
ff_sterols = itp_to_forcefield('ITP/martini_v3.0_sterols_v1.0.itp')
ff_solvents = itp_to_forcefield('ITP/martini_v3.0.0_solvents_v1_BMHB.itp')
ff_ions = itp_to_forcefield('ITP/martini_v3.0.0_ions_v1.itp')
# Combine all the blocks in one big dictionary (a block is a molecule in our case)
all_blocks = {}
for forcefield in [ff_rna, ff_lipids, ff_lisbeth, ff_sterols, ff_solvents, ff_ions]:
    for name, block in forcefield.blocks.items():
        if name in all_blocks.keys():
            print(f'A double definition was found for {name}, this often results in issues!')
        all_blocks[name] = block
#all_blocks['RNA'].nodes(data=True)

In [None]:
molecule_identifiers = {}
for molecule in all_blocks.keys():
    molecule_identifiers[molecule] = []
    for key, values in all_blocks[molecule].nodes(data=True):
        molecule_identifiers[molecule].append((values['resname'], values['atomname']))
#molecule_identifiers

In [None]:
# Read in the GRO file
universe = mda.Universe('Hexagon.pdb') ## This pdb you generate from step3, using VMd or similar tool, to manually select the desired core

In [None]:
# Start matching the resnames
# This is not a very generic approach, but it can be expanded. The issue
#  is that there is no true solution to this problem. For certain molecules
#  might be subgroups of other molecules, meaning it is to a certain degree
#  an ambigous problem. Here we map small segments over long segments for 
#  simplicity, this might or might not be a problem for you system.
#  This can largely be circumvented by making sure that all resnames are
#  unique for every molecule type.
matches = []
active_molecule = []
in_RNA = False
for residue in universe.residues:
    # Matching by residue name if the molecule
    #  is a residue.
    if residue.resname in all_blocks.keys():
        in_RNA = False
        matches.append(residue.resname)
    # The naming of IONS is weird as they are separated
    #  molecules which have the same resname
    elif residue.resname == 'ION':
        in_RNA = False
        matches.append(residue.atoms.names[0])
    # The RNA or any polymer is an issue for
    #  their residues are not molecules. Here
    #  I wrote a basic detection for RNA. But
    #  this might need to be changed for other
    #  polymers. I only match by length, this
    #  might be lazy but for now I think it 
    #  will work.
    elif residue.resname in ['A', 'U']:
        if in_RNA == False:
            in_RNA = len(residue.atoms)
        else:
            in_RNA += len(residue.atoms)
        if in_RNA == len(molecule_identifiers['RNA']):
            in_RNA = False
            matches.append('RNA')
    else:
        in_RNA = False
        print(f'UNKNOWN RESIDUE {residue.resname}')
        break
#print(matches)

In [None]:
successive_elements = []
previous_match = None
counter = None
for match in matches:
    if match == previous_match:
        counter += 1
    else:
        if previous_match is not None:
            successive_elements.append((previous_match, counter))
        counter = 1
        previous_match = match
successive_elements.append((match, counter))

In [None]:
# Quick sanity check to see if the amount of atoms
#  based on the segment counting is indeed the equal
#  to the total amount of atoms in the PDB/GRO.
amount_of_atoms = 0
for element in successive_elements:
    amount_of_atoms += len(molecule_identifiers[element[0]])*element[1]
all_fine = amount_of_atoms == len(universe.atoms)
print(f'The amount of atoms based on the segments in correct: {all_fine}')

In [None]:
# Writing the mol_counting file which can be copied or imported in the top.
if all_fine:
    with open('Hexagon_mol_counting.top', 'w') as f:
        for successive_element in successive_elements:
            f.write(f'{successive_element[0]}\t{successive_element[1]}\n')
    print('The mol_counting.top as been written succesfully.')
else:
    'No ouput has been generated as the amount of atoms does not match.'

In [None]:
topol = open('hexagon.top', 'w')
            
topol.write(f'#include "Martini_ITPs/martini_v3.0.0.itp"\n')
topol.write(f'#include "M3-Ionizable-Lipids/Collection_of_itps/martini_v3.0_ffbonded.itp"\n')
topol.write(f'#include "M3-Ionizable-Lipids/Notebooks/Inverse_Hexagonal/RNA.itp"\n')
topol.write(f'#include "Martini_ITPs/martini_v3.0.0_phospholipids_v1.itp"\n')
topol.write(f'#include "M3-Ionizable-Lipids/Collection_of_itps/MC3_KC2_DP_DT_LI5_LI2_LI10.itp"\n')
topol.write(f'#include "M3-Sterol-Parameters/martini_v3.0_sterols_v1.0.itp"\n')
topol.write(f'#include "Martini_ITPs/martini_v3.0.0_solvents_v1.itp"\n')
topol.write(f'#include "Martini_ITPs/martini_v3.0.0_ions_v1.itp"\n')
topol.write(f'[ system ]\n')
topol.write(f'LNP\n')
topol.write(f'\n')
topol.write(f'[ molecules ]\n')
topol.write(f'#include "Hexagon_mol_counting.top"\n')
topol.close()