In [2]:
import numpy as np
import matplotlib.pyplot as plt

infile = 'Chl_a_new_reorderedN-hybridization.itp'
ofile = 'Chl_a_tabs.itp'
sfile = 'CLA.itp'

with open(infile, 'r') as ifd:
    with open(ofile, 'w') as ofd:
        for line in ifd:
            if len(line)>0 and line[0]!=';' and line[0]!='[':
                line = "\t".join(line.split()) + "\n"
            ofd.write(line)
    ofd.close()
ifd.close()

infile = ofile
    
    
PDBNames = []
with open('PDBNames.txt', 'r') as fd:
    for line in fd:
        PDBNames.append(line[:-1])
fd.close()

# Read [ atoms ] data from topology file
TopNDX = []
TopName = []
TopAtom = []
TopName2PDB = []
AtomLines = []
with open(infile) as ifd:
    in_atoms = False
    for line in ifd:
        
        # If we're starting a new section, stop recording
        if in_atoms and line[0]=='[':
            in_atoms = False
            
        if in_atoms and line[0]!=";" and len(line.split())>=8:
            TopNDX.append(line.split()[0])
            TopName.append(line.split()[1])
            TopAtom.append(line.split()[4])
            TopName2PDB.append(line.split(";")[1].split()[0])
            AtomLines.append(line[:-1])
        
        if line[0:9]=='[ atoms ]':
            in_atoms = True
ifd.close()

# Read [ bonds ] data from topology file 
Bond1 = []
Bond2 = []
BondLines = []
with open(infile) as ifd:
    in_bonds = False
    for line in ifd:
        
        # If we're starting a new section, stop recording
        if in_bonds and line[0]=='[':
            in_bonds = False
            
        if in_bonds and line[0]!=";" and len(line.split())>=3:
            Bond1.append(line.split()[0])
            Bond2.append(line.split()[1])
            BondLines.append(line[:-1])
        
        if line[0:9]=='[ bonds ]':
            in_bonds = True
ifd.close()

error = False
for n in range(0, len(PDBNames)):
    name = PDBNames[n]
    if TopName2PDB.count(name)==0:
        print('Error! Could not locate PDB name ' + name)
        error = True

if error==False:
    
    # BondList is a list of bond-lists
    BondList = []
    for n in range(0, len(PDBNames)):
        name = PDBNames[n]

        # List of partners for each heavy atom
        blist = []
        topndx = TopName2PDB.index(name)
        for line in BondLines:
            
            # This is a bonding partner
            if line.split('\t')[0]==str(topndx+1):
                
                # blist gives the itp index of the bonding partner
                blist.append(int(line.split('\t')[1])-1)
                
        BondList.append(blist)
    
    # PDBTopNdx contains the atom index in the top file
    NewTopNdx = []
    NewNames = []
    
    # Need to determine correct order for H atom entries
    for n in range(0, len(PDBNames)):
        name = PDBNames[n]
        
        # Append information for this atom
        NewTopNdx.append(TopName2PDB.index(name))
        NewNames.append(name)
        
        # And now check for bonded H atoms.
        # (These won't yet be included in the PDBNames list.)
        blist = BondList[n]
        for part in blist:
            # If the bond partner is an H atom
            if TopAtom[part][0]=='H':
                NewNames.append('H')
                NewTopNdx.append(part)
                
# Now the arrays NewNames and NewTopNdx store names and top 
# file indices for all Chl atoms in PDB standard order. 
# What remains is to re-order the top file data to match. 
with open(sfile, 'w') as nfd:
    
    # Header info
    with open(infile) as ifd:
        for line in ifd:
            
            if line[0]=='[':
                break
            else:
                nfd.write(line)
    ifd.close()
    
    # No modifications necessary in [ atomtypes ] directive
    # [ atomtypes ]
    with open(infile) as ifd:
        in_sec = False
        for line in ifd:

            # If we're starting a new section, stop recording
            if in_sec and line[0]=='[':
                in_sec = False
                
            if in_sec:
                nfd.write(line)
                
            if line[0:13]=='[ atomtypes ]':
                in_sec = True
                nfd.write(line)
    ifd.close()
    
    # No modifications necessary in [ moleculetype ] directive
    # [ moleculetype ]
    with open(infile) as ifd:
        in_sec = False
        for line in ifd:

            # If we're starting a new section, stop recording
            if in_sec and line[0]=='[':
                in_sec = False
                
            if in_sec:
                nfd.write(line)
                
            if line[0:16]=='[ moleculetype ]':
                in_sec = True
                nfd.write(line)
    ifd.close()
    
    # In [ atoms ] section, we need to update indices and charge groups
    # [ atoms ]
    Lines = []
    PDBNdcs = []
    with open(infile) as ifd:
        in_sec = False
        for line in ifd:

            # If we're starting a new section, stop recording
            if in_sec and line[0]=='[':
                in_sec = False
                
            if in_sec and len(line.split())>0:
                terms = line.split()
                if terms[0].isdigit():
                    pdbndx = NewTopNdx.index(int(terms[0])-1) + 1
                    terms[0] = str(pdbndx)
                    terms[4] = NewNames[pdbndx-1]
                    terms[5] = str(pdbndx)
                    Lines.append("\t".join(terms) + "\n")
                    PDBNdcs.append(pdbndx)
                    
            if line[0:9]=='[ atoms ]':
                in_sec = True
#                 Lines.append(line)
    ifd.close()
    
    ndcs = np.argsort(PDBNdcs)
    nfd.write('[ atoms ]\n')
    for ndx in ndcs:
        nfd.write(Lines[ndx])
    nfd.write("\n")
    
    # [ bonds ]
    Lines = []
    NdxMat = []
    with open(infile) as ifd:
        in_sec = False
        for line in ifd:

            # If we're starting a new section, stop recording
            if in_sec and line[0]=='[':
                in_sec = False
                
            if in_sec and len(line.split())>0:
                terms = line.split()
                if terms[0].isdigit():
                    pdbndx1 = NewTopNdx.index(int(terms[0])-1) + 1
                    pdbndx2 = NewTopNdx.index(int(terms[1])-1) + 1
                    terms[0] = str(pdbndx1)
                    terms[1] = str(pdbndx2)
                    Lines.append("\t".join(terms) + "\n")
                    NdxMat.append([pdbndx1, pdbndx2])

            if line[0:9]=='[ bonds ]':
                in_sec = True

    ifd.close()
    
    NdxMat = np.array(NdxMat)
    ndcs = np.lexsort(NdxMat[:,-1::-1].transpose())
    
    nfd.write('[ bonds ]\n')
    for ndx in ndcs:
        nfd.writelines(Lines[ndx])
    nfd.write('\n')
    
    
    # [ pairs ]
    Lines = []
    NdxMat = []
    with open(infile) as ifd:
        in_sec = False
        for line in ifd:

            # If we're starting a new section, stop recording
            if in_sec and line[0]=='[':
                in_sec = False
                
            if in_sec and len(line.split())>0:
                terms = line.split()
                if terms[0].isdigit():
                    pdbndx1 = NewTopNdx.index(int(terms[0])-1) + 1
                    pdbndx2 = NewTopNdx.index(int(terms[1])-1) + 1
                    terms[0] = str(pdbndx1)
                    terms[1] = str(pdbndx2)
                    Lines.append("\t".join(terms) + "\n")
                    NdxMat.append([pdbndx1, pdbndx2])

            if line[0:9]=='[ pairs ]':
                in_sec = True
                
    ifd.close()
    
    NdxMat = np.array(NdxMat)
    ndcs = np.lexsort(NdxMat[:,-1::-1].transpose())
    
    nfd.write('[ pairs ]\n')
    for ndx in ndcs:
        nfd.writelines(Lines[ndx])
    nfd.write('\n')
    
    
    # [ angles ]
    Lines = []
    NdxMat = []
    with open(infile) as ifd:
        in_sec = False
        for line in ifd:

            # If we're starting a new section, stop recording
            if in_sec and line[0]=='[':
                in_sec = False
                
            if in_sec and len(line.split())>0:
                terms = line.split()
                if terms[0].isdigit():
                    pdbndx1 = NewTopNdx.index(int(terms[0])-1) + 1
                    pdbndx2 = NewTopNdx.index(int(terms[1])-1) + 1
                    pdbndx3 = NewTopNdx.index(int(terms[2])-1) + 1
                    terms[0] = str(pdbndx1)
                    terms[1] = str(pdbndx2)
                    terms[2] = str(pdbndx3)
                    Lines.append("\t".join(terms) + "\n")
                    NdxMat.append([pdbndx1, pdbndx2, pdbndx3])

            if line[0:10]=='[ angles ]':
                in_sec = True
    
    ifd.close()
    
    NdxMat = np.array(NdxMat)
    ndcs = np.lexsort(NdxMat[:,-1::-1].transpose())
    
    nfd.write('[ angles ]\n')
    for ndx in ndcs:
        nfd.writelines(Lines[ndx])
    nfd.write('\n')
    
    
    # [ dihedrals ]
    # This section has two distinct regions, one beginning after [ dihedrals ] 
    # directive and the other after a comment line 
    Lines1 = []
    NdxMat1 = []
    Lines2 = []
    NdxMat2 = []
    Comments1 = []
    Comments2 = []
    with open(infile) as ifd:
        in_sec1 = False
        in_sec2 = False
        for line in ifd:
            
            # If we're finishing the first section, switch to recording second
            if in_sec1 and len(Lines1)>0 and len(line.split())>0:
                if line.split()[0]==';' and line.split()[1]=='ai':
                    in_sec1 = False
                    in_sec2 = True
                
            if in_sec1 and line[0]==';':
                Comments1.append(line)
                
            if in_sec2 and line[0]==';':
                Comments2.append(line)
                
            # If we're finishing the second section, stop recording
            if in_sec2 and line[0]=='[':
                in_sec2 = False
            
            if in_sec1 and len(line.split())>0:
                terms = line.split()
                if terms[0].isdigit():
                    pdbndx1 = NewTopNdx.index(int(terms[0])-1) + 1
                    pdbndx2 = NewTopNdx.index(int(terms[1])-1) + 1
                    pdbndx3 = NewTopNdx.index(int(terms[2])-1) + 1
                    pdbndx4 = NewTopNdx.index(int(terms[3])-1) + 1
                    terms[0] = str(pdbndx1)
                    terms[1] = str(pdbndx2)
                    terms[2] = str(pdbndx3)
                    terms[3] = str(pdbndx4)
                    Lines1.append("\t".join(terms) + "\n")
                    NdxMat1.append([pdbndx1, pdbndx2, pdbndx3, pdbndx4])
                    
            if in_sec2 and len(line.split())>0:
                terms = line.split()
                if terms[0].isdigit():
                    pdbndx1 = NewTopNdx.index(int(terms[0])-1) + 1
                    pdbndx2 = NewTopNdx.index(int(terms[1])-1) + 1
                    pdbndx3 = NewTopNdx.index(int(terms[2])-1) + 1
                    pdbndx4 = NewTopNdx.index(int(terms[3])-1) + 1
                    terms[0] = str(pdbndx1)
                    terms[1] = str(pdbndx2)
                    terms[2] = str(pdbndx3)
                    terms[3] = str(pdbndx4)
                    Lines2.append("\t".join(terms) + "\n")
                    NdxMat2.append([pdbndx1, pdbndx2, pdbndx3, pdbndx4])

            if line[0:13]=='[ dihedrals ]':
                in_sec1 = True
    
    ifd.close()
    
    NdxMat1 = np.array(NdxMat1)
    ndcs1 = np.lexsort(NdxMat1[:,-1::-1].transpose())
    
    NdxMat2 = np.array(NdxMat2)
    ndcs2 = np.lexsort(NdxMat2[:,-1::-1].transpose())
    
    nfd.write('[ dihedrals ]\n')
    for line in Comments1:
        nfd.write(line)
    for ndx in ndcs1:
        nfd.writelines(Lines1[ndx])
    
    nfd.write("\n")
    for line in Comments2:
        nfd.write(line)
    for ndx in ndcs2:
        nfd.writelines(Lines2[ndx])
    nfd.write('\n')
    
nfd.close()

out = !{"sed -i 's/MOL/CLA/g' " + sfile}
for line in out:
    print(line)