In [149]:
import re
from xml.dom import minidom
import xml.etree.ElementTree as ET

cgenfffile = "/home/miemie/Dropbox/Documents/BetaPose/example_lig.str"; 

def cgenff_reader(filename):
    with open(filename) as file1:
        lst = list(filter(lambda i: re.match(r"^ATOM.*!", i), file1))
    theatom  = [i.strip("\n").split()[1] for i in lst]
    atomtype = [i.strip("\n").split()[2] for i in lst]
    charge   = [float(i.strip("\n").split()[3]) for i in lst]
    penalty  = [float(i.strip("\n").split()[-1]) for i in lst]
    return {"name":theatom, "type":atomtype, "charge":charge, "penalty":penalty}
    
def lig_xml(dic, write_file=False, source=False):
    root = ET.Element('ForceField')
    info = ET.SubElement(root, 'Info')
    info_date = ET.SubElement(info, "date")
    info_date.text = str(date.today())
    if source != False: 
        info_file = ET.SubElement(info, 'source')
        info_file.text = source
    residues_tag = ET.SubElement(root, 'Residues')
    data_lig = ET.SubElement(residues_tag, 'Residue', attrib = {"name":"LIG"})
    for i in range(len(dic["name"])):
        tmpattrib={
            "name":dic["name"][i], 
            "type": dic["type"][i], 
            "charge": str(dic["charge"][i]), 
            'penalty': str(dic["penalty"][i]),
                  }
        tmpatom = ET.SubElement(data_lig, 'Atom', attrib = tmpattrib)
    
    ligxml_str = ET.tostring(root , encoding="unicode")
    dom = minidom.parseString(ligxml_str)
    ligxml_str = dom.toprettyxml()

    if write_file != False :
        with open(write_file, "w") as file1: 
            file1.write(ligxml_str)
    return ligxml_str
    
ligdic = cgenff_reader(cgenfffile)
ligxmlstr = lig_xml(ligdic, write_file="./example_lig.xml", source=cgenfffile)
print(ligxmlstr)

<?xml version="1.0" ?>
<ForceField>
	<Info>
		<date>2022-03-08</date>
		<source>/home/miemie/Dropbox/Documents/BetaPose/example_lig.str</source>
	</Info>
	<Residues>
		<Residue name="LIG">
			<Atom name="N1" type="NG3P3" charge="-0.291" penalty="0.0"/>
			<Atom name="C1" type="CG314" charge="0.219" penalty="0.0"/>
			<Atom name="C2" type="CG321" charge="-0.183" penalty="0.0"/>
			<Atom name="C3" type="CG321" charge="-0.129" penalty="0.0"/>
			<Atom name="S1" type="SG311" charge="-0.101" penalty="0.0"/>
			<Atom name="C4" type="CG2O3" charge="0.345" penalty="0.0"/>
			<Atom name="O1" type="OG2D2" charge="-0.67" penalty="0.0"/>
			<Atom name="O2" type="OG2D2" charge="-0.67" penalty="0.0"/>
			<Atom name="C5" type="CG321" charge="-0.127" penalty="0.0"/>
			<Atom name="C6" type="CG3C51" charge="0.158" penalty="0.0"/>
			<Atom name="O3" type="OG3C51" charge="-0.498" penalty="0.0"/>
			<Atom name="C7" type="CG3C51" charge="0.143" penalty="0.0"/>
			<Atom name="O4" type="OG311" charge="-0.649

In [197]:
class ffreader:
    def __init__(self, ff):
        self.domff = minidom.parse(ff)
        self.residues = self.domff.getElementsByTagName("Residue")
        self.residuemap = {
            "TIP3":"T3P",
            "HOH":"T3P",
            "WAT":"T3P",
            'HID':"HSD",
            'HIE':"HSE",
            'HIP':"HSP", 
            "CYX":"CYS",
        }
        self.map_resnames()
        
    def map_resnames(self):
        reslst = [i.getAttribute("name") for i in self.residues]
        for i in reslst: 
            if i in self.residuemap.keys():
                idx = reslst.index(i)
                self.residues[idx].setAttribute("name", self.residuemap[i])
        self.residues = self.domff.getElementsByTagName("Residue")
        
    def format_resname(self, resname):
        resname = resname.upper()
        if resname in self.residuemap.keys():
            resname = self.residuemap[resname]
        return resname
        
    def format_atomname(self, retdic):
        keys = retdic.keys()
        values = retdic.values()
        return {i:j for i,j in zip(keys, values)}
    
    def getTypes(self, resname):
        # TODO: possible changes to map the residue name 
        # TODO: map HIS/HIE/HID, CYX, etc
        resname = self.format_resname(resname);
        for i in self.residues: # self.domff.getElementsByTagName("Residue"):
            if i.attributes["name"].value == resname:
                names = [_.attributes["name"].value for _ in i.getElementsByTagName("Atom")]
                types = [_.attributes["type"].value for _ in i.getElementsByTagName("Atom")]
                break
        ret = {i:j for i,j in zip(names, types)}
        return self.format_atomname(ret)
    
    def getCharges(self, resname):
        # TODO: possible changes to map the residue name 
        # TODO: map HIS/HIE/HID, CYX, etc
        resname = self.format_resname(resname); 
        for i in self.residues: 
            if i.attributes["name"].value == resname:
                names = [_.attributes["name"].value for _ in i.getElementsByTagName("Atom")]
                charges = [_.attributes["charge"].value for _ in i.getElementsByTagName("Atom")]
                break
        ret = {i:float(j) for i,j in zip(names, charges)}
        return self.format_atomname(ret)
    
    def getAtomCharge(self, resname, atomname):
        resname = self.format_resname(resname); 
        found_res = False
        for i in self.residues: 
            if i.attributes["name"].value == resname:
                names = [_.attributes["name"].value for _ in i.getElementsByTagName("Atom")]; 
                charges = [_.attributes["charge"].value for _ in i.getElementsByTagName("Atom")]; 
                found_res = True;
                break
        if found_res: 
            found_atom = False;
            for i,j in zip(names, charges):
                if i == atomname: 
                    return float(j); 
                    found_atom = True; 
                    break
            if not found_atom:
                print(f"Found the residue {resname}, however, Not found the atom {atomname}"); 
                return 0; 
        else : 
            print(f"Not found the residue {resname}")
            return 0; 
    
    def addFF(self, ff, waitlist=[]):
        # use minidom purely
        residues_oldff = self.domff.getElementsByTagName('Residues')
        newff = minidom.parse(ff)
        residues_newff = newff.getElementsByTagName('Residue')
        for i in residues_newff:
            if len(waitlist) == 0 or i.getAttribute("name") in waitlist:
                residues_oldff[0].appendChild(i)
            else: 
                pass
        self.residues = self.domff.getElementsByTagName("Residue")
        self.map_resnames()
        
ligxml = '/home/miemie/Dropbox/Documents/BetaPose/example_lig.xml'
proteinxml = "/home/miemie/Dropbox/Documents/BetaPose/Forcefield/ff14SB.xml"
reader = ffreader(ligxml)
reader.addFF(proteinxml)
reader.map_resnames()
# reader = ffreader( proteinxml)
# reader.addFF(ligxml )

print(reader.getAtomCharge("LIG", "H10"))
print(reader.getAtomCharge("LEU", "C"))

0.09
0.5973


In [79]:
def load_multiple_ff(filelst):
    for i in filelst: 
#         with open(i, "r") as file1: 
        root = ET.parse(i)
        root = root.getroot()
        ligxml_str = ET.tostring(root, encoding="unicode")
#         print(ligxml_str)
#         ligxml_str = ET.tostring(root , encoding="unicode")
        dom = minidom.parseString(ligxml_str)
#         ligxml_str = dom.toprettyxml()
load_multiple_ff(["/home/miemie/Dropbox/Documents/BetaPose/example_lig.xml","/home/miemie/Dropbox/Documents/BetaPose/Forcefield/ff14SB.xml"])

# Obsolete code

In [None]:
    def addFF(self, ff):
        print(f"Adding the following xml file: {ff}")
        print(f"Residue number before insertion: {len(self.residues)}")
        oldff_str = self.domff.toprettyxml()
        oldff = ET.fromstring(oldff_str)
        residues_oldff = oldff.find('Residue')
        newff = ET.parse(ff)
        residues_newff = newff.findall('*//Residue')
        for i in residues_newff:
            print(i)
            residues_oldff.append(i)
        
        newff = ET.tostring(oldff, encoding="unicode")
        self.domff = minidom.parseString(newff)
        self.residues = self.domff.getElementsByTagName("Residue")
        print(f"Residue number after insertion: {len(self.residues)}")
        return 