## Notebook for building long and short polymers using mbuild
#### TOSELF: Make sure to use mbuild-env when running!

In [25]:
import re, json, warnings
from pathlib import Path

from rdkit import Chem
import mbuild as mb
from mbuild import Compound
from mbuild.lib.recipes.polymer import Polymer

In [28]:
def SMILES_from_monomer_SMIRKS(smirks : str) -> str:
    '''Take a SMIRKS string for a residue in a monomer json file and converts it into a SMILES string that mbuild.load can handle
    Uses some explicit, home-made regex parsing for the conversion'''
    atom_nums = { # atom numbers for alphabetical replacement when converting to valid SMILES 
        'H'  : 1,
        'C'  : 6,
        'N'  : 7,
        'O'  : 8,
        'S'  : 16,
        'Cl' : 17,
        'Br' : 35
    }
    to_remove = '|'.join([r':\d+', r'\*-', r'-\*']) # remove all atom ids and wild groups
    smirks = re.sub(to_remove, '', smirks)

    for letter, num in atom_nums.items(): # replace bracketed atomic weights with letters
        smirks = re.sub(rf'\[#{num}\]', letter, smirks)
    smirks = re.sub(r'H-|-H', '', smirks)   # remove hydrogens
    smiles = re.sub(r'\(\)', '', smirks) # remove dangling empty parens - TOSELF: find better way to do this with regex (simple OR isn't greedy, leaves empty parens)

    return smiles

def SMILES_from_monomer_SMIRKS_rdkit(smirks : str) -> str:
    '''Take a SMIRKS string for a residue in a monomer json file and converts it into a SMILES string that mbuild.load can handle
    Uses rdkit functionality, which should be much more robust going forward but doesn't actually produce SMILES that mbuild can currentyl digest'''
    rdmol = Chem.rdmolfiles.MolFromSmarts(smirks)
    smiles = Chem.rdmolfiles.MolToSmiles(rdmol)

    return smiles

def build_linear_polymer(mono_path : Path, n : int, outpath : Path=Path('mbuild_polymers'), extensions : list[str]=['pdb']) -> Compound:
    '''Takes the path to a monomer json file and a chain length and builds a new polymer of the specified length
    Saves the polymer to the molecular file extensions chosen and returns the resulting mbuild molecule
    Currently only really works for linear polymers
    
    TODO:
        -- Fix weirdness with terminal group (1 is always tangled for some reason)
        -- Find way to automatically determine hydrogen replacement indices for more complex polymer geometries'''
    with mono_path.open('r') as mono_file:
        monos_by_smirks = json.load(mono_file)['monomers']

    chain = Polymer() 
    for res_name, smirks in monos_by_smirks.items(): 
        SMILES = SMILES_from_monomer_SMIRKS(smirks)
        print(SMILES)
        monomer = mb.load(SMILES, smiles=True) # create mbuild compounds from smirks
        monomer.name = res_name # assign name to make tracking easier

        h_ids = [i for i, atom in enumerate(monomer) if atom.name == 'H'] # ids of all hydrogens
        if re.search('TERM', res_name, flags=re.IGNORECASE): # consider terminal group to be any residue whose name contains "term/TERM" anywhere
            chain.add_end_groups(compound=monomer, index=h_ids[0], duplicate=False)
        else:
            chain.add_monomer(compound=monomer, indices=(h_ids[0], h_ids[-1]))

    chain.build(n)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore') # suppress spammy warnings about particles not having charges when converting to parmed.Structure
        for ext in extensions:
            chain.save(str(outpath/f'{mono_path.stem}-N={n}.{ext}'), show_ports=True, overwrite=True) 

    return chain

In [78]:
mol_name = 'peg_modified'
outpath = Path(f'mbuild_polymers/{mol_name}')
outpath.mkdir(exist_ok=True)

mono_path = Path(f'compatible_pdbs/simple_polymers/{mol_name}.json')

chain = build_linear_polymer(mono_path=mono_path, n=15, outpath=outpath, extensions=['pdb', 'hoomdxml', 'lammps', 'xyz', 'gsd', 'gro'])

C-C(-O)
C-O-C-C
C(-O-C-C-O)
No urey bradley terms detected, will use angle_style harmonic


In [74]:
help(Polymer.save)

Help on function save in module mbuild.compound:

save(self, filename, show_ports=False, forcefield_name=None, forcefield_files=None, forcefield_debug=False, box=None, overwrite=False, residues=None, combining_rule='lorentz', foyer_kwargs=None, **kwargs)
    Save the Compound to a file.
    
    Parameters
    ----------
    filename : str
        Filesystem path in which to save the trajectory. The extension or
        prefix will be parsed and control the format. Supported extensions:
        'hoomdxml', 'gsd', 'gro', 'top', 'lammps', 'lmp', 'mcf'
    show_ports : bool, optional, default=False
        Save ports contained within the compound.
    forcefield_files : str, optional, default=None
        Apply a forcefield to the output file using a forcefield provided
        by the `foyer` package.
    forcefield_name : str, optional, default=None
        Apply a named forcefield to the output file using the `foyer`
        package, e.g. 'oplsaa'. `Foyer forcefields
        <https://gi

In [62]:
parm.write_pdb(str(outpath/'test.pdb'), use_hetatoms=False)

In [47]:
parm.visualize()

AttributeError: 'super' object has no attribute '_ipython_display_'

In [37]:
[i for i in chain.particles()]

[<C pos=([ 0.0919 -0.0059  0.0122]), 4 bonds, id: 139955693831840>,
 <C pos=([-0.0571 -0.0342 -0.0029]), 4 bonds, id: 139955532592608>,
 <O pos=([-0.132   0.0792  0.0312]), 2 bonds, id: 139955532588240>,
 <H pos=([ 0.1225  0.0756 -0.0568]), 1 bonds, id: 139955532590160>,
 <H pos=([ 0.15   -0.0973 -0.0123]), 1 bonds, id: 139955532589392>,
 <H pos=([-0.0853 -0.1174  0.065 ]), 1 bonds, id: 139955532590544>,
 <H pos=([-0.0794 -0.0659 -0.1074]), 1 bonds, id: 139955532589056>,
 <C pos=([-0.1248  0.1455 -0.0502]), 4 bonds, id: 139955534542496>,
 <C pos=([-0.2626  0.2003 -0.0851]), 4 bonds, id: 139955514000912>,
 <O pos=([-0.3184  0.2661  0.0253]), 2 bonds, id: 139955514001056>,
 <H pos=([-0.0563  0.2288 -0.0235]), 1 bonds, id: 139955514003360>,
 <H pos=([-0.0829  0.0912 -0.1374]), 1 bonds, id: 139955514014448>,
 <H pos=([-0.3289  0.116  -0.1137]), 1 bonds, id: 139955514003456>,
 <H pos=([-0.2551  0.2694 -0.1721]), 1 bonds, id: 139955541445024>,
 <C pos=([-0.2705  0.3595  0.032 ]), 4 bonds, id

In [5]:
help(Polymer.save)

Help on function save in module mbuild.compound:

save(self, filename, show_ports=False, forcefield_name=None, forcefield_files=None, forcefield_debug=False, box=None, overwrite=False, residues=None, combining_rule='lorentz', foyer_kwargs=None, **kwargs)
    Save the Compound to a file.
    
    Parameters
    ----------
    filename : str
        Filesystem path in which to save the trajectory. The extension or
        prefix will be parsed and control the format. Supported extensions:
        'hoomdxml', 'gsd', 'gro', 'top', 'lammps', 'lmp', 'mcf'
    show_ports : bool, optional, default=False
        Save ports contained within the compound.
    forcefield_files : str, optional, default=None
        Apply a forcefield to the output file using a forcefield provided
        by the `foyer` package.
    forcefield_name : str, optional, default=None
        Apply a named forcefield to the output file using the `foyer`
        package, e.g. 'oplsaa'. `Foyer forcefields
        <https://gi

In [20]:
p = Path('compatible_pdbs/simple_polymers/polythiophene.json')
with p.open('r') as monofile:
    mono_smirks = json.load(monofile)['monomers']

# print(mono_smirks)
for res_name, smirks in mono_smirks.items():
    print(res_name, SMILES_from_monomer_SMIRKS(smirks), SMILES_from_monomer_SMIRKS_rdkit(smirks))

polythiophene C1=C(-C(=C(-S-1))-C(-C(-C(-C)))) *[C]1=[C]([H])[C]([C]([C]([C]([C]([H])([H])[H])([H])[H])([H])[H])([H])[H])=[C](*)[S]1 *[C:1]1=[C:2]([H:10])[C:3]([C:5]([C:7]([C:8]([C:9]([H:17])([H:18])[H:19])([H:15])[H:16])([H:13])[H:14])([H:11])[H:12])=[C:4](*)[S:6]1
polythiophene_TERM1 C1(=C(-C(-C(-C(-C))))-C(=C(-S-1)-Br)) *[C]1=[C]([C]([C]([C]([C]([H])([H])[H])([H])[H])([H])[H])([H])[H])[C]([H])=[C]([Br])[S]1 *[C:1]1=[C:2]([C:3]([C:7]([C:8]([C:9]([H:18])([H:19])[H:20])([H:16])[H:17])([H:14])[H:15])([H:11])[H:12])[C:4]([H:13])=[C:5]([Br:10])[S:6]1
polythiophene_TERM2 C1=C(-C(=C(-Br)-S-1)-C(-C(-C(-C)))) *[C]1=[C]([H])[C]([C]([C]([C]([C]([H])([H])[H])([H])[H])([H])[H])([H])[H])=[C]([Br])[S]1 *[C:1]1=[C:2]([H:11])[C:3]([C:6]([C:8]([C:9]([C:10]([H:18])([H:19])[H:20])([H:16])[H:17])([H:14])[H:15])([H:12])[H:13])=[C:4]([Br:5])[S:7]1


In [None]:
from rdkit import Chem
rdmol = Chem.rdmolfiles.MolFromSmarts(str)
output_smiles = Chem.rdmolfiles.MolToSmiles(rdmol)


In [None]:
outpath = Path('mbuild_polymers')
poly_file = outpath/'test.pdb'
poly_file.touch()

# m1 = mb.load('CC', smiles=True) # mBuild compound of the monomer unit
# m2 = mb.load('COC', smiles=True) # mBuild compound of the monomer unit
m = mb.load('CC(=O)C', smiles=True) # mBuild compound of the monomer unit
cap = mb.load('N', smiles=True)

chain = Polymer()
chain.add_monomer(compound=m, indices=(4, -1))
chain.add_end_groups(cap, index=-1, duplicate=True)

chain.build(n=4)
# with warnings.catch_warnings():
#     warnings.simplefilter('ignore') # suppress spammy warnings about particles not having charges
#     chain.save(str(poly_file), show_ports=True, overwrite=True)

In [12]:
[i for i in m]

[<C pos=([-0.1269 -0.0014  0.0323]), 4 bonds, id: 139625235953744>,
 <C pos=([ 0.0033  0.0567 -0.0138]), 3 bonds, id: 139625403972912>,
 <O pos=([ 0.0094  0.1752 -0.0426]), 1 bonds, id: 139625399705536>,
 <C pos=([ 0.1255 -0.0293 -0.0249]), 4 bonds, id: 139625403969600>,
 <H pos=([-0.2036  0.0115 -0.047 ]), 1 bonds, id: 139625232669216>,
 <H pos=([-0.1605  0.0508  0.1243]), 1 bonds, id: 139625232669888>,
 <H pos=([-0.1162 -0.1097  0.0545]), 1 bonds, id: 139625232674784>,
 <H pos=([ 0.2123  0.0302 -0.0604]), 1 bonds, id: 139625232671664>,
 <H pos=([ 0.1068 -0.1117 -0.0968]), 1 bonds, id: 139625232674880>,
 <H pos=([ 0.1499 -0.0723  0.0745]), 1 bonds, id: 139625232669984>]

In [11]:
m.visualize()


[91m###########################################################[0m

The code at compound.py:1659 requires the "py3Dmol" package

py3Dmol can be installed using:

# conda install -c conda-forge py3Dmol

or

# pip install py3Dmol

[91m###########################################################[0m


DelayImportError: 
The code at compound.py:1659 requires the "py3Dmol" package

py3Dmol can be installed using:

# conda install -c conda-forge py3Dmol

or

# pip install py3Dmol
