## Notebook for building long and short polymers using mbuild
#### TOSELF: Make sure to use mbuild-env when running!

In [1]:
import re, json, warnings
from pathlib import Path

import mbuild as mb
from mbuild.lib.recipes.polymer import Polymer

In [9]:
outpath = Path('mbuild_polymers')
poly_file = outpath/'test.pdb'
poly_file.touch()

# m1 = mb.load('CC', smiles=True) # mBuild compound of the monomer unit
# m2 = mb.load('COC', smiles=True) # mBuild compound of the monomer unit
m = mb.load('CC(=O)C', smiles=True) # mBuild compound of the monomer unit
cap = mb.load('N', smiles=True)

chain = Polymer()
chain.add_monomer(compound=m, indices=(4, -1))
chain.add_end_groups(cap, index=-1, duplicate=True)

chain.build(n=4)
# with warnings.catch_warnings():
#     warnings.simplefilter('ignore') # suppress spammy warnings about particles not having charges
#     chain.save(str(poly_file), show_ports=True, overwrite=True)

In [15]:
def SMILES_from_monomer_SMIRKS(smirks : str) -> str:
    '''Take a SMIRKS string for a residue in a monomer json file and converts it into a SMILES string that mbuild.load can handle'''
    atom_nums = { # atom numbers for alphabetical replacement when converting to valid SMILES 
        'H'  : 1,
        'C'  : 6,
        'N'  : 7,
        'O'  : 8,
        'S'  : 16,
        'Cl' : 17
    }
    to_remove = '|'.join([r':\d+', r'\*-', r'-\*']) # remove all atom ids and wild groups
    smirks = re.sub(to_remove, '', smirks)

    for letter, num in atom_nums.items(): # replace bracketed atomic weights with letters
        smirks = re.sub(rf'\[#{num}\]', letter, smirks)
    smirks = re.sub(r'H-|-H', '', smirks)   # remove hydrogens
    smirks = re.sub(r'\(\)', '', smirks) # remove dangling empty parens - TOSELF: find better way to do this with regex (simple OR isn't greedy, leaves empty parens)

    return smirks

def build_linear_polymer(mono_path : Path, n : int, outpath : Path=Path('mbuild_polymers')) -> None:
    '''Takes the path to a monomer json file and a chain length and builds a new polymer PDB of the specified length
    Currently only really works for linear polymers
    
    TODO:
        -- Fix weirdness with terminal group (1 is always tangled for some reason)
        -- Find way to automatically determine hydrogen replacement indices for more complex polymer geometries'''
    with mono_path.open('r') as mono_file:
        monos_by_smirks = json.load(mono_file)['monomers']

    chain = Polymer() 
    for res_name, smirks in monos_by_smirks.items(): 
        SMILES = SMILES_from_monomer_SMIRKS(smirks)
        print(SMILES)
        monomer = mb.load(SMILES, smiles=True) # create mbuild compounds from smirks
        monomer.name = res_name # assign name to make tracking easier

        h_ids = [i for i, atom in enumerate(monomer) if atom.name == 'H'] # ids of all hydrogens
        if re.search('TERM', res_name, flags=re.IGNORECASE): # consider terminal group to be any residue whose name contains "term/TERM" anywhere
            chain.add_end_groups(compound=monomer, index=h_ids[0], duplicate=False)
        else:
            chain.add_monomer(compound=monomer, indices=(h_ids[0], h_ids[-1]))

    chain.build(n)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore') # suppress spammy warnings about particles not having charges
        chain.save(str(outpath/f'{mono_path.stem}-N={n}.pdb'), show_ports=True, overwrite=True) # save to appropriately-named pdb

In [14]:
build_linear_polymer(mono_path=Path('compatible_pdbs/simple_polymers/polythiophene.json'), n=10)

C1=C(-C(=C(-S-1))-C(-C(-C(-C))))
C1(=C(-C(-C(-C(-C))))-C(=C(-S-1)-[#35]))
C1=C(-C(=C(-[#35])-S-1)-C(-C(-C(-C))))


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [5]:
p = Path('compatible_pdbs/simple_polymers/naturalrubber.json')
with p.open('r') as monofile:
    mono_smirks = json.load(monofile)['monomers']

print(mono_smirks)
for res_name, smirks in mono_smirks.items():
    print(res_name, SMILES_from_monomer_SMIRKS(smirks))

{'natural_rubber': '*-[#6:7](-[#6:5](=[#6:4](-[#6:1](-[#1:2])(-[#1:3])-*)-[#6:10](-[#1:11])(-[#1:12])-[#1:13])-[#1:6])(-[#1:8])-[#1:9]', 'natural_rubber_TERM1': '[#6:1](-[#1:2])(-[#1:3])(-[#6:4](=[#6:5](-[#1:6])-[#6:7](-[#1:8])(-[#1:9])-[#1:14])-[#6:10](-[#1:11])(-[#1:12])-[#1:13])-*', 'natural_rubber_TERM2': '*-[#6:8](-[#6:6](=[#6:5](-[#6:1](-[#1:2])(-[#1:3])-[#1:4])-[#6:11](-[#1:12])(-[#1:13])-[#1:14])-[#1:7])(-[#1:9])-[#1:10]'}
natural_rubber C(-C(=C(-C)-C))
natural_rubber_TERM1 C(-C(=C-C)-C)
natural_rubber_TERM2 C(-C(=C(-C)-C))


In [None]:
from rdkit import Chem
rdmol = Chem.rdmolfiles.MolFromSmarts(str)
output_smiles = Chem.rdmolfiles.MolToSmiles(rdmol)


In [12]:
[i for i in m]

[<C pos=([-0.1269 -0.0014  0.0323]), 4 bonds, id: 139625235953744>,
 <C pos=([ 0.0033  0.0567 -0.0138]), 3 bonds, id: 139625403972912>,
 <O pos=([ 0.0094  0.1752 -0.0426]), 1 bonds, id: 139625399705536>,
 <C pos=([ 0.1255 -0.0293 -0.0249]), 4 bonds, id: 139625403969600>,
 <H pos=([-0.2036  0.0115 -0.047 ]), 1 bonds, id: 139625232669216>,
 <H pos=([-0.1605  0.0508  0.1243]), 1 bonds, id: 139625232669888>,
 <H pos=([-0.1162 -0.1097  0.0545]), 1 bonds, id: 139625232674784>,
 <H pos=([ 0.2123  0.0302 -0.0604]), 1 bonds, id: 139625232671664>,
 <H pos=([ 0.1068 -0.1117 -0.0968]), 1 bonds, id: 139625232674880>,
 <H pos=([ 0.1499 -0.0723  0.0745]), 1 bonds, id: 139625232669984>]

In [11]:
m.visualize()


[91m###########################################################[0m

The code at compound.py:1659 requires the "py3Dmol" package

py3Dmol can be installed using:

# conda install -c conda-forge py3Dmol

or

# pip install py3Dmol

[91m###########################################################[0m


DelayImportError: 
The code at compound.py:1659 requires the "py3Dmol" package

py3Dmol can be installed using:

# conda install -c conda-forge py3Dmol

or

# pip install py3Dmol
