## Notebook for building long and short polymers using mbuild
#### TOSELF: Make sure to use mbuild-env when running!

In [129]:
import re, json, warnings
from pathlib import Path

import mbuild as mb
from mbuild.lib.recipes.polymer import Polymer

In [54]:
outpath = Path('mbuild_polymers')
poly_file = outpath/'test.pdb'
poly_file.touch()

# m1 = mb.load('CC', smiles=True) # mBuild compound of the monomer unit
# m2 = mb.load('COC', smiles=True) # mBuild compound of the monomer unit
m = mb.load('CC(=O)C', smiles=True) # mBuild compound of the monomer unit
cap = mb.load('N', smiles=True)

chain = Polymer()
chain.add_monomer(compound=m, indices=(4, -1))
chain.add_end_groups(cap, index=-1, duplicate=True)

chain.build(n=4)
with warnings.catch_warnings():
    warnings.simplefilter('ignore') # suppress spammy warnings about particles not having charges
    chain.save(str(poly_file), show_ports=True, overwrite=True)

In [155]:
def SMILES_from_monomer_SMIRKS(smirks : str) -> str:
    '''Take a SMIRKS string for a residue in a monomer json file and converts it into a SMILES string that mbuild.load can handle'''
    atom_nums = { # atom numbers for alphabetical replacement when converting to valid SMILES 
        'H'  : 1,
        'C'  : 6,
        'N'  : 7,
        'O'  : 8,
        'S'  : 16,
        'Cl' : 17
    }
    to_remove = '|'.join([r':\d+', r'\*-', r'-\*']) # remove all atom ids and wild groups
    smirks = re.sub(to_remove, '', smirks)

    for letter, num in atom_nums.items(): # replace bracketed atomic weights with letters
        smirks = re.sub(rf'\[#{num}\]', letter, smirks)
    smirks = re.sub(r'H-|-H', '', smirks)   # remove hydrogens
    smirks = re.sub(r'\(\)', '', smirks) # remove dangling empty parens - TOSELF: find better way to do this with regex (simple OR isn't greedy, leaves empty parens)

    return smirks

def build_linear_polymer(mono_path : Path, n : int, outpath : Path=Path('mbuild_polymers')) -> None:
    '''Takes the path to a monomer json file and a chain length and builds a new polymer PDB of the specified length
    Currently only really works for linear polymers
    
    TODO:
        -- Fix weirdness with terminal group (1 is always tangled for some reason)
        -- Find way to automatically determine hydrogen replacement indices for more complex polymer geometries'''
    with mono_path.open('r') as mono_file:
        monos_by_smirks = json.load(mono_file)['monomers']

    chain = Polymer() 
    for res_name, smirks in monos_by_smirks.items(): 
        SMILES = SMILES_from_monomer_SMIRKS(smirks)
        print(SMILES)
        monomer = mb.load(SMILES, smiles=True) # create mbuild compounds from smirks
        monomer.name = res_name # assign name to make tracking easier

        h_ids = [i for i, atom in enumerate(monomer) if atom.name == 'H'] # ids of all hydrogens
        if re.search('TERM', res_name, flags=re.IGNORECASE): # consider terminal group to be any residue whose name contains "term/TERM" anywhere
            chain.add_end_groups(compound=monomer, index=h_ids[0], duplicate=False)
        else:
            chain.add_monomer(compound=monomer, indices=(h_ids[0], h_ids[-1]))

    chain.build(n)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore') # suppress spammy warnings about particles not having charges
        chain.save(str(outpath/f'{mono_path.stem}-N={n}.pdb'), show_ports=True, overwrite=True) # save to appropriately-named pdb

In [157]:
build_linear_polymer(mono_path=Path('compatible_pdbs/simple_polymers/polyphenylenesulfone.json'), n=12)

C1=C(-C(=C(-S(=O)(=O))-C(=C-1)))
C1=C(-C(=C(-S(=O)(=O))-C(=C-1)))
S(=O)(-C1=C(-C(=C(-C(=C-1)))))=O


In [132]:
m._visualize_nglview()

AttributeError: 'super' object has no attribute '_ipython_display_'