## Notebook for building long and short polymers using mbuild
### TOSELF: Make sure to use polybuild-env when running!

In [None]:
import csv, re, json, warnings
from pathlib import Path

from rdkit import Chem
import mbuild as mb
from mbuild import Compound
from mbuild.lib.recipes.polymer import Polymer

## Functions for generating arbitrarily large (or small) linear polymers

In [None]:
def SMILES_from_monomer_SMIRKS(smirks : str) -> str:
    '''Take a SMIRKS string for a residue in a monomer json file and converts it into a SMILES string that mbuild.load can handle
    Uses some explicit, home-made regex parsing for the conversion'''
    atom_nums = { # atom numbers for alphabetical replacement when converting to valid SMILES 
        'H'  : 1,
        'C'  : 6,
        'N'  : 7,
        'O'  : 8,
        'S'  : 16,
        'Cl' : 17,
        'Br' : 35
    }
    to_remove = '|'.join([r':\d+', r'\*-', r'-\*']) # remove all atom ids and wild groups
    smirks = re.sub(to_remove, '', smirks)

    for letter, num in atom_nums.items(): # replace bracketed atomic weights with letters
        smirks = re.sub(rf'\[#{num}\]', letter, smirks)
    smirks = re.sub(r'H-|-H', '', smirks)   # remove hydrogens
    smiles = re.sub(r'\(\)', '', smirks) # remove dangling empty parens - TOSELF: find better way to do this with regex (simple OR isn't greedy, leaves empty parens)

    return smiles

def SMILES_from_monomer_SMIRKS_rdkit(smirks : str) -> str:
    '''Take a SMIRKS string for a residue in a monomer json file and converts it into a SMILES string that mbuild.load can handle
    Uses rdkit functionality, which should be much more robust going forward but doesn't actually produce SMILES that mbuild can currentyl digest'''
    rdmol = Chem.rdmolfiles.MolFromSmarts(smirks)
    smiles = Chem.rdmolfiles.MolToSmiles(rdmol)

    return smiles

def build_linear_polymer(mono_path : Path, N : int) -> Compound:
    '''Takes path to a monomer json file and a chain length, builds a new polymer of the specified length
    Returns the resulting mbuild.Compound() object
    Currently only really works for linear polymers
    
    TODO:
        -- Fix weirdness with terminal group (1 is always tangled for some reason)
        -- Find way to automatically determine hydrogen replacement indices for more complex polymer geometries'''        
    with mono_path.open('r') as mono_file:
        monos_by_smirks = json.load(mono_file)['monomers']

    chain = Polymer() 
    for res_name, smirks in monos_by_smirks.items(): 
        SMILES = SMILES_from_monomer_SMIRKS(smirks)
        # print(SMILES)
        monomer = mb.load(SMILES, smiles=True) # create mbuild compounds from smirks
        monomer.name = res_name # assign name to make tracking easier

        h_ids = [i for i, atom in enumerate(monomer) if atom.name == 'H'] # ids of all hydrogens
        if re.search('TERM', res_name, flags=re.IGNORECASE): # consider terminal group to be any residue whose name contains "term/TERM" anywhere
            chain.add_end_groups(compound=monomer, index=h_ids[0], duplicate=False)
        else:
            chain.add_monomer(compound=monomer, indices=(h_ids[0], h_ids[-1]))
    chain.build(N)

    return chain

In [None]:
poly_table_path = Path('compatible_pdbs/simple_polymers/Available Polymers.json')
with poly_table_path.open('r') as poly_file:
    inventory = json.load(poly_file)

blacklist = ['PAMAM', 'vulcanizedrubber']
mols_to_use = [mol for mol in inventory if mol not in blacklist]
print(mols_to_use)

In [68]:
#  extensions=['pdb', 'hoomdxml', 'gsd', 'gro', 'top', 'lmp']
from IPython.display import clear_output

N = 10 # number of monomer groups to build
sizes = {}
for mol_name in mols_to_use:
    print(f'Building {mol_name}...')
    # mol_name = 'vulcanizedrubber'#'polyethylmethacrylate'

    mono_path = Path(f'compatible_pdbs/simple_polymers/{mol_name}.json')
    try:
        chain = build_linear_polymer(mono_path=mono_path, N=N)
        sizes[mol_name] = sum(1 for _ in chain.particles()) # must use in place of "len" for generator
        traj = chain.to_trajectory() # must convert to MDTraj trajectory to save to pdb (default save via mbuild/ParmEd omits bond info)
        
        outpath = Path(f'mbuild_polymers/{mol_name}')
        outpath.mkdir(exist_ok=True)
        outname = f'{mol_name}-N={N}'

        traj.save_pdb(outpath/f'{outname}.pdb')
        clear_output()
    except ValueError:
        print(f'{mol_name} cannot be assembled linearly')

print(sizes)

{'naturalrubber': 145, 'polyvinylchloride': 68, 'paam_modified': 112, 'syntactic_styrene': 178, 'polyphenylenesulfone': 145, 'polymethylketone': 640, 'peg_modified': 83, 'polyethylmethacrylate': 200, 'polyethylene': 68, 'polythiophene': 211, 'atactic_styrene': 178, 'bisphenolA': 364}


In [None]:
with warnings.catch_warnings():
    warnings.simplefilter('ignore') # suppress spammy warnings about particles not having charges when converting to parmed.Structure
    chain.visualize()   

## Testing SMILES string conversion method(s) for formatting

In [None]:
p = Path('compatible_pdbs/simple_polymers/polythiophene.json')
with p.open('r') as monofile:
    mono_smirks = json.load(monofile)['monomers']

# print(mono_smirks)
for res_name, smirks in mono_smirks.items():
    print(
        f'{res_name}:\n'
        f'\tCustom: {SMILES_from_monomer_SMIRKS(smirks)}\n'
        f'\tRDKit: {SMILES_from_monomer_SMIRKS_rdkit(smirks)}\n'
    )

In [None]:
from rdkit import Chem
rdmol = Chem.rdmolfiles.MolFromSmarts(str)
output_smiles = Chem.rdmolfiles.MolToSmiles(rdmol)

## Following example code for Polymer recipe
### (https://mbuild.mosdef.org/en/stable/getting_started/quick_start/polymer_example.html?highlight=polymer)

In [None]:
outpath = Path('mbuild_polymers')
poly_file = outpath/'test.pdb'
poly_file.touch()

# m1 = mb.load('CC', smiles=True) # mBuild compound of the monomer unit
# m2 = mb.load('COC', smiles=True) # mBuild compound of the monomer unit
m = mb.load('CC(=O)C', smiles=True) # mBuild compound of the monomer unit
cap = mb.load('N', smiles=True)

chain = Polymer()
chain.add_monomer(compound=m, indices=(4, -1))
chain.add_end_groups(cap, index=-1, duplicate=True)

chain.build(n=4)
# with warnings.catch_warnings():
#     warnings.simplefilter('ignore') # suppress spammy warnings about particles not having charges
#     chain.save(str(poly_file), show_ports=True, overwrite=True)