## Notebook for building long and short polymers using mbuild
### TOSELF: Make sure to use polybuild-env when running!

In [30]:
import csv, re, json, warnings
from pathlib import Path
from typing import Any, Callable, Optional

from rdkit import Chem
import mbuild as mb
from mbuild import Compound
from mbuild.lib.recipes.polymer import Polymer

## Functions for generating arbitrarily large (or small) linear polymers

In [2]:
def SMILES_from_monomer_SMIRKS(smirks : str) -> str:
    '''Take a SMIRKS string for a residue in a monomer json file and converts it into a SMILES string that mbuild.load can handle
    Uses some explicit, home-made regex parsing for the conversion'''
    atom_nums = { # atom numbers for alphabetical replacement when converting to valid SMILES 
        'H'  : 1,
        'C'  : 6,
        'N'  : 7,
        'O'  : 8,
        'S'  : 16,
        'Cl' : 17,
        'Br' : 35
    }
    to_remove = '|'.join([r':\d+', r'\*-', r'-\*']) # remove all atom ids and wild groups
    smirks = re.sub(to_remove, '', smirks)

    for letter, num in atom_nums.items(): # replace bracketed atomic weights with letters
        smirks = re.sub(rf'\[#{num}\]', letter, smirks)
    smirks = re.sub(r'H-|-H', '', smirks)   # remove hydrogens
    smiles = re.sub(r'\(\)', '', smirks) # remove dangling empty parens - TOSELF: find better way to do this with regex (simple OR isn't greedy, leaves empty parens)

    return smiles

def SMILES_from_monomer_SMIRKS_rdkit(smirks : str) -> str:
    '''Take a SMIRKS string for a residue in a monomer json file and converts it into a SMILES string that mbuild.load can handle
    Uses rdkit functionality, which should be much more robust going forward but doesn't actually produce SMILES that mbuild can currentyl digest'''
    rdmol = Chem.rdmolfiles.MolFromSmarts(smirks)
    smiles = Chem.rdmolfiles.MolToSmiles(rdmol)

    return smiles

def build_linear_polymer(mono_path : Path, N : int) -> Compound:
    '''Takes path to a monomer json file and a chain length, builds a new polymer of the specified length
    Returns the resulting mbuild.Compound() object
    Currently only really works for linear polymers
    
    TODO:
        -- Fix weirdness with terminal group (1 is always tangled for some reason)
        -- Find way to automatically determine hydrogen replacement indices for more complex polymer geometries'''        
    with mono_path.open('r') as mono_file:
        monos_by_smirks = json.load(mono_file)['monomers']

    chain = Polymer() 
    for res_name, smirks in monos_by_smirks.items(): 
        SMILES = SMILES_from_monomer_SMIRKS(smirks)
        # print(SMILES)
        monomer = mb.load(SMILES, smiles=True) # create mbuild compounds from smirks
        monomer.name = res_name # assign name to make tracking easier

        h_ids = [i for i, atom in enumerate(monomer) if atom.name == 'H'] # ids of all hydrogens
        if re.search('TERM', res_name, flags=re.IGNORECASE): # consider terminal group to be any residue whose name contains "term/TERM" anywhere
            chain.add_end_groups(compound=monomer, index=h_ids[0], duplicate=False)
        else:
            chain.add_monomer(compound=monomer, indices=(h_ids[0], h_ids[-1]))
    chain.build(N)

    return chain

## PDB save method testing

In [33]:
import MDAnalysis as mda
from MDAnalysis.topology.LAMMPSParser import DATAParser as LMPParser
from shutil import copyfile

In [31]:
import MDAnalysis as mda
from MDAnalysis.topology.LAMMPSParser import DATAParser as LMPParser

SaveMethod = Callable[[str, Polymer, Path], None]

def save_pdb_direct(mol_name : str, chain : Polymer, outpath : Path) -> None:
    '''Use mbuild native save method with ParmEd backend for pdb writing'''
    chain.save(str(outpath/f'mbuild_parmed_{mol_name}.pdb'))

def save_mda_lammps(mol_name : str, chain : Polymer, outpath : Path) -> None:
    '''Save via MDAnalysis using LAMMPS topology and incomplete pdb as trajectory'''
    pdb_path    = outpath/f'temp_{mol_name}.pdb'
    lammps_path = outpath/f'temp_{mol_name}.lmp'

    chain.save(str(pdb_path), overwrite=True)
    chain.save(str(lammps_path), overwrite=True)

    lammps_parser = LMPParser(str(lammps_path))
    topo = lammps_parser.parse()
    u = mda.Universe(topo, pdb_path) # use explicit topology but implicit trajectory (traj from pdb)

    polymer = u.select_atoms('all')
    with mda.Writer(outpath/f'mda_lammps_{mol_name}.pdb', multiframe=False) as pdb:
        pdb.write(polymer)

    pdb_path.unlink()
    lammps_path.unlink()

def save_mdtraj(mol_name : str, chain : Polymer, outpath : Path) -> None:
    '''Convert to mdtraj and save natively there'''
    traj = chain.to_trajectory() # must convert to MDTraj trajectory to save to pdb (default save via mbuild/ParmEd omits bond info)
    traj.save_pdb(outpath/f'mdtraj_{mol_name}.pdb')


In [35]:
mol_name = 'polyethylmethacrylate'
N = 6

mono_path = Path(f'compatible_pdbs/simple_polymers/{mol_name}.json')
orig_pdb_path = Path(f'compatible_pdbs/simple_polymers/{mol_name}.pdb')
chain = build_linear_polymer(mono_path=mono_path, N=N)

outpath = Path(f'mbuild_polymers/PDB save test/{mol_name}')
outpath.mkdir(exist_ok=True)

save_methods : list[SaveMethod] = [save_pdb_direct, save_mda_lammps, save_mdtraj]
with warnings.catch_warnings():
    warnings.simplefilter('ignore') # suppress spammy warnings about particles not having charges

    copyfile(orig_pdb_path, outpath/f'original_{mol_name}.pdb')
    for method in save_methods:
        method(mol_name, chain, outpath)

No urey bradley terms detected, will use angle_style harmonic


## Make chains for all available polymers

In [None]:
poly_table_path = Path('compatible_pdbs/simple_polymers/Available Polymers.json')
with poly_table_path.open('r') as poly_file:
    inventory = json.load(poly_file)

blacklist = ['PAMAM', 'vulcanizedrubber']
mols_to_use = [mol for mol in inventory if mol not in blacklist]
print(mols_to_use)

['polyphenyleneI', 'naturalrubber', 'PEO_PLGA', 'polyvinylchloride', 'paam_modified', 'syntactic_styrene', 'PolyphenyleneIII', 'polyphenylenesulfone', 'polymethylketone', 'peg_modified', 'polyethylmethacrylate', 'polyethylene', 'polyphenyleneII', 'polythiophene', 'atactic_styrene', 'bisphenolA']


In [13]:
#  extensions=['pdb', 'hoomdxml', 'gsd', 'gro', 'top', 'lmp']
from IPython.display import clear_output

# mols_to_use = ['polyvinylchloride']

N = 10 # number of monomer groups to build
sizes = {}
success, fail = [], []
for mol_name in mols_to_use:
    print(f'Building {mol_name}...')
    # mol_name = 'vulcanizedrubber'#'polyethylmethacrylate'

    mono_path = Path(f'compatible_pdbs/simple_polymers/{mol_name}.json')
    try:
        chain = build_linear_polymer(mono_path=mono_path, N=N)
        sizes[mol_name] = sum(1 for _ in chain.particles()) # must use in place of "len" for generator
        traj = chain.to_trajectory() # must convert to MDTraj trajectory to save to pdb (default save via mbuild/ParmEd omits bond info)
        
        outpath = Path(f'mbuild_polymers/{mol_name}')
        outpath.mkdir(exist_ok=True)
        outname = f'{mol_name}-N={N}'

        traj.save_pdb(outpath/f'{outname}.pdb')
        success.append(mol_name)
    except ValueError:
        print(f'{mol_name} cannot be assembled linearly')
        fail.append(mol_name)
    
    clear_output()

print(sizes, success, fail)

{'naturalrubber': 145, 'polyvinylchloride': 68, 'paam_modified': 112, 'syntactic_styrene': 178, 'polyphenylenesulfone': 145, 'polymethylketone': 640, 'peg_modified': 83, 'polyethylmethacrylate': 200, 'polyethylene': 68, 'polythiophene': 211, 'atactic_styrene': 178, 'bisphenolA': 364}


In [6]:
with warnings.catch_warnings():
    warnings.simplefilter('ignore') # suppress spammy warnings about particles not having charges when converting to parmed.Structure
    chain.visualize()   

## Testing SMILES string conversion method(s) for formatting

In [7]:
p = Path('compatible_pdbs/simple_polymers/polythiophene.json')
with p.open('r') as monofile:
    mono_smirks = json.load(monofile)['monomers']

# print(mono_smirks)
for res_name, smirks in mono_smirks.items():
    print(
        f'{res_name}:\n'
        f'\tCustom: {SMILES_from_monomer_SMIRKS(smirks)}\n'
        f'\tRDKit: {SMILES_from_monomer_SMIRKS_rdkit(smirks)}\n'
    )

polythiophene:
	Custom: C1=C(-C(=C(-S-1))-C(-C(-C(-C))))
	RDKit: *[C:1]1=[C:2]([H:10])[C:3]([C:5]([C:7]([C:8]([C:9]([H:17])([H:18])[H:19])([H:15])[H:16])([H:13])[H:14])([H:11])[H:12])=[C:4](*)[S:6]1

polythiophene_TERM1:
	Custom: C1(=C(-C(-C(-C(-C))))-C(=C(-S-1)-Br))
	RDKit: *[C:1]1=[C:2]([C:3]([C:7]([C:8]([C:9]([H:18])([H:19])[H:20])([H:16])[H:17])([H:14])[H:15])([H:11])[H:12])[C:4]([H:13])=[C:5]([Br:10])[S:6]1

polythiophene_TERM2:
	Custom: C1=C(-C(=C(-Br)-S-1)-C(-C(-C(-C))))
	RDKit: *[C:1]1=[C:2]([H:11])[C:3]([C:6]([C:8]([C:9]([C:10]([H:18])([H:19])[H:20])([H:16])[H:17])([H:14])[H:15])([H:12])[H:13])=[C:4]([Br:5])[S:7]1



In [8]:
from rdkit import Chem
rdmol = Chem.rdmolfiles.MolFromSmarts(str)
output_smiles = Chem.rdmolfiles.MolToSmiles(rdmol)

TypeError: No registered converter was able to produce a C++ rvalue of type std::__cxx11::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > from this Python object of type type

## Following example code for Polymer recipe
### (https://mbuild.mosdef.org/en/stable/getting_started/quick_start/polymer_example.html?highlight=polymer)

In [None]:
outpath = Path('mbuild_polymers')
poly_file = outpath/'test.pdb'
poly_file.touch()

# m1 = mb.load('CC', smiles=True) # mBuild compound of the monomer unit
# m2 = mb.load('COC', smiles=True) # mBuild compound of the monomer unit
m = mb.load('CC(=O)C', smiles=True) # mBuild compound of the monomer unit
cap = mb.load('N', smiles=True)

chain = Polymer()
chain.add_monomer(compound=m, indices=(4, -1))
chain.add_end_groups(cap, index=-1, duplicate=True)

chain.build(n=4)
# with warnings.catch_warnings():
#     warnings.simplefilter('ignore') # suppress spammy warnings about particles not having charges
#     chain.save(str(poly_file), show_ports=True, overwrite=True)