# Example 3: PEG-PLGA copolymers
More complicated ensemble of copolymers which react according to the same ester pathway

In [None]:
from rdkit.Chem.Draw import MolsToGridImage
from polymerist.rdutils.rdkdraw import set_rdkdraw_size, disable_substruct_highlights, enable_kekulized_drawing


# Molecule drawing settings
MOL_IMG_WIDTH  : int = 400
MOL_IMG_HEIGHT : int = 300
MOL_IMG_SIZE = (MOL_IMG_WIDTH, MOL_IMG_HEIGHT)

enable_kekulized_drawing()
disable_substruct_highlights()
set_rdkdraw_size(MOL_IMG_WIDTH, MOL_IMG_WIDTH / MOL_IMG_HEIGHT)

## Define chemically-complete monomer "feedstocks"

In [None]:
from rdkit.Chem.rdchem import Mol
from rdkit.Chem.rdmolops import SANITIZE_ALL, AROMATICITY_MDL
from polymerist.rdutils.sanitization import explicit_mol_from_SMILES, Smiles


monomer_smiles : dict[str, Smiles] = {
    'EG' : 'OCCO',        # ethylene glycol
    'LA' : 'OC(=O)C(C)O', # lactic acid
    'GA' : 'OC(=O)CO',    # glycolic acid
}
monomers : dict[str, Mol] = {
    name : explicit_mol_from_SMILES(smiles, sanitize_ops=SANITIZE_ALL, aromaticity_model=AROMATICITY_MDL)
    for name, smiles in monomer_smiles.items()
}

MolsToGridImage(monomers.values(), molsPerRow=len(monomers), subImgSize=MOL_IMG_SIZE, legends=list(monomers.keys()))

## Define polycondensation reaction template

### Assemble reaction from target functional groups (in this case a hydroxyl and a carboxylic acid)
In this case, one needs to be careful in how the hydroxyl moiety is specified, as the naive definition ("*-O-[H]") is _also_ a substructure of the carboxyl group   
Polymerist's ReactionAssembler reactive functional groups template are defined by SMARTS queries, allowing one to perform more complex substructure queries for cases like this

In [None]:
from rdkit.Chem.rdmolfiles import MolFromSmarts, MolToSmiles
from polymerist.rdutils.reactions import ReactionAssembler


# NOTE carefully that a "*" site is not present in the hydroxyl group definition, but that the "non-carbonyl" query takes
# place of the linker site definition instead; a "*" matches to ANY atom without restriction, which is not what we want here
assem = ReactionAssembler(
    reactive_groups=[
        MolFromSmarts('*-C(=O)-O-[H]'), # carboxyl group (defined as one would expect)
        MolFromSmarts('[!$([#6]=O)]-O-[H]'),      # hydroxyl group (defined as OH specifically NOT attached to a carbonyl carbon)
    ],
    byproducts=[MolFromSmarts('[H]-[O]-[H]')], # eject and unmap water byproduct
    rxn_name='polyester condensation',
)
display(assem.reactants)

In [None]:
assem.bond_derangement = {
    8 : (7, 4), # H:8, which was connected to O:7, should reconnect to O:4
    2 : (4, 7), # C:2, which was connected to O:4, should reconnect to O:7
} # note that this forms a closed bond cycle, since atoms 2 and 8 each gain and lose 1 bond, respectively, amounting to no net valence change
# Also note that the seemingly-equivalent cycle {
#   7 : (6, 5),
#   4 : (5, 6),
# }
# is not valid, as one of the intermediate fragments consists of two linkers bonded to one another ("neutronium"), which acts as an "indetity" element when splicing bonds

rxn = assem.assemble_rxn(show_steps=True)
display(rxn)

### Export reaction for re-use

In [None]:
from pathlib import Path

rxn_smarts = rxn.to_smarts()
rxn_dir = Path(f'rxn_demo_files/rxns')
rxn_dir.mkdir(parents=True, exist_ok=True)
rxn.to_rxnfile(rxn_dir / f'{rxn.rxnname}.rxn')

## Enumerate all possible repeat unit fragments from monomers and reaction template

In [None]:
from polymerist.rdutils.reactions import AnnotatedReaction
from polymerist.rdutils.reactions import PolymerizationReactor, CutMinimumCostBondsStrategy


reactor = PolymerizationReactor(
    rxn_schema=AnnotatedReaction.from_smarts(rxn_smarts),
    fragment_strategy=CutMinimumCostBondsStrategy()
)
fragments = reactor.propagate_pooled(
    monomers.values(),
    allow_resampling=True, # allow resampling of monomers to produce more fragments
    sanitize_ops=SANITIZE_ALL,
    aromaticity_model=AROMATICITY_MDL,
    clear_dummy_labels=True,
)
MolsToGridImage(fragments.values(), molsPerRow=3, subImgSize=MOL_IMG_SIZE, legends=list(fragments.keys()))

In [None]:
from string import ascii_lowercase
from collections import defaultdict, Counter

from polymerist.polymers.monomers import MonomerGroup
from polymerist.rdutils.bonding.portlib import get_num_ports
from polymerist.genutils.iteration import sort_dict_by_keys


named_fragments : dict[str, Smiles] = {}
fragment_name_modifiers = defaultdict(Counter) # keeps track of how many times each (monomer name, functionality) pair is found to generate distinguishing suffixes
for canon_smiles, fragment_mol in fragments.items():
    functionality = get_num_ports(fragment_mol)
    if functionality == 0:
        continue # skip fragments with no ports, as these are not useful for polymerization
    
    query_smiles = MolToSmiles(fragment_mol, allHsExplicit=True, allBondsExplicit=True) # canonical SMILES is missing much of the chemical info we need to obtain an explicit substructure match
    query_mol = MolFromSmarts(query_smiles) # important! need to makes SMARTS-based query mol to check for substructure - ports on returned SMILES-based mols are not treated correctly as wild-type atoms
    parent_mol_name = '-'.join(name for name, monomer in monomers.items() if monomer.HasSubstructMatch(query_mol))
    suffix = ascii_lowercase[fragment_name_modifiers[parent_mol_name][functionality]]
    
    named_fragments[f'{parent_mol_name}-{functionality}{suffix}'] = canon_smiles
    fragment_name_modifiers[parent_mol_name][functionality] += 1
    
monogrp = MonomerGroup(sort_dict_by_keys(named_fragments, reverse=True)) # sorting is not necessary for any functionality, but groups like fragments together for viewing 
MolsToGridImage([mol for _, mol in monogrp.iter_rdmols()], molsPerRow=3, subImgSize=MOL_IMG_SIZE, legends=list(monogrp.monomers.keys()))

### Generate monomer specification-compliant SMARTS with informative names for each non-monomer fragment

## Use fragments to assemble a polymer

In [None]:

from polymerist.polymers.building import build_linear_polymer, mbmol_to_rdkit_pdb

# these are for you to configure
n_monomers : int = 20
do_energy_min : bool = False # if enable, will yield more physically-plausible structure, at the cost of slower structure output

# NOTE: unless you explicitly provide term group orientations, the builder will automatically choose the first two
# terminal groups as head and tail, respectively, in whatever order they appear, or the only terminal group twice if just one is provided
monogrp.term_orient = {
    'head' : 'LA-1a',
    'tail' : 'EG-1a',
}
chain = build_linear_polymer(
    monomers=monogrp,
    # sequence=f'{"A"*LA_block_size}{"B"*GA_block_size}{"C"*EG_block_size}', # will cycle through LA, GA, and EG middle monomers (as set in the order above)
    sequence='ABCBB',
    n_monomers=n_monomers,
    allow_partial_sequences=True, # relax requirement of having a whole-number of repetitions of the sequence to allow any number of monomers
    energy_minimize=do_energy_min,
)
chain.visualize()
# mbmol_to_rdkit_pdb(f'PEG_PLGA_{N}-mer.pdb', chain)