# Imports

In [1]:
## Logging and Shell
import logging
logging.basicConfig(
    level=logging.ERROR,
    force=True
)

## Numeric imports
import pandas as pd

## File I/O
from pathlib import Path
import json

# Cheminformatics
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole

DIM    = 300
ASPECT = 3/2
IPythonConsole.molSize = (int(ASPECT*DIM), DIM)   # Change image size

# Custom imports
from polymerist.monomers import specification
from polymerist.rdutils.reactions import reactions

# Static Paths
RAW_DATA_DIR  = Path('monomer_data_raw')
FMT_DATA_DIR  = Path('monomer_data_formatted')
PROC_DATA_DIR = Path('monomer_data_processed')
RXN_FILES_DIR = Path('poly_rxns')

PDB_OUT_DIR   = Path('pdb_files')
PDB_OUT_DIR.mkdir(exist_ok=True)

MONO_INFO_DIR = Path('monomer_fragments')
MONO_INFO_DIR.mkdir(exist_ok=True)



# Load monomer and rxn data 

In [4]:
input_data_path = PROC_DATA_DIR / '20231114_polyid_data_density_DP2-6 - 1,2 monomers_FILTERED.csv'
# input_data_path = PROC_DATA_DIR / 'nipu_urethanes_FILTERED.csv'
df = pd.read_csv(input_data_path, index_col=[0,1])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,smiles_monomer_0,fn_group_name_monomer_0,IUPAC_name_monomer_0,smiles_monomer_1,fn_group_name_monomer_1,IUPAC_name_monomer_1,smiles_monomer,smiles_polymer_DP2,smiles_polymer_DP3,smiles_polymer_DP6,...,log10_ElongBreak,YoungMod,Tensile_Strength,Density,log10_Permeability_CH4,log10_Permeability_CO2,log10_Permeability_N2,log10_Permeability_O2,log10_Permeability_H2,log10_Permeability_H2O
mechanism,polymer_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
polycarbonate_phosgene,"poly(4-[1,1,1,3,3,3-hexafluoro-2-(4-hydroxyphenyl)propan-2-yl]phenol-co-carbonyl dichloride)",[H]-[O]-[C]1=[C](-[H])-[C](-[H])=[C](-[C](-[C]...,hydroxyl,"4-[1,1,1,3,3,3-hexafluoro-2-(4-hydroxyphenyl)p...",[O]=[C](-[Cl])-[Cl],acyl_chloride,carbonyl dichloride,O=C(Cl)Cl.Oc1ccc(C(c2ccc(O)cc2)(C(F)(F)F)C(F)(...,O=C(Cl)Oc1ccc(C(c2ccc(O)cc2)(C(F)(F)F)C(F)(F)F...,O=C(Oc1ccc(C(c2ccc(O)cc2)(C(F)(F)F)C(F)(F)F)cc...,O=C(Cl)Oc1ccc(C(c2ccc(OC(=O)Oc3ccc(C(c4ccc(OC(...,...,,,,1.479,0.017759,1.380211,0.225687,0.838849,,
polycarbonate_phosgene,"poly(4-[2-(4-hydroxy-3,5-dimethylphenyl)propan-2-yl]-2,6-dimethylphenol-co-carbonyl dichloride)",[H]-[O]-[C]1=[C](-[C](-[H])(-[H])-[H])-[C](-[H...,hydroxyl,"4-[2-(4-hydroxy-3,5-dimethylphenyl)propan-2-yl...",[O]=[C](-[Cl])-[Cl],acyl_chloride,carbonyl dichloride,O=C(Cl)Cl.Cc1cc(C(C)(C)c2cc(C)c(O)c(C)c2)cc(C)c1O,Cc1cc(C(C)(C)c2cc(C)c(OC(=O)Cl)c(C)c2)cc(C)c1O,Cc1cc(C(C)(C)c2cc(C)c(OC(=O)Cl)c(C)c2)cc(C)c1O...,Cc1cc(C(C)(C)c2cc(C)c(OC(=O)Oc3c(C)cc(C(C)(C)c...,...,,,,1.083,-0.096910,1.245019,0.037426,0.747412,,
polyimide,"poly(4-[9-(4-aminophenyl)fluoren-9-yl]aniline-co-5-[2-(1,3-dioxo-2-benzofuran-5-yl)-1,1,1,3,3,3-hexafluoropropan-2-yl]-2-benzofuran-1,3-dione)",[H]-[C]1=[C](-[H])-[C](-[H])=[C]2-[C](=[C]-1-[...,amine,4-[9-(4-aminophenyl)fluoren-9-yl]aniline,[H]-[C]1=[C]2-[C](=[O])-[O]-[C](=[O])-[C]-2=[C...,anhydride,"5-[2-(1,3-dioxo-2-benzofuran-5-yl)-1,1,1,3,3,3...",Nc1ccc(C2(c3ccc(N)cc3)c3ccccc3-c3ccccc32)cc1.O...,Nc1ccc(C2(c3ccc(N4C(=O)c5ccc(C(c6ccc7c(c6)C(=O...,Nc1ccc(C2(c3ccc(N4C(=O)c5ccc(C(c6ccc7c(c6)C(=O...,Nc1ccc(C2(c3ccc(N4C(=O)c5ccc(C(c6ccc7c(c6)C(=O...,...,,,,1.318,,1.991226,0.518514,,,
polyimide,"poly(benzene-1,3-diamine-co-5-[3-tert-butyl-5-(1,3-dioxo-2-benzofuran-5-carbonyl)benzoyl]-2-benzofuran-1,3-dione)",[H]-[C]1=[C](-[H])-[C](-[N](-[H])-[H])=[C](-[H...,amine,"benzene-1,3-diamine",[H]-[C]1=[C](-[C](=[O])-[C]2=[C](-[H])-[C](-[C...,anhydride,"5-[3-tert-butyl-5-(1,3-dioxo-2-benzofuran-5-ca...",Nc1cccc(N)c1.CC(C)(C)c1cc(C(=O)c2ccc3c(c2)C(=O...,CC(C)(C)c1cc(C(=O)c2ccc3c(c2)C(=O)OC3=O)cc(C(=...,CC(C)(C)c1cc(C(=O)c2ccc3c(c2)C(=O)N(c2cccc(N)c...,CC(C)(C)c1cc(C(=O)c2ccc3c(c2)C(=O)OC3=O)cc(C(=...,...,,1350.0,,1.278,-1.000000,0.568202,-0.886057,0.000000,,
polyimide,"poly(4-[(4-aminophenyl)methyl]aniline-co-5-[2-(1,3-dioxo-2-benzofuran-5-yl)propan-2-yl]-2-benzofuran-1,3-dione)",[H]-[C]1=[C](-[N](-[H])-[H])-[C](-[H])=[C](-[H...,amine,4-[(4-aminophenyl)methyl]aniline,[H]-[C]1=[C]2-[C](=[O])-[O]-[C](=[O])-[C]-2=[C...,anhydride,"5-[2-(1,3-dioxo-2-benzofuran-5-yl)propan-2-yl]...",Nc1ccc(Cc2ccc(N)cc2)cc1.CC(C)(c1ccc2c(c1)C(=O)...,CC(C)(c1ccc2c(c1)C(=O)OC2=O)c1ccc2c(c1)C(=O)N(...,CC(C)(c1ccc2c(c1)C(=O)N(c1ccc(Cc3ccc(N)cc3)cc1...,CC(C)(c1ccc2c(c1)C(=O)OC2=O)c1ccc2c(c1)C(=O)N(...,...,,,,1.417,,,,,,
polyimide,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
polyimide,"poly(4-[4-[2-[4-(4-aminophenoxy)phenyl]propan-2-yl]phenoxy]aniline-co-5-[2-(1,3-dioxo-2-benzofuran-5-yl)-1,1,1,3,3,3-hexafluoropropan-2-yl]-2-benzofuran-1,3-dione)",[H]-[C]1=[C](-[O]-[C]2=[C](-[H])-[C](-[H])=[C]...,amine,4-[4-[2-[4-(4-aminophenoxy)phenyl]propan-2-yl]...,[H]-[C]1=[C]2-[C](=[O])-[O]-[C](=[O])-[C]-2=[C...,anhydride,"5-[2-(1,3-dioxo-2-benzofuran-5-yl)-1,1,1,3,3,3...",CC(C)(c1ccc(Oc2ccc(N)cc2)cc1)c1ccc(Oc2ccc(N)cc...,CC(C)(c1ccc(Oc2ccc(N)cc2)cc1)c1ccc(Oc2ccc(N3C(...,CC(C)(c1ccc(Oc2ccc(N)cc2)cc1)c1ccc(Oc2ccc(N3C(...,CC(C)(c1ccc(Oc2ccc(N)cc2)cc1)c1ccc(Oc2ccc(N3C(...,...,,,,1.333,-0.570248,1.012837,,,,
polyimide,"poly(4-[2-(4-amino-3-methylphenyl)-1,1,1,3,3,3-hexafluoropropan-2-yl]-2-methylaniline-co-5-[2-(1,3-dioxo-2-benzofuran-5-yl)-1,1,1,3,3,3-hexafluoropropan-2-yl]-2-benzofuran-1,3-dione)",[H]-[C]1=[C](-[N](-[H])-[H])-[C](-[C](-[H])(-[...,amine,"4-[2-(4-amino-3-methylphenyl)-1,1,1,3,3,3-hexa...",[H]-[C]1=[C]2-[C](=[O])-[O]-[C](=[O])-[C]-2=[C...,anhydride,"5-[2-(1,3-dioxo-2-benzofuran-5-yl)-1,1,1,3,3,3...",Cc1cc(C(c2ccc(N)c(C)c2)(C(F)(F)F)C(F)(F)F)ccc1...,Cc1cc(C(c2ccc(N3C(=O)c4ccc(C(c5ccc6c(c5)C(=O)O...,Cc1cc(C(c2ccc(N3C(=O)c4ccc(C(c5ccc6c(c5)C(=O)N...,Cc1cc(C(c2ccc(N3C(=O)c4ccc(C(c5ccc6c(c5)C(=O)N...,...,,,,1.422,-0.308919,1.466868,,,,
polyimide,"poly(2,4,6-trimethylbenzene-1,3-diamine-co-5-(1,3-dioxo2-benzofuran-5-carbonyl)-2-benzofuran-1,3-dione)",[H]-[C]1=[C](-[C](-[H])(-[H])-[H])-[C](-[N](-[...,amine,"2,4,6-trimethylbenzene-1,3-diamine",[H]-[C]1=[C](-[C](=[O])-[C]2=[C](-[H])-[C](-[H...,anhydride,"5-(1,3-dioxo2-benzofuran-5-carbonyl)-2-benzofu...",Cc1cc(C)c(N)c(C)c1N.O=C(c1ccc2c(c1)C(=O)OC2=O)...,Cc1cc(C)c(N2C(=O)c3ccc(C(=O)c4ccc5c(c4)C(=O)OC...,Cc1cc(C)c(N2C(=O)c3ccc(C(=O)c4ccc5c(c4)C(=O)N(...,Cc1cc(C)c(N2C(=O)c3ccc(C(=O)c4ccc5c(c4)C(=O)N(...,...,,,,1.293,,,,,,
polycarbonate_phosgene,poly(4-[1-(4-hydroxyphenyl)cycloheptyl]phenol-co-carbonyl dichloride),[H]-[O]-[C]1=[C](-[H])-[C](-[H])=[C](-[C]2(-[C...,hydroxyl,4-[1-(4-hydroxyphenyl)cycloheptyl]phenol,[O]=[C](-[Cl])-[Cl],acyl_chloride,carbonyl dichloride,O=C(Cl)Cl.Oc1ccc(C2(c3ccc(O)cc3)CCCCCC2)cc1,O=C(Cl)Oc1ccc(C2(c3ccc(O)cc3)CCCCCC2)cc1,O=C(Cl)Oc1ccc(C2(c3ccc(OC(=O)Cl)cc3)CCCCCC2)cc1,O=C(Cl)Oc1ccc(C2(c3ccc(OC(=O)Oc4ccc(C5(c6ccc(O...,...,,,,1.203,,,,-0.119186,,


In [3]:
blacklisted_rxns = ['imide']#, 'vinyl']
df = df[df.mechanism.map(lambda s : s not in blacklisted_rxns)]

AttributeError: 'DataFrame' object has no attribute 'mechanism'

In [None]:
keys = ['rxn_name']

df_grouper = df.groupby(keys)
frames = {
    mech : df_grouper.get_group(mech)
        for mech in df_grouper.groups
}

## Load pre-defined reactions with functional group and name backmap

In [None]:
with (RXN_FILES_DIR / 'rxn_groups.json').open('r') as file: # load table of functional group for each reaction
    rxn_groups = json.load(file)

rxns = {
    rxnname : reactions.AnnotatedReaction.from_rxnfile(RXN_FILES_DIR / f'{rxnname}.rxn')
        for rxnname in rxn_groups.keys()
}

# Polymerizing fragments

## Example for polymerization of individual molecule

In [None]:
mechname = 'polyurethane_isocyanate'
mechname = 'polyamide'
frame = frames[mechname]
row = frame.loc[frame.index[0]]
rxn_pathway = rxns[mechname]

named_reactants = {}
for i in range(2):
    reactant = Chem.MolFromSmiles(row[f'smiles_monomer_{i}'], sanitize=False)
    Chem.SanitizeMol(reactant, sanitizeOps=specification.SANITIZE_AS_KEKULE)
    display(reactant)
    named_reactants[ row[f'IUPAC_name_monomer_{i}'] ] = reactant

reactants = [i for i in named_reactants.values()]

In [None]:
from polymerist.rdutils.bonding import portlib
from polymerist.rdutils.reactions import reactors

display(rxn_pathway)
reactor = reactors.PolymerizationReactor(rxn_pathway)
for (dimer, pair) in reactor.propagate(reactants):
    display(dimer)
    for frag in pair:
        display(frag)

    print('='*50)