# Core Imports

In [1]:
# Custom Imports
from polysaccharide import general
from polysaccharide.general import optional_in_place
from polysaccharide.extratypes import ResidueSmarts

from polysaccharide.molutils import reactions
from polysaccharide.molutils.rdmol.rdtypes import *
from polysaccharide.molutils.rdmol import rdcompare, rdconvert, rdkdraw, rdcompare, rdprops, rdbond, rdlabels

from polysaccharide.polymer import monomer as monoutils
from polysaccharide.polymer.monomer import MonomerInfo

# Generic Imports
import re
from functools import partial, cached_property
from collections import defaultdict
from itertools import combinations, chain
from ast import literal_eval

# Numeric imports
import pandas as pd
import numpy as np

# File I/O
from pathlib import Path
import csv, json, openpyxl

# Typing and Subclassing
from typing import Any, Callable, ClassVar, Generator, Iterable, Optional, Union
from dataclasses import dataclass, field
from abc import ABC, abstractmethod, abstractproperty
from openmm.unit import Unit, Quantity

# Cheminformatics
from rdkit import Chem
from rdkit.Chem import rdChemReactions

# Static Paths
RAW_DATA_PATH = Path('raw_monomer_data')
PROC_DATA_PATH = Path('processed_monomer_data')
RXN_FILES_PATH = Path('rxn_smarts')

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


# Inspecting NREL Urethanes

## Loading pre-processed data

In [2]:
digroup_table = pd.read_csv(PROC_DATA_PATH / 'clean_smarts_digroup.csv', index_col=[0])
tables_by_chem = {
    chemistry : digroup_table[digroup_table['Chemistry'] == chemistry].dropna(axis=1).reset_index(drop=True)
        for chemistry in set(digroup_table['Chemistry'])
}

## Loading reaction mechanisms

In [3]:
# defining reacting functional groups
reaction_pairs = {
    'NIPU' : ('cyclocarbonate', 'amine'),
    'urethane' : ('isocyanate', 'hydroxyl')
}

In [4]:
from_smarts = True#False

# from SMARTS
if from_smarts:
    with (RXN_FILES_PATH / 'rxn_smarts.json').open('r') as rxn_file:
        rxns = {
            chemistry : reactions.AnnotatedReaction.from_smarts(rxn_SMARTS)
                for chemistry, rxn_SMARTS in json.load(rxn_file).items()
        }
else:
    # from files
    rxns = {
        chemistry : reactions.AnnotatedReaction.from_rxnfile(RXN_FILES_PATH / f'{chemistry}.rxn')
            for chemistry in reaction_pairs.keys()
    }

In [5]:
rdChemReactions.ReactionToSmarts(rxns['urethane'])

'[#8:1]=[#6:2]=[#7&D2:3]-[*:4].[#8:5](-[#1:6])-[*:7]>>[#8:1]=[#6:2](-[#7&D2:3](-[*:4])-[#1:6])-[#8:5]-[*:7]'

# Polymerizing and generating fragments

In [6]:
show = False #True
mono_info_dir = Path('monomer_files')
mono_info_dir.mkdir(exist_ok=True)

rdkdraw.set_rdkdraw_size(400, 3/2)

for chemistry, smarts_table in tables_by_chem.items():
    chem_dir = mono_info_dir / chemistry
    chem_dir.mkdir(exist_ok=True)

    for i, sample in smarts_table.iterrows():
        # look up reactive groups and pathway by chemistry
        rxn_group_names = reaction_pairs[chemistry]
        rxn = rxns[chemistry]

        # read reactant monomers from digroup_table
        initial_reactants = []
        for j, group_name in enumerate(rxn_group_names):
            reactant = Chem.MolFromSmarts(sample[group_name])
            for atom in reactant.GetAtoms():
                atom.SetProp('reactant_group', group_name)
            
            initial_reactants.append(reactant)
        mono_info = MonomerInfo()

        # first round of polymerization (initiation)
        reactor = reactions.PolymerizationReactor(rxn)
        for dimer, frags in reactor.propagate(initial_reactants):
            dimer = rdconvert.SMILESConverter().convert(dimer) # hacky workaround for RDKit nitrogen bond order SMARTS bug
            num_dimer_ports = monoutils.count_ports_in_rdmol(dimer)
            if num_dimer_ports > 0:
                mono_info.monomers[f'DIMER_{num_dimer_ports}_PORT'] = Chem.MolToSmarts(dimer)

            for assoc_group_name, rdfragment in zip(rxn_group_names, frags):
                rdfragment = rdconvert.SMILESConverter().convert(rdfragment) # hacky workaround for RDKit nitrogen bond order SMARTS bug
                rdlabels.clear_atom_isotopes(rdfragment, in_place=True)

                affix = 'TERM' if monoutils.is_term_by_rdmol(rdfragment) else 'MID'
                mono_info.monomers[f'{assoc_group_name}_{affix}'] = Chem.MolToSmarts(rdfragment)

        # add serial map numbers to monomer fragment SMARTS and save to file
        for monomer_tag, smarts in mono_info.monomers.items():
            monomer = Chem.MolFromSmarts(smarts)
            Chem.Kekulize(monomer, clearAromaticFlags=True) # esnures that aromatic rings are handled correctly
            rdlabels.assign_ordered_atom_map_nums(monomer, in_place=True)
            mono_info.monomers[monomer_tag] = Chem.MolToSmarts(monomer).replace('#0', '*') # ensure wild atoms are marked correctly (rather than as undefined atoms)
        mono_info.to_file(chem_dir / f'{chemistry}_{i}.json')