# Core Imports

In [1]:
# Custom Imports
from polysaccharide import general
from polysaccharide.general import optional_in_place
from polysaccharide.extratypes import ResidueSmarts

from polysaccharide.molutils import reactions
from polysaccharide.molutils.rdmol.rdtypes import *
from polysaccharide.molutils.rdmol import rdcompare, rdconvert, rdkdraw, rdcompare, rdprops, rdbond, rdlabels

from polysaccharide.polymer import monomer
from polysaccharide.polymer.monomer import MonomerInfo

# Generic Imports
import re
from functools import partial, cached_property
from collections import defaultdict
from itertools import combinations, chain
from ast import literal_eval

# Numeric imports
import pandas as pd
import numpy as np

# File I/O
from pathlib import Path
import csv, json, openpyxl

# Typing and Subclassing
from typing import Any, Callable, ClassVar, Generator, Iterable, Optional, Union
from dataclasses import dataclass, field
from abc import ABC, abstractmethod, abstractproperty
from openmm.unit import Unit, Quantity

# Cheminformatics
from rdkit import Chem
from rdkit.Chem import rdChemReactions

# Static Paths
RAW_DATA_PATH = Path('raw_monomer_data')
PROC_DATA_PATH = Path('processed_monomer_data')
RXN_FILES_PATH = Path('rxn_smarts')

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)


# Inspecting NREL Urethanes

## Loading  raw data

In [2]:
p = RAW_DATA_PATH / 'nipu_urethanes.xlsx'
raw_table = pd.read_excel(p) # load from .xlsx file
raw_table['Monomers'] = raw_table['Monomers'].apply(literal_eval) # convert string of tuples into proper tuples

nipus     = raw_table[raw_table['Chemistry'] == 'NIPU'    ]
urethanes = raw_table[raw_table['Chemistry'] == 'urethane']

nipus_mono     = nipus['Monomers'].reset_index(drop=True) # extract monomers and renumber subset in-order
urethanes_mono = urethanes['Monomers'].reset_index(drop=True) # extract monomers and renumber subset in-order

## Loading pre-processed data

In [3]:
digroup_table = pd.read_csv(PROC_DATA_PATH / 'clean_smarts_digroup.csv', index_col=[0])
tables_by_chem = {
    chemistry : digroup_table[digroup_table['Chemistry'] == chemistry].dropna(axis=1).reset_index(drop=True)
        for chemistry in set(digroup_table['Chemistry'])
}

## Loading reaction mechanisms

In [4]:
# defining reacting functional groups
reaction_pairs = {
    'NIPU' : ('cyclocarbonate', 'amine'),
    'urethane' : ('isocyanate', 'hydroxyl')
}

In [5]:
from_smarts = False

# from SMARTS
if from_smarts:
    with (RXN_FILES_PATH / 'rxn_smarts.json').open('r') as rxn_file:
        rxns = {
            chemistry : reactions.AnnotatedReaction.from_smarts(rxn_SMARTS)
                for chemistry, rxn_SMARTS in json.load(rxn_file).items()
        }
else:
    # from files
    rxns = {
        chemistry : reactions.AnnotatedReaction.from_rxnfile(RXN_FILES_PATH / f'{chemistry}.rxn')
            for chemistry in reaction_pairs.keys()
    }

## Polymerizing and generating fragments

In [8]:
show = False #True
mono_info_dir = Path('monomer_files')
mono_info_dir.mkdir(exist_ok=True)

rdkdraw.set_rdkdraw_size(400, 3/2)
for chemistry, smarts_table in tables_by_chem.items():
    chem_dir = mono_info_dir / chemistry
    chem_dir.mkdir(exist_ok=True)

    for i, sample in smarts_table.iterrows():
        # look up reactive groups and pathway by chemistry
        rxn_grp_names = reaction_pairs[chemistry]
        rxn = rxns[chemistry]

        # read reactant monomers from digroup_table
        reactants = []
        for j, group_name in enumerate(rxn_grp_names):
            reactant = Chem.MolFromSmarts(sample[group_name])
            for atom in reactant.GetAtoms():
                atom.SetProp('reactant_group', group_name)
            
            reactants.append(reactant)
        mono_info = MonomerInfo()

        # first round of polymerization (initiation)
        reactor_1 = reactions.PolymerizationReactor(rxn, reactants=reactants)
        reactor_1.react()
        if show:
            display(reactor_1.polymerized_fragments(separate=False))

        mono_info.monomers[f'{"_".join(rxn_grp_names)}_DIMER'] = Chem.MolToSmarts(rdlabels.clear_atom_map_nums(reactor_1.product, in_place=False))
        for assoc_group_name, rdfragment in zip(rxn_grp_names, reactor_1.polymerized_fragments(separate=True)):
            rdlabels.clear_atom_isotopes(rdfragment, in_place=True)
            mono_info.monomers[f'{assoc_group_name}_TERM'] = Chem.MolToSmarts(rdfragment)

        # second round of polymerization (propagation)
        reactor_2 = reactions.PolymerizationReactor(rxn, reactor_1.polymerized_fragments(separate=True))
        reactor_2.react()
        if show:
            display(reactor_2.polymerized_fragments(separate=False))

        for assoc_group_name, rdfragment in zip(rxn_grp_names, reactor_2.polymerized_fragments(separate=True)):
            rdlabels.clear_atom_isotopes(rdfragment, in_place=True)
            mono_info.monomers[f'{assoc_group_name}_MID'] = Chem.MolToSmarts(rdfragment)

        # add serial map numbers to monomer fragment SMARTS
        for monomer_tag, smarts in mono_info.monomers.items():
            monomer = Chem.MolFromSmarts(smarts)
            rdlabels.assign_ordered_atom_map_nums(monomer, in_place=True)
            mono_info.monomers[monomer_tag] = Chem.MolToSmarts(monomer)

        # save to file
        mono_info.to_file(chem_dir / f'{chemistry}_{i}.json')