# For assigning chemical info and partial charges to PDB structures (saves to well-defined SDF files)

## Imports

In [1]:
# Generic Imports
from collections import defaultdict
import json

# Logging
import logging
from polymerist.genutils.logutils.IOHandlers import LOG_FORMATTER

logging.basicConfig(
    level=logging.WARNING,
    format =LOG_FORMATTER._fmt,
    datefmt=LOG_FORMATTER.datefmt,
    force=True
)

# File I/O
from pathlib import Path
from tqdm.notebook import tqdm
from rich.progress import Progress, track # TODO : convert from tqdm

# Cheminformatics
from openff.toolkit.topology import Topology, Molecule

# Static Paths
MONO_DIR = Path('monomer_fragments')
PDB_DIR  = Path('pdb_files/trimers')
# PDB_DIR  = Path('pdb_files_new/pentamers')
TOPO_DIR = Path('Topologies')
TOPO_DIR.mkdir(exist_ok=True)

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)




## Obtaining complete PDB-monomer pairs for iteration

In [2]:
chem_paths = defaultdict(defaultdict)

for chem_mono in MONO_DIR.iterdir():
    chemistry = chem_mono.stem
    chem_pdb  = PDB_DIR / chemistry

    has_pdb  = set(path.stem for path in chem_pdb.glob( '*.pdb'))
    has_mono = set(path.stem for path in chem_mono.glob('*.json'))

    for valid_mol_name in (has_pdb & has_mono):
        pdb_path  = chem_pdb  / f'{valid_mol_name}.pdb'
        mono_path = chem_mono / f'{valid_mol_name}.json'

        chem_paths[chemistry][valid_mol_name] = (pdb_path, mono_path)

In [3]:
# chem_paths = {
#     k : paths
#         for k, paths in chem_paths.items()
#             if k == 'polyurethane_isocyanate'
# }
# chem_paths

## Parameterizing and saving to SDF, recording mols where match fails

In [4]:
from polymerist.openfftools import topology
from polymerist.openfftools.pcharge import MolCharger

from polymerist.monomers import MonomerGroup
from polymerist.residues.partition import partition

# catch annoying warnings
import warnings 
from openff.toolkit.utils.exceptions import IncorrectNumConformersWarning
warnings.catch_warnings(record=True)
warnings.filterwarnings('ignore', category=IncorrectNumConformersWarning)

group_blacklist = [
    'poly(4-N-(4-aminophenyl)-4-N-[4-(2,4,6-tritert-butylphenoxy)phenyl]benzene-1,4-diamine' # for some reason, these compounds are unable to be parameterized in a reasonable amount of time
]

charge_method = 'Espaloma-AM1-BCC'
charger = MolCharger.subclass_registry[charge_method]()

unmatched_pdb_mols = defaultdict(defaultdict)
num_successful = 0
cancelled = defaultdict(list)
for chemistry, path_dict in chem_paths.items():
    chem_dir = TOPO_DIR / chemistry
    chem_dir.mkdir(exist_ok=True)

    progress = tqdm(path_dict.items())
    for mol_name, (pdb_path, mono_path) in progress:
        progress.set_description_str(f'{chemistry} : {mol_name}')
        if any(group_name in mol_name for group_name in group_blacklist):
            progress.set_postfix_str('Cancelled')
            cancelled[chemistry].append(mol_name)
            continue

        try:
            progress.set_postfix_str('Loading files')
            monogrp = MonomerGroup.from_file(mono_path)
            offtop = Topology.from_pdb(pdb_path, _custom_substructures=monogrp.monomers)
            progress.set_postfix_str('Partitioning')
            was_partitioned = partition(offtop)
            assert(was_partitioned)

            progress.set_postfix_str('Assigning Charges')
            offmol = topology.get_largest_offmol(offtop)
            offmol.name = mol_name
            cmol = charger.charge_molecule(offmol)

            progress.set_postfix_str('Saving SDF')
            offtop = cmol.to_topology()
            sdf_path = chem_dir / f'{mol_name}.sdf'
            topology.topology_to_sdf(sdf_path, offtop)

            num_successful += 1
        except Exception as e:
            print(f'{mol_name} : {e}')
            unmatched_pdb_mols[str(e)][chemistry] = mol_name

  0%|          | 0/36 [00:00<?, ?it/s]



  0%|          | 0/46 [00:00<?, ?it/s]



  0%|          | 0/32 [00:00<?, ?it/s]



  0%|          | 0/26 [00:00<?, ?it/s]



In [5]:
print(num_successful)
print(dict(cancelled))

135
{'polyamide': ['poly(4-N-(4-aminophenyl)-4-N-[4-(2,4,6-tritert-butylphenoxy)phenyl]benzene-1,4-diamine-co-4-[2-(4-carboxyphenyl)-1,1,1,3,3,3-hexafluoropropan-2-yl]benzoic acid)', 'poly(4-N-(4-aminophenyl)-4-N-[4-(2,4,6-tritert-butylphenoxy)phenyl]benzene-1,4-diamine-co-naphthalene-2,6-dicarboxylic acid)', 'poly(4-N-(4-aminophenyl)-4-N-[4-(2,4,6-tritert-butylphenoxy)phenyl]benzene-1,4-diamine-co-5-propan-2-ylbenzene-1,3-dicarboxylic acid)', 'poly(4-N-(4-aminophenyl)-4-N-[4-(2,4,6-tritert-butylphenoxy)phenyl]benzene-1,4-diamine-co-Benzene-1,3-dicarboxylic acid)', 'poly(4-N-(4-aminophenyl)-4-N-[4-(2,4,6-tritert-butylphenoxy)phenyl]benzene-1,4-diamine-co-Terephthalic acid)']}
