# Development of miscellaneous new features for polymerist

In [None]:
import polymerist as ps

print(ps.__version__)
print(ps.pascal(5))

In [None]:
from polymerist.mdtools import openfftools

openfftools.POLYMERIST_TOOLKIT_REGISTRY.registered_toolkits

# Testing kekulization flag in expanded_SMILES and rdkdraw

In [3]:
from rdkit import Chem

from polymerist.polymers.monomers.specification import expanded_SMILES
from polymerist.rdutils import set_rdkdraw_size, enable_kekulized_drawing, disable_kekulized_drawing

In [None]:
disable_kekulized_drawing()
enable_kekulized_drawing()

smi = 'c1ccccc1C(=O)O'
exp_smi = expanded_SMILES(smi, kekulize=True)
mol = Chem.MolFromSmiles(exp_smi, sanitize=False)

display(mol)
for bond in mol.GetBonds():
    print(bond.GetBondType())

## Robustness improvements to chemdbqueries

In [1]:
from polymerist.smileslib import chemdbqueries

In [2]:
smi = 'C1=CSC=C1C2=NNN=N2'
chemdbqueries.get_property_from_smiles(smi, prop_name='iupac_name')

'5-thiophen-3-yl-2H-tetrazole'

In [73]:
from typing import Any, ClassVar, Container, Optional
from abc import ABC, abstractmethod, abstractproperty

import requests
import logging

import cirpy
import pubchempy as pcp

# from polymerist.genutils.decorators.classmod import register_abstract_class_attrs


# @register_abstract_class_attrs('SERVICE_NAME')
class ChemDBQueryStrategy(ABC):
    '''Implementation of queries from a particular chemical database'''
    @property
    @abstractmethod
    def SERVICE_NAME(self) -> str:
        ...

    @property
    @abstractmethod
    def VALID_PROPERTIES(self) -> Container[str]:
        ...
        
    @property
    def available_properties(self) -> str:
        return '\n'.join(sorted(self.VALID_PROPERTIES))
        
    @abstractmethod
    def _get_property(self, prop_name : str, representation : str) -> Optional[Any]:
        ...
        
    def get_property(self, prop_name : str, representation : str, keep_first_only : bool=True) -> Optional[Any]:
        logging.info(f'Attempting query of property "{prop_name}" from {self.SERVICE_NAME}')
        if prop_name not in self.VALID_PROPERTIES:
            raise ValueError(
                f'Cannot query property "{prop_name}" from {self.SERVICE_NAME};\n' \
                f'Choose from one of the following property names:\n{self.available_properties}'
            )
        
        try:
            prop_val = self._get_property(prop_name=prop_name, representation=representation)
            if (prop_val is not None) and isinstance(prop_val, Container) and keep_first_only:
                prop_val = prop_val[0]
            return prop_val
            
        except requests.HTTPError:
            logging.error('Website down')
            return None
        
    get_property.__doc__ = f'''Fetch a particular property of a molecule from {SERVICE_NAME}'''
        
        
class NIHCACTUSQuery(ChemDBQueryStrategy):
    @property
    def SERVICE_NAME(self) -> str:
        return 'NIH CACTUS CIR'
    
    _CIR_PROPS : ClassVar[set[str]] = {
        'stdinchikey',
        'stdinchi',
        'smiles',
        'ficts',
        'ficus',
        'uuuuu',
        'hashisy',
        'names',
        'iupac_name',
        'cas',
        'chemspider_id',
        'image',
        'twirl',
        'mw',
        'formula',
        'h_bond_donor_count',
        'h_bond_acceptor_count',
        'h_bond_center_count',
        'rule_of_5_violation_count',
        'rotor_count',
        'effective_rotor_count',
        'ring_count',
        'ringsys_count',
    }
    @property
    def VALID_PROPERTIES(self) -> list[str]: # see official docs for more info: https://cactus.nci.nih.gov/chemical/structure_documentation
        return set.union(self._CIR_PROPS, cirpy.FILE_FORMATS)
    
    def _get_property(self, prop_name, representation):
        return cirpy.resolve(representation, prop_name)

In [76]:
strat = NIHCACTUSQuery()
strat.get_property('iupac_name', 'CCO', keep_first_only=False)

'ethanol'

In [78]:
pcp.get_compounds('CCO', namespace='smiles')

[Compound(702)]

In [36]:
cirpy.resolve('CCO', 'SMILES')

'CCO'

In [37]:
cirpy.construct_api_url('CCO', 'SMILES')

'https://cactus.nci.nih.gov/chemical/structure/CCO/SMILES/xml'

In [6]:
r = requests.get(cirpy.API_BASE)

In [10]:
r.status_code

200

## Testing polymerist importability within environment

In [None]:
import numpy as np
from openff.toolkit import Molecule, Topology, ForceField

In [None]:
import polymerist as ps
from polymerist.genutils.importutils import pyimports, module_hierarchy

import pandas as pd
print(module_hierarchy(ps))

In [None]:
import nglview

print(nglview.__version__)
nglview.demo()

In [None]:
from polymerist.polymers.monomers import specification
from rdkit import Chem

smi = 'CCO-c1ccccc1-N=C=C'
mol1 = Chem.MolFromSmiles(smi)
display(mol1)

sma = specification.expanded_SMILES(smi, assign_map_nums=True)
exp_sma = specification.compliant_mol_SMARTS(sma)
mol2 = Chem.MolFromSmarts(sma)
display(mol2)


In [None]:
from openff.toolkit import Molecule

offmol = Molecule.from_smiles(smi)
offmol.generate_conformers(n_conformers=1)
offmol.visualize(backend='nglview')

## Experimenting with Protocols

In [None]:
from typing import Protocol, runtime_checkable
from dataclasses import dataclass


@runtime_checkable
class Nominative(Protocol):
    def name(self) -> str:
        ...
        
class Foo(Nominative):
    def __init__(self, letter : str):
        self.letter = letter
        
    def name(self) -> str:
        return self.letter*3

@dataclass
class Bar:
    id : int
    
    def name(self) -> str:
        return f'{type(self).__name__}{self.id}'
    
@dataclass
class Baz:
    id : int
    
    def nombre(self) -> str:
        return 'name'

In [None]:
classes = (Foo, Bar, Baz)
a = Foo('a')
b = Bar(2)
c = Baz(3)
instances = (a, b, c)

In [None]:
for typ in classes:
    print(typ, issubclass(typ, Nominative))

In [None]:
for inst in instances:
    print(inst, isinstance(inst, Nominative))

# Parsing lines from PDB file

In [None]:
l1 = 'ATOM    189  C99 OCT     5      39.590  30.100  38.320  1.00  0.00'           
l2 = 'ATOM    190 C100 OCT     5      38.850  31.110  37.700  1.00  0.00'
l3 = 'HETATM   47  H21 UNL     1       0.000   0.000   0.000  1.00  0.00           H '

In [None]:
from typing import Any

PDB_ATOM_TOKEN_COLUMNS : dict[str, tuple[int, int]] = {
    'Is Heteratom' : (1, 6),
    'Atom serial number' : (7, 11),
    'Atom name' : (13, 16),
    'Alternate location indicator' : (17, 17),
    'Residue name' : (18, 20),
    'Chain identifier' : (22, 22),
    'Residue sequence number' : (23, 26),
    'Code for insertions of residues' : (27, 27),
    'X (angstrom)' : (31, 38),
    'Y (angstrom)' : (39, 46),
    'Z (angstrom)' : (47, 54),
    'Occupancy' : (55, 60),
    'Temperature factor' : (61, 66),
    'Segment identifier' : (73, 76),
    'Element symbol' : (77, 78),
    'Charge' : (79, 80),
} # taken from PDB spec doc (https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html)
def tokenize_pdb_atom_line(line : str) -> dict[str, Any]:
    line = line.ljust(80, ' ') # pad line to 80 characters with spaces
    return {
        prop_name : line[i_start-1:i_end]
            for prop_name, (i_start, i_end) in PDB_ATOM_TOKEN_COLUMNS.items()
    }


for line in (l1, l2, l3):
    print(tokenize_pdb_atom_line(line))

# Another thing