# OpenEye Structural Featurizer

In [1]:
from importlib import resources
import inspect

from appdirs import user_cache_dir

from kinoml.core.ligands import Ligand
from kinoml.core.proteins import Protein
from kinoml.core.systems import ProteinSystem
from kinoml.features.protein import OEProteinStructureFeaturizer

  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  setattr(self, word, getattr(machar, word).flat[0])
  return self._float_to_str(self.smallest_subnormal)
  class NCDFPicklable(scipy.io.netcdf.netcdf_file):


## OEProteinStructureFeaturizer

In [2]:
print(inspect.getdoc(OEProteinStructureFeaturizer))

Given systems with exactly one protein, prepare the protein structure by:

 - modeling missing loops
 - building missing side chains
 - mutations, if `uniprot_id` or `sequence` attribute is provided for the protein component
   (see below)
 - removing everything but protein and water
 - protonation at pH 7.4

The protein component of each system must have a `pdb_id` or a `path` attribute specifying
the protein structure to prepare.

 - `pdb_id`: A string specifying the PDB entry of interest, required if `path` not given.
 - `path`: The path to the structure file, required if `pdb_id` not given.

Additionally, the protein component can have the following optional attributes to customize
the protein modeling:

 - `name`: A string specifying the name of the protein, will be used for generating the
   output file name.
 - `chain_id`: A string specifying which chain should be used.
 - `alternate_location`: A string specifying which alternate location should be used.
 - `expo_id`: A string s

In [3]:
systems = []
protein = Protein(pdb_id="4f8o", name="PsaA")
system = ProteinSystem(components=[protein])
systems.append(system)

In [4]:
protein = Protein.from_pdb(pdb_id="4f8o", name="PsaA")
protein.uniprot_id = "P31522"
protein.chain_id = "A"
protein.alternate_location = "B"
protein.expo_id = "AES"
system = ProteinSystem(components=[protein])
systems.append(system)

In [5]:
with resources.path("kinoml.data.proteins", "4f8o_edit.pdb") as structure_path:
    pass
protein = Protein.from_file(file_path=structure_path, name="PsaA")
protein.uniprot_id = "P31522"
system = ProteinSystem(components=[protein])
systems.append(system)

In [6]:
with resources.path("kinoml.data.proteins", "kinoml_tests_4f8o_spruce.loop_db") as loop_db:
    pass
featurizer = OEProteinStructureFeaturizer(
    loop_db=loop_db,
    output_dir=user_cache_dir(),
)

In [7]:
systems = featurizer.featurize(systems)
systems



[<ProteinSystem with 1 components (<Protein name=PsaA>)>,
 <ProteinSystem with 1 components (<Protein name=PsaA>)>,
 <ProteinSystem with 1 components (<Protein name=PsaA>)>]

In [8]:
systems[0]

<ProteinSystem with 1 components (<Protein name=PsaA>)>

In [9]:
systems[0].featurizations["last"]

<Universe with 2381 atoms>

In [10]:
# check number of residues
assert len(systems[0].featurizations["last"].residues) == 239
assert len(systems[1].featurizations["last"].residues) == 216
assert len(systems[2].featurizations["last"].residues) == 109

# check numbering of first residue
assert systems[0].featurizations["last"].residues[0].resid == 1
assert systems[1].featurizations["last"].residues[0].resid == 44
assert systems[2].featurizations["last"].residues[0].resid == 47