Skip to content

Commit

Permalink
Get entries api (#220)
Browse files Browse the repository at this point in the history
* Add value iteration example.

* Add NotImplemented base method.

* Minor.

* Implement get_entries for Molecule class.

* Only yield molecule.

* Implement get_entries for ConstructedMolecule class.

* Handle KeyError in example.

* Add tests.

* Update examples.

* Update constructed_molecule.py

* Update molecule.py

* Update constructed_molecule.py

* Update molecule.py

* Update value.py

* Update test_get_entries.py

* Update docs.

* Fix tests. Minor.

Co-authored-by: Lukas Turcani <lukasturcani93@gmail.com>
  • Loading branch information
andrewtarzia and lukasturcani committed Aug 7, 2020
1 parent 77d063f commit c2f95e4
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 2 deletions.
13 changes: 13 additions & 0 deletions src/stk/databases/constructed_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,16 @@ def get(self, key):
"""

raise NotImplementedError()

def get_all(self):
"""
Get all entries in the database.
Yields
------
:class:`.ConstructedMolecule`
A molecule in the database.
"""

raise NotImplementedError()
13 changes: 13 additions & 0 deletions src/stk/databases/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,16 @@ def get(self, key):
"""

raise NotImplementedError()

def get_all(self):
"""
Get all molecules in the database.
Yields
------
:class:`.Molecule`
A molecule in the database.
"""

raise NotImplementedError()
62 changes: 62 additions & 0 deletions src/stk/databases/mongo_db/constructed_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ class ConstructedMoleculeMongoDb(ConstructedMoleculeDatabase):
ordering, which allows position matrices to be used across
different atom id orderings.
All entries in a database can be iterated over very simply
.. code-block:: python
for entry in db.get_all():
# Do something to entry.
By default, the only molecular key the database stores, is the
InChIKey. However, additional keys can be added to the JSON stored
in the database by using a different
Expand Down Expand Up @@ -539,3 +546,58 @@ def _get_building_block(self, key):
'matrix':
self._building_block_position_matrices.find_one(key),
}

def _get_molecule_keys(self, entry):
# Ignore keys reserved by constructed molecule collections.
reserved_keys = ('_id', 'BB', 'nBB', 'aI', 'bI')

for key, value in entry.items():
if key not in reserved_keys:
yield key, value

def get_all(self):
"""
Get all molecules in the database.
Yields
------
:class:`.Molecule`
All molecule in the database.
"""

for entry in self._constructed_molecules.find():
# Do 'or' query over all key value pairs.
query = {'$or': [
{key: value}
for key, value in self._get_molecule_keys(entry)
]}

molecule_json = self._molecules.find_one(query)
if molecule_json is None:
raise KeyError(
'No molecule found in the database associated '
f'with a position matrix with query: {query}. '
'This suggests your database is corrupted.'
)

position_matrix = self._position_matrices.find_one(query)
if position_matrix is None:
raise KeyError(
'No position matrix found in the database '
'associated with a position matrix with query: '
f'{query}. This suggests your database is '
'corrupted.'
)

yield self._dejsonizer.from_json(
json={
'molecule': molecule_json,
'constructedMolecule': entry,
'matrix': position_matrix,
'buildingBlocks': tuple(map(
self._get_building_block,
entry['BB'],
))
},
)
51 changes: 49 additions & 2 deletions src/stk/databases/mongo_db/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ class MoleculeMongoDb(MoleculeDatabase):
ordering, which allows position matrices to be used across
different atom id orderings.
All entries in a database can be iterated over very simply
.. code-block:: python
for entry in db.get_all():
# Do something to entry.
By default, the only molecular key the database stores, is the
InChIKey. However, additional keys can be added to the JSON stored
in the database by using a different :class:`.MoleculeJsonizer`
Expand Down Expand Up @@ -166,7 +173,7 @@ def get_key(self, molecule):
db = stk.MoleculeMongoDb(
mongo_client=client,
jsonizer=stk.MoleculeJsonizer(
key_makers=(stk.InchiKey(), smiles),
key_makers=(stk.InchiKey(), smiles),
),
)
Expand Down Expand Up @@ -327,7 +334,7 @@ def _get(self, key):
Parameters
----------
key : :class:`.HashableDict`
The key of a a molecule, which is to be returned from the
The key of a molecule, which is to be returned from the
database.
Returns
Expand All @@ -354,3 +361,43 @@ def _get(self, key):
'molecule': json,
'matrix': position_matrix,
})

def _get_molecule_keys(self, entry):

# Ignore keys reserved by position matrix collections.
reserved_keys = ('m', '_id')

for key, value in entry.items():
if key not in reserved_keys:
yield key, value

def get_all(self):
"""
Get all molecules in the database.
Yields
------
:class:`.Molecule`
All `molecule` instances in database.
"""

for entry in self._position_matrices.find():
# Do 'or' query over all key value pairs.
query = {'$or': [
{key: value}
for key, value in self._get_molecule_keys(entry)
]}

json = self._molecules.find_one(query)
if json is None:
raise KeyError(
'No molecule found in the database associated '
f'with a position matrix with query: {query}. '
'This suggests your database is corrupted.'
)

yield self._dejsonizer.from_json({
'molecule': json,
'matrix': entry,
})
23 changes: 23 additions & 0 deletions src/stk/databases/value.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,29 @@ class ValueDatabase:
:mod:`molecule_value_database <.databases.value>`,
can serve as good examples.
*Iterating Through Entries in Database*
The :meth:`.get_entries` method of a molecule database instance can
be used to iterate through entries, the keys of which can be used
to access values.
.. code-block:: python
client = pymongo.MongoClient()
molecule_db = stk.MoleculeMongoDb(client)
value_db = stk.ValueMongoDb(
mongo_client=client,
collection='atom_counts',
)
for molecule in molecule_db.get_all():
try:
value = value_db.get(molecule)
except KeyError:
# In case molecule is not in value_db.
pass
"""

def put(self, molecule, value):
Expand Down
75 changes: 75 additions & 0 deletions tests/databases/constructed_molecule/mongo_db/test_get_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import stk
import pymongo

from tests.utilities import is_equivalent_constructed_molecule


def test_get_all():
"""
Test iteration over all molecules.
"""

database_name = '_test_get_entries_constructed_molecule'
client = pymongo.MongoClient()
client.drop_database(database_name)

key_maker = stk.Inchi()
jsonizer = stk.ConstructedMoleculeJsonizer(
key_makers=(key_maker, )
)

database = stk.ConstructedMoleculeMongoDb(
mongo_client=client,
database=database_name,
jsonizer=jsonizer,
put_lru_cache_size=0,
get_lru_cache_size=0,
)

molecules = [
stk.ConstructedMolecule(
topology_graph=stk.polymer.Linear(
building_blocks=(stk.BuildingBlock(
smiles='BrCCCBr',
functional_groups=[stk.BromoFactory()]),
),
repeating_unit='A',
num_repeating_units=3,
),
),
stk.ConstructedMolecule(
topology_graph=stk.polymer.Linear(
building_blocks=(
stk.BuildingBlock(
smiles='BrCCBr',
functional_groups=[stk.BromoFactory()]
),
stk.BuildingBlock(
smiles='BrCNCBr',
functional_groups=[stk.BromoFactory()]
),
),
repeating_unit='AB',
num_repeating_units=2,
),
),
]
molecules_by_key = {
key_maker.get_key(molecule): molecule
for molecule in molecules
}

for molecule in molecules:
database.put(molecule)

for i, retrieved in enumerate(database.get_all()):
key = key_maker.get_key(retrieved)
molecule = molecules_by_key[key]
is_equivalent_constructed_molecule(
molecule.with_canonical_atom_ordering(),
retrieved.with_canonical_atom_ordering(),
)

# Check number of molecules.
assert i+1 == len(molecules)
50 changes: 50 additions & 0 deletions tests/databases/molecule/mongo_db/test_get_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import stk
import pymongo

from tests.utilities import is_equivalent_molecule


def test_get_all():
"""
Test iteration over all entries.
"""

database_name = '_test_get_entries_molecule'
client = pymongo.MongoClient()
client.drop_database(database_name)

key_maker = stk.Inchi()
jsonizer = stk.MoleculeJsonizer(key_makers=(key_maker, ))

database = stk.MoleculeMongoDb(
mongo_client=client,
database=database_name,
jsonizer=jsonizer,
put_lru_cache_size=0,
get_lru_cache_size=0,
)

molecules = (
stk.BuildingBlock('CCC').with_canonical_atom_ordering(),
stk.BuildingBlock('BrCCCBr').with_canonical_atom_ordering(),
stk.BuildingBlock('NCCN').with_canonical_atom_ordering(),
)
molecules_by_key = {
key_maker.get_key(molecule): molecule
for molecule in molecules
}

for molecule in molecules:
database.put(molecule)

for i, retrieved in enumerate(database.get_all()):
key = key_maker.get_key(retrieved)
molecule = molecules_by_key[key]
is_equivalent_molecule(
molecule1=molecule.with_canonical_atom_ordering(),
molecule2=retrieved.with_canonical_atom_ordering(),
)

# Check number of molecules.
assert i+1 == len(molecules)

0 comments on commit c2f95e4

Please sign in to comment.