Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions polaris/dataset/zarr/codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ def encode(self, buf: np.ndarray):
"""
Encode a chunk of RDKit Mols to byte strings
"""
to_encode = np.empty(shape=len(buf), dtype=object)
# NOTE (cwognum): I ran into a Cython issue because we could pass None to the VLenBytes codec.
# Using np.full() ensures all elements are initialized as empty byte strings instead.
to_encode = np.full(fill_value=b"", shape=len(buf), dtype=object)
for idx, mol in enumerate(buf):
if mol is None or (isinstance(mol, bytes) and len(mol) == 0):
continue
if not isinstance(mol, Chem.Mol):
raise ValueError(f"Expected an RDKitMol, but got {type(buf)} instead.")
raise ValueError(f"Expected an RDKitMol, but got {type(mol)} instead.")
props = Chem.PropertyPickleOptions.AllProps
to_encode[idx] = mol.ToBinary(props)

Expand Down
9 changes: 8 additions & 1 deletion tests/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@

def test_rdkit_mol_codec():
mol = dm.to_mol("C1=CC=CC=C1")
arr = zarr.array([mol, mol], chunks=(2,), dtype=object, object_codec=RDKitMolCodec())

arr = zarr.empty(shape=10, chunks=2, dtype=object, object_codec=RDKitMolCodec())

arr[0] = mol
arr[1] = mol
arr[2] = mol

assert dm.same_mol(arr[0], mol)
assert dm.same_mol(arr[1], mol)
assert dm.same_mol(arr[2], mol)