Skip to content

Commit

Permalink
Formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
Jorge authored and lkuligin committed Apr 9, 2024
1 parent 2db5d3c commit 85d1bd8
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@
"VectorSearchVectorStoreDatastore",
"VectorSearchVectorStoreGCS",
"DataStoreDocumentStorage",
"GCSDocumentStorage"
"GCSDocumentStorage",
]
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
class DocumentStorage(BaseStore[str, Document]):
"""Abstract interface of a key, text storage for retrieving documents."""


class GCSDocumentStorage(DocumentStorage):
"""Stores documents in Google Cloud Storage.
For each pair id, document_text the name of the blob will be {prefix}/{id} stored
Expand All @@ -31,9 +32,9 @@ def __init__(
super().__init__()
self._bucket = bucket
self._prefix = prefix

def mset(self, key_value_pairs: Sequence[Tuple[str, Document]]) -> None:
""" Stores a series of documents using each keys
"""Stores a series of documents using each keys
Args:
key_value_pairs (Sequence[Tuple[K, V]]): A sequence of key-value pairs.
Expand All @@ -53,9 +54,9 @@ def mget(self, keys: Sequence[str]) -> List[Optional[Document]]:
None instead.
"""
return [self._get_one(key) for key in keys]

def mdelete(self, keys: Sequence[str]) -> None:
""" Deletes a batch of documents by id.
"""Deletes a batch of documents by id.
Args:
keys: List of ids for the text.
Expand All @@ -64,14 +65,13 @@ def mdelete(self, keys: Sequence[str]) -> None:
self._delete_one(key)

def yield_keys(self, *, prefix: str | None = None) -> Iterator[str]:
""" Yields the keys present in the storage.
"""Yields the keys present in the storage.
Args:
prefix: Ignored. Uses the prefix provided in the constructor.
"""
for blob in self._bucket.list_blobs(prefix=self._prefix):
yield blob.name.split("/")[-1]


def _get_one(self, key: str) -> Document | None:
"""Gets the text of a document by its id. If not found, returns None.
Expand Down Expand Up @@ -105,7 +105,7 @@ def _set_one(self, key: str, value: Document) -> None:
new_blow.upload_from_string(document_text)

def _delete_one(self, key: str) -> None:
""" Deletes one document by its key.
"""Deletes one document by its key.
Args:
key (str): Id of the document to delete.
Expand Down Expand Up @@ -168,12 +168,14 @@ def mget(self, keys: Sequence[str]) -> List[Optional[Document]]:
metadata=self._convert_entity_to_dict(
entity[self._metadata_property_name]
),
) if entity is not None else None
)
if entity is not None
else None
for entity in entities
]

def mset(self, key_value_pairs: Sequence[Tuple[str, Document]]) -> None:
""" Stores a series of documents using each keys
"""Stores a series of documents using each keys
Args:
key_value_pairs (Sequence[Tuple[K, V]]): A sequence of key-value pairs.
Expand All @@ -194,7 +196,7 @@ def mset(self, key_value_pairs: Sequence[Tuple[str, Document]]) -> None:
self._client.put_multi(entities)

def mdelete(self, keys: Sequence[str]) -> None:
""" Deletes a sequence of documents by key.
"""Deletes a sequence of documents by key.
Args:
keys (Sequence[str]): A sequence of keys to delete.
Expand All @@ -204,7 +206,7 @@ def mdelete(self, keys: Sequence[str]) -> None:
self._client.delete_multi(keys)

def yield_keys(self, *, prefix: str | None = None) -> Iterator[str]:
""" Yields the keys of all documents in the storage.
"""Yields the keys of all documents in the storage.
Args:
prefix: Ignored
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ def similarity_search_by_vector_with_score(
# Ignore typing because mypy doesn't seem to be able to identify that
# in documents there is no possibility to have None values with the
# check above.
return list(zip(documents, distances)) # type: ignore
else:
return list(zip(documents, distances)) # type: ignore
else:
missing_docs = [key for key, doc in zip(keys, documents) if doc is None]
message = f"Documents with ids: {missing_docs} not found in the storage"
raise ValueError(message)
Expand Down

0 comments on commit 85d1bd8

Please sign in to comment.