diff --git a/pinecone/db_data/index.py b/pinecone/db_data/index.py
index 4c6b35382..460e9b37f 100644
--- a/pinecone/db_data/index.py
+++ b/pinecone/db_data/index.py
@@ -276,6 +276,163 @@ def upsert(
show_progress: bool = True,
**kwargs,
) -> UpsertResponse | ApplyResult:
+ """Upsert vectors into a namespace of your index.
+
+ The upsert operation writes vectors into a namespace of your index.
+ If a new value is upserted for an existing vector id, it will overwrite the previous value.
+
+ Args:
+ vectors: A list of vectors to upsert. Can be a list of Vector objects, tuples, or dictionaries.
+ namespace: The namespace to write to. If not specified, the default namespace is used. [optional]
+ batch_size: The number of vectors to upsert in each batch.
+ If not specified, all vectors will be upserted in a single batch. [optional]
+ show_progress: Whether to show a progress bar using tqdm.
+ Applied only if batch_size is provided. Default is True.
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ UpsertResponse: Includes the number of vectors upserted. If async_req=True, returns ApplyResult instead.
+
+ **Upserting dense vectors**
+
+ When working with dense vectors, the dimension of each vector must match the dimension configured for the
+ index. A vector can be represented in a variety of ways.
+
+ .. code-block:: python
+ :caption: Upserting a dense vector using the Vector object
+
+ from pinecone import Pinecone, Vector
+
+ pc = Pinecone()
+ idx = pc.Index(host="example-index-host")
+
+ idx.upsert(
+ namespace='my-namespace',
+ vectors=[
+ Vector(
+ id='id1',
+ values=[0.1, 0.2, 0.3, 0.4],
+ metadata={'metadata_key': 'metadata_value'}
+ ),
+ ]
+ )
+
+ .. code-block:: python
+ :caption: Upserting a dense vector as a two-element tuple (no metadata)
+
+ idx.upsert(
+ namespace='my-namespace',
+ vectors=[
+ ('id1', [0.1, 0.2, 0.3, 0.4]),
+ ]
+ )
+
+ .. code-block:: python
+ :caption: Upserting a dense vector as a three-element tuple with metadata
+
+ idx.upsert(
+ namespace='my-namespace',
+ vectors=[
+ (
+ 'id1',
+ [0.1, 0.2, 0.3, 0.4],
+ {'metadata_key': 'metadata_value'}
+ ),
+ ]
+ )
+
+ .. code-block:: python
+ :caption: Upserting a dense vector using a vector dictionary
+
+ idx.upsert(
+ namespace='my-namespace',
+ vectors=[
+ {
+ "id": "id1",
+ "values": [0.1, 0.2, 0.3, 0.4],
+ "metadata": {"metadata_key": "metadata_value"}
+ },
+ ]
+ )
+
+ **Upserting sparse vectors**
+
+ .. code-block:: python
+ :caption: Upserting a sparse vector
+
+ from pinecone import (
+ Pinecone,
+ Vector,
+ SparseValues,
+ )
+
+ pc = Pinecone()
+ idx = pc.Index(host="example-index-host")
+
+ idx.upsert(
+ namespace='my-namespace',
+ vectors=[
+ Vector(
+ id='id1',
+ sparse_values=SparseValues(
+ indices=[1, 2],
+ values=[0.2, 0.4]
+ )
+ ),
+ ]
+ )
+
+ .. code-block:: python
+ :caption: Upserting a sparse vector using a dictionary
+
+ idx.upsert(
+ namespace='my-namespace',
+ vectors=[
+ {
+ "id": "id1",
+ "sparse_values": {
+ "indices": [1, 2],
+ "values": [0.2, 0.4]
+ }
+ },
+ ]
+ )
+
+ **Batch upsert**
+
+ If you have a large number of vectors, you can upsert them in batches.
+
+ .. code-block:: python
+ :caption: Upserting in batches
+
+ from pinecone import Pinecone, Vector
+ import random
+
+ pc = Pinecone()
+ idx = pc.Index(host="example-index-host")
+
+ num_vectors = 100000
+ vectors = [
+ Vector(
+ id=f'id{i}',
+ values=[random.random() for _ in range(1536)])
+ for i in range(num_vectors)
+ ]
+
+ idx.upsert(
+ namespace='my-namespace',
+ vectors=vectors,
+ batch_size=50
+ )
+
+ **Visual progress bar with tqdm**
+
+ To see a progress bar when upserting in batches, you will need to separately install `tqdm `_.
+ If ``tqdm`` is present, the client will detect and use it to display progress when ``show_progress=True``.
+
+ To upsert in parallel, follow `this link `_.
+
+ """
_check_type = kwargs.pop("_check_type", True)
if kwargs.get("async_req", False) and batch_size is not None:
@@ -368,6 +525,51 @@ def _iter_dataframe(df, batch_size):
def upsert_from_dataframe(
self, df, namespace: str | None = None, batch_size: int = 500, show_progress: bool = True
) -> UpsertResponse:
+ """Upsert vectors from a pandas DataFrame into the index.
+
+ Args:
+ df: A pandas DataFrame with the following columns: id, values, sparse_values, and metadata.
+ namespace: The namespace to upsert into. If not specified, the default namespace is used. [optional]
+ batch_size: The number of rows to upsert in a single batch. Defaults to 500.
+ show_progress: Whether to show a progress bar. Defaults to True.
+
+ Returns:
+ UpsertResponse: Object containing the number of vectors upserted.
+
+ Examples:
+
+ .. code-block:: python
+
+ import pandas as pd
+ from pinecone import Pinecone
+
+ pc = Pinecone()
+ idx = pc.Index(host="example-index-host")
+
+ # Create a DataFrame with vector data
+ df = pd.DataFrame({
+ 'id': ['id1', 'id2', 'id3'],
+ 'values': [
+ [0.1, 0.2, 0.3],
+ [0.4, 0.5, 0.6],
+ [0.7, 0.8, 0.9]
+ ],
+ 'metadata': [
+ {'key1': 'value1'},
+ {'key2': 'value2'},
+ {'key3': 'value3'}
+ ]
+ })
+
+ # Upsert from DataFrame
+ response = idx.upsert_from_dataframe(
+ df=df,
+ namespace='my-namespace',
+ batch_size=100,
+ show_progress=True
+ )
+
+ """
try:
import pandas as pd
except ImportError:
@@ -407,6 +609,73 @@ def upsert_from_dataframe(
return UpsertResponse(upserted_count=upserted_count, _response_info=response_info)
def upsert_records(self, namespace: str, records: list[dict]) -> UpsertResponse:
+ """Upsert records to a namespace.
+
+ A record is a dictionary that contains either an ``id`` or ``_id``
+ field along with other fields that will be stored as metadata. The ``id`` or ``_id`` field is used
+ as the unique identifier for the record. At least one field in the record should correspond to
+ a field mapping in the index's embed configuration.
+
+ When records are upserted, Pinecone converts mapped fields into embeddings and upserts them into
+ the specified namespace of the index.
+
+ Args:
+ namespace: The namespace of the index to upsert records to.
+ records: The records to upsert into the index. Each record should contain an ``id`` or ``_id``
+ field and fields that match the index's embed configuration field mappings.
+
+ Returns:
+ UpsertResponse: Object which contains the number of records upserted.
+
+ Examples:
+
+ .. code-block:: python
+ :caption: Upserting records to be embedded with Pinecone's integrated inference models
+
+ from pinecone import (
+ Pinecone,
+ CloudProvider,
+ AwsRegion,
+ EmbedModel,
+ IndexEmbed
+ )
+
+ pc = Pinecone(api_key="<>")
+
+ # Create an index configured for the multilingual-e5-large model
+ index_model = pc.create_index_for_model(
+ name="my-model-index",
+ cloud=CloudProvider.AWS,
+ region=AwsRegion.US_WEST_2,
+ embed=IndexEmbed(
+ model=EmbedModel.Multilingual_E5_Large,
+ field_map={"text": "my_text_field"}
+ )
+ )
+
+ # Instantiate the index client
+ idx = pc.Index(host=index_model.host)
+
+ # Upsert records
+ idx.upsert_records(
+ namespace="my-namespace",
+ records=[
+ {
+ "_id": "test1",
+ "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.",
+ },
+ {
+ "_id": "test2",
+ "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.",
+ },
+ {
+ "_id": "test3",
+ "my_text_field": "Many people enjoy eating apples as a healthy snack.",
+ },
+ ],
+ )
+
+ """
args = IndexRequestFactory.upsert_records_args(namespace=namespace, records=records)
# Use _return_http_data_only=False to get headers for LSN extraction
result = self._vector_api.upsert_records_namespace(_return_http_data_only=False, **args)
@@ -438,6 +707,74 @@ def search(
rerank: SearchRerankTypedDict | SearchRerank | None = None,
fields: list[str] | None = ["*"], # Default to returning all fields
) -> SearchRecordsResponse:
+ """Search for records in a namespace.
+
+ This operation converts a query to a vector embedding and then searches a namespace. You
+ can optionally provide a reranking operation as part of the search.
+
+ Args:
+ namespace: The namespace in the index to search.
+ query: The SearchQuery to use for the search. The query can include a ``match_terms`` field
+ to specify which terms must be present in the text of each search hit. The match_terms
+ should be a dict with ``strategy`` (str) and ``terms`` (list[str]) keys, e.g.
+ ``{"strategy": "all", "terms": ["term1", "term2"]}``. Currently only "all" strategy
+ is supported, which means all specified terms must be present.
+ **Note:** match_terms is only supported for sparse indexes with integrated embedding
+ configured to use the pinecone-sparse-english-v0 model.
+ rerank: The SearchRerank to use with the search request. [optional]
+ fields: List of fields to return in the response. Defaults to ["*"] to return all fields. [optional]
+
+ Returns:
+ SearchRecordsResponse: The records that match the search.
+
+ Examples:
+
+ .. code-block:: python
+
+ from pinecone import (
+ Pinecone,
+ CloudProvider,
+ AwsRegion,
+ EmbedModel,
+ IndexEmbed,
+ SearchQuery,
+ SearchRerank,
+ RerankModel
+ )
+
+ pc = Pinecone(api_key="<>")
+
+ # Create an index for your embedding model
+ index_model = pc.create_index_for_model(
+ name="my-model-index",
+ cloud=CloudProvider.AWS,
+ region=AwsRegion.US_WEST_2,
+ embed=IndexEmbed(
+ model=EmbedModel.Multilingual_E5_Large,
+ field_map={"text": "my_text_field"}
+ )
+ )
+
+ # Instantiate the index client
+ idx = pc.Index(host=index_model.host)
+
+ # Search for similar records
+ response = idx.search(
+ namespace="my-namespace",
+ query=SearchQuery(
+ inputs={
+ "text": "Apple corporation",
+ },
+ top_k=3,
+ ),
+ rerank=SearchRerank(
+ model=RerankModel.Bge_Reranker_V2_M3,
+ rank_fields=["my_text_field"],
+ top_n=3,
+ ),
+ )
+
+ """
if namespace is None:
raise Exception("Namespace is required when searching records")
@@ -456,6 +793,11 @@ def search_records(
rerank: SearchRerankTypedDict | SearchRerank | None = None,
fields: list[str] | None = ["*"], # Default to returning all fields
) -> SearchRecordsResponse:
+ """Alias of the search() method.
+
+ See :meth:`search` for full documentation and examples.
+
+ """
return self.search(namespace, query=query, rerank=rerank, fields=fields)
@validate_and_convert_errors
@@ -467,6 +809,52 @@ def delete(
filter: FilterTypedDict | None = None,
**kwargs,
) -> dict[str, Any]:
+ """Delete vectors from the index, from a single namespace.
+
+ The Delete operation deletes vectors from the index, from a single namespace.
+ No error is raised if the vector id does not exist.
+
+ Note: For any delete call, if namespace is not specified, the default namespace ``""`` is used.
+ Since the delete operation does not error when ids are not present, this means you may not receive
+ an error if you delete from the wrong namespace.
+
+ Delete can occur in the following mutually exclusive ways:
+
+ 1. Delete by ids from a single namespace
+ 2. Delete all vectors from a single namespace by setting delete_all to True
+ 3. Delete all vectors from a single namespace by specifying a metadata filter
+ (note that for this option delete_all must be set to False)
+
+ Args:
+ ids: Vector ids to delete. [optional]
+ delete_all: This indicates that all vectors in the index namespace should be deleted.
+ Default is False. [optional]
+ namespace: The namespace to delete vectors from. If not specified, the default namespace is used. [optional]
+ filter: If specified, the metadata filter here will be used to select the vectors to delete.
+ This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True.
+ See `metadata filtering _` [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ dict[str, Any]: An empty dictionary if the delete operation was successful.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> # Delete specific vectors by ID
+ >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace')
+ {}
+
+ >>> # Delete all vectors from a namespace
+ >>> index.delete(delete_all=True, namespace='my_namespace')
+ {}
+
+ >>> # Delete vectors matching a metadata filter
+ >>> index.delete(filter={'key': 'value'}, namespace='my_namespace')
+ {}
+
+ """
from typing import cast
result = self._vector_api.delete_vectors(
@@ -479,6 +867,32 @@ def delete(
@validate_and_convert_errors
def fetch(self, ids: list[str], namespace: str | None = None, **kwargs) -> FetchResponse:
+ """Fetch vectors by ID from a single namespace.
+
+ The fetch operation looks up and returns vectors, by ID, from a single namespace.
+ The returned vectors include the vector data and/or metadata.
+
+ Args:
+ ids: The vector IDs to fetch.
+ namespace: The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ FetchResponse: Object which contains the list of Vector objects, and namespace name.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> # Fetch vectors from a specific namespace
+ >>> response = index.fetch(ids=['id1', 'id2'], namespace='my_namespace')
+ >>> for vector_id, vector in response.vectors.items():
+ ... print(f"{vector_id}: {vector.values}")
+
+ >>> # Fetch vectors from the default namespace
+ >>> response = index.fetch(ids=['id1', 'id2'])
+
+ """
args_dict = parse_non_empty_args([("namespace", namespace)])
result = self._vector_api.fetch_vectors(ids=ids, **args_dict, **kwargs)
# Copy response info from OpenAPI response if present
@@ -512,31 +926,38 @@ def fetch_by_metadata(
Look up and return vectors by metadata filter from a single namespace.
The returned vectors include the vector data and/or metadata.
+ Args:
+ filter: Metadata filter expression to select vectors.
+ See `metadata filtering _`
+ namespace: The namespace to fetch vectors from.
+ If not specified, the default namespace is used. [optional]
+ limit: Max number of vectors to return. Defaults to 100. [optional]
+ pagination_token: Pagination token to continue a previous listing operation. [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ FetchByMetadataResponse: Object containing the fetched vectors, namespace, usage, and pagination token.
+
Examples:
.. code-block:: python
- >>> index.fetch_by_metadata(
+ >>> # Fetch vectors matching a complex filter
+ >>> response = index.fetch_by_metadata(
... filter={'genre': {'$in': ['comedy', 'drama']}, 'year': {'$eq': 2019}},
... namespace='my_namespace',
... limit=50
... )
- >>> index.fetch_by_metadata(
+ >>> print(f"Found {len(response.vectors)} vectors")
+
+ >>> # Fetch vectors with pagination
+ >>> response = index.fetch_by_metadata(
... filter={'status': 'active'},
... pagination_token='token123'
... )
+ >>> if response.pagination:
+ ... print(f"Next page token: {response.pagination.next}")
- Args:
- filter (dict[str, str | float | int | bool | List | dict]):
- Metadata filter expression to select vectors.
- See `metadata filtering _`
- namespace (str): The namespace to fetch vectors from.
- If not specified, the default namespace is used. [optional]
- limit (int): Max number of vectors to return. Defaults to 100. [optional]
- pagination_token (str): Pagination token to continue a previous listing operation. [optional]
-
- Returns:
- FetchByMetadataResponse: Object containing the fetched vectors, namespace, usage, and pagination token.
"""
request = IndexRequestFactory.fetch_by_metadata_request(
filter=filter,
@@ -583,6 +1004,82 @@ def query(
sparse_vector: SparseValues | SparseVectorTypedDict | None = None,
**kwargs,
) -> QueryResponse | ApplyResult:
+ """Query a namespace using a query vector.
+
+ The Query operation searches a namespace, using a query vector.
+ It retrieves the ids of the most similar items in a namespace, along with their similarity scores.
+
+ Args:
+ top_k: The number of results to return for each query. Must be an integer greater than 1.
+ vector: The query vector. This should be the same length as the dimension of the index
+ being queried. Each ``query()`` request can contain only one of the parameters
+ ``id`` or ``vector``. [optional]
+ id: The unique ID of the vector to be used as a query vector.
+ Each ``query()`` request can contain only one of the parameters
+ ``vector`` or ``id``. [optional]
+ namespace: The namespace to query vectors from. If not specified, the default namespace is used. [optional]
+ filter: The filter to apply. You can use vector metadata to limit your search.
+ See `metadata filtering _` [optional]
+ include_values: Indicates whether vector values are included in the response.
+ If omitted the server will use the default value of False [optional]
+ include_metadata: Indicates whether metadata is included in the response as well as the ids.
+ If omitted the server will use the default value of False [optional]
+ sparse_vector: Sparse values of the query vector. Expected to be either a SparseValues object or a dict
+ of the form: ``{'indices': list[int], 'values': list[float]}``, where the lists each have
+ the same length. [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ QueryResponse: Object which contains the list of the closest vectors as ScoredVector objects,
+ and namespace name. If async_req=True, returns ApplyResult instead.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> # Query with a vector
+ >>> response = index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace')
+ >>> for match in response.matches:
+ ... print(f"ID: {match.id}, Score: {match.score}")
+
+ >>> # Query using an existing vector ID
+ >>> response = index.query(id='id1', top_k=10, namespace='my_namespace')
+
+ >>> # Query with metadata filter
+ >>> response = index.query(
+ ... vector=[1, 2, 3],
+ ... top_k=10,
+ ... namespace='my_namespace',
+ ... filter={'key': 'value'}
+ ... )
+
+ >>> # Query with include_values and include_metadata
+ >>> response = index.query(
+ ... id='id1',
+ ... top_k=10,
+ ... namespace='my_namespace',
+ ... include_metadata=True,
+ ... include_values=True
+ ... )
+
+ >>> # Query with sparse vector (hybrid search)
+ >>> response = index.query(
+ ... vector=[1, 2, 3],
+ ... sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]},
+ ... top_k=10,
+ ... namespace='my_namespace'
+ ... )
+
+ >>> # Query with sparse vector using SparseValues object
+ >>> from pinecone import SparseValues
+ >>> response = index.query(
+ ... vector=[1, 2, 3],
+ ... sparse_vector=SparseValues(indices=[1, 2], values=[0.2, 0.4]),
+ ... top_k=10,
+ ... namespace='my_namespace'
+ ... )
+
+ """
response = self._query(
*args,
top_k=top_k,
@@ -654,6 +1151,64 @@ def query_namespaces(
sparse_vector: SparseValues | SparseVectorTypedDict | None = None,
**kwargs,
) -> QueryNamespacesResults:
+ """Query multiple namespaces in parallel and combine the results.
+
+ The ``query_namespaces()`` method is used to make a query to multiple namespaces in parallel and combine
+ the results into one result set.
+
+ .. admonition:: Note
+
+ Since several asynchronous calls are made on your behalf when calling this method, you will need to tune
+ the **pool_threads** and **connection_pool_maxsize** parameter of the Index constructor to suit your workload.
+ If these values are too small in relation to your workload, you will experience performance issues as
+ requests queue up while waiting for a request thread to become available.
+
+ Args:
+ vector: The query vector, must be the same length as the dimension of the index being queried.
+ namespaces: The list of namespaces to query.
+ metric: Must be one of 'cosine', 'euclidean', 'dotproduct'. This is needed in order to merge results
+ across namespaces, since the interpretation of score depends on the index metric type.
+ top_k: The number of results you would like to request from each namespace. Defaults to 10. [optional]
+ filter: Pass an optional filter to filter results based on metadata. Defaults to None. [optional]
+ include_values: Boolean field indicating whether vector values should be included with results. Defaults to None. [optional]
+ include_metadata: Boolean field indicating whether vector metadata should be included with results. Defaults to None. [optional]
+ sparse_vector: If you are working with a dotproduct index, you can pass a sparse vector as part of your hybrid search. Defaults to None. [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ QueryNamespacesResults: A QueryNamespacesResults object containing the combined results from all namespaces,
+ as well as the combined usage cost in read units.
+
+ Examples:
+
+ .. code-block:: python
+
+ from pinecone import Pinecone
+
+ pc = Pinecone()
+
+ index = pc.Index(
+ host="index-name",
+ pool_threads=32,
+ connection_pool_maxsize=32
+ )
+
+ query_vec = [0.1, 0.2, 0.3] # An embedding that matches the index dimension
+ combined_results = index.query_namespaces(
+ vector=query_vec,
+ namespaces=['ns1', 'ns2', 'ns3', 'ns4'],
+ metric="cosine",
+ top_k=10,
+ filter={'genre': {"$eq": "drama"}},
+ include_values=True,
+ include_metadata=True
+ )
+
+ for vec in combined_results.matches:
+ print(vec.id, vec.score)
+ print(combined_results.usage)
+
+ """
if namespaces is None or len(namespaces) == 0:
raise ValueError("At least one namespace must be specified")
if sparse_vector is None and vector is not None and len(vector) == 0:
@@ -705,6 +1260,102 @@ def update(
dry_run: bool | None = None,
**kwargs,
) -> UpdateResponse:
+ """Update vectors in a namespace.
+
+ The Update operation updates vectors in a namespace.
+
+ This method supports two update modes:
+
+ 1. **Single vector update by ID**: Provide ``id`` to update a specific vector.
+ - Updates the vector with the given ID
+ - If ``values`` is included, it will overwrite the previous vector values
+ - If ``set_metadata`` is included, the metadata will be merged with existing metadata on the vector.
+ Fields specified in ``set_metadata`` will overwrite existing fields with the same key, while
+ fields not in ``set_metadata`` will remain unchanged.
+
+ 2. **Bulk update by metadata filter**: Provide ``filter`` to update all vectors matching the filter criteria.
+ - Updates all vectors in the namespace that match the filter expression
+ - Useful for updating metadata across multiple vectors at once
+ - If ``set_metadata`` is included, the metadata will be merged with existing metadata on each vector.
+ Fields specified in ``set_metadata`` will overwrite existing fields with the same key, while
+ fields not in ``set_metadata`` will remain unchanged.
+ - The response includes ``matched_records`` indicating how many vectors were updated
+
+ Either ``id`` or ``filter`` must be provided (but not both in the same call).
+
+ Args:
+ id: Vector's unique id. Required for single vector updates. Must not be provided when using filter. [optional]
+ values: Vector values to set. [optional]
+ set_metadata: Metadata to merge with existing metadata on the vector(s). Fields specified will overwrite
+ existing fields with the same key, while fields not specified will remain unchanged. [optional]
+ namespace: Namespace name where to update the vector(s). [optional]
+ sparse_values: Sparse values to update for the vector. Expected to be either a SparseValues object or a dict
+ of the form: ``{'indices': list[int], 'values': list[float]}`` where the lists each have
+ the same length. [optional]
+ filter: A metadata filter expression. When provided, updates all vectors in the namespace that match
+ the filter criteria. See `metadata filtering _`.
+ Must not be provided when using id. Either ``id`` or ``filter`` must be provided. [optional]
+ dry_run: If ``True``, return the number of records that match the ``filter`` without executing
+ the update. Only meaningful when using ``filter`` (not with ``id``). Useful for previewing
+ the impact of a bulk update before applying changes. Defaults to ``False``. [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ UpdateResponse: An UpdateResponse object. When using filter-based updates, the response includes
+ ``matched_records`` indicating the number of vectors that were updated (or would be updated if
+ ``dry_run=True``).
+
+ Examples:
+
+ **Single vector update by ID:**
+
+ .. code-block:: python
+
+ >>> # Update vector values
+ >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
+
+ >>> # Update vector metadata
+ >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace')
+
+ >>> # Update vector values and sparse values
+ >>> index.update(
+ ... id='id1',
+ ... values=[1, 2, 3],
+ ... sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
+ ... namespace='my_namespace'
+ ... )
+
+ >>> # Update with SparseValues object
+ >>> from pinecone import SparseValues
+ >>> index.update(
+ ... id='id1',
+ ... values=[1, 2, 3],
+ ... sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]),
+ ... namespace='my_namespace'
+ ... )
+
+ **Bulk update by metadata filter:**
+
+ .. code-block:: python
+
+ >>> # Update metadata for all vectors matching the filter
+ >>> response = index.update(
+ ... set_metadata={'status': 'active'},
+ ... filter={'genre': {'$eq': 'drama'}},
+ ... namespace='my_namespace'
+ ... )
+ >>> print(f"Updated {response.matched_records} vectors")
+
+ >>> # Preview how many vectors would be updated (dry run)
+ >>> response = index.update(
+ ... set_metadata={'status': 'active'},
+ ... filter={'genre': {'$eq': 'drama'}},
+ ... namespace='my_namespace',
+ ... dry_run=True
+ ... )
+ >>> print(f"Would update {response.matched_records} vectors")
+
+ """
# Validate that exactly one of id or filter is provided
if id is None and filter is None:
raise ValueError("Either 'id' or 'filter' must be provided to update vectors.")
@@ -752,6 +1403,36 @@ def update(
def describe_index_stats(
self, filter: FilterTypedDict | None = None, **kwargs
) -> DescribeIndexStatsResponse:
+ """Get statistics about the index's contents.
+
+ The DescribeIndexStats operation returns statistics about the index's contents.
+ For example: The vector count per namespace and the number of dimensions.
+
+ Args:
+ filter: If this parameter is present, the operation only returns statistics for vectors that satisfy the filter.
+ See `metadata filtering _` [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ DescribeIndexStatsResponse: Object which contains stats about the index.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> pc = Pinecone()
+ >>> index = pc.Index(host="example-index-host")
+ >>> stats = index.describe_index_stats()
+ >>> print(f"Total vectors: {stats.total_vector_count}")
+ >>> print(f"Dimension: {stats.dimension}")
+ >>> print(f"Namespaces: {list(stats.namespaces.keys())}")
+
+ >>> # Get stats for vectors matching a filter
+ >>> filtered_stats = index.describe_index_stats(
+ ... filter={'genre': {'$eq': 'drama'}}
+ ... )
+
+ """
from typing import cast
result = self._vector_api.describe_index_stats(
@@ -770,6 +1451,45 @@ def list_paginated(
namespace: str | None = None,
**kwargs,
) -> ListResponse:
+ """List vector IDs based on an id prefix within a single namespace (paginated).
+
+ The list_paginated operation finds vectors based on an id prefix within a single namespace.
+ It returns matching ids in a paginated form, with a pagination token to fetch the next page of results.
+ This id list can then be passed to fetch or delete operations, depending on your use case.
+
+ Consider using the ``list`` method to avoid having to handle pagination tokens manually.
+
+ Args:
+ prefix: The id prefix to match. If unspecified, an empty string prefix will
+ be used with the effect of listing all ids in a namespace [optional]
+ limit: The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
+ pagination_token: A token needed to fetch the next page of results. This token is returned
+ in the response if additional results are available. [optional]
+ namespace: The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ ListResponse: Object which contains the list of ids, the namespace name, pagination information,
+ and usage showing the number of read_units consumed.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> # List vectors with a prefix
+ >>> results = index.list_paginated(prefix='99', limit=5, namespace='my_namespace')
+ >>> [v.id for v in results.vectors]
+ ['99', '990', '991', '992', '993']
+ >>> # Get next page
+ >>> if results.pagination and results.pagination.next:
+ ... next_results = index.list_paginated(
+ ... prefix='99',
+ ... limit=5,
+ ... namespace='my_namespace',
+ ... pagination_token=results.pagination.next
+ ... )
+
+ """
args_dict = IndexRequestFactory.list_paginated_args(
prefix=prefix,
limit=limit,
@@ -785,6 +1505,41 @@ def list_paginated(
@validate_and_convert_errors
def list(self, **kwargs):
+ """List vector IDs based on an id prefix within a single namespace (generator).
+
+ The list operation accepts all of the same arguments as list_paginated, and returns a generator that yields
+ a list of the matching vector ids in each page of results. It automatically handles pagination tokens on your
+ behalf.
+
+ Args:
+ prefix: The id prefix to match. If unspecified, an empty string prefix will
+ be used with the effect of listing all ids in a namespace [optional]
+ limit: The maximum number of ids to return. If unspecified, the server will use a default value. [optional]
+ pagination_token: A token needed to fetch the next page of results. This token is returned
+ in the response if additional results are available. [optional]
+ namespace: The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Yields:
+ list[str]: A list of vector IDs for each page of results.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> # Iterate over all vector IDs with a prefix
+ >>> for ids in index.list(prefix='99', limit=5, namespace='my_namespace'):
+ ... print(ids)
+ ['99', '990', '991', '992', '993']
+ ['994', '995', '996', '997', '998']
+ ['999']
+
+ >>> # Convert generator to list (be cautious with large datasets)
+ >>> all_ids = []
+ >>> for ids in index.list(prefix='99', namespace='my_namespace'):
+ ... all_ids.extend(ids)
+
+ """
done = False
while not done:
results = self.list_paginated(**kwargs)
@@ -915,7 +1670,18 @@ def cancel_import(self, id: str):
"""Cancel an import operation.
Args:
- id (str): The id of the import operation to cancel.
+ id: The id of the import operation to cancel.
+
+ Returns:
+ The response from the cancel operation.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> # Cancel an import operation
+ >>> index.cancel_import(id="import-123")
+
"""
return self.bulk_import.cancel(id=id)
@@ -924,16 +1690,80 @@ def cancel_import(self, id: str):
def create_namespace(
self, name: str, schema: dict[str, Any] | None = None, **kwargs
) -> "NamespaceDescription":
+ """Create a namespace in a serverless index.
+
+ Create a namespace in a serverless index. For guidance and examples, see
+ `Manage namespaces `_.
+
+ **Note:** This operation is not supported for pod-based indexes.
+
+ Args:
+ name: The name of the namespace to create.
+ schema: Optional schema configuration for the namespace as a dictionary. [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ NamespaceDescription: Information about the created namespace including vector count.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> # Create a namespace with just a name
+ >>> namespace = index.create_namespace(name="my-namespace")
+ >>> print(f"Created namespace: {namespace.name}, Vector count: {namespace.vector_count}")
+
+ >>> # Create a namespace with schema configuration
+ >>> from pinecone.core.openapi.db_data.model.create_namespace_request_schema import CreateNamespaceRequestSchema
+ >>> schema = CreateNamespaceRequestSchema(fields={...})
+ >>> namespace = index.create_namespace(name="my-namespace", schema=schema)
+
+ """
return self.namespace.create(name=name, schema=schema, **kwargs)
@validate_and_convert_errors
@require_kwargs
def describe_namespace(self, namespace: str, **kwargs) -> "NamespaceDescription":
+ """Describe a namespace within an index, showing the vector count within the namespace.
+
+ Args:
+ namespace: The namespace to describe.
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ NamespaceDescription: Information about the namespace including vector count.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> namespace_info = index.describe_namespace(namespace="my-namespace")
+ >>> print(f"Namespace: {namespace_info.name}")
+ >>> print(f"Vector count: {namespace_info.vector_count}")
+
+ """
return self.namespace.describe(namespace=namespace, **kwargs)
@validate_and_convert_errors
@require_kwargs
def delete_namespace(self, namespace: str, **kwargs) -> dict[str, Any]:
+ """Delete a namespace from an index.
+
+ Args:
+ namespace: The namespace to delete.
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ dict[str, Any]: Response from the delete operation.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> result = index.delete_namespace(namespace="my-namespace")
+ >>> print("Namespace deleted successfully")
+
+ """
from typing import cast
result = self.namespace.delete(namespace=namespace, **kwargs)
@@ -944,6 +1774,33 @@ def delete_namespace(self, namespace: str, **kwargs) -> dict[str, Any]:
def list_namespaces(
self, limit: int | None = None, **kwargs
) -> Iterator[ListNamespacesResponse]:
+ """List all namespaces in an index.
+
+ This method automatically handles pagination to return all results.
+
+ Args:
+ limit: The maximum number of namespaces to return. If unspecified, the server will use a default value. [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ Iterator[ListNamespacesResponse]: An iterator that yields ListNamespacesResponse objects containing the list of namespaces.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> # Iterate over all namespaces
+ >>> for namespace_response in index.list_namespaces(limit=5):
+ ... for namespace in namespace_response.namespaces:
+ ... print(f"Namespace: {namespace.name}, Vector count: {namespace.vector_count}")
+
+ >>> # Convert to list (be cautious with large datasets)
+ >>> results = list(index.list_namespaces(limit=5))
+ >>> for namespace_response in results:
+ ... for namespace in namespace_response.namespaces:
+ ... print(f"Namespace: {namespace.name}, Vector count: {namespace.vector_count}")
+
+ """
return self.namespace.list(limit=limit, **kwargs)
@validate_and_convert_errors
@@ -951,6 +1808,38 @@ def list_namespaces(
def list_namespaces_paginated(
self, limit: int | None = None, pagination_token: str | None = None, **kwargs
) -> ListNamespacesResponse:
+ """List all namespaces in an index with pagination support.
+
+ The response includes pagination information if there are more results available.
+
+ Consider using the ``list_namespaces`` method to avoid having to handle pagination tokens manually.
+
+ Args:
+ limit: The maximum number of namespaces to return. If unspecified, the server will use a default value. [optional]
+ pagination_token: A token needed to fetch the next page of results. This token is returned
+ in the response if additional results are available. [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ ListNamespacesResponse: Object containing the list of namespaces and pagination information.
+
+ Examples:
+
+ .. code-block:: python
+
+ >>> # Get first page of namespaces
+ >>> results = index.list_namespaces_paginated(limit=5)
+ >>> for namespace in results.namespaces:
+ ... print(f"Namespace: {namespace.name}, Vector count: {namespace.vector_count}")
+
+ >>> # Get next page if available
+ >>> if results.pagination and results.pagination.next:
+ ... next_results = index.list_namespaces_paginated(
+ ... limit=5,
+ ... pagination_token=results.pagination.next
+ ... )
+
+ """
return self.namespace.list_paginated(
limit=limit, pagination_token=pagination_token, **kwargs
)
diff --git a/pinecone/db_data/interfaces.py b/pinecone/db_data/interfaces.py
index f8f8bda73..a8224de1d 100644
--- a/pinecone/db_data/interfaces.py
+++ b/pinecone/db_data/interfaces.py
@@ -242,16 +242,18 @@ def upsert(
def upsert_from_dataframe(
self, df, namespace: str | None = None, batch_size: int = 500, show_progress: bool = True
):
- """Upserts a dataframe into the index.
+ """Upsert vectors from a pandas DataFrame into the index.
- :param df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata.
- :type df: pandas.DataFrame
- :param namespace: The namespace to upsert into.
- :type namespace: str, optional
- :param batch_size: The number of rows to upsert in a single batch.
- :type batch_size: int, optional
- :param show_progress: Whether to show a progress bar.
- :type show_progress: bool, optional
+ Args:
+ df: A pandas DataFrame with the following columns: id, values, sparse_values, and metadata.
+ namespace: The namespace to upsert into. If not specified, the default namespace is used. [optional]
+ batch_size: The number of rows to upsert in a single batch. Defaults to 500.
+ show_progress: Whether to show a progress bar. Defaults to True.
+
+ Returns:
+ UpsertResponse: Object containing the number of vectors upserted.
+
+ Examples:
.. code-block:: python
@@ -259,19 +261,27 @@ def upsert_from_dataframe(
from pinecone import Pinecone
pc = Pinecone()
- idx = pc.Index(host="your-index-host")
+ idx = pc.Index(host="example-index-host")
- # Create a dataframe with vector data
+ # Create a DataFrame with vector data
df = pd.DataFrame({
'id': ['id1', 'id2', 'id3'],
- 'values': [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]],
- 'metadata': [{'key': 'value1'}, {'key': 'value2'}, {'key': 'value3'}]
+ 'values': [
+ [0.1, 0.2, 0.3],
+ [0.4, 0.5, 0.6],
+ [0.7, 0.8, 0.9]
+ ],
+ 'metadata': [
+ {'key1': 'value1'},
+ {'key2': 'value2'},
+ {'key3': 'value3'}
+ ]
})
- # Upsert the dataframe
- idx.upsert_from_dataframe(
+ # Upsert from DataFrame
+ response = idx.upsert_from_dataframe(
df=df,
- namespace="my-namespace",
+ namespace='my-namespace',
batch_size=100,
show_progress=True
)
@@ -281,20 +291,25 @@ def upsert_from_dataframe(
@abstractmethod
def upsert_records(self, namespace: str, records: list[dict]) -> UpsertResponse:
- """
- :param namespace: The namespace of the index to upsert records to.
- :type namespace: str, required
- :param records: The records to upsert into the index.
- :type records: list[dict], required
- :return: UpsertResponse object which contains the number of records upserted.
-
- Upsert records to a namespace. A record is a dictionary that contains eitiher an `id` or `_id`
- field along with other fields that will be stored as metadata. The `id` or `_id` field is used
+ """Upsert records to a namespace.
+
+ A record is a dictionary that contains either an ``id`` or ``_id``
+ field along with other fields that will be stored as metadata. The ``id`` or ``_id`` field is used
as the unique identifier for the record. At least one field in the record should correspond to
a field mapping in the index's embed configuration.
When records are upserted, Pinecone converts mapped fields into embeddings and upserts them into
- the specified namespacce of the index.
+ the specified namespace of the index.
+
+ Args:
+ namespace: The namespace of the index to upsert records to.
+ records: The records to upsert into the index. Each record should contain an ``id`` or ``_id``
+ field and fields that match the index's embed configuration field mappings.
+
+ Returns:
+ UpsertResponse: Object which contains the number of records upserted.
+
+ Examples:
.. code-block:: python
:caption: Upserting records to be embedded with Pinecone's integrated inference models
@@ -323,7 +338,7 @@ def upsert_records(self, namespace: str, records: list[dict]) -> UpsertResponse:
# Instantiate the index client
idx = pc.Index(host=index_model.host)
- # upsert records
+ # Upsert records
idx.upsert_records(
namespace="my-namespace",
records=[
@@ -339,39 +354,9 @@ def upsert_records(self, namespace: str, records: list[dict]) -> UpsertResponse:
"_id": "test3",
"my_text_field": "Many people enjoy eating apples as a healthy snack.",
},
- {
- "_id": "test4",
- "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
- },
- {
- "_id": "test5",
- "my_text_field": "An apple a day keeps the doctor away, as the saying goes.",
- },
- {
- "_id": "test6",
- "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.",
- },
],
)
- from pinecone import SearchQuery, SearchRerank, RerankModel
-
- # Search for similar records
- response = idx.search_records(
- namespace="my-namespace",
- query=SearchQuery(
- inputs={
- "text": "Apple corporation",
- },
- top_k=3,
- ),
- rerank=SearchRerank(
- model=RerankModel.Bge_Reranker_V2_M3,
- rank_fields=["my_text_field"],
- top_n=3,
- ),
- )
-
"""
pass
@@ -383,26 +368,28 @@ def search(
rerank: (SearchRerankTypedDict | SearchRerank) | None = None,
fields: list[str] | None = ["*"], # Default to returning all fields
) -> SearchRecordsResponse:
- """
- :param namespace: The namespace in the index to search.
- :type namespace: str, required
- :param query: The SearchQuery to use for the search. The query can include a ``match_terms`` field
- to specify which terms must be present in the text of each search hit. The match_terms
- should be a dict with ``strategy`` (str) and ``terms`` (list[str]) keys, e.g.
- ``{"strategy": "all", "terms": ["term1", "term2"]}``. Currently only "all" strategy
- is supported, which means all specified terms must be present.
- **Note:** match_terms is only supported for sparse indexes with integrated embedding
- configured to use the pinecone-sparse-english-v0 model.
- :type query: Union[dict, SearchQuery], required
- :param rerank: The SearchRerank to use with the search request.
- :type rerank: Union[dict, SearchRerank], optional
- :return: The records that match the search.
-
- Search for records.
+ """Search for records in a namespace.
This operation converts a query to a vector embedding and then searches a namespace. You
can optionally provide a reranking operation as part of the search.
+ Args:
+ namespace: The namespace in the index to search.
+ query: The SearchQuery to use for the search. The query can include a ``match_terms`` field
+ to specify which terms must be present in the text of each search hit. The match_terms
+ should be a dict with ``strategy`` (str) and ``terms`` (list[str]) keys, e.g.
+ ``{"strategy": "all", "terms": ["term1", "term2"]}``. Currently only "all" strategy
+ is supported, which means all specified terms must be present.
+ **Note:** match_terms is only supported for sparse indexes with integrated embedding
+ configured to use the pinecone-sparse-english-v0 model.
+ rerank: The SearchRerank to use with the search request. [optional]
+ fields: List of fields to return in the response. Defaults to ["*"] to return all fields. [optional]
+
+ Returns:
+ SearchRecordsResponse: The records that match the search.
+
+ Examples:
+
.. code-block:: python
from pinecone import (
@@ -410,7 +397,10 @@ def search(
CloudProvider,
AwsRegion,
EmbedModel,
- IndexEmbed
+ IndexEmbed,
+ SearchQuery,
+ SearchRerank,
+ RerankModel
)
pc = Pinecone(api_key="<>")
@@ -429,41 +419,8 @@ def search(
# Instantiate the index client
idx = pc.Index(host=index_model.host)
- # upsert records
- idx.upsert_records(
- namespace="my-namespace",
- records=[
- {
- "_id": "test1",
- "my_text_field": "Apple is a popular fruit known for its sweetness and crisp texture.",
- },
- {
- "_id": "test2",
- "my_text_field": "The tech company Apple is known for its innovative products like the iPhone.",
- },
- {
- "_id": "test3",
- "my_text_field": "Many people enjoy eating apples as a healthy snack.",
- },
- {
- "_id": "test4",
- "my_text_field": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces.",
- },
- {
- "_id": "test5",
- "my_text_field": "An apple a day keeps the doctor away, as the saying goes.",
- },
- {
- "_id": "test6",
- "my_text_field": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership.",
- },
- ],
- )
-
- from pinecone import SearchQuery, SearchRerank, RerankModel
-
- # search for similar records
- response = idx.search_records(
+ # Search for similar records
+ response = idx.search(
namespace="my-namespace",
query=SearchQuery(
inputs={
@@ -489,7 +446,11 @@ def search_records(
rerank: (SearchRerankTypedDict | SearchRerank) | None = None,
fields: list[str] | None = ["*"], # Default to returning all fields
) -> SearchRecordsResponse:
- """Alias of the search() method."""
+ """Alias of the search() method.
+
+ See :meth:`search` for full documentation and examples.
+
+ """
pass
@abstractmethod
@@ -501,66 +462,81 @@ def delete(
filter: FilterTypedDict | None = None,
**kwargs,
) -> dict[str, Any]:
- """
- Args:
- ids (list[str]): Vector ids to delete [optional]
- delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional]
- Default is False.
- namespace (str): The namespace to delete vectors from [optional]
- If not specified, the default namespace is used.
- filter (dict[str, Union[str, float, int, bool, List, dict]]):
- If specified, the metadata filter here will be used to select the vectors to delete.
- This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True.
- See `metadata filtering _` [optional]
-
+ """Delete vectors from the index, from a single namespace.
The Delete operation deletes vectors from the index, from a single namespace.
-
No error is raised if the vector id does not exist.
Note: For any delete call, if namespace is not specified, the default namespace ``""`` is used.
Since the delete operation does not error when ids are not present, this means you may not receive
an error if you delete from the wrong namespace.
- Delete can occur in the following mutual exclusive ways:
+ Delete can occur in the following mutually exclusive ways:
1. Delete by ids from a single namespace
2. Delete all vectors from a single namespace by setting delete_all to True
3. Delete all vectors from a single namespace by specifying a metadata filter
- (note that for this option delete all must be set to False)
+ (note that for this option delete_all must be set to False)
+
+ Args:
+ ids: Vector ids to delete. [optional]
+ delete_all: This indicates that all vectors in the index namespace should be deleted.
+ Default is False. [optional]
+ namespace: The namespace to delete vectors from. If not specified, the default namespace is used. [optional]
+ filter: If specified, the metadata filter here will be used to select the vectors to delete.
+ This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True.
+ See `metadata filtering _` [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ dict[str, Any]: An empty dictionary if the delete operation was successful.
Examples:
.. code-block:: python
+ >>> # Delete specific vectors by ID
>>> index.delete(ids=['id1', 'id2'], namespace='my_namespace')
+ {}
+
+ >>> # Delete all vectors from a namespace
>>> index.delete(delete_all=True, namespace='my_namespace')
- >>> index.delete(filter={'key': 'value'}, namespace='my_namespace')
+ {}
+ >>> # Delete vectors matching a metadata filter
+ >>> index.delete(filter={'key': 'value'}, namespace='my_namespace')
+ {}
- Returns: An empty dictionary if the delete operation was successful.
"""
pass
@abstractmethod
def fetch(self, ids: list[str], namespace: str | None = None, **kwargs) -> FetchResponse:
- """
+ """Fetch vectors by ID from a single namespace.
+
The fetch operation looks up and returns vectors, by ID, from a single namespace.
The returned vectors include the vector data and/or metadata.
+ Args:
+ ids: The vector IDs to fetch.
+ namespace: The namespace to fetch vectors from. If not specified, the default namespace is used. [optional]
+ **kwargs: Additional keyword arguments for the API call.
+
+ Returns:
+ FetchResponse: Object which contains the list of Vector objects, and namespace name.
+
Examples:
.. code-block:: python
- >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace')
- >>> index.fetch(ids=['id1', 'id2'])
+ >>> # Fetch vectors from a specific namespace
+ >>> response = index.fetch(ids=['id1', 'id2'], namespace='my_namespace')
+ >>> for vector_id, vector in response.vectors.items():
+ ... print(f"{vector_id}: {vector.values}")
- Args:
- ids (list[str]): The vector IDs to fetch.
- namespace (str): The namespace to fetch vectors from.
- If not specified, the default namespace is used. [optional]
+ >>> # Fetch vectors from the default namespace
+ >>> response = index.fetch(ids=['id1', 'id2'])
- Returns: FetchResponse object which contains the list of Vector objects, and namespace name.
"""
pass
@@ -648,9 +624,8 @@ def query(
top_k (int): The number of results to return for each query. Must be an integer greater than 1.
namespace (str): The namespace to query vectors from.
If not specified, the default namespace is used. [optional]
- filter (dict[str, Union[str, float, int, bool, List, dict]):
- The filter to apply. You can use vector metadata to limit your search.
- See `metadata filtering _` [optional]
+ filter: The filter to apply. You can use vector metadata to limit your search.
+ See `metadata filtering _` [optional]
include_values (bool): Indicates whether vector values are included in the response.
If omitted the server will use the default value of False [optional]
include_metadata (bool): Indicates whether metadata is included in the response as well as the ids.
@@ -801,22 +776,20 @@ def update(
>>> print(f"Would update {response.matched_records} vectors")
Args:
- id (str): Vector's unique id. Required for single vector updates. Must not be provided when using filter. [optional]
- values (list[float]): Vector values to set. [optional]
- set_metadata (dict[str, Union[str, float, int, bool, list[int], list[float], list[str]]]]):
- Metadata to merge with existing metadata on the vector(s). Fields specified will overwrite
- existing fields with the same key, while fields not specified will remain unchanged. [optional]
- namespace (str): Namespace name where to update the vector(s). [optional]
- sparse_values: (dict[str, Union[list[float], list[int]]]): Sparse values to update for the vector.
- Expected to be either a SparseValues object or a dict of the form:
- {'indices': list[int], 'values': list[float]} where the lists each have the same length. [optional]
- filter (dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
- When provided, updates all vectors in the namespace that match the filter criteria.
- See `metadata filtering _`.
- Must not be provided when using id. Either `id` or `filter` must be provided. [optional]
- dry_run (bool): If `True`, return the number of records that match the `filter` without executing
- the update. Only meaningful when using `filter` (not with `id`). Useful for previewing
- the impact of a bulk update before applying changes. Defaults to `False`. [optional]
+ id: Vector unique id. Required for single vector updates. Must not be provided when using filter. [optional]
+ values: Vector values to set. [optional]
+ set_metadata: Metadata to merge with existing metadata on the vector(s). Fields specified will overwrite
+ existing fields with the same key, while fields not specified will remain unchanged. [optional]
+ namespace: Namespace name where to update the vector(s). [optional]
+ sparse_values: Sparse values to update for the vector. Expected to be either a SparseValues object or a dict
+ of the form: ``{'indices': list[int], 'values': list[float]}`` where the lists each have
+ the same length. [optional]
+ filter: A metadata filter expression. When provided, updates all vectors in the namespace that match
+ the filter criteria. See `metadata filtering _`.
+ Must not be provided when using id. Either ``id`` or ``filter`` must be provided. [optional]
+ dry_run: If ``True``, return the number of records that match the ``filter`` without executing
+ the update. Only meaningful when using ``filter`` (not with ``id``). Useful for previewing
+ the impact of a bulk update before applying changes. Defaults to ``False``. [optional]
Returns:
UpdateResponse: An UpdateResponse object. When using filter-based updates, the response includes
@@ -830,7 +803,7 @@ def describe_index_stats(
self, filter: FilterTypedDict | None = None, **kwargs
) -> DescribeIndexStatsResponse:
"""
- The DescribeIndexStats operation returns statistics about the index's contents.
+ The DescribeIndexStats operation returns statistics about the index contents.
For example: The vector count per namespace and the number of dimensions.
Args:
@@ -977,10 +950,10 @@ def delete_namespace(self, namespace: str, **kwargs) -> dict[str, Any]:
"""Delete a namespace from an index.
Args:
- namespace (str): The namespace to delete
+ namespace: The namespace to delete.
Returns:
- dict[str, Any]: Response from the delete operation
+ dict[str, Any]: Response from the delete operation.
"""
pass
@@ -992,18 +965,26 @@ def list_namespaces(
"""List all namespaces in an index. This method automatically handles pagination to return all results.
Args:
- limit (Optional[int]): The maximum number of namespaces to return. If unspecified, the server will use a default value. [optional]
+ limit: The maximum number of namespaces to return. If unspecified, the server will use a default value. [optional]
Returns:
- ``ListNamespacesResponse``: Object containing the list of namespaces.
+ Iterator[ListNamespacesResponse]: An iterator that yields ListNamespacesResponse objects containing the list of namespaces.
Examples:
- .. code-block:: python
- >>> results = list(index.list_namespaces(limit=5))
- >>> for namespace in results:
- ... print(f"Namespace: {namespace.name}, Vector count: {namespace.vector_count}")
- Namespace: namespace1, Vector count: 1000
- Namespace: namespace2, Vector count: 2000
+
+ .. code-block:: python
+
+ >>> # Iterate over all namespaces
+ >>> for namespace_response in index.list_namespaces(limit=5):
+ ... for namespace in namespace_response.namespaces:
+ ... print(f"Namespace: {namespace.name}, Vector count: {namespace.vector_count}")
+
+ >>> # Convert to list (be cautious with large datasets)
+ >>> results = list(index.list_namespaces(limit=5))
+ >>> for namespace_response in results:
+ ... for namespace in namespace_response.namespaces:
+ ... print(f"Namespace: {namespace.name}, Vector count: {namespace.vector_count}")
+
"""
pass
@@ -1017,18 +998,29 @@ def list_namespaces_paginated(
Consider using the ``list_namespaces`` method to avoid having to handle pagination tokens manually.
Args:
- limit (Optional[int]): The maximum number of namespaces to return. If unspecified, the server will use a default value. [optional]
- pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned
- in the response if additional results are available. [optional]
+ limit: The maximum number of namespaces to return. If unspecified, the server will use a default value. [optional]
+ pagination_token: A token needed to fetch the next page of results. This token is returned
+ in the response if additional results are available. [optional]
+ **kwargs: Additional keyword arguments for the API call.
Returns:
- ``ListNamespacesResponse``: Object containing the list of namespaces and pagination information.
+ ListNamespacesResponse: Object containing the list of namespaces and pagination information.
Examples:
- .. code-block:: python
- >>> results = index.list_namespaces_paginated(limit=5)
- >>> results.pagination.next
- eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9
- >>> next_results = index.list_namespaces_paginated(limit=5, pagination_token=results.pagination.next)
+
+ .. code-block:: python
+
+ >>> # Get first page of namespaces
+ >>> results = index.list_namespaces_paginated(limit=5)
+ >>> for namespace in results.namespaces:
+ ... print(f"Namespace: {namespace.name}, Vector count: {namespace.vector_count}")
+
+ >>> # Get next page if available
+ >>> if results.pagination and results.pagination.next:
+ ... next_results = index.list_namespaces_paginated(
+ ... limit=5,
+ ... pagination_token=results.pagination.next
+ ... )
+
"""
pass