diff --git a/dspy/retrieve/vectara_rm.py b/dspy/retrieve/vectara_rm.py index 047c70d6c9..d1f642b375 100644 --- a/dspy/retrieve/vectara_rm.py +++ b/dspy/retrieve/vectara_rm.py @@ -18,7 +18,7 @@ class VectaraRM(dspy.Retrieve): """ A retrieval module that uses Vectara to return the top passages for a given query. - Assumes that a Vectara corpus has been created and populated with the following payload: + Assumes that a Vectara corpora have been created and populated with the following payload: - document: The text of the passage Args: @@ -67,17 +67,21 @@ def __init__( def _vectara_query( self, query: str, - limit: int = 3, + limit: int = 5, ) -> List[str]: """Query Vectara index to get for top k matching passages. Args: query: query string """ - corpus_key = { - "customerId": self._vectara_customer_id, - "corpusId": self._vectara_corpus_id, - "lexicalInterpolationConfig": {"lambda": 0.025 }, - } + # If multiple corpus ids are provided (comma-separated), create a list of corpus keys + # otherwise by default, the `split(',')` is a no-op so retains the single corpus id + corpus_key = [ + { + "customerId": self._vectara_customer_id, + "corpusId": corpus_id, + "lexicalInterpolationConfig": {"lambda": 0.025 }, + } for corpus_id in self._vectara_corpus_id.split(',') + ] data = { "query": [ @@ -91,7 +95,7 @@ def _vectara_query( "startTag": START_SNIPPET, "endTag": END_SNIPPET, }, - "corpusKey": [corpus_key], + "corpusKey": corpus_key, }, ], }