From 71e90dc85b368f2623d91c7eddb023211dabda83 Mon Sep 17 00:00:00 2001
From: Ofer Mendelevitch <ofermend@gmail.com>
Date: Tue, 14 May 2024 11:04:54 -0700
Subject: [PATCH 1/2] support multiple corpora in vectara retriever

---
 dspy/retrieve/vectara_rm.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/dspy/retrieve/vectara_rm.py b/dspy/retrieve/vectara_rm.py
index 047c70d6c9..d0efa0ba16 100644
--- a/dspy/retrieve/vectara_rm.py
+++ b/dspy/retrieve/vectara_rm.py
@@ -18,7 +18,7 @@ class VectaraRM(dspy.Retrieve):
     """
     A retrieval module that uses Vectara to return the top passages for a given query.
 
-    Assumes that a Vectara corpus has been created and populated with the following payload:
+    Assumes that a Vectara corpora have been created and populated with the following payload:
         - document: The text of the passage
 
     Args:
@@ -67,17 +67,27 @@ def __init__(
     def _vectara_query(
         self,
         query: str,
-        limit: int = 3,
+        limit: int = 5,
     ) -> List[str]:
         """Query Vectara index to get for top k matching passages.
         Args:
             query: query string
         """
-        corpus_key = {
-            "customerId": self._vectara_customer_id,
-            "corpusId": self._vectara_corpus_id,
-            "lexicalInterpolationConfig": {"lambda": 0.025 },
-        }
+        # If multiple corpus ids are provided (comma-separated), create a list of corpus keys
+        if ',' in self._vectara_corpus_id:
+            corpus_key = [
+                {
+                    "customerId": self._vectara_customer_id,
+                    "corpusId": corpus_id,
+                    "lexicalInterpolationConfig": {"lambda": 0.025 },
+                } for corpus_id in self._vectara_corpus_id.split(',')
+            ]
+        else:
+            corpus_key = [{
+                "customerId": self._vectara_customer_id,
+                "corpusId": self._vectara_corpus_id,
+                "lexicalInterpolationConfig": {"lambda": 0.025 },
+            }]
 
         data = {
             "query": [
@@ -91,7 +101,7 @@ def _vectara_query(
                         "startTag": START_SNIPPET,
                         "endTag": END_SNIPPET,
                     },
-                    "corpusKey": [corpus_key],
+                    "corpusKey": corpus_key,
                 },
             ],
         }

From c74b45e54936ae8804ab13a28a60c3b05c003438 Mon Sep 17 00:00:00 2001
From: Ofer Mendelevitch <ofermend@gmail.com>
Date: Tue, 14 May 2024 14:25:51 -0700
Subject: [PATCH 2/2] simplification

---
 dspy/retrieve/vectara_rm.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/dspy/retrieve/vectara_rm.py b/dspy/retrieve/vectara_rm.py
index d0efa0ba16..d1f642b375 100644
--- a/dspy/retrieve/vectara_rm.py
+++ b/dspy/retrieve/vectara_rm.py
@@ -74,20 +74,14 @@ def _vectara_query(
             query: query string
         """
         # If multiple corpus ids are provided (comma-separated), create a list of corpus keys
-        if ',' in self._vectara_corpus_id:
-            corpus_key = [
-                {
-                    "customerId": self._vectara_customer_id,
-                    "corpusId": corpus_id,
-                    "lexicalInterpolationConfig": {"lambda": 0.025 },
-                } for corpus_id in self._vectara_corpus_id.split(',')
-            ]
-        else:
-            corpus_key = [{
+        # otherwise by default, the `split(',')` is a no-op so retains the single corpus id
+        corpus_key = [
+            {
                 "customerId": self._vectara_customer_id,
-                "corpusId": self._vectara_corpus_id,
+                "corpusId": corpus_id,
                 "lexicalInterpolationConfig": {"lambda": 0.025 },
-            }]
+            } for corpus_id in self._vectara_corpus_id.split(',')
+        ]
 
         data = {
             "query": [