fmt

langchain-ai · Aug 18, 2024 · f1a8808 · f1a8808
1 parent 1431575
commit f1a8808
Show file tree

Hide file tree

Showing 4 changed files with 101 additions and 10 deletions.
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
@@ -3413,8 +3413,41 @@ def list_examples(
             if limit is not None and i + 1 >= limit:
                 break
 
-    # dataset_name arg explicitly not supported to avoid extra API calls.
-    # TODO: Update note on enabling indexing when there's an enable_indexing method.
+    @warn_beta
+    def index_dataset(
+        self,
+        *,
+        dataset_id: ID_TYPE,
+        tag: str = "latest",
+        **kwargs: Any,
+    ) -> None:
+        """Enable dataset indexing. Examples are indexed by their inputs.
+
+        This enables searching for similar examples by inputs with
+        ``client.similar_examples()``.
+
+        Args:
+            dataset_id (UUID): The ID of the dataset to index.
+            tag (str, optional): The version of the dataset to index. If 'latest'
+                then any updates to the dataset (additions, updates, deletions of
+                examples) will be reflected in the index.
+
+        Returns:
+            None
+
+        Raises:
+            requests.HTTPError
+        """  # noqa: E501
+        dataset_id = _as_uuid(dataset_id, "dataset_id")
+        resp = self.request_with_retries(
+            "POST",
+            f"/datasets/{dataset_id}/index",
+            headers=self._headers,
+            data=json.dumps({"tag": tag, **kwargs}),
+        )
+        ls_utils.raise_for_status_with_text(resp)
+
+    # NOTE: dataset_name arg explicitly not supported to avoid extra API calls.
     @warn_beta
     def similar_examples(
         self,
@@ -3427,15 +3460,14 @@ def similar_examples(
     ) -> List[ls_schemas.ExampleSearch]:
         r"""Retrieve the dataset examples whose inputs best match the current inputs.
 
-        **Note**: Must have few-shot indexing enabled for the dataset. You can do this
-        in the LangSmith UI:
-        https://docs.smith.langchain.com/how_to_guides/datasets/index_datasets_for_dynamic_few_shot_example_selection
+        **Note**: Must have few-shot indexing enabled for the dataset. See
+        ``client.index_dataset()``.
 
         Args:
             inputs (dict): The inputs to use as a search query. Must match the dataset
                 input schema. Must be JSON serializable.
             limit (int): The maximum number of examples to return.
-            dataset_id (UUID, optional): The ID of the dataset to filter by.
+            dataset_id (str or UUID): The ID of the dataset to search over.
             kwargs (Any): Additional keyword args to pass as part of request body.
 
         Returns:

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -101,3 +101,4 @@ disallow_untyped_defs = "True"
 
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
+markers = [ "slow: long-running tests",]
diff --git a/python/tests/integration_tests/conftest.py b/python/tests/integration_tests/conftest.py
@@ -0,0 +1,21 @@
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--runslow", action="store_true", default=False, help="run slow tests"
+    )
+
+
+def pytest_configure(config):
+    config.addinivalue_line("markers", "slow: mark test as slow to run")
+
+
+def pytest_collection_modifyitems(config, items):
+    if config.getoption("--runslow"):
+        # --runslow given in cli: do not skip slow tests
+        return
+    skip_slow = pytest.mark.skip(reason="need --runslow option to run")
+    for item in items:
+        if "slow" in item.keywords:
+            item.add_marker(skip_slow)
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
@@ -37,7 +37,7 @@ def wait_for(
 
 @pytest.fixture
 def langchain_client() -> Client:
-    return Client()
+    return Client(api_key=os.environ["LANGCHAIN_ORG_API_KEY"])
 
 
 def test_datasets(langchain_client: Client) -> None:
@@ -268,10 +268,47 @@ def test_list_examples(langchain_client: Client) -> None:
 
     langchain_client.delete_dataset(dataset_id=dataset.id)
 
-    example_list = langchain_client.similar_examples(
-        {"text": "hey there"}, k=1, dataset_id=dataset.id
+
+@pytest.mark.slow
+def test_similar_examples(langchain_client: Client) -> None:
+    inputs = [{"text": "how are you"}, {"text": "good bye"}, {"text": "see ya later"}]
+    outputs = [
+        {"response": "good how are you"},
+        {"response": "ta ta"},
+        {"response": "tootles"},
+    ]
+    dataset_name = "__test_similar_examples" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(
+        dataset_name=dataset_name,
+        inputs_schema={
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "object",
+            "properties": {
+                "text": {"type": "string"},
+            },
+            "required": ["text"],
+            "additionalProperties": False,
+        },
+        outputs_schema={
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "object",
+            "properties": {
+                "response": {"type": "string"},
+            },
+            "required": ["response"],
+            "additionalProperties": False,
+        },
     )
-    assert len(example_list) == 1
+    langchain_client.create_examples(
+        inputs=inputs, outputs=outputs, dataset_id=dataset.id
+    )
+    langchain_client.index_dataset(dataset_id=dataset.id)
+    # Need to wait for indexing to finish.
+    time.sleep(5)
+    similar_list = langchain_client.similar_examples(
+        {"text": "howdy"}, limit=2, dataset_id=dataset.id
+    )
+    assert len(similar_list) == 2
 
 
 @pytest.mark.skip(reason="This test is flaky")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -101,3 +101,4 @@ disallow_untyped_defs = "True"

		[tool.pytest.ini_options]
		asyncio_mode = "auto"
		markers = [ "slow: long-running tests",]