Skip to content

Commit

Permalink
fmt
Browse files Browse the repository at this point in the history
  • Loading branch information
baskaryan committed Aug 18, 2024
1 parent 1431575 commit f1a8808
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 10 deletions.
44 changes: 38 additions & 6 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3413,8 +3413,41 @@ def list_examples(
if limit is not None and i + 1 >= limit:
break

# dataset_name arg explicitly not supported to avoid extra API calls.
# TODO: Update note on enabling indexing when there's an enable_indexing method.
@warn_beta
def index_dataset(
self,
*,
dataset_id: ID_TYPE,
tag: str = "latest",
**kwargs: Any,
) -> None:
"""Enable dataset indexing. Examples are indexed by their inputs.
This enables searching for similar examples by inputs with
``client.similar_examples()``.
Args:
dataset_id (UUID): The ID of the dataset to index.
tag (str, optional): The version of the dataset to index. If 'latest'
then any updates to the dataset (additions, updates, deletions of
examples) will be reflected in the index.
Returns:
None
Raises:
requests.HTTPError
""" # noqa: E501
dataset_id = _as_uuid(dataset_id, "dataset_id")
resp = self.request_with_retries(
"POST",
f"/datasets/{dataset_id}/index",
headers=self._headers,
data=json.dumps({"tag": tag, **kwargs}),
)
ls_utils.raise_for_status_with_text(resp)

# NOTE: dataset_name arg explicitly not supported to avoid extra API calls.
@warn_beta
def similar_examples(
self,
Expand All @@ -3427,15 +3460,14 @@ def similar_examples(
) -> List[ls_schemas.ExampleSearch]:
r"""Retrieve the dataset examples whose inputs best match the current inputs.
**Note**: Must have few-shot indexing enabled for the dataset. You can do this
in the LangSmith UI:
https://docs.smith.langchain.com/how_to_guides/datasets/index_datasets_for_dynamic_few_shot_example_selection
**Note**: Must have few-shot indexing enabled for the dataset. See
``client.index_dataset()``.
Args:
inputs (dict): The inputs to use as a search query. Must match the dataset
input schema. Must be JSON serializable.
limit (int): The maximum number of examples to return.
dataset_id (UUID, optional): The ID of the dataset to filter by.
dataset_id (str or UUID): The ID of the dataset to search over.
kwargs (Any): Additional keyword args to pass as part of request body.
Returns:
Expand Down
1 change: 1 addition & 0 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,4 @@ disallow_untyped_defs = "True"

[tool.pytest.ini_options]
asyncio_mode = "auto"
markers = [ "slow: long-running tests",]
21 changes: 21 additions & 0 deletions python/tests/integration_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pytest


def pytest_addoption(parser):
parser.addoption(
"--runslow", action="store_true", default=False, help="run slow tests"
)


def pytest_configure(config):
config.addinivalue_line("markers", "slow: mark test as slow to run")


def pytest_collection_modifyitems(config, items):
if config.getoption("--runslow"):
# --runslow given in cli: do not skip slow tests
return
skip_slow = pytest.mark.skip(reason="need --runslow option to run")
for item in items:
if "slow" in item.keywords:
item.add_marker(skip_slow)
45 changes: 41 additions & 4 deletions python/tests/integration_tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def wait_for(

@pytest.fixture
def langchain_client() -> Client:
return Client()
return Client(api_key=os.environ["LANGCHAIN_ORG_API_KEY"])


def test_datasets(langchain_client: Client) -> None:
Expand Down Expand Up @@ -268,10 +268,47 @@ def test_list_examples(langchain_client: Client) -> None:

langchain_client.delete_dataset(dataset_id=dataset.id)

example_list = langchain_client.similar_examples(
{"text": "hey there"}, k=1, dataset_id=dataset.id

@pytest.mark.slow
def test_similar_examples(langchain_client: Client) -> None:
inputs = [{"text": "how are you"}, {"text": "good bye"}, {"text": "see ya later"}]
outputs = [
{"response": "good how are you"},
{"response": "ta ta"},
{"response": "tootles"},
]
dataset_name = "__test_similar_examples" + uuid4().hex[:4]
dataset = langchain_client.create_dataset(
dataset_name=dataset_name,
inputs_schema={
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"text": {"type": "string"},
},
"required": ["text"],
"additionalProperties": False,
},
outputs_schema={
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"response": {"type": "string"},
},
"required": ["response"],
"additionalProperties": False,
},
)
assert len(example_list) == 1
langchain_client.create_examples(
inputs=inputs, outputs=outputs, dataset_id=dataset.id
)
langchain_client.index_dataset(dataset_id=dataset.id)
# Need to wait for indexing to finish.
time.sleep(5)
similar_list = langchain_client.similar_examples(
{"text": "howdy"}, limit=2, dataset_id=dataset.id
)
assert len(similar_list) == 2


@pytest.mark.skip(reason="This test is flaky")
Expand Down

0 comments on commit f1a8808

Please sign in to comment.