diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 4d86dac6a59cf7..8d776064019a73 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,19 +1,24 @@ Thank you for contributing to LangChain! -Checklist: - -- [ ] PR title: Please title your PR "package: description", where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. +- [ ] **PR title**: "package: description" + - Where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" -- [ ] PR message: **Delete this entire template message** and replace it with the following bulleted list + + +- [ ] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! -- [ ] Pass lint and test: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified to check that you're passing lint and testing. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ -- [ ] Add tests and docs: If you're adding a new integration, please include + + +- [ ] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. + +- [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ + Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. diff --git a/.github/workflows/_integration_test.yml b/.github/workflows/_integration_test.yml index 18a1e5eb89f185..1189907e96695f 100644 --- a/.github/workflows/_integration_test.yml +++ b/.github/workflows/_integration_test.yml @@ -52,6 +52,7 @@ jobs: - name: Run integration tests shell: bash env: + AI21_API_KEY: ${{ secrets.AI21_API_KEY }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} @@ -66,6 +67,9 @@ jobs: WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }} PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }} + ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} + ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} + ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }} run: | make integration_tests diff --git a/.github/workflows/_release.yml b/.github/workflows/_release.yml index 0b18925680194d..1221d6a6d6ae89 100644 --- a/.github/workflows/_release.yml +++ b/.github/workflows/_release.yml @@ -166,6 +166,7 @@ jobs: - name: Run integration tests if: ${{ startsWith(inputs.working-directory, 'libs/partners/') }} env: + AI21_API_KEY: ${{ secrets.AI21_API_KEY }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} @@ -186,6 +187,9 @@ jobs: WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }} PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }} + ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} + ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} + ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }} run: make integration_tests working-directory: ${{ inputs.working-directory }} diff --git a/.gitignore b/.gitignore index 407a65571adcda..db21b911098a8d 100644 --- a/.gitignore +++ b/.gitignore @@ -177,4 +177,6 @@ docs/docs/build docs/docs/node_modules docs/docs/yarn.lock _dist -docs/docs/templates \ No newline at end of file +docs/docs/templates + +prof diff --git a/cookbook/amazon_personalize_how_to.ipynb b/cookbook/amazon_personalize_how_to.ipynb new file mode 100644 index 00000000000000..7555e39d89494f --- /dev/null +++ b/cookbook/amazon_personalize_how_to.ipynb @@ -0,0 +1,284 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Amazon Personalize\n", + "\n", + "[Amazon Personalize](https://docs.aws.amazon.com/personalize/latest/dg/what-is-personalize.html) is a fully managed machine learning service that uses your data to generate item recommendations for your users. It can also generate user segments based on the users' affinity for certain items or item metadata.\n", + "\n", + "This notebook goes through how to use Amazon Personalize Chain. You need a Amazon Personalize campaign_arn or a recommender_arn before you get started with the below notebook.\n", + "\n", + "Following is a [tutorial](https://github.com/aws-samples/retail-demo-store/blob/master/workshop/1-Personalization/Lab-1-Introduction-and-data-preparation.ipynb) to setup a campaign_arn/recommender_arn on Amazon Personalize. Once the campaign_arn/recommender_arn is setup, you can use it in the langchain ecosystem. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Install Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "!pip install boto3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Sample Use-cases" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.1 [Use-case-1] Setup Amazon Personalize Client and retrieve recommendations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_experimental.recommenders import AmazonPersonalize\n", + "\n", + "recommender_arn = \"\"\n", + "\n", + "client = AmazonPersonalize(\n", + " credentials_profile_name=\"default\",\n", + " region_name=\"us-west-2\",\n", + " recommender_arn=recommender_arn,\n", + ")\n", + "client.get_recommendations(user_id=\"1\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "### 2.2 [Use-case-2] Invoke Personalize Chain for summarizing results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "from langchain.llms.bedrock import Bedrock\n", + "from langchain_experimental.recommenders import AmazonPersonalizeChain\n", + "\n", + "bedrock_llm = Bedrock(model_id=\"anthropic.claude-v2\", region_name=\"us-west-2\")\n", + "\n", + "# Create personalize chain\n", + "# Use return_direct=True if you do not want summary\n", + "chain = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=False\n", + ")\n", + "response = chain({\"user_id\": \"1\"})\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.3 [Use-Case-3] Invoke Amazon Personalize Chain using your own prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.prompts.prompt import PromptTemplate\n", + "\n", + "RANDOM_PROMPT_QUERY = \"\"\"\n", + "You are a skilled publicist. Write a high-converting marketing email advertising several movies available in a video-on-demand streaming platform next week, \n", + " given the movie and user information below. Your email will leverage the power of storytelling and persuasive language. \n", + " The movies to recommend and their information is contained in the tag. \n", + " All movies in the tag must be recommended. Give a summary of the movies and why the human should watch them. \n", + " Put the email between tags.\n", + "\n", + " \n", + " {result} \n", + " \n", + "\n", + " Assistant:\n", + " \"\"\"\n", + "\n", + "RANDOM_PROMPT = PromptTemplate(input_variables=[\"result\"], template=RANDOM_PROMPT_QUERY)\n", + "\n", + "chain = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=False, prompt_template=RANDOM_PROMPT\n", + ")\n", + "chain.run({\"user_id\": \"1\", \"item_id\": \"234\"})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.4 [Use-case-4] Invoke Amazon Personalize in a Sequential Chain " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import LLMChain, SequentialChain\n", + "\n", + "RANDOM_PROMPT_QUERY_2 = \"\"\"\n", + "You are a skilled publicist. Write a high-converting marketing email advertising several movies available in a video-on-demand streaming platform next week, \n", + " given the movie and user information below. Your email will leverage the power of storytelling and persuasive language. \n", + " You want the email to impress the user, so make it appealing to them.\n", + " The movies to recommend and their information is contained in the tag. \n", + " All movies in the tag must be recommended. Give a summary of the movies and why the human should watch them. \n", + " Put the email between tags.\n", + "\n", + " \n", + " {result}\n", + " \n", + "\n", + " Assistant:\n", + " \"\"\"\n", + "\n", + "RANDOM_PROMPT_2 = PromptTemplate(\n", + " input_variables=[\"result\"], template=RANDOM_PROMPT_QUERY_2\n", + ")\n", + "personalize_chain_instance = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=True\n", + ")\n", + "random_chain_instance = LLMChain(llm=bedrock_llm, prompt=RANDOM_PROMPT_2)\n", + "overall_chain = SequentialChain(\n", + " chains=[personalize_chain_instance, random_chain_instance],\n", + " input_variables=[\"user_id\"],\n", + " verbose=True,\n", + ")\n", + "overall_chain.run({\"user_id\": \"1\", \"item_id\": \"234\"})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "### 2.5 [Use-case-5] Invoke Amazon Personalize and retrieve metadata " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "recommender_arn = \"\"\n", + "metadata_column_names = [\n", + " \"\",\n", + " \"\",\n", + "]\n", + "metadataMap = {\"ITEMS\": metadata_column_names}\n", + "\n", + "client = AmazonPersonalize(\n", + " credentials_profile_name=\"default\",\n", + " region_name=\"us-west-2\",\n", + " recommender_arn=recommender_arn,\n", + ")\n", + "client.get_recommendations(user_id=\"1\", metadataColumns=metadataMap)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "### 2.6 [Use-Case 6] Invoke Personalize Chain with returned metadata for summarizing results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "bedrock_llm = Bedrock(model_id=\"anthropic.claude-v2\", region_name=\"us-west-2\")\n", + "\n", + "# Create personalize chain\n", + "# Use return_direct=True if you do not want summary\n", + "chain = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=False\n", + ")\n", + "response = chain({\"user_id\": \"1\", \"metadata_columns\": metadataMap})\n", + "print(response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + }, + "vscode": { + "interpreter": { + "hash": "15e58ce194949b77a891bd4339ce3d86a9bd138e905926019517993f97db9e6c" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docker/Makefile b/docker/Makefile new file mode 100644 index 00000000000000..d578580c323964 --- /dev/null +++ b/docker/Makefile @@ -0,0 +1,12 @@ +# Makefile + +build_graphdb: + docker build --tag graphdb ./graphdb + +start_graphdb: + docker-compose up -d graphdb + +down: + docker-compose down -v --remove-orphans + +.PHONY: build_graphdb start_graphdb down diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index ce680ccafda51e..968e32469a34a5 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -15,3 +15,7 @@ services: - "6020:6379" volumes: - ./redis-volume:/data + graphdb: + image: graphdb + ports: + - "6021:7200" diff --git a/docker/graphdb/Dockerfile b/docker/graphdb/Dockerfile new file mode 100644 index 00000000000000..dfcbe7e622d7a1 --- /dev/null +++ b/docker/graphdb/Dockerfile @@ -0,0 +1,5 @@ +FROM ontotext/graphdb:10.5.1 +RUN mkdir -p /opt/graphdb/dist/data/repositories/langchain +COPY config.ttl /opt/graphdb/dist/data/repositories/langchain/ +COPY graphdb_create.sh /run.sh +ENTRYPOINT bash /run.sh diff --git a/docker/graphdb/config.ttl b/docker/graphdb/config.ttl new file mode 100644 index 00000000000000..dcbdeeebe12832 --- /dev/null +++ b/docker/graphdb/config.ttl @@ -0,0 +1,46 @@ +@prefix rdfs: . +@prefix rep: . +@prefix sr: . +@prefix sail: . +@prefix graphdb: . + +[] a rep:Repository ; + rep:repositoryID "langchain" ; + rdfs:label "" ; + rep:repositoryImpl [ + rep:repositoryType "graphdb:SailRepository" ; + sr:sailImpl [ + sail:sailType "graphdb:Sail" ; + + graphdb:read-only "false" ; + + # Inference and Validation + graphdb:ruleset "empty" ; + graphdb:disable-sameAs "true" ; + graphdb:check-for-inconsistencies "false" ; + + # Indexing + graphdb:entity-id-size "32" ; + graphdb:enable-context-index "false" ; + graphdb:enablePredicateList "true" ; + graphdb:enable-fts-index "false" ; + graphdb:fts-indexes ("default" "iri") ; + graphdb:fts-string-literals-index "default" ; + graphdb:fts-iris-index "none" ; + + # Queries and Updates + graphdb:query-timeout "0" ; + graphdb:throw-QueryEvaluationException-on-timeout "false" ; + graphdb:query-limit-results "0" ; + + # Settable in the file but otherwise hidden in the UI and in the RDF4J console + graphdb:base-URL "http://example.org/owlim#" ; + graphdb:defaultNS "" ; + graphdb:imports "" ; + graphdb:repository-type "file-repository" ; + graphdb:storage-folder "storage" ; + graphdb:entity-index-size "10000000" ; + graphdb:in-memory-literal-properties "true" ; + graphdb:enable-literal-index "true" ; + ] + ]. diff --git a/docker/graphdb/graphdb_create.sh b/docker/graphdb/graphdb_create.sh new file mode 100644 index 00000000000000..52ffe8ad74a066 --- /dev/null +++ b/docker/graphdb/graphdb_create.sh @@ -0,0 +1,28 @@ +#! /bin/bash +REPOSITORY_ID="langchain" +GRAPHDB_URI="http://localhost:7200/" + +echo -e "\nUsing GraphDB: ${GRAPHDB_URI}" + +function startGraphDB { + echo -e "\nStarting GraphDB..." + exec /opt/graphdb/dist/bin/graphdb +} + +function waitGraphDBStart { + echo -e "\nWaiting GraphDB to start..." + for _ in $(seq 1 5); do + CHECK_RES=$(curl --silent --write-out '%{http_code}' --output /dev/null ${GRAPHDB_URI}/rest/repositories) + if [ "${CHECK_RES}" = '200' ]; then + echo -e "\nUp and running" + break + fi + sleep 30s + echo "CHECK_RES: ${CHECK_RES}" + done +} + + +startGraphDB & +waitGraphDBStart +wait diff --git a/docs/docs/expression_language/how_to/message_history.ipynb b/docs/docs/expression_language/how_to/message_history.ipynb index 6d93423a96455c..aac305d7ac94c6 100644 --- a/docs/docs/expression_language/how_to/message_history.ipynb +++ b/docs/docs/expression_language/how_to/message_history.ipynb @@ -7,7 +7,7 @@ "source": [ "# Add message history (memory)\n", "\n", - "The `RunnableWithMessageHistory` let us add message history to certain types of chains.\n", + "The `RunnableWithMessageHistory` lets us add message history to certain types of chains. It wraps another Runnable and manages the chat message history for it.\n", "\n", "Specifically, it can be used for any Runnable that takes as input one of\n", "\n", @@ -21,195 +21,264 @@ "* a sequence of `BaseMessage`\n", "* a dict with a key that contains a sequence of `BaseMessage`\n", "\n", - "Let's take a look at some examples to see how it works." + "Let's take a look at some examples to see how it works. First we construct a runnable (which here accepts a dict as input and returns a message as output):" ] }, { - "cell_type": "markdown", - "id": "6bca45e5-35d9-4603-9ca9-6ac0ce0e35cd", + "cell_type": "code", + "execution_count": 1, + "id": "2ed413b4-33a1-48ee-89b0-2d4917ec101a", "metadata": {}, + "outputs": [], "source": [ - "## Setup\n", + "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n", + "from langchain_openai.chat_models import ChatOpenAI\n", "\n", - "We'll use Redis to store our chat message histories and Anthropic's claude-2 model so we'll need to install the following dependencies:" + "model = ChatOpenAI()\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"You're an assistant who's good at {ability}. Respond in 20 words or fewer\",\n", + " ),\n", + " MessagesPlaceholder(variable_name=\"history\"),\n", + " (\"human\", \"{input}\"),\n", + " ]\n", + ")\n", + "runnable = prompt | model" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "477d04b3-c2b6-4ba5-962f-492c0d625cd5", + "cell_type": "markdown", + "id": "9fd175e1-c7b8-4929-a57e-3331865fe7aa", "metadata": {}, - "outputs": [], "source": [ - "%pip install --upgrade --quiet langchain redis anthropic" + "To manage the message history, we will need:\n", + "1. This runnable;\n", + "2. A callable that returns an instance of `BaseChatMessageHistory`.\n", + "\n", + "Check out the [memory integrations](https://integrations.langchain.com/memory) page for implementations of chat message histories using Redis and other providers. Here we demonstrate using an in-memory `ChatMessageHistory` as well as more persistent storage using `RedisChatMessageHistory`." ] }, { "cell_type": "markdown", - "id": "93776323-d6b8-4912-bb6a-867c5e655f46", + "id": "3d83adad-9672-496d-9f25-5747e7b8c8bb", "metadata": {}, "source": [ - "Set your [Anthropic API key](https://console.anthropic.com/):" + "## In-memory\n", + "\n", + "Below we show a simple example in which the chat history lives in memory, in this case via a global Python dict.\n", + "\n", + "We construct a callable `get_session_history` that references this dict to return an instance of `ChatMessageHistory`. The arguments to the callable can be specified by passing a configuration to the `RunnableWithMessageHistory` at runtime. By default, the configuration parameter is expected to be a single string `session_id`. This can be adjusted via the `history_factory_config` kwarg.\n", + "\n", + "Using the single-parameter default:" ] }, { "cell_type": "code", - "execution_count": null, - "id": "c7f56f69-d2f1-4a21-990c-b5551eb012fa", + "execution_count": 2, + "id": "54348d02-d8ee-440c-bbf9-41bc0fbbc46c", "metadata": {}, "outputs": [], "source": [ - "import getpass\n", - "import os\n", + "from langchain_community.chat_message_histories import ChatMessageHistory\n", + "from langchain_core.chat_history import BaseChatMessageHistory\n", + "from langchain_core.runnables.history import RunnableWithMessageHistory\n", + "\n", + "store = {}\n", "\n", - "os.environ[\"ANTHROPIC_API_KEY\"] = getpass.getpass()" + "\n", + "def get_session_history(session_id: str) -> BaseChatMessageHistory:\n", + " if session_id not in store:\n", + " store[session_id] = ChatMessageHistory()\n", + " return store[session_id]\n", + "\n", + "\n", + "with_message_history = RunnableWithMessageHistory(\n", + " runnable,\n", + " get_session_history,\n", + " input_messages_key=\"input\",\n", + " history_messages_key=\"history\",\n", + ")" ] }, { "cell_type": "markdown", - "id": "6a0ec9e0-7b1c-4c6f-b570-e61d520b47c6", + "id": "01acb505-3fd3-4ab4-9f04-5ea07e81542e", "metadata": {}, "source": [ - "Start a local Redis Stack server if we don't have an existing Redis deployment to connect to:\n", - "```bash\n", - "docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n", - "```" + "Note that we've specified `input_messages_key` (the key to be treated as the latest input message) and `history_messages_key` (the key to add historical messages to).\n", + "\n", + "When invoking this new runnable, we specify the corresponding chat history via a configuration parameter:" ] }, { "cell_type": "code", - "execution_count": 1, - "id": "cd6a250e-17fe-4368-a39d-1fe6b2cbde68", + "execution_count": 3, + "id": "01384412-f08e-4634-9edb-3f46f475b582", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='Cosine is a trigonometric function that calculates the ratio of the adjacent side to the hypotenuse of a right triangle.')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "REDIS_URL = \"redis://localhost:6379/0\"" + "with_message_history.invoke(\n", + " {\"ability\": \"math\", \"input\": \"What does cosine mean?\"},\n", + " config={\"configurable\": {\"session_id\": \"abc123\"}},\n", + ")" ] }, { - "cell_type": "markdown", - "id": "36f43b87-655c-4f64-aa7b-bd8c1955d8e5", + "cell_type": "code", + "execution_count": 4, + "id": "954688a2-9a3f-47ee-a9e8-fa0c83e69477", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='Cosine is a mathematical function used to calculate the length of a side in a right triangle.')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "### [LangSmith](/docs/langsmith)\n", - "\n", - "LangSmith is especially useful for something like message history injection, where it can be hard to otherwise understand what the inputs are to various parts of the chain.\n", - "\n", - "Note that LangSmith is not needed, but it is helpful.\n", - "If you do want to use LangSmith, after you sign up at the link above, make sure to uncoment the below and set your environment variables to start logging traces:" + "# Remembers\n", + "with_message_history.invoke(\n", + " {\"ability\": \"math\", \"input\": \"What?\"},\n", + " config={\"configurable\": {\"session_id\": \"abc123\"}},\n", + ")" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "2afc1556-8da1-4499-ba11-983b66c58b18", + "execution_count": 5, + "id": "39350d7c-2641-4744-bc2a-fd6a57c4ea90", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='I can help with math problems. What do you need assistance with?')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", - "# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()" + "# New session_id --> does not remember.\n", + "with_message_history.invoke(\n", + " {\"ability\": \"math\", \"input\": \"What?\"},\n", + " config={\"configurable\": {\"session_id\": \"def234\"}},\n", + ")" ] }, { "cell_type": "markdown", - "id": "1a5a632e-ba9e-4488-b586-640ad5494f62", + "id": "d29497be-3366-408d-bbb9-d4a8bf4ef37c", "metadata": {}, "source": [ - "## Example: Dict input, message output\n", - "\n", - "Let's create a simple chain that takes a dict as input and returns a BaseMessage.\n", - "\n", - "In this case the `\"question\"` key in the input represents our input message, and the `\"history\"` key is where our historical messages will be injected." + "The configuration parameters by which we track message histories can be customized by passing in a list of ``ConfigurableFieldSpec`` objects to the ``history_factory_config`` parameter. Below, we use two parameters: a `user_id` and `conversation_id`." ] }, { "cell_type": "code", - "execution_count": 2, - "id": "2a150d6f-8878-4950-8634-a608c5faad56", + "execution_count": 6, + "id": "1c89daee-deff-4fdf-86a3-178f7d8ef536", "metadata": {}, "outputs": [], "source": [ - "from typing import Optional\n", + "from langchain_core.runnables import ConfigurableFieldSpec\n", "\n", - "from langchain_community.chat_message_histories import RedisChatMessageHistory\n", - "from langchain_community.chat_models import ChatAnthropic\n", - "from langchain_core.chat_history import BaseChatMessageHistory\n", - "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n", - "from langchain_core.runnables.history import RunnableWithMessageHistory" + "store = {}\n", + "\n", + "\n", + "def get_session_history(user_id: str, conversation_id: str) -> BaseChatMessageHistory:\n", + " if (user_id, conversation_id) not in store:\n", + " store[(user_id, conversation_id)] = ChatMessageHistory()\n", + " return store[(user_id, conversation_id)]\n", + "\n", + "\n", + "with_message_history = RunnableWithMessageHistory(\n", + " runnable,\n", + " get_session_history,\n", + " input_messages_key=\"input\",\n", + " history_messages_key=\"history\",\n", + " history_factory_config=[\n", + " ConfigurableFieldSpec(\n", + " id=\"user_id\",\n", + " annotation=str,\n", + " name=\"User ID\",\n", + " description=\"Unique identifier for the user.\",\n", + " default=\"\",\n", + " is_shared=True,\n", + " ),\n", + " ConfigurableFieldSpec(\n", + " id=\"conversation_id\",\n", + " annotation=str,\n", + " name=\"Conversation ID\",\n", + " description=\"Unique identifier for the conversation.\",\n", + " default=\"\",\n", + " is_shared=True,\n", + " ),\n", + " ],\n", + ")" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "3185edba-4eb6-4b32-80c6-577c0d19af97", + "execution_count": null, + "id": "65c5622e-09b8-4f2f-8c8a-2dab0fd040fa", "metadata": {}, "outputs": [], "source": [ - "prompt = ChatPromptTemplate.from_messages(\n", - " [\n", - " (\"system\", \"You're an assistant who's good at {ability}\"),\n", - " MessagesPlaceholder(variable_name=\"history\"),\n", - " (\"human\", \"{question}\"),\n", - " ]\n", - ")\n", - "\n", - "chain = prompt | ChatAnthropic(model=\"claude-2\")" + "with_message_history.invoke(\n", + " {\"ability\": \"math\", \"input\": \"Hello\"},\n", + " config={\"configurable\": {\"user_id\": \"123\", \"conversation_id\": \"1\"}},\n", + ")" ] }, { "cell_type": "markdown", - "id": "f9d81796-ce61-484c-89e2-6c567d5e54ef", + "id": "18f1a459-3f88-4ee6-8542-76a907070dd6", "metadata": {}, "source": [ - "### Adding message history\n", + "### Examples with runnables of different signatures\n", "\n", - "To add message history to our original chain we wrap it in the `RunnableWithMessageHistory` class.\n", - "\n", - "Crucially, we also need to define a method that takes a session_id string and based on it returns a `BaseChatMessageHistory`. Given the same input, this method should return an equivalent output.\n", - "\n", - "In this case we'll also want to specify `input_messages_key` (the key to be treated as the latest input message) and `history_messages_key` (the key to add historical messages to)." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "ca7c64d8-e138-4ef8-9734-f82076c47d80", - "metadata": {}, - "outputs": [], - "source": [ - "chain_with_history = RunnableWithMessageHistory(\n", - " chain,\n", - " lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n", - " input_messages_key=\"question\",\n", - " history_messages_key=\"history\",\n", - ")" + "The above runnable takes a dict as input and returns a BaseMessage. Below we show some alternatives." ] }, { "cell_type": "markdown", - "id": "37eefdec-9901-4650-b64c-d3c097ed5f4d", + "id": "48eae1bf-b59d-4a61-8e62-b6dbf667e866", "metadata": {}, "source": [ - "## Invoking with config\n", - "\n", - "Whenever we call our chain with message history, we need to include a config that contains the `session_id`\n", - "```python\n", - "config={\"configurable\": {\"session_id\": \"\"}}\n", - "```\n", - "\n", - "Given the same configuration, our chain should be pulling from the same chat message history." + "#### Messages input, dict output" ] }, { "cell_type": "code", "execution_count": 7, - "id": "a85bcc22-ca4c-4ad5-9440-f94be7318f3e", + "id": "17733d4f-3a32-4055-9d44-5d58b9446a26", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "AIMessage(content=' Cosine is one of the basic trigonometric functions in mathematics. It is defined as the ratio of the adjacent side to the hypotenuse in a right triangle.\\n\\nSome key properties and facts about cosine:\\n\\n- It is denoted by cos(θ), where θ is the angle in a right triangle. \\n\\n- The cosine of an acute angle is always positive. For angles greater than 90 degrees, cosine can be negative.\\n\\n- Cosine is one of the three main trig functions along with sine and tangent.\\n\\n- The cosine of 0 degrees is 1. As the angle increases towards 90 degrees, the cosine value decreases towards 0.\\n\\n- The range of values for cosine is -1 to 1.\\n\\n- The cosine function maps angles in a circle to the x-coordinate on the unit circle.\\n\\n- Cosine is used to find adjacent side lengths in right triangles, and has many other applications in mathematics, physics, engineering and more.\\n\\n- Key cosine identities include: cos(A+B) = cosAcosB − sinAsinB and cos(2A) = cos^2(A) − sin^2(A)\\n\\nSo in summary, cosine is a fundamental trig')" + "{'output_message': AIMessage(content=\"Simone de Beauvoir believed in the existence of free will. She argued that individuals have the ability to make choices and determine their own actions, even in the face of social and cultural constraints. She rejected the idea that individuals are purely products of their environment or predetermined by biology or destiny. Instead, she emphasized the importance of personal responsibility and the need for individuals to actively engage in creating their own lives and defining their own existence. De Beauvoir believed that freedom and agency come from recognizing one's own freedom and actively exercising it in the pursuit of personal and collective liberation.\")}" ] }, "execution_count": 7, @@ -218,22 +287,40 @@ } ], "source": [ - "chain_with_history.invoke(\n", - " {\"ability\": \"math\", \"question\": \"What does cosine mean?\"},\n", - " config={\"configurable\": {\"session_id\": \"foobar\"}},\n", + "from langchain_core.messages import HumanMessage\n", + "from langchain_core.runnables import RunnableParallel\n", + "\n", + "chain = RunnableParallel({\"output_message\": ChatOpenAI()})\n", + "\n", + "\n", + "def get_session_history(session_id: str) -> BaseChatMessageHistory:\n", + " if session_id not in store:\n", + " store[session_id] = ChatMessageHistory()\n", + " return store[session_id]\n", + "\n", + "\n", + "with_message_history = RunnableWithMessageHistory(\n", + " chain,\n", + " get_session_history,\n", + " output_messages_key=\"output_message\",\n", + ")\n", + "\n", + "with_message_history.invoke(\n", + " [HumanMessage(content=\"What did Simone de Beauvoir believe about free will\")],\n", + " config={\"configurable\": {\"session_id\": \"baz\"}},\n", ")" ] }, { "cell_type": "code", "execution_count": 8, - "id": "ab29abd3-751f-41ce-a1b0-53f6b565e79d", + "id": "efb57ef5-91f9-426b-84b9-b77f071a9dd7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "AIMessage(content=' The inverse of the cosine function is called the arccosine or inverse cosine, often denoted as cos-1(x) or arccos(x).\\n\\nThe key properties and facts about arccosine:\\n\\n- It is defined as the angle θ between 0 and π radians whose cosine is x. So arccos(x) = θ such that cos(θ) = x.\\n\\n- The range of arccosine is 0 to π radians (0 to 180 degrees).\\n\\n- The domain of arccosine is -1 to 1. \\n\\n- arccos(cos(θ)) = θ for values of θ from 0 to π radians.\\n\\n- arccos(x) is the angle in a right triangle whose adjacent side is x and hypotenuse is 1.\\n\\n- arccos(0) = 90 degrees. As x increases from 0 to 1, arccos(x) decreases from 90 to 0 degrees.\\n\\n- arccos(1) = 0 degrees. arccos(-1) = 180 degrees.\\n\\n- The graph of y = arccos(x) is part of the unit circle, restricted to x')" + "{'output_message': AIMessage(content='Simone de Beauvoir\\'s views on free will were closely aligned with those of her contemporary and partner Jean-Paul Sartre. Both de Beauvoir and Sartre were existentialist philosophers who emphasized the importance of individual freedom and the rejection of determinism. They believed that human beings have the capacity to transcend their circumstances and create their own meaning and values.\\n\\nSartre, in his famous work \"Being and Nothingness,\" argued that human beings are condemned to be free, meaning that we are burdened with the responsibility of making choices and defining ourselves in a world that lacks inherent meaning. Like de Beauvoir, Sartre believed that individuals have the ability to exercise their freedom and make choices in the face of external and internal constraints.\\n\\nWhile there may be some nuanced differences in their philosophical writings, overall, de Beauvoir and Sartre shared a similar belief in the existence of free will and the importance of individual agency in shaping one\\'s own life.')}" ] }, "execution_count": 8, @@ -242,149 +329,250 @@ } ], "source": [ - "chain_with_history.invoke(\n", - " {\"ability\": \"math\", \"question\": \"What's its inverse\"},\n", - " config={\"configurable\": {\"session_id\": \"foobar\"}},\n", + "with_message_history.invoke(\n", + " [HumanMessage(content=\"How did this compare to Sartre\")],\n", + " config={\"configurable\": {\"session_id\": \"baz\"}},\n", ")" ] }, { "cell_type": "markdown", - "id": "da3d1feb-b4bb-4624-961c-7db2e1180df7", + "id": "a39eac5f-a9d8-4729-be06-5e7faf0c424d", "metadata": {}, "source": [ - ":::tip\n", + "#### Messages input, messages output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e45bcd95-e31f-4a9a-967a-78f96e8da881", + "metadata": {}, + "outputs": [], + "source": [ + "RunnableWithMessageHistory(\n", + " ChatOpenAI(),\n", + " get_session_history,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "04daa921-a2d1-40f9-8cd1-ae4e9a4163a7", + "metadata": {}, + "source": [ + "#### Dict with single key for all messages input, messages output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27157f15-9fb0-4167-9870-f4d7f234b3cb", + "metadata": {}, + "outputs": [], + "source": [ + "from operator import itemgetter\n", "\n", - "[Langsmith trace](https://smith.langchain.com/public/863a003b-7ca8-4b24-be9e-d63ec13c106e/r)\n", + "RunnableWithMessageHistory(\n", + " itemgetter(\"input_messages\") | ChatOpenAI(),\n", + " get_session_history,\n", + " input_messages_key=\"input_messages\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "418ca7af-9ed9-478c-8bca-cba0de2ca61e", + "metadata": {}, + "source": [ + "## Persistent storage" + ] + }, + { + "cell_type": "markdown", + "id": "76799a13-d99a-4c4f-91f2-db699e40b8df", + "metadata": {}, + "source": [ + "In many cases it is preferable to persist conversation histories. `RunnableWithMessageHistory` is agnostic as to how the `get_session_history` callable retrieves its chat message histories. See [here](https://github.com/langchain-ai/langserve/blob/main/examples/chat_with_persistence_and_user/server.py) for an example using a local filesystem. Below we demonstrate how one could use Redis. Check out the [memory integrations](https://integrations.langchain.com/memory) page for implementations of chat message histories using other providers." + ] + }, + { + "cell_type": "markdown", + "id": "6bca45e5-35d9-4603-9ca9-6ac0ce0e35cd", + "metadata": {}, + "source": [ + "### Setup\n", "\n", - ":::" + "We'll need to install Redis if it's not installed already:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "477d04b3-c2b6-4ba5-962f-492c0d625cd5", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet redis" ] }, { "cell_type": "markdown", - "id": "61d5115e-64a1-4ad5-b676-8afd4ef6093e", + "id": "6a0ec9e0-7b1c-4c6f-b570-e61d520b47c6", "metadata": {}, "source": [ - "Looking at the Langsmith trace for the second call, we can see that when constructing the prompt, a \"history\" variable has been injected which is a list of two messages (our first input and first output)." + "Start a local Redis Stack server if we don't have an existing Redis deployment to connect to:\n", + "```bash\n", + "docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cd6a250e-17fe-4368-a39d-1fe6b2cbde68", + "metadata": {}, + "outputs": [], + "source": [ + "REDIS_URL = \"redis://localhost:6379/0\"" ] }, { "cell_type": "markdown", - "id": "028cf151-6cd5-4533-b3cf-c8d735554647", + "id": "36f43b87-655c-4f64-aa7b-bd8c1955d8e5", "metadata": {}, "source": [ - "## Example: messages input, dict output" + "### [LangSmith](/docs/langsmith)\n", + "\n", + "LangSmith is especially useful for something like message history injection, where it can be hard to otherwise understand what the inputs are to various parts of the chain.\n", + "\n", + "Note that LangSmith is not needed, but it is helpful.\n", + "If you do want to use LangSmith, after you sign up at the link above, make sure to uncoment the below and set your environment variables to start logging traces:" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "0bb446b5-6251-45fe-a92a-4c6171473c53", + "execution_count": 2, + "id": "2afc1556-8da1-4499-ba11-983b66c58b18", + "metadata": {}, + "outputs": [], + "source": [ + "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", + "# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()" + ] + }, + { + "cell_type": "markdown", + "id": "f9d81796-ce61-484c-89e2-6c567d5e54ef", + "metadata": {}, + "source": [ + "Updating the message history implementation just requires us to define a new callable, this time returning an instance of `RedisChatMessageHistory`:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ca7c64d8-e138-4ef8-9734-f82076c47d80", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.chat_message_histories import RedisChatMessageHistory\n", + "\n", + "\n", + "def get_message_history(session_id: str) -> RedisChatMessageHistory:\n", + " return RedisChatMessageHistory(session_id, url=REDIS_URL)\n", + "\n", + "\n", + "with_message_history = RunnableWithMessageHistory(\n", + " runnable,\n", + " get_message_history,\n", + " input_messages_key=\"input\",\n", + " history_messages_key=\"history\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "37eefdec-9901-4650-b64c-d3c097ed5f4d", + "metadata": {}, + "source": [ + "We can invoke as before:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a85bcc22-ca4c-4ad5-9440-f94be7318f3e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'output_message': AIMessage(content=' Here is a summary of Simone de Beauvoir\\'s views on free will:\\n\\n- De Beauvoir was an existentialist philosopher and believed strongly in the concept of free will. She rejected the idea that human nature or instincts determine behavior.\\n\\n- Instead, de Beauvoir argued that human beings define their own essence or nature through their actions and choices. As she famously wrote, \"One is not born, but rather becomes, a woman.\"\\n\\n- De Beauvoir believed that while individuals are situated in certain cultural contexts and social conditions, they still have agency and the ability to transcend these situations. Freedom comes from choosing one\\'s attitude toward these constraints.\\n\\n- She emphasized the radical freedom and responsibility of the individual. We are \"condemned to be free\" because we cannot escape making choices and taking responsibility for our choices. \\n\\n- De Beauvoir felt that many people evade their freedom and responsibility by adopting rigid mindsets, ideologies, or conforming uncritically to social roles.\\n\\n- She advocated for the recognition of ambiguity in the human condition and warned against the quest for absolute rules that deny freedom and responsibility. Authentic living involves embracing ambiguity.\\n\\nIn summary, de Beauvoir promoted an existential ethics')}" + "AIMessage(content='Cosine is a trigonometric function that represents the ratio of the adjacent side to the hypotenuse in a right triangle.')" ] }, - "execution_count": 14, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from langchain_core.messages import HumanMessage\n", - "from langchain_core.runnables import RunnableParallel\n", - "\n", - "chain = RunnableParallel({\"output_message\": ChatAnthropic(model=\"claude-2\")})\n", - "chain_with_history = RunnableWithMessageHistory(\n", - " chain,\n", - " lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n", - " output_messages_key=\"output_message\",\n", - ")\n", - "\n", - "chain_with_history.invoke(\n", - " [HumanMessage(content=\"What did Simone de Beauvoir believe about free will\")],\n", - " config={\"configurable\": {\"session_id\": \"baz\"}},\n", + "with_message_history.invoke(\n", + " {\"ability\": \"math\", \"input\": \"What does cosine mean?\"},\n", + " config={\"configurable\": {\"session_id\": \"foobar\"}},\n", ")" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "601ce3ff-aea8-424d-8e54-fd614256af4f", + "execution_count": 12, + "id": "ab29abd3-751f-41ce-a1b0-53f6b565e79d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'output_message': AIMessage(content=\" There are many similarities between Simone de Beauvoir's views on free will and those of Jean-Paul Sartre, though some key differences emerge as well:\\n\\nSimilarities with Sartre:\\n\\n- Both were existentialist thinkers who rejected determinism and emphasized human freedom and responsibility.\\n\\n- They agreed that existence precedes essence - there is no predefined human nature that determines who we are.\\n\\n- Individuals must define themselves through their choices and actions. This leads to anxiety but also freedom.\\n\\n- The human condition is characterized by ambiguity and uncertainty, rather than fixed meanings/values.\\n\\n- Both felt that most people evade their freedom through self-deception, conformity, or adopting collective identities/values uncritically.\\n\\nDifferences from Sartre: \\n\\n- Sartre placed more emphasis on the burden and anguish of radical freedom. De Beauvoir focused more on its positive potential.\\n\\n- De Beauvoir critiqued Sartre's premise that human relations are necessarily conflictual. She saw more potential for mutual recognition.\\n\\n- Sartre saw the Other's gaze as a threat to freedom. De Beauvoir put more stress on how the Other's gaze can confirm\")}" + "AIMessage(content='The inverse of cosine is the arccosine function, denoted as acos or cos^-1, which gives the angle corresponding to a given cosine value.')" ] }, - "execution_count": 16, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "chain_with_history.invoke(\n", - " [HumanMessage(content=\"How did this compare to Sartre\")],\n", - " config={\"configurable\": {\"session_id\": \"baz\"}},\n", + "with_message_history.invoke(\n", + " {\"ability\": \"math\", \"input\": \"What's its inverse\"},\n", + " config={\"configurable\": {\"session_id\": \"foobar\"}},\n", ")" ] }, { "cell_type": "markdown", - "id": "b898d1b1-11e6-4d30-a8dd-cc5e45533611", + "id": "da3d1feb-b4bb-4624-961c-7db2e1180df7", "metadata": {}, "source": [ ":::tip\n", "\n", - "[LangSmith trace](https://smith.langchain.com/public/f6c3e1d1-a49d-4955-a9fa-c6519df74fa7/r)\n", + "[Langsmith trace](https://smith.langchain.com/public/bd73e122-6ec1-48b2-82df-e6483dc9cb63/r)\n", "\n", ":::" ] }, { "cell_type": "markdown", - "id": "1724292c-01c6-44bb-83e8-9cdb6bf01483", - "metadata": {}, - "source": [ - "## More examples\n", - "\n", - "We could also do any of the below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fd89240b-5a25-48f8-9568-5c1127f9ffad", + "id": "61d5115e-64a1-4ad5-b676-8afd4ef6093e", "metadata": {}, - "outputs": [], "source": [ - "from operator import itemgetter\n", - "\n", - "# messages in, messages out\n", - "RunnableWithMessageHistory(\n", - " ChatAnthropic(model=\"claude-2\"),\n", - " lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n", - ")\n", - "\n", - "# dict with single key for all messages in, messages out\n", - "RunnableWithMessageHistory(\n", - " itemgetter(\"input_messages\") | ChatAnthropic(model=\"claude-2\"),\n", - " lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n", - " input_messages_key=\"input_messages\",\n", - ")" + "Looking at the Langsmith trace for the second call, we can see that when constructing the prompt, a \"history\" variable has been injected which is a list of two messages (our first input and first output)." ] } ], "metadata": { "kernelspec": { - "display_name": "poetry-venv", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "poetry-venv" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -396,7 +584,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/docs/docs/get_started/quickstart.mdx b/docs/docs/get_started/quickstart.mdx index f5d43e02a225aa..d8a9f1e732e140 100644 --- a/docs/docs/get_started/quickstart.mdx +++ b/docs/docs/get_started/quickstart.mdx @@ -193,7 +193,7 @@ After that, we can import and use WebBaseLoader. ```python from langchain_community.document_loaders import WebBaseLoader -loader = WebBaseLoader("https://docs.smith.langchain.com/overview") +loader = WebBaseLoader("https://docs.smith.langchain.com") docs = loader.load() ``` diff --git a/docs/docs/integrations/chat/ai21.ipynb b/docs/docs/integrations/chat/ai21.ipynb new file mode 100644 index 00000000000000..45f4a969bf508b --- /dev/null +++ b/docs/docs/integrations/chat/ai21.ipynb @@ -0,0 +1,141 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "4cebeec0", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: AI21 Labs\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# ChatAI21\n", + "\n", + "This notebook covers how to get started with AI21 chat models.\n", + "\n", + "## Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c3bef91", + "metadata": { + "ExecuteTime": { + "end_time": "2024-02-15T06:50:44.929635Z", + "start_time": "2024-02-15T06:50:41.209704Z" + } + }, + "outputs": [], + "source": [ + "!pip install -qU langchain-ai21" + ] + }, + { + "cell_type": "markdown", + "id": "2b4f3e15", + "metadata": {}, + "source": [ + "## Environment Setup\n", + "\n", + "We'll need to get a [AI21 API key](https://docs.ai21.com/) and set the `AI21_API_KEY` environment variable:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62e0dbc3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "os.environ[\"AI21_API_KEY\"] = getpass()" + ] + }, + { + "cell_type": "markdown", + "id": "4828829d3da430ce", + "metadata": { + "collapsed": false + }, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "39353473fce5dd2e", + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='Bonjour, comment vas-tu?')" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_ai21 import ChatAI21\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "\n", + "chat = ChatAI21(model=\"j2-ultra\")\n", + "\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", \"You are a helpful assistant that translates English to French.\"),\n", + " (\"human\", \"Translate this sentence from English to French. {english_text}.\"),\n", + " ]\n", + ")\n", + "\n", + "chain = prompt | chat\n", + "chain.invoke({\"english_text\": \"Hello, how are you?\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c159a79f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/document_loaders/cassandra.ipynb b/docs/docs/integrations/document_loaders/cassandra.ipynb index 49f261a18a84b4..b69b1135a2bbe9 100644 --- a/docs/docs/integrations/document_loaders/cassandra.ipynb +++ b/docs/docs/integrations/document_loaders/cassandra.ipynb @@ -72,57 +72,72 @@ }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "### Init from a cassandra driver Session\n", "\n", "You need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [], "source": [ "from cassandra.cluster import Cluster\n", "\n", "cluster = Cluster()\n", "session = cluster.connect()" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "You need to provide the name of an existing keyspace of the Cassandra instance:" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [], "source": [ "CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "Creating the document loader:" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", @@ -144,18 +159,21 @@ }, { "cell_type": "code", - "outputs": [], - "source": [ - "docs = loader.load()" - ], + "execution_count": 17, "metadata": { - "collapsed": false, "ExecuteTime": { "end_time": "2024-01-19T15:47:26.399472Z", "start_time": "2024-01-19T15:47:26.389145Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false } }, - "execution_count": 17 + "outputs": [], + "source": [ + "docs = loader.load()" + ] }, { "cell_type": "code", @@ -169,7 +187,9 @@ "outputs": [ { "data": { - "text/plain": "Document(page_content='Row(_id=\\'659bdffa16cbc4586b11a423\\', title=\\'Dangerous Men\\', reviewtext=\\'\"Dangerous Men,\" the picture\\\\\\'s production notes inform, took 26 years to reach the big screen. After having seen it, I wonder: What was the rush?\\')', metadata={'table': 'movie_reviews', 'keyspace': 'default_keyspace'})" + "text/plain": [ + "Document(page_content='Row(_id=\\'659bdffa16cbc4586b11a423\\', title=\\'Dangerous Men\\', reviewtext=\\'\"Dangerous Men,\" the picture\\\\\\'s production notes inform, took 26 years to reach the big screen. After having seen it, I wonder: What was the rush?\\')', metadata={'table': 'movie_reviews', 'keyspace': 'default_keyspace'})" + ] }, "execution_count": 19, "metadata": {}, @@ -182,17 +202,27 @@ }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "source": [ "### Init from cassio\n", "\n", "It's also possible to use cassio to configure the session and keyspace." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, "outputs": [], "source": [ "import cassio\n", @@ -204,11 +234,16 @@ ")\n", "\n", "docs = loader.load()" - ], - "metadata": { - "collapsed": false - }, - "execution_count": null + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Attribution statement\n", + "\n", + "> Apache Cassandra, Cassandra and Apache are either registered trademarks or trademarks of the [Apache Software Foundation](http://www.apache.org/) in the United States and/or other countries." + ] } ], "metadata": { @@ -233,7 +268,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.9.17" } }, "nbformat": 4, diff --git a/docs/docs/integrations/llms/ai21.ipynb b/docs/docs/integrations/llms/ai21.ipynb index 2e22f85f11d59a..29e698e7d8fe16 100644 --- a/docs/docs/integrations/llms/ai21.ipynb +++ b/docs/docs/integrations/llms/ai21.ipynb @@ -1,137 +1,114 @@ { "cells": [ { - "cell_type": "markdown", - "id": "9597802c", + "cell_type": "raw", + "id": "602a52a4", "metadata": {}, "source": [ - "# AI21\n", - "\n", - "[AI21 Studio](https://docs.ai21.com/) provides API access to `Jurassic-2` large language models.\n", - "\n", - "This example goes over how to use LangChain to interact with [AI21 models](https://docs.ai21.com/docs/jurassic-2-models)." + "---\n", + "sidebar_label: AI21 Labs\n", + "---" ] }, { - "cell_type": "code", - "execution_count": 1, - "id": "02be122d-04e8-4ec6-84d1-f1d8961d6828", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mWARNING: There was an error checking the latest version of pip.\u001b[0m\u001b[33m\n", - "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" - ] - } - ], - "source": [ - "# install the package:\n", - "%pip install --upgrade --quiet ai21" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "4229227e-6ca2-41ad-a3c3-5f29e3559091", - "metadata": { - "tags": [] - }, - "outputs": [], + "cell_type": "markdown", + "id": "9597802c", + "metadata": {}, "source": [ - "# get AI21_API_KEY. Use https://studio.ai21.com/account/account\n", + "# AI21LLM\n", "\n", - "from getpass import getpass\n", + "This example goes over how to use LangChain to interact with `AI21` models.\n", "\n", - "AI21_API_KEY = getpass()" + "## Installation" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "6fb585dd", - "metadata": { - "tags": [] - }, + "execution_count": null, + "id": "59c710c4", + "metadata": {}, "outputs": [], "source": [ - "from langchain_community.llms import AI21\n", - "from langchain_core.prompts import PromptTemplate" + "!pip install -qU langchain-ai21" ] }, { - "cell_type": "code", - "execution_count": 12, - "id": "035dea0f", + "cell_type": "markdown", + "id": "560a2f9254963fd7", "metadata": { - "tags": [] + "collapsed": false }, - "outputs": [], "source": [ - "template = \"\"\"Question: {question}\n", - "\n", - "Answer: Let's think step by step.\"\"\"\n", + "## Environment Setup\n", "\n", - "prompt = PromptTemplate.from_template(template)" + "We'll need to get a [AI21 API key](https://docs.ai21.com/) and set the `AI21_API_KEY` environment variable:" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "3f3458d9", + "execution_count": 4, + "id": "035dea0f", "metadata": { "tags": [] }, "outputs": [], "source": [ - "llm = AI21(ai21_api_key=AI21_API_KEY)" + "import os\n", + "from getpass import getpass\n", + "\n", + "os.environ[\"AI21_API_KEY\"] = getpass()" ] }, { - "cell_type": "code", - "execution_count": 10, - "id": "a641dbd9", + "cell_type": "markdown", + "id": "1891df96eb076e1a", "metadata": { - "tags": [] + "collapsed": false }, - "outputs": [], "source": [ - "llm_chain = prompt | llm" + "## Usage" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "9f0b1960", + "execution_count": 6, + "id": "98f70927a87e4745", "metadata": { - "tags": [] + "collapsed": false }, "outputs": [ { "data": { "text/plain": [ - "'\\nThe Super Bowl in the year Justin Beiber was born was in the year 1991.\\nThe Super Bowl in 1991 was won by the Washington Redskins.\\nFinal answer: Washington Redskins'" + "'\\nLangChain is a decentralized blockchain network that leverages AI and machine learning to provide language translation services.'" ] }, - "execution_count": 13, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n", + "from langchain_ai21 import AI21LLM\n", + "from langchain_core.prompts import PromptTemplate\n", + "\n", + "template = \"\"\"Question: {question}\n", "\n", - "llm_chain.invoke({\"question\": question})" + "Answer: Let's think step by step.\"\"\"\n", + "\n", + "prompt = PromptTemplate.from_template(template)\n", + "\n", + "model = AI21LLM(model=\"j2-ultra\")\n", + "\n", + "chain = prompt | model\n", + "\n", + "chain.invoke({\"question\": \"What is LangChain?\"})" ] }, { "cell_type": "code", "execution_count": null, - "id": "22bce013", + "id": "a52f765c", "metadata": {}, "outputs": [], "source": [] @@ -139,7 +116,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3.11.1 64-bit", "language": "python", "name": "python3" }, @@ -153,7 +130,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.11.4" + }, + "vscode": { + "interpreter": { + "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1" + } } }, "nbformat": 4, diff --git a/docs/docs/integrations/llms/huggingface_endpoint.ipynb b/docs/docs/integrations/llms/huggingface_endpoint.ipynb new file mode 100644 index 00000000000000..a71a987bac101e --- /dev/null +++ b/docs/docs/integrations/llms/huggingface_endpoint.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Huggingface Endpoints\n", + "\n", + ">The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is a platform with over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n", + "\n", + "The `Hugging Face Hub` also offers various endpoints to build ML applications.\n", + "This example showcases how to connect to the different Endpoints types.\n", + "\n", + "In particular, text generation inference is powered by [Text Generation Inference](https://github.com/huggingface/text-generation-inference): a custom-built Rust, Python and gRPC server for blazing-faset text generation inference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.llms import HuggingFaceEndpoint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation and Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To use, you should have the ``huggingface_hub`` python [package installed](https://huggingface.co/docs/huggingface_hub/installation)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet huggingface_hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token\n", + "\n", + "from getpass import getpass\n", + "\n", + "HUGGINGFACEHUB_API_TOKEN = getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = HUGGINGFACEHUB_API_TOKEN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare Examples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.llms import HuggingFaceEndpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import LLMChain\n", + "from langchain.prompts import PromptTemplate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"Who won the FIFA World Cup in the year 1994? \"\n", + "\n", + "template = \"\"\"Question: {question}\n", + "\n", + "Answer: Let's think step by step.\"\"\"\n", + "\n", + "prompt = PromptTemplate.from_template(template)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Examples\n", + "\n", + "Here is an example of how you can access `HuggingFaceEndpoint` integration of the free [Serverless Endpoints](https://huggingface.co/inference-endpoints/serverless) API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "repo_id = \"mistralai/Mistral-7B-Instruct-v0.2\"\n", + "\n", + "llm = HuggingFaceEndpoint(\n", + " repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN\n", + ")\n", + "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", + "print(llm_chain.run(question))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dedicated Endpoint\n", + "\n", + "\n", + "The free serverless API lets you implement solutions and iterate in no time, but it may be rate limited for heavy use cases, since the loads are shared with other requests.\n", + "\n", + "For enterprise workloads, the best is to use [Inference Endpoints - Dedicated](https://huggingface.co/inference-endpoints/dedicated).\n", + "This gives access to a fully managed infrastructure that offer more flexibility and speed. These resoucres come with continuous support and uptime guarantees, as well as options like AutoScaling\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set the url to your Inference Endpoint below\n", + "your_endpoint_url = \"https://fayjubiy2xqn36z0.us-east-1.aws.endpoints.huggingface.cloud\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm = HuggingFaceEndpoint(\n", + " endpoint_url=f\"{your_endpoint_url}\",\n", + " max_new_tokens=512,\n", + " top_k=10,\n", + " top_p=0.95,\n", + " typical_p=0.95,\n", + " temperature=0.01,\n", + " repetition_penalty=1.03,\n", + ")\n", + "llm(\"What did foo say about bar?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "from langchain_community.llms import HuggingFaceEndpoint\n", + "\n", + "llm = HuggingFaceEndpoint(\n", + " endpoint_url=f\"{your_endpoint_url}\",\n", + " max_new_tokens=512,\n", + " top_k=10,\n", + " top_p=0.95,\n", + " typical_p=0.95,\n", + " temperature=0.01,\n", + " repetition_penalty=1.03,\n", + " streaming=True,\n", + ")\n", + "llm(\"What did foo say about bar?\", callbacks=[StreamingStdOutCallbackHandler()])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "agents", + "language": "python", + "name": "agents" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/llms/huggingface_hub.ipynb b/docs/docs/integrations/llms/huggingface_hub.ipynb deleted file mode 100644 index 67dbe3c41f3855..00000000000000 --- a/docs/docs/integrations/llms/huggingface_hub.ipynb +++ /dev/null @@ -1,466 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "959300d4", - "metadata": {}, - "source": [ - "# Hugging Face Hub\n", - "\n", - ">The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is a platform with over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n", - "\n", - "This example showcases how to connect to the `Hugging Face Hub` and use different models." - ] - }, - { - "cell_type": "markdown", - "id": "1ddafc6d-7d7c-48fa-838f-0e7f50895ce3", - "metadata": {}, - "source": [ - "## Installation and Setup" - ] - }, - { - "cell_type": "markdown", - "id": "4c1b8450-5eaf-4d34-8341-2d785448a1ff", - "metadata": { - "tags": [] - }, - "source": [ - "To use, you should have the ``huggingface_hub`` python [package installed](https://huggingface.co/docs/huggingface_hub/installation)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d772b637-de00-4663-bd77-9bc96d798db2", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "%pip install --upgrade --quiet huggingface_hub" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "d597a792-354c-4ca5-b483-5965eec5d63d", - "metadata": {}, - "outputs": [ - { - "name": "stdin", - "output_type": "stream", - "text": [ - " ········\n" - ] - } - ], - "source": [ - "# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token\n", - "\n", - "from getpass import getpass\n", - "\n", - "HUGGINGFACEHUB_API_TOKEN = getpass()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b8c5b88c-e4b8-4d0d-9a35-6e8f106452c2", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = HUGGINGFACEHUB_API_TOKEN" - ] - }, - { - "cell_type": "markdown", - "id": "84dd44c1-c428-41f3-a911-520281386c94", - "metadata": {}, - "source": [ - "## Prepare Examples" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3fe7d1d1-241d-426a-acff-e208f1088871", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_community.llms import HuggingFaceHub" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "6620f39b-3d32-4840-8931-ff7d2c3e47e8", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.chains import LLMChain\n", - "from langchain.prompts import PromptTemplate" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "44adc1a0-9c0a-4f1e-af5a-fe04222e78d7", - "metadata": {}, - "outputs": [], - "source": [ - "question = \"Who won the FIFA World Cup in the year 1994? \"\n", - "\n", - "template = \"\"\"Question: {question}\n", - "\n", - "Answer: Let's think step by step.\"\"\"\n", - "\n", - "prompt = PromptTemplate.from_template(template)" - ] - }, - { - "cell_type": "markdown", - "id": "ddaa06cf-95ec-48ce-b0ab-d892a7909693", - "metadata": {}, - "source": [ - "## Examples\n", - "\n", - "Below are some examples of models you can access through the `Hugging Face Hub` integration." - ] - }, - { - "cell_type": "markdown", - "id": "4c16fded-70d1-42af-8bfa-6ddda9f0bc63", - "metadata": {}, - "source": [ - "### `Flan`, by `Google`" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "39c7eeac-01c4-486b-9480-e828a9e73e78", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "repo_id = \"google/flan-t5-xxl\" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "3acf0069", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The FIFA World Cup was held in the year 1994. West Germany won the FIFA World Cup in 1994\n" - ] - } - ], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "1a5c97af-89bc-4e59-95c1-223742a9160b", - "metadata": {}, - "source": [ - "### `Dolly`, by `Databricks`\n", - "\n", - "See [Databricks](https://huggingface.co/databricks) organization page for a list of available models." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "521fcd2b-8e38-4920-b407-5c7d330411c9", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"databricks/dolly-v2-3b\"" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "9907ec3a-fe0c-4543-81c4-d42f9453f16c", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " First of all, the world cup was won by the Germany. Then the Argentina won the world cup in 2022. So, the Argentina won the world cup in 1994.\n", - "\n", - "\n", - "Question: Who\n" - ] - } - ], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "03f6ae52-b5f9-4de6-832c-551cb3fa11ae", - "metadata": {}, - "source": [ - "### `Camel`, by `Writer`\n", - "\n", - "See [Writer's](https://huggingface.co/Writer) organization page for a list of available models." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "257a091d-750b-4910-ac08-fe1c7b3fd98b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "repo_id = \"Writer/camel-5b-hf\" # See https://huggingface.co/Writer for other options" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b06f6838-a11a-4d6a-88e3-91fa1747a2b3", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "2bf838eb-1083-402f-b099-b07c452418c8", - "metadata": {}, - "source": [ - "### `XGen`, by `Salesforce`\n", - "\n", - "See [more information](https://github.com/salesforce/xgen)." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "18c78880-65d7-41d0-9722-18090efb60e9", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"Salesforce/xgen-7b-8k-base\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b1150b4-ec30-4674-849e-6a41b085aa2b", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "0aca9f9e-f333-449c-97b2-10d1dbf17e75", - "metadata": {}, - "source": [ - "### `Falcon`, by `Technology Innovation Institute (TII)`\n", - "\n", - "See [more information](https://huggingface.co/tiiuae/falcon-40b)." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "496b35ac-5ee2-4b68-a6ce-232608f56c03", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"tiiuae/falcon-40b\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ff2541ad-e394-4179-93c2-7ae9c4ca2a25", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "7e15849b-5561-4bb9-86ec-6412ca10196a", - "metadata": {}, - "source": [ - "### `InternLM-Chat`, by `Shanghai AI Laboratory`\n", - "\n", - "See [more information](https://huggingface.co/internlm/internlm-7b)." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "3b533461-59f8-406e-907b-000841fa60a7", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"internlm/internlm-chat-7b\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c71210b9-5895-41a2-889a-f430d22fa1aa", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"max_length\": 128, \"temperature\": 0.8}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "4f2e5132-1713-42d7-919a-8c313744ce95", - "metadata": {}, - "source": [ - "### `Qwen`, by `Alibaba Cloud`\n", - "\n", - ">`Tongyi Qianwen-7B` (`Qwen-7B`) is a model with a scale of 7 billion parameters in the `Tongyi Qianwen` large model series developed by `Alibaba Cloud`. `Qwen-7B` is a large language model based on Transformer, which is trained on ultra-large-scale pre-training data.\n", - "\n", - "See [more information on HuggingFace](https://huggingface.co/Qwen/Qwen-7B) of on [GitHub](https://github.com/QwenLM/Qwen-7B).\n", - "\n", - "See here a [big example for LangChain integration and Qwen](https://github.com/QwenLM/Qwen-7B/blob/main/examples/langchain_tooluse.ipynb)." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "f598b1ca-77c7-40f1-a83f-c21ea9910c88", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"Qwen/Qwen-7B\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2c97f4e2-d401-44fb-9da7-b60b2e2cc663", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"max_length\": 128, \"temperature\": 0.5}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "e3871376-ed0e-49a8-8d9b-7e60dbbd2b35", - "metadata": {}, - "source": [ - "### `Yi` series models, by `01.ai`\n", - "\n", - ">The `Yi` series models are large language models trained from scratch by developers at [01.ai](https://01.ai/). The first public release contains two bilingual(English/Chinese) base models with the parameter sizes of 6B(`Yi-6B`) and 34B(`Yi-34B`). Both of them are trained with 4K sequence length and can be extended to 32K during inference time. The `Yi-6B-200K` and `Yi-34B-200K` are base model with 200K context length.\n", - "\n", - "Here we test the [Yi-34B](https://huggingface.co/01-ai/Yi-34B) model." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "1c9d3125-3f50-48b8-93b6-b50847207afa", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"01-ai/Yi-34B\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8b661069-8229-4850-9f13-c4ca28c0c96b", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"max_length\": 128, \"temperature\": 0.5}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd6f3edc-9f97-47a6-ab2c-116756babbe6", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/docs/integrations/llms/huggingface_textgen_inference.ipynb b/docs/docs/integrations/llms/huggingface_textgen_inference.ipynb deleted file mode 100644 index e9b5e31c38600f..00000000000000 --- a/docs/docs/integrations/llms/huggingface_textgen_inference.ipynb +++ /dev/null @@ -1,108 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Huggingface TextGen Inference\n", - "\n", - "[Text Generation Inference](https://github.com/huggingface/text-generation-inference) is a Rust, Python and gRPC server for text generation inference. Used in production at [HuggingFace](https://huggingface.co/) to power LLMs api-inference widgets.\n", - "\n", - "This notebooks goes over how to use a self hosted LLM using `Text Generation Inference`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To use, you should have the `text_generation` python package installed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# !pip3 install text_generation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_community.llms import HuggingFaceTextGenInference\n", - "\n", - "llm = HuggingFaceTextGenInference(\n", - " inference_server_url=\"http://localhost:8010/\",\n", - " max_new_tokens=512,\n", - " top_k=10,\n", - " top_p=0.95,\n", - " typical_p=0.95,\n", - " temperature=0.01,\n", - " repetition_penalty=1.03,\n", - ")\n", - "llm(\"What did foo say about bar?\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Streaming" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", - "from langchain_community.llms import HuggingFaceTextGenInference\n", - "\n", - "llm = HuggingFaceTextGenInference(\n", - " inference_server_url=\"http://localhost:8010/\",\n", - " max_new_tokens=512,\n", - " top_k=10,\n", - " top_p=0.95,\n", - " typical_p=0.95,\n", - " temperature=0.01,\n", - " repetition_penalty=1.03,\n", - " streaming=True,\n", - ")\n", - "llm(\"What did foo say about bar?\", callbacks=[StreamingStdOutCallbackHandler()])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - }, - "vscode": { - "interpreter": { - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/docs/integrations/llms/llm_caching.ipynb b/docs/docs/integrations/llms/llm_caching.ipynb index 791ff870b0fda7..f428939a2a8c81 100644 --- a/docs/docs/integrations/llms/llm_caching.ipynb +++ b/docs/docs/integrations/llms/llm_caching.ipynb @@ -1131,6 +1131,16 @@ "print(llm(\"How come we always see one face of the moon?\"))" ] }, + { + "cell_type": "markdown", + "id": "55dc84b3-37cb-4f19-b175-40e18e06f83f", + "metadata": {}, + "source": [ + "#### Attribution statement\n", + "\n", + ">Apache Cassandra, Cassandra and Apache are either registered trademarks or trademarks of the [Apache Software Foundation](http://www.apache.org/) in the United States and/or other countries." + ] + }, { "cell_type": "markdown", "id": "8712f8fc-bb89-4164-beb9-c672778bbd91", @@ -1588,7 +1598,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.1" + "version": "3.9.17" } }, "nbformat": 4, diff --git a/docs/docs/integrations/memory/astradb_chat_message_history.ipynb b/docs/docs/integrations/memory/astradb_chat_message_history.ipynb index abe6b1b2876f6e..068b804f00b45b 100644 --- a/docs/docs/integrations/memory/astradb_chat_message_history.ipynb +++ b/docs/docs/integrations/memory/astradb_chat_message_history.ipynb @@ -32,7 +32,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install --upgrade --quiet \"astrapy>=0.6.2\"" + "%pip install --upgrade --quiet \"astrapy>=0.7.1\"" ] }, { diff --git a/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb b/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb index d802bc785da356..64ead129f51c05 100644 --- a/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb +++ b/docs/docs/integrations/memory/cassandra_chat_message_history.ipynb @@ -145,6 +145,24 @@ "source": [ "message_history.messages" ] + }, + { + "cell_type": "markdown", + "id": "59902d0f-e9ba-4e3d-a7e0-ce202b9d3c43", + "metadata": {}, + "source": [ + "#### Attribution statement\n", + "\n", + "> Apache Cassandra, Cassandra and Apache are either registered trademarks or trademarks of the [Apache Software Foundation](http://www.apache.org/) in the United States and/or other countries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7efaa51c-e9ee-4dce-80a4-eb9280a0dbe5", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -163,7 +181,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.9.17" } }, "nbformat": 4, diff --git a/docs/docs/integrations/providers/apache_doris.mdx b/docs/docs/integrations/providers/apache_doris.mdx new file mode 100644 index 00000000000000..93db9330309867 --- /dev/null +++ b/docs/docs/integrations/providers/apache_doris.mdx @@ -0,0 +1,21 @@ +# Apache Doris + +>[Apache Doris](https://doris.apache.org/) is a modern data warehouse for real-time analytics. +It delivers lightning-fast analytics on real-time data at scale. + +>Usually `Apache Doris` is categorized into OLAP, and it has showed excellent performance in [ClickBench — a Benchmark For Analytical DBMS](https://benchmark.clickhouse.com/). Since it has a super-fast vectorized execution engine, it could also be used as a fast vectordb. + +## Installation and Setup + + +```bash +pip install pymysql +``` + +## Vector Store + +See a [usage example](/docs/integrations/vectorstores/apache_doris). + +```python +from langchain_community.vectorstores import ApacheDoris +``` diff --git a/docs/docs/integrations/providers/astradb.mdx b/docs/docs/integrations/providers/astradb.mdx index 20a94d736b361f..8cc4788cc28fee 100644 --- a/docs/docs/integrations/providers/astradb.mdx +++ b/docs/docs/integrations/providers/astradb.mdx @@ -1,25 +1,21 @@ # Astra DB -This page lists the integrations available with [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) and [Apache Cassandra®](https://cassandra.apache.org/). +> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Apache Cassandra® and made conveniently available +> through an easy-to-use JSON API. ### Setup Install the following Python package: ```bash -pip install "astrapy>=0.5.3" +pip install "astrapy>=0.7.1" ``` -## Astra DB - -> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available -> through an easy-to-use JSON API. - -### Vector Store +## Vector Store ```python -from langchain_community.vectorstores import AstraDB -vector_store = AstraDB( +from langchain_astradb import AstraDBVectorStore +vector_store = AstraDBVectorStore( embedding=my_embedding, collection_name="my_store", api_endpoint="...", @@ -29,11 +25,22 @@ vector_store = AstraDB( Learn more in the [example notebook](/docs/integrations/vectorstores/astradb). -### LLM Cache +## Chat message history + +```python +from langchain_community.chat_message_histories import AstraDBChatMessageHistory +message_history = AstraDBChatMessageHistory( + session_id="test-session", + api_endpoint="...", + token="...", +) +``` + +## LLM Cache ```python from langchain.globals import set_llm_cache -from langchain.cache import AstraDBCache +from langchain_community.cache import AstraDBCache set_llm_cache(AstraDBCache( api_endpoint="...", token="...", @@ -43,11 +50,11 @@ set_llm_cache(AstraDBCache( Learn more in the [example notebook](/docs/integrations/llms/llm_caching#astra-db-caches) (scroll to the Astra DB section). -### Semantic LLM Cache +## Semantic LLM Cache ```python from langchain.globals import set_llm_cache -from langchain.cache import AstraDBSemanticCache +from langchain_community.cache import AstraDBSemanticCache set_llm_cache(AstraDBSemanticCache( embedding=my_embedding, api_endpoint="...", @@ -57,20 +64,9 @@ set_llm_cache(AstraDBSemanticCache( Learn more in the [example notebook](/docs/integrations/llms/llm_caching#astra-db-caches) (scroll to the appropriate section). -### Chat message history - -```python -from langchain.memory import AstraDBChatMessageHistory -message_history = AstraDBChatMessageHistory( - session_id="test-session", - api_endpoint="...", - token="...", -) -``` - Learn more in the [example notebook](/docs/integrations/memory/astradb_chat_message_history). -### Document loader +## Document loader ```python from langchain_community.document_loaders import AstraDBLoader @@ -83,13 +79,13 @@ loader = AstraDBLoader( Learn more in the [example notebook](/docs/integrations/document_loaders/astradb). -### Self-querying retriever +## Self-querying retriever ```python -from langchain_community.vectorstores import AstraDB +from langchain_astradb import AstraDBVectorStore from langchain.retrievers.self_query.base import SelfQueryRetriever -vector_store = AstraDB( +vector_store = AstraDBVectorStore( embedding=my_embedding, collection_name="my_store", api_endpoint="...", @@ -106,7 +102,7 @@ retriever = SelfQueryRetriever.from_llm( Learn more in the [example notebook](/docs/integrations/retrievers/self_query/astradb). -### Store +## Store ```python from langchain_community.storage import AstraDBStore @@ -119,7 +115,7 @@ store = AstraDBStore( Learn more in the [example notebook](/docs/integrations/stores/astradb#astradbstore). -### Byte Store +## Byte Store ```python from langchain_community.storage import AstraDBByteStore @@ -131,57 +127,3 @@ store = AstraDBByteStore( ``` Learn more in the [example notebook](/docs/integrations/stores/astradb#astradbbytestore). - -## Apache Cassandra and Astra DB through CQL - -> [Cassandra](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database. -> Starting with version 5.0, the database ships with [vector search capabilities](https://cassandra.apache.org/doc/trunk/cassandra/vector-search/overview.html). -> DataStax [Astra DB through CQL](https://docs.datastax.com/en/astra-serverless/docs/vector-search/quickstart.html) is a managed serverless database built on Cassandra, offering the same interface and strengths. - -These databases use the CQL protocol (Cassandra Query Language). -Hence, a different set of connectors, outlined below, shall be used. - -### Vector Store - -```python -from langchain_community.vectorstores import Cassandra -vector_store = Cassandra( - embedding=my_embedding, - table_name="my_store", -) -``` - -Learn more in the [example notebook](/docs/integrations/vectorstores/astradb#apache-cassandra-and-astra-db-through-cql) (scroll down to the CQL-specific section). - - -### Memory - -```python -from langchain.memory import CassandraChatMessageHistory -message_history = CassandraChatMessageHistory(session_id="my-session") -``` - -Learn more in the [example notebook](/docs/integrations/memory/cassandra_chat_message_history). - - -### LLM Cache - -```python -from langchain.cache import CassandraCache -langchain.llm_cache = CassandraCache() -``` - -Learn more in the [example notebook](/docs/integrations/llms/llm_caching#cassandra-caches) (scroll to the Cassandra section). - - -### Semantic LLM Cache - -```python -from langchain.cache import CassandraSemanticCache -cassSemanticCache = CassandraSemanticCache( - embedding=my_embedding, - table_name="my_store", -) -``` - -Learn more in the [example notebook](/docs/integrations/llms/llm_caching#cassandra-caches) (scroll to the appropriate section). diff --git a/docs/docs/integrations/providers/cassandra.mdx b/docs/docs/integrations/providers/cassandra.mdx new file mode 100644 index 00000000000000..392f7a1767e3ea --- /dev/null +++ b/docs/docs/integrations/providers/cassandra.mdx @@ -0,0 +1,76 @@ +# Apache Cassandra + +> [Apache Cassandra®](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database. +> Starting with version 5.0, the database ships with [vector search capabilities](https://cassandra.apache.org/doc/trunk/cassandra/vector-search/overview.html). + +The integrations outlined in this page can be used with Cassandra as well as other CQL-compatible databases, i.e. those using the Cassandra Query Language protocol. + + +### Setup + +Install the following Python package: + +```bash +pip install "cassio>=0.1.4" +``` + + +## Vector Store + +```python +from langchain_community.vectorstores import Cassandra +vector_store = Cassandra( + embedding=my_embedding, + table_name="my_store", +) +``` + +Learn more in the [example notebook](/docs/integrations/vectorstores/cassandra). + +## Chat message history + +```python +from langchain_community.chat_message_histories import CassandraChatMessageHistory +message_history = CassandraChatMessageHistory(session_id="my-session") +``` + +Learn more in the [example notebook](/docs/integrations/memory/cassandra_chat_message_history). + + +## LLM Cache + +```python +from langchain.globals import set_llm_cache +from langchain_community.cache import CassandraCache +set_llm_cache(CassandraCache()) +``` + +Learn more in the [example notebook](/docs/integrations/llms/llm_caching#cassandra-caches) (scroll to the Cassandra section). + + +## Semantic LLM Cache + +```python +from langchain.globals import set_llm_cache +from langchain_community.cache import CassandraSemanticCache +set_llm_cache(CassandraSemanticCache( + embedding=my_embedding, + table_name="my_store", +)) +``` + +Learn more in the [example notebook](/docs/integrations/llms/llm_caching#cassandra-caches) (scroll to the appropriate section). + +## Document loader + +```python +from langchain_community.document_loaders import CassandraLoader +loader = CassandraLoader(table="my_table") +docs = loader.load() +``` + +Learn more in the [example notebook](/docs/integrations/document_loaders/cassandra). + +#### Attribution statement + +> Apache Cassandra, Cassandra and Apache are either registered trademarks or trademarks of the [Apache Software Foundation](http://www.apache.org/) in the United States and/or other countries. diff --git a/docs/docs/integrations/text_embedding/ai21.ipynb b/docs/docs/integrations/text_embedding/ai21.ipynb new file mode 100644 index 00000000000000..c44e55f2b497d1 --- /dev/null +++ b/docs/docs/integrations/text_embedding/ai21.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "c2923bd1", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: AI21 Labs\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "cc3c6ef6bbd57ce9", + "metadata": { + "collapsed": false + }, + "source": [ + "# AI21Embeddings\n", + "\n", + "This notebook covers how to get started with AI21 embedding models.\n", + "\n", + "## Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c3bef91", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU langchain-ai21" + ] + }, + { + "cell_type": "markdown", + "id": "2b4f3e15", + "metadata": {}, + "source": [ + "## Environment Setup\n", + "\n", + "We'll need to get a [AI21 API key](https://docs.ai21.com/) and set the `AI21_API_KEY` environment variable:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62e0dbc3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "os.environ[\"AI21_API_KEY\"] = getpass()" + ] + }, + { + "cell_type": "markdown", + "id": "74ef9d8b40a1319e", + "metadata": { + "collapsed": false + }, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "12fcfb4b", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_ai21 import AI21Embeddings\n", + "\n", + "embeddings = AI21Embeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f2e6104", + "metadata": {}, + "outputs": [], + "source": [ + "embeddings.embed_query(\"My query to look up\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3465d7e63bfb3d1", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "embeddings.embed_documents(\n", + " [\"This is a content of the document\", \"This is another document\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d60af6d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/toolkits/cogniswitch.ipynb b/docs/docs/integrations/toolkits/cogniswitch.ipynb new file mode 100644 index 00000000000000..836f425cf6055f --- /dev/null +++ b/docs/docs/integrations/toolkits/cogniswitch.ipynb @@ -0,0 +1,326 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "19062701", + "metadata": {}, + "source": [ + "## Cogniswitch Tools\n", + "\n", + "**Use CogniSwitch to build production ready applications that can consume, organize and retrieve knowledge flawlessly. Using the framework of your choice, in this case Langchain CogniSwitch helps alleviate the stress of decision making when it comes to, choosing the right storage and retrieval formats. It also eradicates reliability issues and hallucinations when it comes to responses that are generated. Get started by interacting with your knowledge in just two simple steps.**\n", + "\n", + "visit [https://www.cogniswitch.ai/developer to register](https://www.cogniswitch.ai/developer?utm_source=langchain&utm_medium=langchainbuild&utm_id=dev).\n\n", + "**Registration:** \n\n", + "- Signup with your email and verify your registration \n\n", + "- You will get a mail with a platform token and oauth token for using the services.\n\n\n", + "\n", + "**step 1: Instantiate the toolkit and get the tools:**\n\n", + "- Instantiate the cogniswitch toolkit with the cogniswitch token, openAI API key and OAuth token and get the tools. \n", + "\n", + "**step 2: Instantiate the agent with the tools and llm:**\n", + "- Instantiate the agent with the list of cogniswitch tools and the llm, into the agent executor.\n", + "\n", + "**step 3: CogniSwitch Store Tool:** \n", + "\n", + "***CogniSwitch knowledge source file tool***\n", + "- Use the agent to upload a file by giving the file path.(formats that are currently supported are .pdf, .docx, .doc, .txt, .html) \n", + "- The content from the file will be processed by the cogniswitch and stored in your knowledge store. \n", + "\n", + "***CogniSwitch knowledge source url tool***\n", + "- Use the agent to upload a URL. \n", + "- The content from the url will be processed by the cogniswitch and stored in your knowledge store. \n", + "\n", + "**step 4: CogniSwitch Status Tool:**\n", + "- Use the agent to know the status of the document uploaded with a document name.\n", + "- You can also check the status of document processing in cogniswitch console. \n", + "\n", + "**step 5: CogniSwitch Answer Tool:**\n", + "- Use the agent to ask your question.\n", + "- You will get the answer from your knowledge as the response. \n" + ] + }, + { + "cell_type": "markdown", + "id": "1435b193", + "metadata": {}, + "source": [ + "### Import necessary libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8d86323b", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "import os\n", + "\n", + "from langchain.agents.agent_toolkits import create_conversational_retrieval_agent\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain_community.agent_toolkits import CogniswitchToolkit" + ] + }, + { + "cell_type": "markdown", + "id": "6e6acf0e", + "metadata": {}, + "source": [ + "### Cogniswitch platform token, OAuth token and OpenAI API key" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3d2dfc9f", + "metadata": {}, + "outputs": [], + "source": [ + "cs_token = \"Your CogniSwitch token\"\n", + "OAI_token = \"Your OpenAI API token\"\n", + "oauth_token = \"Your CogniSwitch authentication token\"\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = OAI_token" + ] + }, + { + "cell_type": "markdown", + "id": "320e02fc", + "metadata": {}, + "source": [ + "### Instantiate the cogniswitch toolkit with the credentials" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "89f58167", + "metadata": {}, + "outputs": [], + "source": [ + "cogniswitch_toolkit = CogniswitchToolkit(\n", + " cs_token=cs_token, OAI_token=OAI_token, apiKey=oauth_token\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "16901682", + "metadata": {}, + "source": [ + "### Get the list of cogniswitch tools" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "288d07f6", + "metadata": {}, + "outputs": [], + "source": [ + "tool_lst = cogniswitch_toolkit.get_tools()" + ] + }, + { + "cell_type": "markdown", + "id": "4aae43a3", + "metadata": {}, + "source": [ + "### Instantiate the llm" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4d67e5bb", + "metadata": {}, + "outputs": [], + "source": [ + "llm = ChatOpenAI(\n", + " temperature=0,\n", + " openai_api_key=OAI_token,\n", + " max_tokens=1500,\n", + " model_name=\"gpt-3.5-turbo-0613\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "04179282", + "metadata": {}, + "source": [ + "### Create a agent executor" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2153e758", + "metadata": {}, + "outputs": [], + "source": [ + "agent_executor = create_conversational_retrieval_agent(llm, tool_lst, verbose=False)" + ] + }, + { + "cell_type": "markdown", + "id": "42c9890e", + "metadata": {}, + "source": [ + "### Invoke the agent to upload a URL" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "794b4fba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The URL https://cogniswitch.ai/developer has been uploaded successfully. The status of the document is currently being processed. You will receive an email notification once the processing is complete.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"upload this url https://cogniswitch.ai/developer\")\n", + "\n", + "print(response[\"output\"])" + ] + }, + { + "cell_type": "markdown", + "id": "544fe8f9", + "metadata": {}, + "source": [ + "### Invoke the agent to upload a File" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "fd0addfc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file example_file.txt has been uploaded successfully. The status of the document is currently being processed. You will receive an email notification once the processing is complete.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"upload this file example_file.txt\")\n", + "\n", + "print(response[\"output\"])" + ] + }, + { + "cell_type": "markdown", + "id": "02827e1b", + "metadata": {}, + "source": [ + "### Invoke the agent to get the status of a document" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f424e6c5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The status of the document example_file.txt is as follows:\n", + "\n", + "- Created On: 2024-01-22T19:07:42.000+00:00\n", + "- Modified On: 2024-01-22T19:07:42.000+00:00\n", + "- Document Entry ID: 153\n", + "- Status: 0 (Processing)\n", + "- Original File Name: example_file.txt\n", + "- Saved File Name: 1705950460069example_file29393011.txt\n", + "\n", + "The document is currently being processed.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"Tell me the status of this document example_file.txt\")\n", + "\n", + "print(response[\"output\"])" + ] + }, + { + "cell_type": "markdown", + "id": "0ba9aca9", + "metadata": {}, + "source": [ + "### Invoke the agent with query and get the answer" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e73e963f", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CogniSwitch can help develop GenAI applications in several ways:\n", + "\n", + "1. Knowledge Extraction: CogniSwitch can extract knowledge from various sources such as documents, websites, and databases. It can analyze and store data from these sources, making it easier to access and utilize the information for GenAI applications.\n", + "\n", + "2. Natural Language Processing: CogniSwitch has advanced natural language processing capabilities. It can understand and interpret human language, allowing GenAI applications to interact with users in a more conversational and intuitive manner.\n", + "\n", + "3. Sentiment Analysis: CogniSwitch can analyze the sentiment of text data, such as customer reviews or social media posts. This can be useful in developing GenAI applications that can understand and respond to the emotions and opinions of users.\n", + "\n", + "4. Knowledge Base Integration: CogniSwitch can integrate with existing knowledge bases or create new ones. This allows GenAI applications to access a vast amount of information and provide accurate and relevant responses to user queries.\n", + "\n", + "5. Document Analysis: CogniSwitch can analyze documents and extract key information such as entities, relationships, and concepts. This can be valuable in developing GenAI applications that can understand and process large amounts of textual data.\n", + "\n", + "Overall, CogniSwitch provides a range of AI-powered capabilities that can enhance the development of GenAI applications by enabling knowledge extraction, natural language processing, sentiment analysis, knowledge base integration, and document analysis.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"How can cogniswitch help develop GenAI applications?\")\n", + "\n", + "print(response[\"output\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "langchain_repo", + "language": "python", + "name": "langchain_repo" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/vectorstores/apache_doris.ipynb b/docs/docs/integrations/vectorstores/apache_doris.ipynb new file mode 100644 index 00000000000000..1cee7b8f5370d5 --- /dev/null +++ b/docs/docs/integrations/vectorstores/apache_doris.ipynb @@ -0,0 +1,322 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "84180ad0-66cd-43e5-b0b8-2067a29e16ba", + "metadata": { + "collapsed": false + }, + "source": [ + "# Apache Doris\n", + "\n", + ">[Apache Doris](https://doris.apache.org/) is a modern data warehouse for real-time analytics.\n", + "It delivers lightning-fast analytics on real-time data at scale.\n", + "\n", + ">Usually `Apache Doris` is categorized into OLAP, and it has showed excellent performance in [ClickBench — a Benchmark For Analytical DBMS](https://benchmark.clickhouse.com/). Since it has a super-fast vectorized execution engine, it could also be used as a fast vectordb.\n", + "\n", + "Here we'll show how to use the Apache Doris Vector Store." + ] + }, + { + "cell_type": "markdown", + "id": "1685854f", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "311d44bb-4aca-4f3b-8f97-5e1f29238e40", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet pymysql" + ] + }, + { + "cell_type": "markdown", + "id": "2c891bba", + "metadata": {}, + "source": [ + "Set `update_vectordb = False` at the beginning. If there is no docs updated, then we don't need to rebuild the embeddings of docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4e6ca20-79dd-482a-8f68-af9d7dd59c7c", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!pip install sqlalchemy\n", + "!pip install langchain" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "96f7c7a2-4811-4fdf-87f5-c60772f51fe1", + "metadata": { + "ExecuteTime": { + "end_time": "2024-02-14T12:54:01.392500Z", + "start_time": "2024-02-14T12:53:58.866615Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain.text_splitter import TokenTextSplitter\n", + "from langchain_community.document_loaders import (\n", + " DirectoryLoader,\n", + " UnstructuredMarkdownLoader,\n", + ")\n", + "from langchain_community.vectorstores.apache_doris import (\n", + " ApacheDoris,\n", + " ApacheDorisSettings,\n", + ")\n", + "from langchain_openai import OpenAI, OpenAIEmbeddings\n", + "\n", + "update_vectordb = False" + ] + }, + { + "cell_type": "markdown", + "id": "ee821c00", + "metadata": {}, + "source": [ + "## Load docs and split them into tokens" + ] + }, + { + "cell_type": "markdown", + "id": "34ba0cfd", + "metadata": {}, + "source": [ + "Load all markdown files under the `docs` directory\n", + "\n", + "for Apache Doris documents, you can clone repo from https://github.com/apache/doris, and there is `docs` directory in it." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "799edf20-bcf4-4a65-bff7-b907f6bdba20", + "metadata": { + "ExecuteTime": { + "end_time": "2024-02-14T12:55:24.128917Z", + "start_time": "2024-02-14T12:55:19.463831Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "loader = DirectoryLoader(\n", + " \"./docs\", glob=\"**/*.md\", loader_cls=UnstructuredMarkdownLoader\n", + ")\n", + "documents = loader.load()" + ] + }, + { + "cell_type": "markdown", + "id": "b415fe2a", + "metadata": {}, + "source": [ + "Split docs into tokens, and set `update_vectordb = True` because there are new docs/tokens." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0dc5ba83-62ef-4f61-a443-e872f251e7da", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# load text splitter and split docs into snippets of text\n", + "text_splitter = TokenTextSplitter(chunk_size=400, chunk_overlap=50)\n", + "split_docs = text_splitter.split_documents(documents)\n", + "\n", + "# tell vectordb to update text embeddings\n", + "update_vectordb = True" + ] + }, + { + "cell_type": "markdown", + "id": "46966e25-9449-4a36-87d1-c0b25dce2994", + "metadata": { + "collapsed": false + }, + "source": [ + "split_docs[-20]" + ] + }, + { + "cell_type": "markdown", + "id": "99422e95-b407-43eb-aa68-9a62363fc82f", + "metadata": { + "collapsed": false + }, + "source": [ + "print(\"# docs = %d, # splits = %d\" % (len(documents), len(split_docs)))" + ] + }, + { + "cell_type": "markdown", + "id": "e780d77f-3f96-4690-a10f-f87566f7ccc6", + "metadata": { + "collapsed": false + }, + "source": [ + "## Create vectordb instance" + ] + }, + { + "cell_type": "markdown", + "id": "15702d9c", + "metadata": {}, + "source": [ + "### Use Apache Doris as vectordb" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ced7dbe1", + "metadata": { + "ExecuteTime": { + "end_time": "2024-02-14T12:55:39.508287Z", + "start_time": "2024-02-14T12:55:39.500370Z" + } + }, + "outputs": [], + "source": [ + "def gen_apache_doris(update_vectordb, embeddings, settings):\n", + " if update_vectordb:\n", + " docsearch = ApacheDoris.from_documents(split_docs, embeddings, config=settings)\n", + " else:\n", + " docsearch = ApacheDoris(embeddings, settings)\n", + " return docsearch" + ] + }, + { + "cell_type": "markdown", + "id": "15d86fda", + "metadata": {}, + "source": [ + "## Convert tokens into embeddings and put them into vectordb" + ] + }, + { + "cell_type": "markdown", + "id": "ff1322ea", + "metadata": {}, + "source": [ + "Here we use Apache Doris as vectordb, you can configure Apache Doris instance via `ApacheDorisSettings`.\n", + "\n", + "Configuring Apache Doris instance is pretty much like configuring mysql instance. You need to specify:\n", + "1. host/port\n", + "2. username(default: 'root')\n", + "3. password(default: '')\n", + "4. database(default: 'default')\n", + "5. table(default: 'langchain')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "b34f8c31-c173-4902-8168-2e838ddfb9e9", + "metadata": { + "ExecuteTime": { + "end_time": "2024-02-14T12:56:02.671291Z", + "start_time": "2024-02-14T12:55:48.350294Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c53ab3f2-9e34-4424-8b07-6292bde67e14", + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "update_vectordb = True\n", + "\n", + "embeddings = OpenAIEmbeddings()\n", + "\n", + "# configure Apache Doris settings(host/port/user/pw/db)\n", + "settings = ApacheDorisSettings()\n", + "settings.port = 9030\n", + "settings.host = \"172.30.34.130\"\n", + "settings.username = \"root\"\n", + "settings.password = \"\"\n", + "settings.database = \"langchain\"\n", + "docsearch = gen_apache_doris(update_vectordb, embeddings, settings)\n", + "\n", + "print(docsearch)\n", + "\n", + "update_vectordb = False" + ] + }, + { + "cell_type": "markdown", + "id": "bde66626", + "metadata": {}, + "source": [ + "## Build QA and ask question to it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84921814", + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI()\n", + "qa = RetrievalQA.from_chain_type(\n", + " llm=llm, chain_type=\"stuff\", retriever=docsearch.as_retriever()\n", + ")\n", + "query = \"what is apache doris\"\n", + "resp = qa.run(query)\n", + "print(resp)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/vectorstores/astradb.ipynb b/docs/docs/integrations/vectorstores/astradb.ipynb index c4b354225d88b8..fe980dd1377628 100644 --- a/docs/docs/integrations/vectorstores/astradb.ipynb +++ b/docs/docs/integrations/vectorstores/astradb.ipynb @@ -2,13 +2,27 @@ "cells": [ { "cell_type": "markdown", - "id": "d2d6ca14-fb7e-4172-9aa0-a3119a064b96", + "id": "66d0270a-b74f-4110-901e-7960b00297af", "metadata": {}, "source": [ "# Astra DB\n", "\n", - "This page provides a quickstart for using [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) and [Apache Cassandra®](https://cassandra.apache.org/) as a Vector Store.\n", - "\n", + "This page provides a quickstart for using [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) as a Vector Store." + ] + }, + { + "cell_type": "markdown", + "id": "ab8cd64f-3bb2-4f16-a0a9-12d7b1789bf6", + "metadata": {}, + "source": [ + "> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Apache Cassandra® and made conveniently available through an easy-to-use JSON API." + ] + }, + { + "cell_type": "markdown", + "id": "d2d6ca14-fb7e-4172-9aa0-a3119a064b96", + "metadata": {}, + "source": [ "_Note: in addition to access to the database, an OpenAI API Key is required to run the full example._" ] }, @@ -17,7 +31,7 @@ "id": "bb9be7ce-8c70-4d46-9f11-71c42a36e928", "metadata": {}, "source": [ - "### Setup and general dependencies" + "## Setup and general dependencies" ] }, { @@ -25,7 +39,7 @@ "id": "dbe7c156-0413-47e3-9237-4769c4248869", "metadata": {}, "source": [ - "Use of the integration requires the following Python package." + "Use of the integration requires the corresponding Python package:" ] }, { @@ -35,7 +49,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install --upgrade --quiet \"astrapy>=0.5.3\"" + "pip install --upgrade langchain-astradb" ] }, { @@ -43,8 +57,25 @@ "id": "2453d83a-bc8f-41e1-a692-befe4dd90156", "metadata": {}, "source": [ - "_Note: depending on your LangChain setup, you may need to install/upgrade other dependencies needed for this demo_\n", - "_(specifically, recent versions of `datasets`, `openai`, `pypdf` and `tiktoken` are required)._" + "_**Note.** the following are all packages required to run the full demo on this page. Depending on your LangChain setup, some of them may need to be installed:_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56c1f86e-5921-4976-ac8f-1d62e5a512b0", + "metadata": {}, + "outputs": [], + "source": [ + "pip install langchain langchain-openai datasets pypdf" + ] + }, + { + "cell_type": "markdown", + "id": "c2910035-e61f-48d9-a110-d68c401b62aa", + "metadata": {}, + "source": [ + "### Import dependencies" ] }, { @@ -89,28 +120,12 @@ "embe = OpenAIEmbeddings()" ] }, - { - "cell_type": "markdown", - "id": "dd8caa76-bc41-429e-a93b-989ba13aff01", - "metadata": {}, - "source": [ - "_Keep reading to connect with Astra DB. For usage with Apache Cassandra and Astra DB through CQL, scroll to the section below._" - ] - }, { "cell_type": "markdown", "id": "22866f09-e10d-4f05-a24b-b9420129462e", "metadata": {}, "source": [ - "## Astra DB" - ] - }, - { - "cell_type": "markdown", - "id": "5fba47cc-3533-42fc-84b7-9dc14cd68b2b", - "metadata": {}, - "source": [ - "DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API." + "## Import the Vector Store" ] }, { @@ -120,7 +135,7 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_community.vectorstores import AstraDB" + "from langchain_astradb import AstraDBVectorStore" ] }, { @@ -128,10 +143,13 @@ "id": "68f61b01-3e09-47c1-9d67-5d6915c86626", "metadata": {}, "source": [ - "### Astra DB connection parameters\n", + "## Connection parameters\n", + "\n", + "These are found on your Astra DB dashboard:\n", "\n", "- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n", - "- the Token looks like `AstraCS:6gBhNmsk135....`" + "- the Token looks like `AstraCS:6gBhNmsk135....`\n", + "- you may optionally provide a _Namespace_ such as `my_namespace`" ] }, { @@ -142,7 +160,21 @@ "outputs": [], "source": [ "ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n", - "ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")" + "ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n", + "\n", + "desired_namespace = input(\"(optional) Namespace = \")\n", + "if desired_namespace:\n", + " ASTRA_DB_KEYSPACE = desired_namespace\n", + "else:\n", + " ASTRA_DB_KEYSPACE = None" + ] + }, + { + "cell_type": "markdown", + "id": "196268bd-a950-41c3-bede-f5b55f6a0804", + "metadata": {}, + "source": [ + "Now you can create the vector store:" ] }, { @@ -152,11 +184,12 @@ "metadata": {}, "outputs": [], "source": [ - "vstore = AstraDB(\n", + "vstore = AstraDBVectorStore(\n", " embedding=embe,\n", " collection_name=\"astra_vector_demo\",\n", " api_endpoint=ASTRA_DB_API_ENDPOINT,\n", " token=ASTRA_DB_APPLICATION_TOKEN,\n", + " namespace=ASTRA_DB_KEYSPACE,\n", ")" ] }, @@ -165,7 +198,7 @@ "id": "9a348678-b2f6-46ca-9a0d-2eb4cc6b66b1", "metadata": {}, "source": [ - "### Load a dataset" + "## Load a dataset" ] }, { @@ -243,7 +276,7 @@ "id": "c031760a-1fc5-4855-adf2-02ed52fe2181", "metadata": {}, "source": [ - "### Run simple searches" + "## Run searches" ] }, { @@ -318,12 +351,22 @@ " print(f\"* {res.page_content} [{res.metadata}]\")" ] }, + { + "cell_type": "markdown", + "id": "60fda5df-14e4-4fb0-bd17-65a393fab8a9", + "metadata": {}, + "source": [ + "### Async\n", + "\n", + "Note that the Astra DB vector store supports all fully async methods (`asimilarity_search`, `afrom_texts`, `adelete` and so on) natively, i.e. without thread wrapping involved." + ] + }, { "cell_type": "markdown", "id": "1cc86edd-692b-4495-906c-ccfd13b03c23", "metadata": {}, "source": [ - "### Deleting stored documents" + "## Deleting stored documents" ] }, { @@ -353,7 +396,7 @@ "id": "847181ba-77d1-4a17-b7f9-9e2c3d8efd13", "metadata": {}, "source": [ - "### A minimal RAG chain" + "## A minimal RAG chain" ] }, { @@ -452,7 +495,7 @@ "id": "177610c7-50d0-4b7b-8634-b03338054c8e", "metadata": {}, "source": [ - "### Cleanup" + "## Cleanup" ] }, { @@ -474,290 +517,6 @@ "source": [ "vstore.delete_collection()" ] - }, - { - "cell_type": "markdown", - "id": "94ebaab1-7cbf-4144-a147-7b0e32c43069", - "metadata": {}, - "source": [ - "## Apache Cassandra and Astra DB through CQL" - ] - }, - { - "cell_type": "markdown", - "id": "bc3931b4-211d-4f84-bcc0-51c127e3027c", - "metadata": {}, - "source": [ - "[Cassandra](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database.Starting with version 5.0, the database ships with [vector search capabilities](https://cassandra.apache.org/doc/trunk/cassandra/vector-search/overview.html).\n", - "\n", - "DataStax [Astra DB through CQL](https://docs.datastax.com/en/astra-serverless/docs/vector-search/quickstart.html) is a managed serverless database built on Cassandra, offering the same interface and strengths." - ] - }, - { - "cell_type": "markdown", - "id": "a0055fbf-448d-4e46-9c40-28d43df25ca3", - "metadata": {}, - "source": [ - "#### What sets this case apart from \"Astra DB\" above?\n", - "\n", - "Thanks to LangChain having a standardized `VectorStore` interface, most of the \"Astra DB\" section above applies to this case as well. However, this time the database uses the CQL protocol, which means you'll use a _different_ class this time and instantiate it in another way.\n", - "\n", - "The cells below show how you should get your `vstore` object in this case and how you can clean up the database resources at the end: for the rest, i.e. the actual usage of the vector store, you will be able to run the very code that was shown above.\n", - "\n", - "In other words, running this demo in full with Cassandra or Astra DB through CQL means:\n", - "\n", - "- **initialization as shown below**\n", - "- \"Load a dataset\", _see above section_\n", - "- \"Run simple searches\", _see above section_\n", - "- \"MMR search\", _see above section_\n", - "- \"Deleting stored documents\", _see above section_\n", - "- \"A minimal RAG chain\", _see above section_\n", - "- **cleanup as shown below**" - ] - }, - { - "cell_type": "markdown", - "id": "23d12be2-745f-4e72-a82c-334a887bc7cd", - "metadata": {}, - "source": [ - "### Initialization" - ] - }, - { - "cell_type": "markdown", - "id": "e3212542-79be-423e-8e1f-b8d725e3cda8", - "metadata": {}, - "source": [ - "The class to use is the following:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "941af73e-a090-4fba-b23c-595757d470eb", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_community.vectorstores import Cassandra" - ] - }, - { - "cell_type": "markdown", - "id": "414d1e72-f7c9-4b6d-bf6f-16075712c7e3", - "metadata": {}, - "source": [ - "Now, depending on whether you connect to a Cassandra cluster or to Astra DB through CQL, you will provide different parameters when creating the vector store object." - ] - }, - { - "cell_type": "markdown", - "id": "48ecca56-71a4-4a91-b198-29384c44ce27", - "metadata": {}, - "source": [ - "#### Initialization (Cassandra cluster)" - ] - }, - { - "cell_type": "markdown", - "id": "55ebe958-5654-43e0-9aed-d607ffd3fa48", - "metadata": {}, - "source": [ - "In this case, you first need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4642dafb-a065-4063-b58c-3d276f5ad07e", - "metadata": {}, - "outputs": [], - "source": [ - "from cassandra.cluster import Cluster\n", - "\n", - "cluster = Cluster([\"127.0.0.1\"])\n", - "session = cluster.connect()" - ] - }, - { - "cell_type": "markdown", - "id": "624c93bf-fb46-4350-bcfa-09ca09dc068f", - "metadata": {}, - "source": [ - "You can now set the session, along with your desired keyspace name, as a global CassIO parameter:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92a4ab28-1c4f-4dad-9671-d47e0b1dde7b", - "metadata": {}, - "outputs": [], - "source": [ - "import cassio\n", - "\n", - "CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")\n", - "\n", - "cassio.init(session=session, keyspace=CASSANDRA_KEYSPACE)" - ] - }, - { - "cell_type": "markdown", - "id": "3b87a824-36f1-45b4-b54c-efec2a2de216", - "metadata": {}, - "source": [ - "Now you can create the vector store:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "853a2a88-a565-4e24-8789-d78c213954a6", - "metadata": {}, - "outputs": [], - "source": [ - "vstore = Cassandra(\n", - " embedding=embe,\n", - " table_name=\"cassandra_vector_demo\",\n", - " # session=None, keyspace=None # Uncomment on older versions of LangChain\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "768ddf7a-0c3e-4134-ad38-25ac53c3da7a", - "metadata": {}, - "source": [ - "#### Initialization (Astra DB through CQL)" - ] - }, - { - "cell_type": "markdown", - "id": "4ed4269a-b7e7-4503-9e66-5a11335c7681", - "metadata": {}, - "source": [ - "In this case you initialize CassIO with the following connection parameters:\n", - "\n", - "- the Database ID, e.g. `01234567-89ab-cdef-0123-456789abcdef`\n", - "- the Token, e.g. `AstraCS:6gBhNmsk135....` (it must be a \"Database Administrator\" token)\n", - "- Optionally a Keyspace name (if omitted, the default one for the database will be used)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5fa6bd74-d4b2-45c5-9757-96dddc6242fb", - "metadata": {}, - "outputs": [], - "source": [ - "ASTRA_DB_ID = input(\"ASTRA_DB_ID = \")\n", - "ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n", - "\n", - "desired_keyspace = input(\"ASTRA_DB_KEYSPACE (optional, can be left empty) = \")\n", - "if desired_keyspace:\n", - " ASTRA_DB_KEYSPACE = desired_keyspace\n", - "else:\n", - " ASTRA_DB_KEYSPACE = None" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "add6e585-17ff-452e-8ef6-7e485ead0b06", - "metadata": {}, - "outputs": [], - "source": [ - "import cassio\n", - "\n", - "cassio.init(\n", - " database_id=ASTRA_DB_ID,\n", - " token=ASTRA_DB_APPLICATION_TOKEN,\n", - " keyspace=ASTRA_DB_KEYSPACE,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "b305823c-bc98-4f3d-aabb-d7eb663ea421", - "metadata": {}, - "source": [ - "Now you can create the vector store:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f45f3038-9d59-41cc-8b43-774c6aa80295", - "metadata": {}, - "outputs": [], - "source": [ - "vstore = Cassandra(\n", - " embedding=embe,\n", - " table_name=\"cassandra_vector_demo\",\n", - " # session=None, keyspace=None # Uncomment on older versions of LangChain\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "39284918-cf8a-49bb-a2d3-aef285bb2ffa", - "metadata": {}, - "source": [ - "### Usage of the vector store" - ] - }, - { - "cell_type": "markdown", - "id": "3cc1aead-d6ec-48a3-affe-1d0cffa955a9", - "metadata": {}, - "source": [ - "_See the sections \"Load a dataset\" through \"A minimal RAG chain\" above._\n", - "\n", - "Speaking of the latter, you can check out a full RAG template for Astra DB through CQL [here](https://github.com/langchain-ai/langchain/tree/master/templates/cassandra-entomology-rag)." - ] - }, - { - "cell_type": "markdown", - "id": "096397d8-6622-4685-9f9d-7e238beca467", - "metadata": {}, - "source": [ - "### Cleanup" - ] - }, - { - "cell_type": "markdown", - "id": "cc1e74f9-5500-41aa-836f-235b1ed5f20c", - "metadata": {}, - "source": [ - "the following essentially retrieves the `Session` object from CassIO and runs a CQL `DROP TABLE` statement with it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b5b82c33-0e77-4a37-852c-8d50edbdd991", - "metadata": {}, - "outputs": [], - "source": [ - "cassio.config.resolve_session().execute(\n", - " f\"DROP TABLE {cassio.config.resolve_keyspace()}.cassandra_vector_demo;\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "c10ece4d-ae06-42ab-baf4-4d0ac2051743", - "metadata": {}, - "source": [ - "### Learn more" - ] - }, - { - "cell_type": "markdown", - "id": "51ea8b69-7e15-458f-85aa-9fa199f95f9c", - "metadata": {}, - "source": [ - "For more information, extended quickstarts and additional usage examples, please visit the [CassIO documentation](https://cassio.org/frameworks/langchain/about/) for more on using the LangChain `Cassandra` vector store." - ] } ], "metadata": { @@ -776,7 +535,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/docs/docs/integrations/vectorstores/cassandra.ipynb b/docs/docs/integrations/vectorstores/cassandra.ipynb new file mode 100644 index 00000000000000..524f76a1052c92 --- /dev/null +++ b/docs/docs/integrations/vectorstores/cassandra.ipynb @@ -0,0 +1,651 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d2d6ca14-fb7e-4172-9aa0-a3119a064b96", + "metadata": {}, + "source": [ + "# Apache Cassandra\n", + "\n", + "This page provides a quickstart for using [Apache Cassandra®](https://cassandra.apache.org/) as a Vector Store." + ] + }, + { + "cell_type": "markdown", + "id": "6a1a562e-3d1a-4693-b55d-08bf90943a9a", + "metadata": {}, + "source": [ + "> [Cassandra](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database.Starting with version 5.0, the database ships with [vector search capabilities](https://cassandra.apache.org/doc/trunk/cassandra/vector-search/overview.html)." + ] + }, + { + "cell_type": "markdown", + "id": "9cf37d7f-c18e-4e63-adea-138e5e981475", + "metadata": {}, + "source": [ + "_Note: in addition to access to the database, an OpenAI API Key is required to run the full example._" + ] + }, + { + "cell_type": "markdown", + "id": "bb9be7ce-8c70-4d46-9f11-71c42a36e928", + "metadata": {}, + "source": [ + "### Setup and general dependencies" + ] + }, + { + "cell_type": "markdown", + "id": "dbe7c156-0413-47e3-9237-4769c4248869", + "metadata": {}, + "source": [ + "Use of the integration requires the following Python package." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d00fcf4-9798-4289-9214-d9734690adfc", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet \"cassio>=0.1.4\"" + ] + }, + { + "cell_type": "markdown", + "id": "2453d83a-bc8f-41e1-a692-befe4dd90156", + "metadata": {}, + "source": [ + "_Note: depending on your LangChain setup, you may need to install/upgrade other dependencies needed for this demo_\n", + "_(specifically, recent versions of `datasets`, `openai`, `pypdf` and `tiktoken` are required, along with `langchain-community`)._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b06619af-fea2-4863-8149-7f239a8c9c82", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "from datasets import (\n", + " load_dataset,\n", + ")\n", + "from langchain.schema import Document\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain_community.document_loaders import PyPDFLoader\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1983f1da-0ae7-4a9b-bf4c-4ade328f7a3a", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"OPENAI_API_KEY\"] = getpass(\"OPENAI_API_KEY = \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c656df06-e938-4bc5-b570-440b8b7a0189", + "metadata": {}, + "outputs": [], + "source": [ + "embe = OpenAIEmbeddings()" + ] + }, + { + "cell_type": "markdown", + "id": "22866f09-e10d-4f05-a24b-b9420129462e", + "metadata": {}, + "source": [ + "## Import the Vector Store" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b32730d-176e-414c-9d91-fd3644c54211", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.vectorstores import Cassandra" + ] + }, + { + "cell_type": "markdown", + "id": "68f61b01-3e09-47c1-9d67-5d6915c86626", + "metadata": {}, + "source": [ + "## Connection parameters\n", + "\n", + "The Vector Store integration shown in this page can be used with Cassandra as well as other derived databases, such as Astra DB, which use the CQL (Cassandra Query Language) protocol.\n", + "\n", + "> DataStax [Astra DB](https://docs.datastax.com/en/astra-serverless/docs/vector-search/quickstart.html) is a managed serverless database built on Cassandra, offering the same interface and strengths.\n", + "\n", + "Depending on whether you connect to a Cassandra cluster or to Astra DB through CQL, you will provide different parameters when creating the vector store object." + ] + }, + { + "cell_type": "markdown", + "id": "36bbb3d9-4d07-4f63-b23d-c52be03f8938", + "metadata": {}, + "source": [ + "### Connecting to a Cassandra cluster\n", + "\n", + "You first need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d95bb1d4-d8a6-4e66-89bc-776f9c6f962b", + "metadata": {}, + "outputs": [], + "source": [ + "from cassandra.cluster import Cluster\n", + "\n", + "cluster = Cluster([\"127.0.0.1\"])\n", + "session = cluster.connect()" + ] + }, + { + "cell_type": "markdown", + "id": "8279aa78-96d6-43ad-aa21-79fd798d895d", + "metadata": {}, + "source": [ + "You can now set the session, along with your desired keyspace name, as a global CassIO parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29ececc4-e50b-4428-967f-4b6bbde12a14", + "metadata": {}, + "outputs": [], + "source": [ + "import cassio\n", + "\n", + "CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")\n", + "\n", + "cassio.init(session=session, keyspace=CASSANDRA_KEYSPACE)" + ] + }, + { + "cell_type": "markdown", + "id": "0bd035a2-f0af-418f-94e5-0fbb4d51ac3c", + "metadata": {}, + "source": [ + "Now you can create the vector store:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eeb62cde-89fc-44d7-ba76-91e19cbc5898", + "metadata": {}, + "outputs": [], + "source": [ + "vstore = Cassandra(\n", + " embedding=embe,\n", + " table_name=\"cassandra_vector_demo\",\n", + " # session=None, keyspace=None # Uncomment on older versions of LangChain\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ce240555-e5fc-431d-ac0f-bcf2f6e6a5fb", + "metadata": {}, + "source": [ + "_Note: you can also pass your session and keyspace directly as parameters when creating the vector store. Using the global `cassio.init` setting, however, comes handy if your applications uses Cassandra in several ways (for instance, for vector store, chat memory and LLM response caching), as it allows to centralize credential and DB connection management in one place._" + ] + }, + { + "cell_type": "markdown", + "id": "b598e5fa-eb62-4939-9734-091628e84db4", + "metadata": {}, + "source": [ + "### Connecting to Astra DB through CQL" + ] + }, + { + "cell_type": "markdown", + "id": "2feec7c3-7092-4252-9a3f-05eda4babe74", + "metadata": {}, + "source": [ + "In this case you initialize CassIO with the following connection parameters:\n", + "\n", + "- the Database ID, e.g. `01234567-89ab-cdef-0123-456789abcdef`\n", + "- the Token, e.g. `AstraCS:6gBhNmsk135....` (it must be a \"Database Administrator\" token)\n", + "- Optionally a Keyspace name (if omitted, the default one for the database will be used)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f96147d-6d76-4101-bbb0-4a7f215c3d2d", + "metadata": {}, + "outputs": [], + "source": [ + "ASTRA_DB_ID = input(\"ASTRA_DB_ID = \")\n", + "ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n", + "\n", + "desired_keyspace = input(\"ASTRA_DB_KEYSPACE (optional, can be left empty) = \")\n", + "if desired_keyspace:\n", + " ASTRA_DB_KEYSPACE = desired_keyspace\n", + "else:\n", + " ASTRA_DB_KEYSPACE = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d653df1d-9dad-4980-ba52-76a47b4c5c1a", + "metadata": {}, + "outputs": [], + "source": [ + "import cassio\n", + "\n", + "cassio.init(\n", + " database_id=ASTRA_DB_ID,\n", + " token=ASTRA_DB_APPLICATION_TOKEN,\n", + " keyspace=ASTRA_DB_KEYSPACE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e606b58b-d390-4fed-a2fc-65036c44860f", + "metadata": {}, + "source": [ + "Now you can create the vector store:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cb552d1-e888-4550-a350-6df06b1f5aae", + "metadata": {}, + "outputs": [], + "source": [ + "vstore = Cassandra(\n", + " embedding=embe,\n", + " table_name=\"cassandra_vector_demo\",\n", + " # session=None, keyspace=None # Uncomment on older versions of LangChain\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "9a348678-b2f6-46ca-9a0d-2eb4cc6b66b1", + "metadata": {}, + "source": [ + "## Load a dataset" + ] + }, + { + "cell_type": "markdown", + "id": "552e56b0-301a-4b06-99c7-57ba6faa966f", + "metadata": {}, + "source": [ + "Convert each entry in the source dataset into a `Document`, then write them into the vector store:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a1f532f-ad63-4256-9730-a183841bd8e9", + "metadata": {}, + "outputs": [], + "source": [ + "philo_dataset = load_dataset(\"datastax/philosopher-quotes\")[\"train\"]\n", + "\n", + "docs = []\n", + "for entry in philo_dataset:\n", + " metadata = {\"author\": entry[\"author\"]}\n", + " doc = Document(page_content=entry[\"quote\"], metadata=metadata)\n", + " docs.append(doc)\n", + "\n", + "inserted_ids = vstore.add_documents(docs)\n", + "print(f\"\\nInserted {len(inserted_ids)} documents.\")" + ] + }, + { + "cell_type": "markdown", + "id": "79d4f436-ef04-4288-8f79-97c9abb983ed", + "metadata": {}, + "source": [ + "In the above, `metadata` dictionaries are created from the source data and are part of the `Document`." + ] + }, + { + "cell_type": "markdown", + "id": "084d8802-ab39-4262-9a87-42eafb746f92", + "metadata": {}, + "source": [ + "Add some more entries, this time with `add_texts`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6b157f5-eb31-4907-a78e-2e2b06893936", + "metadata": {}, + "outputs": [], + "source": [ + "texts = [\"I think, therefore I am.\", \"To the things themselves!\"]\n", + "metadatas = [{\"author\": \"descartes\"}, {\"author\": \"husserl\"}]\n", + "ids = [\"desc_01\", \"huss_xy\"]\n", + "\n", + "inserted_ids_2 = vstore.add_texts(texts=texts, metadatas=metadatas, ids=ids)\n", + "print(f\"\\nInserted {len(inserted_ids_2)} documents.\")" + ] + }, + { + "cell_type": "markdown", + "id": "63840eb3-8b29-4017-bc2f-301bf5001f28", + "metadata": {}, + "source": [ + "_Note: you may want to speed up the execution of `add_texts` and `add_documents` by increasing the concurrency level for_\n", + "_these bulk operations - check out the methods' `batch_size` parameter_\n", + "_for more details. Depending on the network and the client machine specifications, your best-performing choice of parameters may vary._" + ] + }, + { + "cell_type": "markdown", + "id": "c031760a-1fc5-4855-adf2-02ed52fe2181", + "metadata": {}, + "source": [ + "## Run searches" + ] + }, + { + "cell_type": "markdown", + "id": "02a77d8e-1aae-4054-8805-01c77947c49f", + "metadata": {}, + "source": [ + "This section demonstrates metadata filtering and getting the similarity scores back:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1761806a-1afd-4491-867c-25a80d92b9fe", + "metadata": {}, + "outputs": [], + "source": [ + "results = vstore.similarity_search(\"Our life is what we make of it\", k=3)\n", + "for res in results:\n", + " print(f\"* {res.page_content} [{res.metadata}]\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eebc4f7c-f61a-438e-b3c8-17e6888d8a0b", + "metadata": {}, + "outputs": [], + "source": [ + "results_filtered = vstore.similarity_search(\n", + " \"Our life is what we make of it\",\n", + " k=3,\n", + " filter={\"author\": \"plato\"},\n", + ")\n", + "for res in results_filtered:\n", + " print(f\"* {res.page_content} [{res.metadata}]\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11bbfe64-c0cd-40c6-866a-a5786538450e", + "metadata": {}, + "outputs": [], + "source": [ + "results = vstore.similarity_search_with_score(\"Our life is what we make of it\", k=3)\n", + "for res, score in results:\n", + " print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "b14ea558-bfbe-41ce-807e-d70670060ada", + "metadata": {}, + "source": [ + "### MMR (Maximal-marginal-relevance) search" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76381ce8-780a-4e3b-97b1-056d6782d7d5", + "metadata": {}, + "outputs": [], + "source": [ + "results = vstore.max_marginal_relevance_search(\n", + " \"Our life is what we make of it\",\n", + " k=3,\n", + " filter={\"author\": \"aristotle\"},\n", + ")\n", + "for res in results:\n", + " print(f\"* {res.page_content} [{res.metadata}]\")" + ] + }, + { + "cell_type": "markdown", + "id": "1cc86edd-692b-4495-906c-ccfd13b03c23", + "metadata": {}, + "source": [ + "## Deleting stored documents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38a70ec4-b522-4d32-9ead-c642864fca37", + "metadata": {}, + "outputs": [], + "source": [ + "delete_1 = vstore.delete(inserted_ids[:3])\n", + "print(f\"all_succeed={delete_1}\") # True, all documents deleted" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4cf49ed-9d29-4ed9-bdab-51a308c41b8e", + "metadata": {}, + "outputs": [], + "source": [ + "delete_2 = vstore.delete(inserted_ids[2:5])\n", + "print(f\"some_succeeds={delete_2}\") # True, though some IDs were gone already" + ] + }, + { + "cell_type": "markdown", + "id": "847181ba-77d1-4a17-b7f9-9e2c3d8efd13", + "metadata": {}, + "source": [ + "## A minimal RAG chain" + ] + }, + { + "cell_type": "markdown", + "id": "cd64b844-846f-43c5-a7dd-c26b9ed417d0", + "metadata": {}, + "source": [ + "The next cells will implement a simple RAG pipeline:\n", + "- download a sample PDF file and load it onto the store;\n", + "- create a RAG chain with LCEL (LangChain Expression Language), with the vector store at its heart;\n", + "- run the question-answering chain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cbc4dba-0d5e-4038-8fc5-de6cadd1c2a9", + "metadata": {}, + "outputs": [], + "source": [ + "!curl -L \\\n", + " \"https://github.com/awesome-astra/datasets/blob/main/demo-resources/what-is-philosophy/what-is-philosophy.pdf?raw=true\" \\\n", + " -o \"what-is-philosophy.pdf\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "459385be-5e9c-47ff-ba53-2b7ae6166b09", + "metadata": {}, + "outputs": [], + "source": [ + "pdf_loader = PyPDFLoader(\"what-is-philosophy.pdf\")\n", + "splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)\n", + "docs_from_pdf = pdf_loader.load_and_split(text_splitter=splitter)\n", + "\n", + "print(f\"Documents from PDF: {len(docs_from_pdf)}.\")\n", + "inserted_ids_from_pdf = vstore.add_documents(docs_from_pdf)\n", + "print(f\"Inserted {len(inserted_ids_from_pdf)} documents.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5010a66c-4298-4e32-82b5-2da0d36a5c70", + "metadata": {}, + "outputs": [], + "source": [ + "retriever = vstore.as_retriever(search_kwargs={\"k\": 3})\n", + "\n", + "philo_template = \"\"\"\n", + "You are a philosopher that draws inspiration from great thinkers of the past\n", + "to craft well-thought answers to user questions. Use the provided context as the basis\n", + "for your answers and do not make up new reasoning paths - just mix-and-match what you are given.\n", + "Your answers must be concise and to the point, and refrain from answering about other topics than philosophy.\n", + "\n", + "CONTEXT:\n", + "{context}\n", + "\n", + "QUESTION: {question}\n", + "\n", + "YOUR ANSWER:\"\"\"\n", + "\n", + "philo_prompt = ChatPromptTemplate.from_template(philo_template)\n", + "\n", + "llm = ChatOpenAI()\n", + "\n", + "chain = (\n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | philo_prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcbc1296-6c7c-478b-b55b-533ba4e54ddb", + "metadata": {}, + "outputs": [], + "source": [ + "chain.invoke(\"How does Russel elaborate on Peirce's idea of the security blanket?\")" + ] + }, + { + "cell_type": "markdown", + "id": "869ab448-a029-4692-aefc-26b85513314d", + "metadata": {}, + "source": [ + "For more, check out a complete RAG template using Astra DB through CQL [here](https://github.com/langchain-ai/langchain/tree/master/templates/cassandra-entomology-rag)." + ] + }, + { + "cell_type": "markdown", + "id": "177610c7-50d0-4b7b-8634-b03338054c8e", + "metadata": {}, + "source": [ + "## Cleanup" + ] + }, + { + "cell_type": "markdown", + "id": "0da4d19f-9878-4d3d-82c9-09cafca20322", + "metadata": {}, + "source": [ + "the following essentially retrieves the `Session` object from CassIO and runs a CQL `DROP TABLE` statement with it:\n", + "\n", + "_(You will lose the data you stored in it.)_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd405a13-6f71-46fa-87e6-167238e9c25e", + "metadata": {}, + "outputs": [], + "source": [ + "cassio.config.resolve_session().execute(\n", + " f\"DROP TABLE {cassio.config.resolve_keyspace()}.cassandra_vector_demo;\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c10ece4d-ae06-42ab-baf4-4d0ac2051743", + "metadata": {}, + "source": [ + "### Learn more" + ] + }, + { + "cell_type": "markdown", + "id": "51ea8b69-7e15-458f-85aa-9fa199f95f9c", + "metadata": {}, + "source": [ + "For more information, extended quickstarts and additional usage examples, please visit the [CassIO documentation](https://cassio.org/frameworks/langchain/about/) for more on using the LangChain `Cassandra` vector store." + ] + }, + { + "cell_type": "markdown", + "id": "3b8ee30c-2c84-42f3-9cff-e80dbc590490", + "metadata": {}, + "source": [ + "#### Attribution statement\n", + "\n", + "> Apache Cassandra, Cassandra and Apache are either registered trademarks or trademarks of the [Apache Software Foundation](http://www.apache.org/) in the United States and/or other countries.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/vectorstores/lancedb.ipynb b/docs/docs/integrations/vectorstores/lancedb.ipynb index ab5c56eb8f3cdc..18eb519eecd3b5 100644 --- a/docs/docs/integrations/vectorstores/lancedb.ipynb +++ b/docs/docs/integrations/vectorstores/lancedb.ipynb @@ -14,14 +14,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "bfcf346a", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: lancedb in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (0.4.4)\n", + "Requirement already satisfied: deprecation in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (2.1.0)\n", + "Requirement already satisfied: pylance==0.9.6 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (0.9.6)\n", + "Requirement already satisfied: ratelimiter~=1.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (1.2.0.post0)\n", + "Requirement already satisfied: retry>=0.9.2 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (0.9.2)\n", + "Requirement already satisfied: tqdm>=4.27.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (4.66.1)\n", + "Requirement already satisfied: pydantic>=1.10 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (2.4.2)\n", + "Requirement already satisfied: attrs>=21.3.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (23.1.0)\n", + "Requirement already satisfied: semver>=3.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (3.0.2)\n", + "Requirement already satisfied: cachetools in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (5.3.2)\n", + "Requirement already satisfied: pyyaml>=6.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (6.0.1)\n", + "Requirement already satisfied: click>=8.1.7 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (8.1.7)\n", + "Requirement already satisfied: requests>=2.31.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (2.31.0)\n", + "Requirement already satisfied: overrides>=0.7 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (7.4.0)\n", + "Requirement already satisfied: pyarrow>=12 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pylance==0.9.6->lancedb) (14.0.2)\n", + "Requirement already satisfied: numpy>=1.22 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pylance==0.9.6->lancedb) (1.24.4)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pydantic>=1.10->lancedb) (0.5.0)\n", + "Requirement already satisfied: pydantic-core==2.10.1 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pydantic>=1.10->lancedb) (2.10.1)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pydantic>=1.10->lancedb) (4.8.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (3.3.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (2.0.6)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (2023.7.22)\n", + "Requirement already satisfied: decorator>=3.4.2 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from retry>=0.9.2->lancedb) (5.1.1)\n", + "Requirement already satisfied: py<2.0.0,>=1.4.26 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from retry>=0.9.2->lancedb) (1.11.0)\n", + "Requirement already satisfied: packaging in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from deprecation->lancedb) (23.2)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], "source": [ - "%pip install --upgrade --quiet lancedb" + "! pip install lancedb" ] }, { @@ -34,20 +70,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "a0361f5c-e6f4-45f4-b829-11680cf03cec", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "OpenAI API Key: ········\n" - ] - } - ], + "outputs": [], "source": [ "import getpass\n", "import os\n", @@ -57,15 +85,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "aac9563e", "metadata": { "tags": [] }, "outputs": [], "source": [ - "from langchain_community.vectorstores import LanceDB\n", - "from langchain_openai import OpenAIEmbeddings" + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.vectorstores import LanceDB" ] }, { @@ -75,14 +103,13 @@ "metadata": {}, "outputs": [], "source": [ + "from langchain.document_loaders import TextLoader\n", "from langchain.text_splitter import CharacterTextSplitter\n", - "from langchain_community.document_loaders import TextLoader\n", "\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "\n", "documents = CharacterTextSplitter().split_documents(documents)\n", - "\n", "embeddings = OpenAIEmbeddings()" ] }, @@ -93,22 +120,7 @@ "metadata": {}, "outputs": [], "source": [ - "import lancedb\n", - "\n", - "db = lancedb.connect(\"/tmp/lancedb\")\n", - "table = db.create_table(\n", - " \"my_table\",\n", - " data=[\n", - " {\n", - " \"vector\": embeddings.embed_query(\"Hello World\"),\n", - " \"text\": \"Hello World\",\n", - " \"id\": \"1\",\n", - " }\n", - " ],\n", - " mode=\"overwrite\",\n", - ")\n", - "\n", - "docsearch = LanceDB.from_documents(documents, embeddings, connection=table)\n", + "docsearch = LanceDB.from_documents(documents, embeddings)\n", "\n", "query = \"What did the president say about Ketanji Brown Jackson\"\n", "docs = docsearch.similarity_search(query)" @@ -116,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "9c608226", "metadata": {}, "outputs": [ @@ -136,7 +148,7 @@ "\n", "I’ve worked on these issues a long time. \n", "\n", - "I know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n", + "I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n", "\n", "So let’s not abandon our streets. Or choose between safety and equal justice. \n", "\n", @@ -192,11 +204,97 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "a359ed74", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n", + "\n", + "Officer Mora was 27 years old. \n", + "\n", + "Officer Rivera was 22. \n", + "\n", + "Both Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n", + "\n", + "I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n", + "\n", + "I’ve worked on these issues a long time. \n", + "\n", + "I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n", + "\n", + "So let’s not abandon our streets. Or choose between safety and equal justice. \n", + "\n", + "Let’s come together to protect our communities, restore trust, and hold law enforcement accountable. \n", + "\n", + "That’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \n", + "\n", + "That’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope. \n", + "\n", + "We should all agree: The answer is not to Defund the police. The answer is to FUND the police with the resources and training they need to protect our communities. \n", + "\n", + "I ask Democrats and Republicans alike: Pass my budget and keep our neighborhoods safe. \n", + "\n", + "And I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. \n", + "\n", + "And I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? \n", + "\n", + "Ban assault weapons and high-capacity magazines. \n", + "\n", + "Repeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued. \n", + "\n", + "These laws don’t infringe on the Second Amendment. They save lives. \n", + "\n", + "The most fundamental right in America is the right to vote – and to have it counted. And it’s under assault. \n", + "\n", + "In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \n", + "\n", + "We cannot let this happen. \n", + "\n", + "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", + "\n", + "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", + "\n", + "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", + "\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n", + "\n", + "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", + "\n", + "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n", + "\n", + "We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n", + "\n", + "We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n", + "\n", + "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster.\n" + ] + } + ], + "source": [ + "print(docs[0].page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "12ca9ea8-3d09-49fb-922e-47c64ba90f28", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'vector': [-0.005863776430487633, -0.0019847142975777388, -0.004525014664977789, -0.002664136001840234, -0.0007940530776977539, 0.01969318464398384, 0.01712276227772236, 0.008474362082779408, -0.01931833289563656, -0.016988886520266533, 0.01086405199021101, 0.010763644240796566, -0.0004455566522665322, -0.007537228986620903, -0.003405475290492177, -0.0009003172744996846, 0.03338871896266937, -0.009672553278505802, 0.007657717447727919, -0.03087184764444828, -0.014016835950314999, 0.003234783187508583, 0.014552340842783451, 0.0068009099923074245, 0.0008007469004951417, 0.010261609219014645, 0.03170187771320343, -0.010013937950134277, 0.011004622094333172, -0.018608788028359413, -0.01729680225253105, 0.0061917733401060104, -0.036789171397686005, -0.018448136746883392, -0.02779269404709339, -0.0061415694653987885, 0.0002734002482611686, -0.011084947735071182, 0.018943479284644127, -0.014217650517821312, 0.036173343658447266, -0.02574438974261284, 0.002319404622539878, -0.01838119886815548, -0.019104130566120148, 0.017952794209122658, -0.00919059943407774, -0.020764194428920746, -0.026052303612232208, 0.025610512122511864, 0.044580765068531036, 0.0020282240584492683, -0.029211781919002533, -0.024994682520627975, 0.011586982756853104, -0.013735695742070675, -0.013327373191714287, 0.009378026239573956, -0.01097115222364664, -0.011607064865529537, 0.013882959261536598, 0.0014149037888273597, -0.02219666913151741, 0.01697549782693386, -0.009411495178937912, -0.01838119886815548, 0.0012860479764640331, 0.02172810398042202, -0.003882409306243062, 0.015797387808561325, 0.054246626794338226, 0.0028314811643213034, 0.026186181232333183, -0.0068678478710353374, 0.031621553003787994, -0.019719960168004036, -0.005365087650716305, -0.004725828766822815, -0.0011948448373004794, -0.017725205048918724, 0.022451035678386688, -0.01289896946400404, -0.02246442250907421, 0.015917876735329628, 0.013206885196268559, -0.014579115435481071, -0.002242425922304392, -0.0010567849967628717, 0.002655768534168601, 0.0006116467993706465, 0.013006070628762245, 0.024378851056098938, -0.003266578773036599, 0.006626870948821306, -0.009639084339141846, 0.015261884778738022, -0.02694927528500557, 0.02162100188434124, 0.008112896233797073, -0.026386994868516922, 0.016881786286830902, -0.02089807018637657, -0.026453932747244835, -0.011473188176751137, -0.028970805928111076, -0.02961341105401516, -0.006188426166772842, 0.002182181691750884, 0.004344281740486622, 0.011011315509676933, -0.006827685050666332, 0.009029948152601719, 0.0015763919800519943, 0.0075706979259848595, -0.011533432640135288, -0.02203601785004139, -0.018314260989427567, -0.025583738461136818, 0.022330546751618385, -0.03890441730618477, 0.019037192687392235, 0.014445239678025246, 0.0022390789818018675, -0.027953345328569412, 0.01969318464398384, -0.019974324852228165, -0.014164099469780922, 0.008199915289878845, 0.0008442566613666713, 0.003725104732438922, -0.011553513817489147, -0.011473188176751137, 0.023334616795182228, -0.008400729857385159, 0.011406250298023224, 0.007885306142270565, -0.02093823440372944, 0.01755116693675518, -0.01376247126609087, -0.01838119886815548, 0.01917106844484806, -0.01279856264591217, -0.02579793892800808, -0.01538237277418375, 0.01271823700517416, 0.021272923797369003, 0.0005706471856683493, 0.005903939250856638, 0.014552340842783451, 0.015810776501893997, 0.014766542240977287, -0.01603836566209793, -0.0003526800428517163, -0.007845143787562847, 0.004970152862370014, -0.002126957755535841, -0.024539504200220108, 0.0015303720720112324, 0.008969703689217567, 0.0027461349964141846, 0.006509729195386171, -0.01994754932820797, -0.009331169538199902, 0.03649464622139931, 0.02314719185233116, 0.016426606103777885, -0.014498789794743061, 0.02684217318892479, -0.0007497065817005932, 0.02554357424378395, 0.01915767975151539, 0.017899245023727417, -0.015288659371435642, 0.02773914486169815, 0.00015939632430672646, 0.007778205908834934, 0.018407974392175674, -0.008748807944357395, -0.02694927528500557, 0.01713615097105503, 0.01801973208785057, 0.0008266853983514011, 0.012222895398736, 0.04380428418517113, -0.023120416328310966, -0.009337862953543663, 0.017939407378435135, 0.0074836784042418, -0.023334616795182228, -0.007443515583872795, -0.0010659890249371529, 0.020871296525001526, 0.011138497851788998, -0.012832031585276127, -0.6456044912338257, -0.014552340842783451, 0.017484229058027267, -0.012115794233977795, -0.0034573522862046957, 0.010121039114892483, -0.0011714164866134524, 0.01785908080637455, -0.016426606103777885, 0.01538237277418375, -0.013534881174564362, 0.012805256061255932, 0.0006769114406779408, -0.022852662950754166, -0.026092467829585075, -0.027926571667194366, -0.013039539568126202, -0.00830701645463705, 0.031139599159359932, -0.006164997816085815, -0.02611924149096012, 0.004387791734188795, -0.006108100526034832, 0.0072493948973715305, 0.008353873156011105, 0.015676898881793022, 0.020509829744696617, -0.016105303540825844, -0.015650125220417976, 0.010515973903238773, -0.030175691470503807, 0.03204995393753052, -0.0017805531388148665, 0.0056227995082736015, 0.040136076509952545, -0.0022223445121198893, 0.0030105405021458864, 0.022866051644086838, 0.013668757863342762, 0.021808428689837456, -0.012336689978837967, 0.024378851056098938, 0.03954702243208885, -0.0028113997541368008, 0.025664063170552254, -0.00548222940415144, 0.021768266335129738, -0.010094263590872288, 0.0003871950029861182, 0.0241780374199152, -0.005867123603820801, 0.019559308886528015, -0.000377781834686175, 0.001261782948859036, -0.015730449929833412, -0.002237405627965927, -0.007162375375628471, -0.02146035060286522, 0.0009747859439812601, 0.0026674827095121145, -0.0057165129110217094, 0.008655094541609287, -0.022544747218489647, -0.011131804436445236, -0.01958608441054821, 0.02856917679309845, 0.012336689978837967, 0.011801185086369514, 0.018916703760623932, -0.0066201770678162575, 0.014659442007541656, 0.004689013119786978, -0.01013442687690258, -0.03515588492155075, 0.010054100304841995, -0.004340935032814741, 0.026025528088212013, -0.013019458390772343, -0.005673002917319536, 0.011312536895275116, 0.0013747409684583545, -0.00547218881547451, 7.080794603098184e-05, -0.0010944376699626446, 0.01607852801680565, 0.008929540403187275, -0.02172810398042202, 0.00571985961869359, 0.003490821458399296, 0.012283138930797577, 0.025463249534368515, 0.0025536881294101477, 0.011185354553163052, -0.017992958426475525, 0.010930989868938923, 0.02230377122759819, -0.023321229964494705, 0.0025202189572155476, 0.012390240095555782, -0.03170187771320343, -0.003520943457260728, -0.011841347441077232, 0.02370947040617466, 0.007282864302396774, 0.01650693267583847, 0.013193497434258461, -0.013949898071587086, -0.010355322621762753, 0.036066241562366486, -0.03818148374557495, -0.015275271609425545, 0.005187701899558306, -0.018889928236603737, -0.017618104815483093, 0.006600095424801111, -0.01665419526398182, 0.00783175602555275, 0.018622176721692085, -0.015061070211231709, -0.019974324852228165, 0.005164273548871279, -2.9782220735796727e-05, 0.013012764044106007, -0.03906506672501564, 0.015502861700952053, 0.005204436369240284, 0.006499688606709242, -0.003090866142883897, -0.0010735195828601718, -0.01049589179456234, 0.0033569452352821827, -0.0045149740763008595, 0.020978396758437157, 0.009210680611431599, 0.014846867881715298, -0.005047131795436144, 0.013802633620798588, -0.010904214344918728, 0.016774684190750122, -0.011325924657285213, -0.0029034395702183247, -0.001386455143801868, -0.006041162647306919, -0.003771961433812976, -0.02480725571513176, -0.02579793892800808, -0.0007149824523366988, -0.002642381004989147, -0.030041813850402832, -0.027498167008161545, 0.009779654443264008, -0.0185418501496315, -0.021607615053653717, -0.005960837006568909, 0.0074836784042418, -0.0010919275227934122, -0.01571706309914589, 0.01543592382222414, -0.004866398870944977, -0.02208956889808178, 0.01602497696876526, 0.0035744940396398306, -0.02779269404709339, -0.01475315447896719, 0.009833205491304398, -0.010268302634358406, 0.04099288582801819, 0.013461249880492687, 0.006600095424801111, -0.027873020619153976, 0.0012266404228284955, -0.013949898071587086, -0.0015337190125137568, -0.0008810725994408131, 0.03740500286221504, 0.017015662044286728, -0.02878337912261486, 0.01376247126609087, 0.016627419739961624, 0.011607064865529537, -0.007389965001493692, -0.013166721910238266, -0.02532937191426754, -0.007021805737167597, 0.018394585698843002, 0.016105303540825844, 0.004120039287954569, 0.014994132332503796, -0.02423158846795559, 0.020871296525001526, -0.0006279629305936396, -0.007784899789839983, -0.01801973208785057, 0.009813123382627964, 0.012597748078405857, 0.030148915946483612, 0.0088559091091156, 0.00596753042191267, 0.0033619655296206474, 0.02862272784113884, 0.011265680193901062, 0.011138497851788998, 0.02214311994612217, -0.010455729439854622, -0.01828748546540737, -0.03842246159911156, 0.009752878919243813, -0.031621553003787994, 0.0212996993213892, 0.0025720959529280663, -0.005709819030016661, -0.027350902557373047, -0.02632005698978901, -0.03234448283910751, 0.009712716564536095, 0.018394585698843002, -0.009732797741889954, 0.030363118276000023, -0.010054100304841995, -0.016828235238790512, 0.011613758280873299, 0.016855010762810707, 0.017216475680470467, -0.008273547515273094, 0.004893174394965172, 0.0032967007718980312, -0.0019311638316139579, 0.011680696159601212, 0.010857357643544674, -0.0015220048371702433, 8.377720223506913e-05, 0.01875605247914791, 0.015368985012173653, 0.031353797763586044, -0.01013442687690258, -0.02167455293238163, 0.0024649950210005045, -0.0015939632430672646, 0.04184969142079353, 0.004638809245079756, 0.02615940570831299, 0.020228689536452293, 0.016373055055737495, -0.001106151845306158, 0.02574438974261284, -0.031675104051828384, 0.0442059151828289, 0.00973949208855629, 0.030416667461395264, 0.013695533387362957, 0.00031586410477757454, 0.002749481936916709, -0.0013362516183406115, 0.008153058588504791, 0.01760471612215042, -0.03510233387351036, -0.0022072833962738514, 0.02083113230764866, 0.014659442007541656, 0.02575777657330036, 0.033549368381500244, 0.03060409426689148, 0.01654709503054619, -0.017511002719402313, -0.007543922867625952, 0.0015379026299342513, -0.010462422855198383, 0.007677799090743065, -0.0044681173749268055, -0.01812683418393135, 0.0018374505452811718, -0.017926020547747612, 0.009993856772780418, 0.00771796191111207, 0.031675104051828384, 0.022892825305461884, -0.004879786632955074, 0.015181559138000011, 0.0022223445121198893, 0.003467393107712269, -0.00917051825672388, -0.03413842245936394, 0.02721702679991722, 0.0240307729691267, -0.014900418929755688, -0.003497515106573701, -0.010462422855198383, -0.021594226360321045, -0.021085496991872787, 0.019452208653092384, -0.01739051565527916, -0.007624248508363962, -0.008688563480973244, 0.029800837859511375, -0.004983540624380112, -0.016051752492785454, 0.030684420838952065, -0.01376247126609087, 0.017899245023727417, -0.0014584135496988893, 0.005458801053464413, -0.001113682403229177, -0.022999927401542664, -0.0038388995453715324, 0.008782276883721352, -0.0030590705573558807, 0.012624523602426052, -0.011807878501713276, 0.023200741037726402, -0.017939407378435135, 0.01827409863471985, -0.009839898906648159, -0.013461249880492687, 0.010382097214460373, 0.002767889993265271, -0.003795389784500003, -0.02741784043610096, -0.014378301799297333, 0.004387791734188795, -0.012082325294613838, -0.002431526081636548, -0.024419015273451805, -0.04466109350323677, -0.016573870554566383, 0.13719630241394043, 0.02590504102408886, -0.00403301976621151, 0.007021805737167597, -0.006486300844699144, 0.0037083702627569437, -0.003395434468984604, -0.004461423493921757, 0.011332618072628975, -0.018461523577570915, 0.002367934910580516, 0.009324475191533566, -0.0032833132427185774, -0.003731798380613327, 0.012517422437667847, 0.003226415952667594, 0.018822990357875824, -0.025677450001239777, -0.010060794651508331, -0.013990060426294804, -0.01472637988626957, -0.005027050152420998, 0.021821817383170128, 0.032826438546180725, -0.02428513765335083, -0.01634628139436245, 0.031246699392795563, 0.026306668296456337, 0.012691461481153965, 0.003889102954417467, -0.002913480391725898, 0.014980744570493698, 0.02241087146103382, -0.0004777706053573638, -0.02302670292556286, -0.002781277522444725, 0.017162924632430077, -0.033817119896411896, 0.023227516561746597, 0.016413219273090363, 0.013153334148228168, 9.360873082187027e-05, 0.004320853389799595, -0.01154012605547905, 0.029907938092947006, -0.01634628139436245, 0.009157130494713783, 0.0020901416428387165, 0.01021475251764059, 0.0007053600857034326, 0.016948724165558815, -0.006663686595857143, -0.0106498496606946, -0.012939132750034332, 0.0024951172526925802, 0.012544197961688042, -0.0002017555816564709, -0.005006968975067139, -0.019238006323575974, 0.02329445444047451, -0.026909111067652702, -0.03411164879798889, 0.002063366584479809, -0.01650693267583847, 0.005686390679329634, -0.019666410982608795, -0.0056930845603346825, 0.003350251354277134, -0.0167612973600626, -0.013220272958278656, -0.006221895571798086, -0.008420811034739017, -0.03834213688969612, 0.02459305338561535, 0.009444964118301868, 0.011004622094333172, 0.03293353691697121, 0.0016491871792823076, 0.005070560146123171, -0.0012902315938845277, 0.006767440587282181, -0.042278096079826355, -0.0022859356831759214, 0.004946724511682987, -0.0013019457692280412, 0.00628213956952095, -0.01822054758667946, -0.00854129996150732, -0.02433868870139122, 0.037726305425167084, -0.00562949338927865, 0.0016885133227333426, 0.014619278721511364, -0.02183520421385765, -0.002321078209206462, 0.01796618290245533, 0.024218199774622917, 0.018033120781183243, -0.002704298822209239, -0.006185079459100962, 0.015904489904642105, -0.030041813850402832, -0.016908559948205948, -0.0203224029392004, -0.005957489833235741, -0.016373055055737495, 0.0074769845232367516, 0.02590504102408886, -0.01289896946400404, -0.011098334565758705, -0.005438719876110554, -0.011607064865529537, 0.0039058374240994453, 0.017484229058027267, -0.004863052163273096, 0.0024716889020055532, 0.01947898417711258, 0.007222619839012623, 0.001441679080016911, -0.02365592122077942, 0.0056897373870015144, -0.018367810174822807, 0.035798490047454834, 0.02194230444729328, -0.0063256495632231236, -0.008661787956953049, 0.006837725639343262, -0.021487126126885414, 0.018207158893346786, 0.0043978323228657246, 0.002235732041299343, 0.020603543147444725, -0.012269752100110054, -0.022009244188666344, -0.011238904669880867, -0.01645338162779808, -0.014445239678025246, 0.021540677174925804, 0.009913531132042408, 0.008159752935171127, -0.014485402964055538, -0.011707471683621407, -0.00022989050194155425, -0.04701731353998184, 0.014405076391994953, -0.014699604362249374, 0.006265405099838972, 0.000786940916441381, -0.01755116693675518, 0.0030791519675403833, -0.030577318742871284, -0.007256088778376579, -0.024834031239151955, -0.0010777032002806664, -0.0423048697412014, -0.021179210394620895, -0.0007501249783672392, -0.026547646149992943, 0.03692304715514183, 0.02684217318892479, 0.019345106557011604, 0.0041702426970005035, -0.012055549770593643, 0.0120890187099576, 0.01522172149270773, 0.01645338162779808, -0.007008417975157499, 0.023588981479406357, -0.009953693486750126, 0.04289392754435539, 0.031996406614780426, 0.018247323110699654, -0.028488850221037865, 0.008869296871125698, 0.008581462316215038, 0.02084452100098133, -0.028194323182106018, -0.004401179030537605, -0.011198742315173149, -0.022076182067394257, -0.023856734856963158, -0.008835827000439167, -0.002734420821070671, -0.0035811876878142357, -0.014284588396549225, 7.746252776996698e-06, 0.04931998252868652, -0.012450484558939934, 0.029185006394982338, -0.011894898489117622, 0.02167455293238163, -0.015047682449221611, -0.004223793279379606, -0.008849214762449265, -0.014927193522453308, -0.02057676762342453, -0.04626760631799698, 0.0051709674298763275, 0.03373679518699646, -0.013320679776370525, 0.009023253805935383, -0.0013772511156275868, -0.010382097214460373, -0.015168171375989914, 0.013521494343876839, 0.010669930838048458, -0.018608788028359413, -0.018501687794923782, 0.016828235238790512, -0.019974324852228165, -0.00033385370625182986, -0.00965916644781828, -0.027190251275897026, -0.029907938092947006, 0.0012400280684232712, 0.0006639421335421503, 0.01015450805425644, 0.010837276466190815, -0.007597472984343767, -0.015128008089959621, -0.027297353371977806, -0.014364914037287235, 0.008782276883721352, -0.005820266902446747, 0.011272373609244823, 0.007543922867625952, 0.00016619471716694534, -0.013789246790111065, 0.02172810398042202, 0.033549368381500244, 0.004357669502496719, 0.005398556590080261, 0.02700282447040081, -0.013775859028100967, -0.0007513800519518554, 0.00041815388249233365, 0.006379199679940939, -0.016774684190750122, -0.03071119636297226, 0.024271750822663307, 0.018836377188563347, -0.012992682866752148, -0.017002273350954056, -0.0008354710298590362, -0.018140221014618874, -0.010254914872348309, -0.01480670552700758, 0.02518210932612419, -0.001659227884374559, -0.010984539985656738, -0.020282240584492683, -0.004571871366351843, -0.006262058392167091, 0.005890551954507828, 0.02255813591182232, -0.01587771438062191, 0.011098334565758705, -0.0019261435372754931, 0.00572990020737052, 0.00644948473200202, -0.01433813851326704, 0.03164832666516304, -0.01827409863471985, 0.0040397136472165585, 0.0010484177619218826, 0.020697256550192833, -0.031086048111319542, 0.0005011989269405603, 0.024820642545819283, 0.024298526346683502, 0.0009639085037633777, 0.004568524658679962, -0.012343383394181728, -0.0011270700488239527, -0.01728341355919838, -0.007938857190310955, -0.026239730417728424, -0.020483054220676422, 0.00014914642088115215, 0.0016567177372053266, 0.007851837202906609, -0.0022240178659558296, -0.034754253923892975, -0.0017253292025998235, -0.003218048717826605, -0.019438819959759712, -0.016279341652989388, -0.018582012504339218, 0.025396311655640602, -0.0009371332707814872, -0.017484229058027267, -0.02178165316581726, -0.0014542299322783947, 0.027444615960121155, -0.004106651525944471, 0.009578839875757694, 0.021072110161185265, 0.003062417497858405, -0.027042988687753677, 0.01522172149270773, -0.038877639919519424, 0.007851837202906609, -0.03547718748450279, -0.005974224302917719, -0.03279966115951538, -0.013909734785556793, 0.00917051825672388, -0.002953643212094903, -0.025918427854776382, -0.020857907831668854, -0.007577391806989908, 0.0018910010112449527, 0.0018290833104401827, -0.017403902485966682, -0.006459525786340237, -0.003008867148309946, -0.00241646496579051, -0.013963285833597183, -0.01980028674006462, 0.05140845105051994, -0.016640808433294296, -0.005783450789749622, 0.0005053825443610549, -0.02532937191426754, -0.009799735620617867, 0.00089613365707919, 0.010763644240796566, 0.012537503615021706, -0.01013442687690258, -0.02266523614525795, -0.010623074136674404, 0.022705400362610817, -0.036949824541807175, -0.03055054321885109, -0.0149673568084836, 0.004394485615193844, -0.02037595398724079, 0.004702400416135788, 0.008547993376851082, -0.012932438403367996, 0.020014489069581032, 0.01303284615278244, 0.01488703116774559, -0.012517422437667847, -0.010040713474154472, -0.01602497696876526, 0.004357669502496719, -0.015342210419476032, -0.013073008507490158, -0.03306741639971733, -0.017939407378435135, 0.027096537873148918, -8.273129060398787e-05, -0.014458627440035343, -0.009726104326546192, -0.020242078229784966, -0.023776408284902573, -0.00950520858168602, -0.03175542876124382, 0.002734420821070671, 0.031166374683380127, 0.02356220781803131, 0.004628768656402826, 0.024164650589227676, -0.011714165098965168, 0.023120416328310966, -0.00443799514323473, -0.0036749010905623436, 0.01927816867828369, -0.037056926637887955, 0.036066241562366486, 0.0077514308504760265, -0.0211524348706007, -0.0005325761740095913, 0.009304394014179707, -0.0036347382701933384, 0.029238557443022728, 0.01613207906484604, -0.0362536683678627, 0.0003723431145772338, 0.0048965211026370525, 0.0051709674298763275, 0.011680696159601212, 0.006784175522625446, 0.0164935439825058, -0.0384492389857769, -0.023388167843222618, -0.0013287210604175925, -0.0023545471485704184, -0.008574768900871277, -0.01755116693675518, 0.01281864382326603, 0.0014215976698324084, 5.653130938299e-05, -0.015757225453853607, -0.001877613365650177, 0.03665529564023018, -0.01921123079955578, 0.028087222948670387, 0.015636736527085304, -0.009257537312805653, 0.018582012504339218, 0.02725718915462494, -0.016640808433294296, -0.005117416847497225, -0.005201089195907116, 0.015061070211231709, 0.012537503615021706, -0.0033569452352821827, 0.00042484767618589103, 0.036173343658447266, -0.02093823440372944, -0.005298149771988392, -0.012477260082960129, 0.02277233824133873, -0.01008087582886219, -0.005455454345792532, -0.002896745689213276, 0.00771796191111207, 0.0073230271227657795, -0.016587257385253906, -0.008688563480973244, 0.013467943295836449, -0.02575777657330036, 0.0033318432979285717, -0.019653022289276123, -0.014953969046473503, -0.010261609219014645, -0.010870745405554771, -0.0026055651251226664, -0.006968255154788494, -0.02282588742673397, -0.0021236108150333166, -0.012631217017769814, -0.007637635804712772, 0.021955693140625954, 0.23198063671588898, 0.003340210532769561, 0.005271374247968197, 0.016252567991614342, -0.013260435312986374, 0.030577318742871284, 0.010141120292246342, 0.011801185086369514, -0.003544371807947755, 0.021018559113144875, -0.01392312254756689, -0.010917602106928825, -0.021594226360321045, 0.004434648435562849, 0.0007823389023542404, -0.008869296871125698, -0.035798490047454834, -0.02345510572195053, -0.007938857190310955, 0.002749481936916709, -0.01917106844484806, 0.00942488294094801, -0.0058938986621797085, -0.014538953080773354, 0.015810776501893997, 0.016051752492785454, 0.0073698838241398335, 0.014980744570493698, 0.00692139845341444, -0.002874990925192833, -0.022892825305461884, -0.006335690151900053, 0.012390240095555782, -0.000747614772990346, -0.0023311187978833914, -0.011787797324359417, -0.024941131472587585, -0.012336689978837967, -0.0055993711575865746, 0.015556411817669868, -0.020616931840777397, 0.03245158493518829, 0.0018876540707424283, 0.007242701482027769, -0.004287384450435638, 0.041448064148426056, -0.00667372765019536, -0.013039539568126202, 0.0083806486800313, 0.006014387123286724, -0.03175542876124382, 0.011707471683621407, 0.01791263185441494, 0.02565067633986473, 0.0006677074125036597, -0.015569799579679966, 0.0005300659686326981, 0.003358618589118123, -0.018394585698843002, -0.013675451278686523, -0.015757225453853607, 0.00861493218690157, -0.013635288923978806, 0.039921876043081284, -0.013882959261536598, 0.04053770750761032, 0.020871296525001526, 0.009250843897461891, 0.007952244952321053, -0.013019458390772343, -0.030068589374423027, 0.011841347441077232, -0.01151335146278143, -0.004846317693591118, -0.017564553767442703, -0.01733696460723877, 0.012537503615021706, 0.01135939359664917, 0.014016835950314999, -0.0024348730221390724, 0.003607962979003787, -0.01692194864153862, 0.010562830604612827, 0.004247221630066633, -0.00266246241517365, -0.035075556486845016, 0.022384095937013626, -0.0034874745178967714, -0.007490372285246849, 0.004682319238781929, 0.0035477187484502792, -0.015810776501893997, -0.014873643405735493, -0.00848774891346693, -0.0013136599445715547, -0.00976626668125391, 0.010362016037106514, 0.035022005438804626, -0.020094813778996468, 0.01859540119767189, -0.031407348811626434, 0.02172810398042202, 0.033442266285419464, -0.011064865626394749, -0.004893174394965172, -0.0010484177619218826, -0.001434985315427184, 0.039975427091121674, 0.020710645243525505, -0.026360219344496727, -0.0004292404919397086, -0.021607615053653717, -0.004451382905244827, -0.006914704572409391, -0.0019964284729212523, 0.018193772062659264, 0.02282588742673397, -0.021433575078845024, 0.02569083869457245, 0.0027327474672347307, -0.004769338760524988, -0.035691387951374054, -0.031166374683380127, -0.002039938233792782, 0.0015805755974724889, -0.020175140351057053, -0.0075706979259848595, -0.005197742488235235, -0.004056448116898537, -0.024927744641900063, 0.0060445093549788, -0.011018008925020695, 0.03357614576816559, -0.003554412629455328, -0.001986387651413679, -0.0008844194817356765, 0.02188875526189804, 9.198757470585406e-05, -0.01157359592616558, 0.0019211231265217066, -0.00507725402712822, 0.0004426281084306538, 0.0055960239842534065, -0.013481331057846546, 0.00846097432076931, -0.014980744570493698, 0.02507500723004341, -0.025516798719763756, -0.0013119864743202925, -0.0033251496497541666, -0.03858311474323273, 0.02627989463508129, 0.008608237840235233, -0.018983641639351845, 0.016841622069478035, -0.029265332967042923, -0.02381657250225544, -0.03545041009783745, -0.01681484654545784, 0.015529637224972248, -0.03852956369519234, 0.024686766788363457, 0.023281067609786987, 0.004605340305715799, -0.019023803994059563, -0.009150436148047447, -0.17104020714759827, 0.03346904367208481, 0.004354322329163551, -0.006837725639343262, 9.397479880135506e-05, -0.007309639360755682, 0.00911027379333973, -0.014712992124259472, -0.0008484402787871659, -0.00233781267888844, 0.01791263185441494, 0.005883858073502779, -0.017216475680470467, -0.011225517839193344, 0.0003819654812105, -0.018863152712583542, -0.022692011669278145, 0.010522667318582535, 0.022437646985054016, 0.010221445932984352, 0.047392167150974274, -0.027083151042461395, 0.011319230310618877, -0.04361685737967491, -0.001145477988757193, -0.0149673568084836, -0.009277618490159512, 0.02005465142428875, -0.012376852333545685, -0.019934162497520447, -0.02036256715655327, -0.009853286668658257, 0.006974949035793543, 0.023334616795182228, 0.005950795952230692, 0.00274278805591166, 0.0021102232858538628, -0.0019964284729212523, -0.0013805980561301112, 0.015623349696397781, 0.0439649373292923, 0.020764194428920746, -0.012517422437667847, -0.006496341433376074, -0.015449310652911663, 0.01279856264591217, 0.005766716320067644, -0.004755950998514891, -0.006814297288656235, -0.003343557473272085, -0.01598481461405754, -0.043429430574178696, -0.011145191267132759, 0.01953253336250782, 0.0174440648406744, -0.004819542169570923, -0.03657497093081474, -0.006228588987141848, -0.014231037348508835, -0.009719409979879856, -0.0068477666936814785, 0.013695533387362957, 0.00506721343845129, 0.002038264647126198, -0.015837552025914192, -0.007905388250946999, -0.023669308051466942, -0.007356496062129736, -0.03368324413895607, 0.010274996049702168, -0.03279966115951538, 0.006007693242281675, -0.007450209464877844, -0.02950630895793438, 0.005003622267395258, 0.01884976588189602, -0.0044413418509066105, 0.002751155523583293, 0.008025876246392727, 0.006315608508884907, -0.0177118182182312, 0.023200741037726402, -0.01733696460723877, 0.007584085687994957, 0.005355047062039375, 0.011038091033697128, 0.010589605197310448, 0.0029569901525974274, -0.008440893143415451, -0.029104681685566902, 0.008829133585095406, -0.03676239773631096, 0.018247323110699654, -0.012102406471967697, -0.008447586558759212, 0.013481331057846546, 0.023588981479406357, -0.014445239678025246, 0.0023562207352370024, -0.019519146531820297, 0.0013780879089608788, -0.02204940654337406, 0.0029168270993977785, 0.017899245023727417, 0.0054654949344694614, 0.01660064607858658, 0.027350902557373047, 0.04324200376868248, 0.013856184668838978, -0.0054420665837824345, -0.015114620327949524, 0.01102470327168703, 0.009257537312805653, 0.003929265774786472, 0.009244149550795555, -0.007356496062129736, -0.010348628275096416, -0.0007384108030237257, 0.021487126126885414, -0.028381749987602234, 0.06345730274915695, 0.005137498024851084, -0.023629145696759224, 0.005478882696479559, 0.004732522647827864, -0.012296526692807674, -0.1011032909154892, 0.004304118920117617, 0.006305567920207977, 0.01467282883822918, -0.009880061261355877, 0.03143412619829178, 0.0030657644383609295, 0.04152838885784149, -0.013099784031510353, 0.03290676325559616, -0.01480670552700758, -0.030282791703939438, -0.007617554627358913, 0.013595125637948513, 0.018421361222863197, 0.00241479161195457, 0.0012592728016898036, -0.004458076786249876, -0.005428678821772337, 0.026146017014980316, -0.0044212606735527515, 0.002905112924054265, 0.009157130494713783, -0.013963285833597183, -0.012999377213418484, -0.014846867881715298, -0.0211524348706007, 0.016252567991614342, -0.009083498269319534, 0.013816021382808685, -0.005308190360665321, 0.014953969046473503, 0.01706921122968197, 0.00627879286184907, -0.020871296525001526, 0.003490821458399296, -0.0332280658185482, -0.02203601785004139, 0.027029599994421005, -0.015328822657465935, 0.004776032641530037, -0.020496442914009094, -0.0027160129975527525, -0.028381749987602234, -0.007363189943134785, -0.0024599747266620398, -0.006031121592968702, 0.005281415302306414, 0.022009244188666344, -0.01656048186123371, -0.02428513765335083, 0.010020631365478039, -0.0014249446103349328, -0.030898621305823326, 0.00443799514323473, 0.005187701899558306, -0.001059295260347426, -0.014699604362249374, -0.005227864719927311, 0.002454954432323575, 0.00030477746622636914, 0.01071009412407875, -0.010442341677844524, 0.015944652259349823, -0.0012893949169665575, -0.024767093360424042, -0.047606367617845535, 0.0022775684483349323, 0.007778205908834934, -0.012825338169932365, -0.0022240178659558296, 0.013554963283240795, -0.022892825305461884, 0.008869296871125698, -0.0288369283080101, 0.007918776012957096, -0.037940509617328644, -0.0014174140524119139, 0.020536605268716812, -0.02768559381365776, -0.00047484206152148545, -0.0174440648406744, 0.016828235238790512, -0.007597472984343767, 0.0252758227288723, 0.009826511144638062, -0.0054420665837824345, 0.01185473520308733, 0.0018960214219987392, -0.012524116784334183, 0.00861493218690157, 0.0318625271320343, -0.002891725394874811, -0.009177211672067642, 0.004334241151809692, 0.032505135983228683, 0.008400729857385159, 0.0021369983442127705, 0.008547993376851082, 0.007885306142270565, -0.0063256495632231236, 0.0018910010112449527, -0.06361795961856842, 0.022183282300829887, 0.0005267190863378346, 0.0012040488654747605, -0.005783450789749622, 0.014833481051027775, -0.0060445093549788, 0.0002813491446431726, 0.02037595398724079, 0.013789246790111065, -0.006914704572409391, 0.02042950503528118, 0.02219666913151741, -0.012316607870161533, -0.03703014925122261, 0.021554064005613327, 0.014405076391994953, 0.005408597644418478, 0.03743177652359009, 0.0060445093549788, 0.005361740943044424, 0.029238557443022728, 0.014940581284463406, 0.009471739642322063, -0.0006367485621012747, -0.004354322329163551, -0.01724325120449066, 0.006051203235983849, 0.011158579029142857, -0.008039264008402824, -0.0016140446532517672, -0.013635288923978806, -0.01143971923738718, 0.01823393441736698, -0.007135600317269564, -0.027444615960121155, 0.009793042205274105, -0.003842246253043413, 0.005010315682739019, 0.002568749012425542, -0.031407348811626434, -0.024298526346683502, -0.01681484654545784, -0.017457453534007072, -0.004156854934990406, -0.0058738174848258495, -0.005709819030016661, -0.013749083504080772, 0.0015412494540214539, -0.0039694285951554775, -0.011379474774003029, 0.0008229201193898916, -0.03154122456908226, 0.003915878012776375, -0.01062976848334074, -0.01447201520204544, 0.003929265774786472, 0.014083773829042912, 0.0031527839601039886, -0.027605267241597176, 0.034031324088573456, 0.010335240513086319, 0.0022574870381504297, -0.010034019127488136, 0.02862272784113884, -0.015489473938941956, -0.027712369337677956, 0.007082049734890461, 0.026333443820476532, -0.02532937191426754, -0.035022005438804626, -0.011894898489117622, -0.0019261435372754931, 0.02105872333049774, -0.008581462316215038, -0.007644329685717821, 0.012671380303800106, 0.0033100885339081287, 0.011346005834639072, 0.02162100188434124, 0.022062793374061584, -0.004136773757636547, -0.012035468593239784, 0.03622689098119736, -0.006215201690793037, 0.015114620327949524, -0.004889827221632004, 0.020081426948308945, 0.011131804436445236, 0.0020683868788182735, -0.02579793892800808, -0.0028498892206698656, -0.007008417975157499, 0.0009229088900610805, -0.010930989868938923, 0.005920673720538616, -0.004856358282268047, 0.00017759510956238955, 0.026467319577932358, -0.00037213394534774125, -0.005351700354367495, -0.018059896305203438, -0.0018742665415629745, 0.009752878919243813, -0.0029636838007718325, 0.025838103145360947, -0.028167547658085823, 0.0019378577126190066, -0.02486080676317215, 0.023696083575487137, 0.02136663720011711, 0.023374781012535095, 0.00905672274529934, 0.028033671900629997, -0.00395604083314538, 0.02203601785004139, 0.005388516001403332, -0.02095162123441696, -0.006375852972269058, 0.04559822753071785, 0.026708297431468964, -0.011325924657285213, 0.0066201770678162575, -0.010676625184714794, 0.02611924149096012, 0.008481055498123169, -0.001496066222898662, -0.0014458626974374056, 0.006208507809787989, 0.004314159508794546, 0.04075190797448158, -0.019452208653092384, -0.04393815994262695, 0.011807878501713276, -0.010690012946724892, 0.008467667736113071, 0.011158579029142857, 0.02516872063279152, 0.0006961561157368124, 0.04795444756746292, 0.01780553162097931, 0.0019361842423677444, -0.0063959346152842045, -0.010683318600058556, 0.01942543312907219, -0.008969703689217567, 0.005017009563744068, 0.00013032008428126574, -0.013160028494894505, 0.03419197350740433, -0.020027875900268555, 0.0036983294412493706, -0.0006095549324527383, -0.027377678081393242, 0.01303284615278244, -0.004163548815995455, 0.016721133142709732, -0.002142018871381879, 0.01175432838499546, 0.0027545022312551737, 0.0029971529729664326, 0.020349178463220596, 0.018394585698843002, -0.007664411328732967, -0.004089917056262493, 0.01287888828665018, -0.020871296525001526, 0.0028080528136342764, -0.015087845735251904, 0.01289896946400404, 0.008494443260133266, -0.02266523614525795, -0.024740317836403847, 0.030148915946483612, -0.01875605247914791, 0.02255813591182232, 0.01729680225253105, 0.018314260989427567, 0.00771796191111207, 0.0032297628931701183, -0.004853011108934879, -0.020228689536452293, -0.03713725134730339, 0.026507483795285225, 0.013816021382808685, -0.008755501359701157, -0.021754879504442215, 0.004391138441860676], 'id': '0c906ab3-3786-477f-b13a-5a98367ceee6', '_distance': 0.4137815535068512}\n" + ] + } + ], + "source": [ + "print(docs[0].metadata)" + ] } ], "metadata": { @@ -215,7 +313,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/docs/docs/modules/data_connection/vectorstores/index.mdx b/docs/docs/modules/data_connection/vectorstores/index.mdx index b1242021a27ca2..3b6d12699b8c5c 100644 --- a/docs/docs/modules/data_connection/vectorstores/index.mdx +++ b/docs/docs/modules/data_connection/vectorstores/index.mdx @@ -131,7 +131,7 @@ table = db.create_table( raw_documents = TextLoader('../../../state_of_the_union.txt').load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) documents = text_splitter.split_documents(raw_documents) -db = LanceDB.from_documents(documents, OpenAIEmbeddings(), connection=table) +db = LanceDB.from_documents(documents, OpenAIEmbeddings()) ``` diff --git a/docs/docs/modules/model_io/chat/custom_chat_model.ipynb b/docs/docs/modules/model_io/chat/custom_chat_model.ipynb new file mode 100644 index 00000000000000..b91ca4cfd4333f --- /dev/null +++ b/docs/docs/modules/model_io/chat/custom_chat_model.ipynb @@ -0,0 +1,644 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "e3da9a3f-f583-4ba6-994e-0e8c1158f5eb", + "metadata": {}, + "source": [ + "# Custom Chat Model\n", + "\n", + "In this guide, we'll learn how to create a custom chat model using LangChain abstractions.\n", + "\n", + "Wrapping your LLM with the standard `ChatModel` interface allow you to use your LLM in existing LangChain programs with minimal code modifications!\n", + "\n", + "As an bonus, your LLM will automatically become a LangChain `Runnable` and will benefit from some optimizations out of the box (e.g., batch via a threadpool), async support, the `astream_events` API, etc.\n", + "\n", + "## Inputs and outputs\n", + "\n", + "First, we need to talk about messages which are the inputs and outputs of chat models.\n", + "\n", + "### Messages\n", + "\n", + "Chat models take messages as inputs and return a message as output. \n", + "\n", + "LangChain has a few built-in message types:\n", + "\n", + "- `SystemMessage`: Used for priming AI behavior, usually passed in as the first of a sequence of input messages.\n", + "- `HumanMessage`: Represents a message from a person interacting with the chat model.\n", + "- `AIMessage`: Represents a message from the chat model. This can be either text or a request to invoke a tool.\n", + "- `FunctionMessage` / `ToolMessage`: Message for passing the results of tool invocation back to the model.\n", + "\n", + "::: {.callout-note}\n", + "`ToolMessage` and `FunctionMessage` closely follow OpenAIs `function` and `tool` arguments.\n", + "\n", + "This is a rapidly developing field and as more models add function calling capabilities, expect that there will be additions to this schema.\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c5046e6a-8b09-4a99-b6e6-7a605aac5738", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.messages import (\n", + " AIMessage,\n", + " BaseMessage,\n", + " FunctionMessage,\n", + " HumanMessage,\n", + " SystemMessage,\n", + " ToolMessage,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "53033447-8260-4f53-bd6f-b2f744e04e75", + "metadata": {}, + "source": [ + "### Streaming Variant\n", + "\n", + "All the chat messages have a streaming variant that contains `Chunk` in the name." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d4656e9d-bfa1-4703-8f79-762fe6421294", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.messages import (\n", + " AIMessageChunk,\n", + " FunctionMessageChunk,\n", + " HumanMessageChunk,\n", + " SystemMessageChunk,\n", + " ToolMessageChunk,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "81ebf3f4-c760-4898-b921-fdb469453d4a", + "metadata": {}, + "source": [ + "These chunks are used when streaming output from chat models, and they all define an additive property!" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9c15c299-6f8a-49cf-a072-09924fd44396", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessageChunk(content='Hello World!')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "AIMessageChunk(content=\"Hello\") + AIMessageChunk(content=\" World!\")" + ] + }, + { + "cell_type": "markdown", + "id": "8e952d64-6d38-4a2b-b996-8812c204a12c", + "metadata": {}, + "source": [ + "## Simple Chat Model\n", + "\n", + "Inherting from `SimpleChatModel` is great for prototyping!\n", + "\n", + "It won't allow you to implement all features that you might want out of a chat model, but it's quick to implement, and if you need more you can transition to `BaseChatModel` shown below.\n", + "\n", + "Let's implement a chat model that echoes back the last `n` characters of the prompt!\n", + "\n", + "You need to implement the following:\n", + "\n", + "* The method `_call` - Use to generate a chat result from a prompt.\n", + "\n", + "In addition, you have the option to specify the following:\n", + "\n", + "* The property `_identifying_params` - Represent model parameterization for logging purposes.\n", + "\n", + "Optional:\n", + "\n", + "* `_stream` - Use to implement streaming.\n" + ] + }, + { + "cell_type": "markdown", + "id": "bbfebea1", + "metadata": {}, + "source": [ + "## Base Chat Model\n", + "\n", + "Let's implement a chat model that echoes back the first `n` characetrs of the last message in the prompt!\n", + "\n", + "To do so, we will inherit from `BaseChatModel` and we'll need to implement the following methods/properties:\n", + "\n", + "In addition, you have the option to specify the following:\n", + "\n", + "To do so inherit from `BaseChatModel` which is a lower level class and implement the methods:\n", + "\n", + "* `_generate` - Use to generate a chat result from a prompt\n", + "* The property `_llm_type` - Used to uniquely identify the type of the model. Used for logging.\n", + "\n", + "Optional:\n", + "\n", + "* `_stream` - Use to implement streaming.\n", + "* `_agenerate` - Use to implement a native async method.\n", + "* `_astream` - Use to implement async version of `_stream`.\n", + "* The property `_identifying_params` - Represent model parameterization for logging purposes.\n", + "\n", + "\n", + ":::{.callout-caution}\n", + "\n", + "Currently, to get async streaming to work (via `astream`), you must provide an implementation of `_astream`.\n", + "\n", + "By default if `_astream` is not provided, then async streaming falls back on `_agenerate` which does not support\n", + "token by token streaming.\n", + ":::" + ] + }, + { + "cell_type": "markdown", + "id": "8e7047bd-c235-46f6-85e1-d6d7e0868eb1", + "metadata": {}, + "source": [ + "### Implementation" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "25ba32e5-5a6d-49f4-bb68-911827b84d61", + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Any, AsyncIterator, Dict, Iterator, List, Optional\n", + "\n", + "from langchain_core.callbacks import (\n", + " AsyncCallbackManagerForLLMRun,\n", + " CallbackManagerForLLMRun,\n", + ")\n", + "from langchain_core.language_models import BaseChatModel, SimpleChatModel\n", + "from langchain_core.messages import AIMessageChunk, BaseMessage, HumanMessage\n", + "from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult\n", + "from langchain_core.runnables import run_in_executor\n", + "\n", + "\n", + "class CustomChatModelAdvanced(BaseChatModel):\n", + " \"\"\"A custom chat model that echoes the first `n` characters of the input.\n", + "\n", + " When contributing an implementation to LangChain, carefully document\n", + " the model including the initialization parameters, include\n", + " an example of how to initialize the model and include any relevant\n", + " links to the underlying models documentation or API.\n", + "\n", + " Example:\n", + "\n", + " .. code-block:: python\n", + "\n", + " model = CustomChatModel(n=2)\n", + " result = model.invoke([HumanMessage(content=\"hello\")])\n", + " result = model.batch([[HumanMessage(content=\"hello\")],\n", + " [HumanMessage(content=\"world\")]])\n", + " \"\"\"\n", + "\n", + " n: int\n", + " \"\"\"The number of characters from the last message of the prompt to be echoed.\"\"\"\n", + "\n", + " def _generate(\n", + " self,\n", + " messages: List[BaseMessage],\n", + " stop: Optional[List[str]] = None,\n", + " run_manager: Optional[CallbackManagerForLLMRun] = None,\n", + " **kwargs: Any,\n", + " ) -> ChatResult:\n", + " \"\"\"Override the _generate method to implement the chat model logic.\n", + "\n", + " This can be a call to an API, a call to a local model, or any other\n", + " implementation that generates a response to the input prompt.\n", + "\n", + " Args:\n", + " messages: the prompt composed of a list of messages.\n", + " stop: a list of strings on which the model should stop generating.\n", + " If generation stops due to a stop token, the stop token itself\n", + " SHOULD BE INCLUDED as part of the output. This is not enforced\n", + " across models right now, but it's a good practice to follow since\n", + " it makes it much easier to parse the output of the model\n", + " downstream and understand why generation stopped.\n", + " run_manager: A run manager with callbacks for the LLM.\n", + " \"\"\"\n", + " last_message = messages[-1]\n", + " tokens = last_message.content[: self.n]\n", + " message = AIMessage(content=tokens)\n", + " generation = ChatGeneration(message=message)\n", + " return ChatResult(generations=[generation])\n", + "\n", + " def _stream(\n", + " self,\n", + " messages: List[BaseMessage],\n", + " stop: Optional[List[str]] = None,\n", + " run_manager: Optional[CallbackManagerForLLMRun] = None,\n", + " **kwargs: Any,\n", + " ) -> Iterator[ChatGenerationChunk]:\n", + " \"\"\"Stream the output of the model.\n", + "\n", + " This method should be implemented if the model can generate output\n", + " in a streaming fashion. If the model does not support streaming,\n", + " do not implement it. In that case streaming requests will be automatically\n", + " handled by the _generate method.\n", + "\n", + " Args:\n", + " messages: the prompt composed of a list of messages.\n", + " stop: a list of strings on which the model should stop generating.\n", + " If generation stops due to a stop token, the stop token itself\n", + " SHOULD BE INCLUDED as part of the output. This is not enforced\n", + " across models right now, but it's a good practice to follow since\n", + " it makes it much easier to parse the output of the model\n", + " downstream and understand why generation stopped.\n", + " run_manager: A run manager with callbacks for the LLM.\n", + " \"\"\"\n", + " last_message = messages[-1]\n", + " tokens = last_message.content[: self.n]\n", + "\n", + " for token in tokens:\n", + " chunk = ChatGenerationChunk(message=AIMessageChunk(content=token))\n", + "\n", + " if run_manager:\n", + " run_manager.on_llm_new_token(token, chunk=chunk)\n", + "\n", + " yield chunk\n", + "\n", + " async def _astream(\n", + " self,\n", + " messages: List[BaseMessage],\n", + " stop: Optional[List[str]] = None,\n", + " run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,\n", + " **kwargs: Any,\n", + " ) -> AsyncIterator[ChatGenerationChunk]:\n", + " \"\"\"An async variant of astream.\n", + "\n", + " If not provided, the default behavior is to delegate to the _generate method.\n", + "\n", + " The implementation below instead will delegate to `_stream` and will\n", + " kick it off in a separate thread.\n", + "\n", + " If you're able to natively support async, then by all means do so!\n", + " \"\"\"\n", + " result = await run_in_executor(\n", + " None,\n", + " self._stream,\n", + " messages,\n", + " stop=stop,\n", + " run_manager=run_manager.get_sync() if run_manager else None,\n", + " **kwargs,\n", + " )\n", + " for chunk in result:\n", + " yield chunk\n", + "\n", + " @property\n", + " def _llm_type(self) -> str:\n", + " \"\"\"Get the type of language model used by this chat model.\"\"\"\n", + " return \"echoing-chat-model-advanced\"\n", + "\n", + " @property\n", + " def _identifying_params(self) -> Dict[str, Any]:\n", + " \"\"\"Return a dictionary of identifying parameters.\"\"\"\n", + " return {\"n\": self.n}" + ] + }, + { + "cell_type": "markdown", + "id": "b3c3d030-8d8b-4891-962d-a2d39b331883", + "metadata": {}, + "source": [ + ":::{.callout-tip}\n", + "The `_astream` implementation uses `run_in_executor` to launch the sync `_stream` in a separate thread.\n", + "\n", + "You can use this trick if you want to reuse the `_stream` implementation, but if you're able to implement code\n", + "that's natively async that's a better solution since that code will run with less overhead.\n", + ":::" + ] + }, + { + "cell_type": "markdown", + "id": "1e9af284-f2d3-44e2-ac6a-09b73d89ada3", + "metadata": {}, + "source": [ + "### Let's test it 🧪\n", + "\n", + "The chat model will implement the standard `Runnable` interface of LangChain which many of the LangChain abstractions support!" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "34bf2d48-556a-48be-aee7-496fb02332f3", + "metadata": {}, + "outputs": [], + "source": [ + "model = CustomChatModelAdvanced(n=3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "27689f30-dcd2-466b-ba9d-f60b7d434110", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='Meo')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.invoke(\n", + " [\n", + " HumanMessage(content=\"hello!\"),\n", + " AIMessage(content=\"Hi there human!\"),\n", + " HumanMessage(content=\"Meow!\"),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "406436df-31bf-466b-9c3d-39db9d6b6407", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='hel')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.invoke(\"hello\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a72ffa46-6004-41ef-bbe4-56fa17a029e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[AIMessage(content='hel'), AIMessage(content='goo')]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.batch([\"hello\", \"goodbye\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3633be2c-2ea0-42f9-a72f-3b5240690b55", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "c|a|t|" + ] + } + ], + "source": [ + "for chunk in model.stream(\"cat\"):\n", + " print(chunk.content, end=\"|\")" + ] + }, + { + "cell_type": "markdown", + "id": "3f8a7c42-aec4-4116-adf3-93133d409827", + "metadata": {}, + "source": [ + "Please see the implementation of `_astream` in the model! If you do not implement it, then no output will stream.!" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b7d73995-eeab-48c6-a7d8-32c98ba29fc2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "c|a|t|" + ] + } + ], + "source": [ + "async for chunk in model.astream(\"cat\"):\n", + " print(chunk.content, end=\"|\")" + ] + }, + { + "cell_type": "markdown", + "id": "f80dc55b-d159-4527-9191-407a7c6d6042", + "metadata": {}, + "source": [ + "Let's try to use the astream events API which will also help double check that all the callbacks were implemented!" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "17840eba-8ff4-4e73-8e4f-85f16eb1c9d0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'event': 'on_chat_model_start', 'run_id': 'e03c0b21-521f-4cb4-a837-02fed65cf1cf', 'name': 'CustomChatModelAdvanced', 'tags': [], 'metadata': {}, 'data': {'input': 'cat'}}\n", + "{'event': 'on_chat_model_stream', 'run_id': 'e03c0b21-521f-4cb4-a837-02fed65cf1cf', 'tags': [], 'metadata': {}, 'name': 'CustomChatModelAdvanced', 'data': {'chunk': AIMessageChunk(content='c')}}\n", + "{'event': 'on_chat_model_stream', 'run_id': 'e03c0b21-521f-4cb4-a837-02fed65cf1cf', 'tags': [], 'metadata': {}, 'name': 'CustomChatModelAdvanced', 'data': {'chunk': AIMessageChunk(content='a')}}\n", + "{'event': 'on_chat_model_stream', 'run_id': 'e03c0b21-521f-4cb4-a837-02fed65cf1cf', 'tags': [], 'metadata': {}, 'name': 'CustomChatModelAdvanced', 'data': {'chunk': AIMessageChunk(content='t')}}\n", + "{'event': 'on_chat_model_end', 'name': 'CustomChatModelAdvanced', 'run_id': 'e03c0b21-521f-4cb4-a837-02fed65cf1cf', 'tags': [], 'metadata': {}, 'data': {'output': AIMessageChunk(content='cat')}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/eugene/src/langchain/libs/core/langchain_core/_api/beta_decorator.py:86: LangChainBetaWarning: This API is in beta and may change in the future.\n", + " warn_beta(\n" + ] + } + ], + "source": [ + "async for event in model.astream_events(\"cat\", version=\"v1\"):\n", + " print(event)" + ] + }, + { + "cell_type": "markdown", + "id": "42f9553f-7d8c-4277-aeb4-d80d77839d90", + "metadata": {}, + "source": [ + "## Identifying Params\n", + "\n", + "LangChain has a callback system which allows implementing loggers to monitor the behavior of LLM applications.\n", + "\n", + "Remember the `_identifying_params` property from earlier? \n", + "\n", + "It's passed to the callback system and is accessible for user specified loggers.\n", + "\n", + "Below we'll implement a handler with just a single `on_chat_model_start` event to see where `_identifying_params` appears." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "cc7e6b5f-711b-48aa-9ebe-92a13e230c37", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---\n", + "On chat model start.\n", + "{'invocation_params': {'n': 3, '_type': 'echoing-chat-model-advanced', 'stop': ['woof']}, 'options': {'stop': ['woof']}, 'name': None, 'batch_size': 1}\n" + ] + }, + { + "data": { + "text/plain": [ + "AIMessage(content='meo')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from typing import Union\n", + "from uuid import UUID\n", + "\n", + "from langchain_core.callbacks import AsyncCallbackHandler\n", + "from langchain_core.outputs import (\n", + " ChatGenerationChunk,\n", + " ChatResult,\n", + " GenerationChunk,\n", + " LLMResult,\n", + ")\n", + "\n", + "\n", + "class SampleCallbackHandler(AsyncCallbackHandler):\n", + " \"\"\"Async callback handler that handles callbacks from LangChain.\"\"\"\n", + "\n", + " async def on_chat_model_start(\n", + " self,\n", + " serialized: Dict[str, Any],\n", + " messages: List[List[BaseMessage]],\n", + " *,\n", + " run_id: UUID,\n", + " parent_run_id: Optional[UUID] = None,\n", + " tags: Optional[List[str]] = None,\n", + " metadata: Optional[Dict[str, Any]] = None,\n", + " **kwargs: Any,\n", + " ) -> Any:\n", + " \"\"\"Run when a chat model starts running.\"\"\"\n", + " print(\"---\")\n", + " print(\"On chat model start.\")\n", + " print(kwargs)\n", + "\n", + "\n", + "model.invoke(\"meow\", stop=[\"woof\"], config={\"callbacks\": [SampleCallbackHandler()]})" + ] + }, + { + "cell_type": "markdown", + "id": "44ee559b-b1da-4851-8c97-420ab394aff9", + "metadata": {}, + "source": [ + "## Contributing\n", + "\n", + "We appreciate all chat model integration contributions. \n", + "\n", + "Here's a checklist to help make sure your contribution gets added to LangChain:\n", + "\n", + "Documentation:\n", + "\n", + "* The model contains doc-strings for all initialization arguments, as these will be surfaced in the [APIReference](https://api.python.langchain.com/en/stable/langchain_api_reference.html).\n", + "* The class doc-string for the model contains a link to the model API if the model is powered by a service.\n", + "\n", + "Tests:\n", + "\n", + "* [ ] Add unit or integration tests to the overridden methods. Verify that `invoke`, `ainvoke`, `batch`, `stream` work if you've over-ridden the corresponding code.\n", + "\n", + "Streaming (if you're implementing it):\n", + "\n", + "* [ ] Provided an async implementation via `_astream`\n", + "* [ ] Make sure to invoke the `on_llm_new_token` callback\n", + "* [ ] `on_llm_new_token` is invoked BEFORE yielding the chunk\n", + "\n", + "Stop Token Behavior:\n", + "\n", + "* [ ] Stop token should be respected\n", + "* [ ] Stop token should be INCLUDED as part of the response\n", + "\n", + "Secret API Keys:\n", + "\n", + "* [ ] If your model connects to an API it will likely accept API keys as part of its initialization. Use Pydantic's `SecretStr` type for secrets, so they don't get accidentally printed out when folks print the model." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/modules/model_io/chat/index.mdx b/docs/docs/modules/model_io/chat/index.mdx index 2ebb19c93d671b..058192951d8da8 100644 --- a/docs/docs/modules/model_io/chat/index.mdx +++ b/docs/docs/modules/model_io/chat/index.mdx @@ -4,11 +4,13 @@ sidebar_position: 3 # Chat Models -ChatModels are a core component of LangChain. -LangChain does not serve its own ChatModels, but rather provides a standard interface for interacting with many different models. To be specific, this interface is one that takes as input a list of messages and returns a message. +Chat Models are a core component of LangChain. +A chat model is a language model that uses chat messages as inputs and returns chat messages as outputs (as opposed to using plain text). -There are lots of model providers (OpenAI, Cohere, Hugging Face, etc) - the `ChatModel` class is designed to provide a standard interface for all of them. +LangChain has integrations with many model providers (OpenAI, Cohere, Hugging Face, etc.) and exposes a standard interface to interact with all of these models. + +LangChain allows you to use models in sync, async, batching and streaming modes and provides other features (e.g., caching) and more. ## [Quick Start](./quick_start) @@ -27,3 +29,4 @@ This includes: - [How to use ChatModels that support function calling](./function_calling) - [How to stream responses from a ChatModel](./streaming) - [How to track token usage in a ChatModel call](./token_usage_tracking) +- [How to creat a custom ChatModel](./custom_chat_model) diff --git a/docs/vercel.json b/docs/vercel.json index 796d3d16b5e622..9a1ab9d8abdc4c 100644 --- a/docs/vercel.json +++ b/docs/vercel.json @@ -1,5 +1,13 @@ { "redirects": [ + { + "source": "/docs/integrations/llms/huggingface_textgen_inference", + "destination": "/docs/integrations/llms/huggingface_endpoint" + }, + { + "source": "/docs/integrations/llms/huggingface_hub", + "destination": "/docs/integrations/llms/huggingface_endpoint" + }, { "source": "/docs/integrations/llms/watsonxllm", "destination": "/docs/integrations/llms/ibm_watsonx" @@ -594,11 +602,7 @@ }, { "source": "/docs/integrations/cassandra", - "destination": "/docs/integrations/providers/astradb" - }, - { - "source": "/docs/integrations/providers/cassandra", - "destination": "/docs/integrations/providers/astradb" + "destination": "/docs/integrations/providers/cassandra" }, { "source": "/docs/integrations/providers/providers/semadb", @@ -608,10 +612,6 @@ "source": "/docs/integrations/vectorstores/vectorstores/semadb", "destination": "/docs/integrations/vectorstores/semadb" }, - { - "source": "/docs/integrations/vectorstores/cassandra", - "destination": "/docs/integrations/vectorstores/astradb" - }, { "source": "/docs/integrations/vectorstores/async_faiss", "destination": "/docs/integrations/vectorstores/faiss_async" diff --git a/libs/community/langchain_community/agent_toolkits/__init__.py b/libs/community/langchain_community/agent_toolkits/__init__.py index 3f6bf3033190d9..bbb3820cb3fac9 100644 --- a/libs/community/langchain_community/agent_toolkits/__init__.py +++ b/libs/community/langchain_community/agent_toolkits/__init__.py @@ -18,6 +18,7 @@ from langchain_community.agent_toolkits.azure_cognitive_services import ( AzureCognitiveServicesToolkit, ) +from langchain_community.agent_toolkits.cogniswitch.toolkit import CogniswitchToolkit from langchain_community.agent_toolkits.connery import ConneryToolkit from langchain_community.agent_toolkits.file_management.toolkit import ( FileManagementToolkit, @@ -51,6 +52,7 @@ "AINetworkToolkit", "AmadeusToolkit", "AzureCognitiveServicesToolkit", + "CogniswitchToolkit", "ConneryToolkit", "FileManagementToolkit", "GmailToolkit", diff --git a/libs/community/langchain_community/agent_toolkits/cogniswitch/__init__.py b/libs/community/langchain_community/agent_toolkits/cogniswitch/__init__.py new file mode 100644 index 00000000000000..df1d84976c49a8 --- /dev/null +++ b/libs/community/langchain_community/agent_toolkits/cogniswitch/__init__.py @@ -0,0 +1 @@ +"""CogniSwitch Toolkit""" diff --git a/libs/community/langchain_community/agent_toolkits/cogniswitch/toolkit.py b/libs/community/langchain_community/agent_toolkits/cogniswitch/toolkit.py new file mode 100644 index 00000000000000..36ec5ae0f36095 --- /dev/null +++ b/libs/community/langchain_community/agent_toolkits/cogniswitch/toolkit.py @@ -0,0 +1,40 @@ +from typing import List + +from langchain_community.agent_toolkits.base import BaseToolkit +from langchain_community.tools import BaseTool +from langchain_community.tools.cogniswitch.tool import ( + CogniswitchKnowledgeRequest, + CogniswitchKnowledgeSourceFile, + CogniswitchKnowledgeSourceURL, + CogniswitchKnowledgeStatus, +) + + +class CogniswitchToolkit(BaseToolkit): + """ + Toolkit for CogniSwitch. + + Use the toolkit to get all the tools present in the cogniswitch and + use them to interact with your knowledge + """ + + cs_token: str # cogniswitch token + OAI_token: str # OpenAI API token + apiKey: str # Cogniswitch OAuth token + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return [ + CogniswitchKnowledgeStatus( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + CogniswitchKnowledgeRequest( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + CogniswitchKnowledgeSourceFile( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + CogniswitchKnowledgeSourceURL( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + ] diff --git a/libs/community/langchain_community/chat_message_histories/astradb.py b/libs/community/langchain_community/chat_message_histories/astradb.py index f820480ff26b76..5b118a0ac9f3ca 100644 --- a/libs/community/langchain_community/chat_message_histories/astradb.py +++ b/libs/community/langchain_community/chat_message_histories/astradb.py @@ -26,16 +26,16 @@ class AstraDBChatMessageHistory(BaseChatMessageHistory): """Chat message history that stores history in Astra DB. - Args (only keyword-arguments accepted): + Args: session_id: arbitrary key that is used to store the messages of a single chat session. - collection_name (str): name of the Astra DB collection to create/use. - token (Optional[str]): API token for Astra DB usage. - api_endpoint (Optional[str]): full URL to the API endpoint, + collection_name: name of the Astra DB collection to create/use. + token: API token for Astra DB usage. + api_endpoint: full URL to the API endpoint, such as "https://-us-east1.apps.astra.datastax.com". - astra_db_client (Optional[Any]): *alternative to token+api_endpoint*, + astra_db_client: *alternative to token+api_endpoint*, you can pass an already-created 'astrapy.db.AstraDB' instance. - namespace (Optional[str]): namespace (aka keyspace) where the + namespace: namespace (aka keyspace) where the collection is created. Defaults to the database's "default namespace". """ @@ -51,7 +51,6 @@ def __init__( setup_mode: SetupMode = SetupMode.SYNC, pre_delete_collection: bool = False, ) -> None: - """Create an Astra DB chat message history.""" self.astra_env = _AstraDBCollectionEnvironment( collection_name=collection_name, token=token, @@ -96,7 +95,6 @@ def messages(self, messages: List[BaseMessage]) -> None: raise NotImplementedError("Use add_messages instead") async def aget_messages(self) -> List[BaseMessage]: - """Retrieve all session messages from DB""" await self.astra_env.aensure_db_setup() docs = self.async_collection.paginated_find( filter={ @@ -117,7 +115,6 @@ async def aget_messages(self) -> List[BaseMessage]: return messages def add_messages(self, messages: Sequence[BaseMessage]) -> None: - """Write a message to the table""" self.astra_env.ensure_db_setup() docs = [ { @@ -130,7 +127,6 @@ def add_messages(self, messages: Sequence[BaseMessage]) -> None: self.collection.chunked_insert_many(docs) async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None: - """Write a message to the table""" await self.astra_env.aensure_db_setup() docs = [ { @@ -143,11 +139,9 @@ async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None: await self.async_collection.chunked_insert_many(docs) def clear(self) -> None: - """Clear session memory from DB""" self.astra_env.ensure_db_setup() self.collection.delete_many(filter={"session_id": self.session_id}) async def aclear(self) -> None: - """Clear session memory from DB""" await self.astra_env.aensure_db_setup() await self.async_collection.delete_many(filter={"session_id": self.session_id}) diff --git a/libs/community/langchain_community/chat_models/huggingface.py b/libs/community/langchain_community/chat_models/huggingface.py index 0af34a8cf04b23..143aff07172d1c 100644 --- a/libs/community/langchain_community/chat_models/huggingface.py +++ b/libs/community/langchain_community/chat_models/huggingface.py @@ -1,4 +1,5 @@ """Hugging Face Chat Wrapper.""" + from typing import Any, List, Optional, Union from langchain_core.callbacks.manager import ( @@ -52,6 +53,7 @@ def __init__(self, **kwargs: Any): from transformers import AutoTokenizer self._resolve_model_id() + self.tokenizer = ( AutoTokenizer.from_pretrained(self.model_id) if self.tokenizer is None @@ -90,10 +92,10 @@ def _to_chat_prompt( ) -> str: """Convert a list of messages into a prompt format expected by wrapped LLM.""" if not messages: - raise ValueError("at least one HumanMessage must be provided") + raise ValueError("At least one HumanMessage must be provided!") if not isinstance(messages[-1], HumanMessage): - raise ValueError("last message must be a HumanMessage") + raise ValueError("Last message must be a HumanMessage!") messages_dicts = [self._to_chatml_format(m) for m in messages] @@ -135,20 +137,15 @@ def _resolve_model_id(self) -> None: from huggingface_hub import list_inference_endpoints available_endpoints = list_inference_endpoints("*") - - if isinstance(self.llm, HuggingFaceTextGenInference): - endpoint_url = self.llm.inference_server_url - - elif isinstance(self.llm, HuggingFaceEndpoint): - endpoint_url = self.llm.endpoint_url - - elif isinstance(self.llm, HuggingFaceHub): - # no need to look up model_id for HuggingFaceHub LLM + if isinstance(self.llm, HuggingFaceHub) or ( + hasattr(self.llm, "repo_id") and self.llm.repo_id + ): self.model_id = self.llm.repo_id return - + elif isinstance(self.llm, HuggingFaceTextGenInference): + endpoint_url: Optional[str] = self.llm.inference_server_url else: - raise ValueError(f"Unknown LLM type: {type(self.llm)}") + endpoint_url = self.llm.endpoint_url for endpoint in available_endpoints: if endpoint.url == endpoint_url: @@ -156,8 +153,8 @@ def _resolve_model_id(self) -> None: if not self.model_id: raise ValueError( - "Failed to resolve model_id" - f"Could not find model id for inference server provided: {endpoint_url}" + "Failed to resolve model_id:" + f"Could not find model id for inference server: {endpoint_url}" "Make sure that your Hugging Face token has access to the endpoint." ) diff --git a/libs/community/langchain_community/document_loaders/directory.py b/libs/community/langchain_community/document_loaders/directory.py index 7cfa456487cc54..3cb2ad1309a0ad 100644 --- a/libs/community/langchain_community/document_loaders/directory.py +++ b/libs/community/langchain_community/document_loaders/directory.py @@ -2,7 +2,7 @@ import logging import random from pathlib import Path -from typing import Any, List, Optional, Type, Union +from typing import Any, List, Optional, Sequence, Type, Union from langchain_core.documents import Document @@ -41,6 +41,7 @@ def __init__( use_multithreading: bool = False, max_concurrency: int = 4, *, + exclude: Union[Sequence[str], str] = (), sample_size: int = 0, randomize_sample: bool = False, sample_seed: Union[int, None] = None, @@ -51,6 +52,8 @@ def __init__( path: Path to directory. glob: Glob pattern to use to find files. Defaults to "**/[!.]*" (all files except hidden). + exclude: A pattern or list of patterns to exclude from results. + Use glob syntax. silent_errors: Whether to silently ignore errors. Defaults to False. load_hidden: Whether to load hidden files. Defaults to False. loader_cls: Loader class to use for loading files. @@ -64,11 +67,38 @@ def __init__( directory. randomize_sample: Shuffle the files to get a random sample. sample_seed: set the seed of the random shuffle for reproducibility. + + Examples: + + .. code-block:: python + from langchain_community.document_loaders import DirectoryLoader + + # Load all non-hidden files in a directory. + loader = DirectoryLoader("/path/to/directory") + + # Load all text files in a directory without recursion. + loader = DirectoryLoader("/path/to/directory", glob="*.txt") + + # Recursively load all text files in a directory. + loader = DirectoryLoader( + "/path/to/directory", glob="*.txt", recursive=True + ) + + # Load all files in a directory, except for py files. + loader = DirectoryLoader("/path/to/directory", exclude="*.py") + + # Load all files in a directory, except for py or pyc files. + loader = DirectoryLoader( + "/path/to/directory", exclude=["*.py", "*.pyc"] + ) """ if loader_kwargs is None: loader_kwargs = {} + if isinstance(exclude, str): + exclude = (exclude,) self.path = path self.glob = glob + self.exclude = exclude self.load_hidden = load_hidden self.loader_cls = loader_cls self.loader_kwargs = loader_kwargs @@ -118,7 +148,13 @@ def load(self) -> List[Document]: raise ValueError(f"Expected directory, got file: '{self.path}'") docs: List[Document] = [] - items = list(p.rglob(self.glob) if self.recursive else p.glob(self.glob)) + + paths = p.rglob(self.glob) if self.recursive else p.glob(self.glob) + items = [ + path + for path in paths + if not (self.exclude and any(path.match(glob) for glob in self.exclude)) + ] if self.sample_size > 0: if self.randomize_sample: diff --git a/libs/community/langchain_community/embeddings/voyageai.py b/libs/community/langchain_community/embeddings/voyageai.py index 93109d45c65b6a..f8b1a4059e6d5e 100644 --- a/libs/community/langchain_community/embeddings/voyageai.py +++ b/libs/community/langchain_community/embeddings/voyageai.py @@ -86,6 +86,15 @@ class VoyageEmbeddings(BaseModel, Embeddings): show_progress_bar: bool = False """Whether to show a progress bar when embedding. Must have tqdm installed if set to True.""" + truncation: Optional[bool] = None + """Whether to truncate the input texts to fit within the context length. + + If True, over-length input texts will be truncated to fit within the context + length, before vectorized by the embedding model. If False, an error will be + raised if any given text exceeds the context length. If not specified + (defaults to None), we will truncate the input text before sending it to the + embedding model if it slightly exceeds the context window length. If it + significantly exceeds the context window length, an error will be raised.""" class Config: """Configuration for this pydantic object.""" @@ -104,12 +113,14 @@ def _invocation_params( self, input: List[str], input_type: Optional[str] = None ) -> Dict: api_key = cast(SecretStr, self.voyage_api_key).get_secret_value() - params = { + params: Dict = { "url": self.voyage_api_base, "headers": {"Authorization": f"Bearer {api_key}"}, "json": {"model": self.model, "input": input, "input_type": input_type}, "timeout": self.request_timeout, } + if self.truncation is not None: + params["json"]["truncation"] = self.truncation return params def _get_embeddings( diff --git a/libs/community/langchain_community/llms/huggingface_endpoint.py b/libs/community/langchain_community/llms/huggingface_endpoint.py index c14b2e24a8050b..df25bf367e8b4d 100644 --- a/libs/community/langchain_community/llms/huggingface_endpoint.py +++ b/libs/community/langchain_community/llms/huggingface_endpoint.py @@ -1,12 +1,17 @@ -from typing import Any, Dict, List, Mapping, Optional +import json +import logging +from typing import Any, AsyncIterator, Dict, Iterator, List, Mapping, Optional -import requests -from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) from langchain_core.language_models.llms import LLM -from langchain_core.pydantic_v1 import Extra, root_validator -from langchain_core.utils import get_from_dict_or_env +from langchain_core.outputs import GenerationChunk +from langchain_core.pydantic_v1 import Extra, Field, root_validator +from langchain_core.utils import get_from_dict_or_env, get_pydantic_field_names -from langchain_community.llms.utils import enforce_stop_tokens +logger = logging.getLogger(__name__) VALID_TASKS = ( "text2text-generation", @@ -17,70 +22,198 @@ class HuggingFaceEndpoint(LLM): - """HuggingFace Endpoint models. - - To use, you should have the ``huggingface_hub`` python package installed, and the - environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass - it as a named parameter to the constructor. + """ + HuggingFace Endpoint. - Only supports `text-generation` and `text2text-generation` for now. + To use this class, you should have installed the ``huggingface_hub`` package, and + the environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, + or given as a named parameter to the constructor. Example: .. code-block:: python - from langchain_community.llms import HuggingFaceEndpoint - endpoint_url = ( - "https://abcdefghijklmnop.us-east-1.aws.endpoints.huggingface.cloud" + # Basic Example (no streaming) + llm = HuggingFaceEndpoint( + endpoint_url="http://localhost:8010/", + max_new_tokens=512, + top_k=10, + top_p=0.95, + typical_p=0.95, + temperature=0.01, + repetition_penalty=1.03, + huggingfacehub_api_token="my-api-key" ) - hf = HuggingFaceEndpoint( - endpoint_url=endpoint_url, + print(llm("What is Deep Learning?")) + + # Streaming response example + from langchain_community.callbacks import streaming_stdout + + callbacks = [streaming_stdout.StreamingStdOutCallbackHandler()] + llm = HuggingFaceEndpoint( + endpoint_url="http://localhost:8010/", + max_new_tokens=512, + top_k=10, + top_p=0.95, + typical_p=0.95, + temperature=0.01, + repetition_penalty=1.03, + callbacks=callbacks, + streaming=True, huggingfacehub_api_token="my-api-key" ) + print(llm("What is Deep Learning?")) + """ - endpoint_url: str = "" + endpoint_url: Optional[str] = None """Endpoint URL to use.""" + repo_id: Optional[str] = None + """Repo to use.""" + huggingfacehub_api_token: Optional[str] = None + max_new_tokens: int = 512 + """Maximum number of generated tokens""" + top_k: Optional[int] = None + """The number of highest probability vocabulary tokens to keep for + top-k-filtering.""" + top_p: Optional[float] = 0.95 + """If set to < 1, only the smallest set of most probable tokens with probabilities + that add up to `top_p` or higher are kept for generation.""" + typical_p: Optional[float] = 0.95 + """Typical Decoding mass. See [Typical Decoding for Natural Language + Generation](https://arxiv.org/abs/2202.00666) for more information.""" + temperature: Optional[float] = 0.8 + """The value used to module the logits distribution.""" + repetition_penalty: Optional[float] = None + """The parameter for repetition penalty. 1.0 means no penalty. + See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.""" + return_full_text: bool = False + """Whether to prepend the prompt to the generated text""" + truncate: Optional[int] = None + """Truncate inputs tokens to the given size""" + stop_sequences: List[str] = Field(default_factory=list) + """Stop generating tokens if a member of `stop_sequences` is generated""" + seed: Optional[int] = None + """Random sampling seed""" + inference_server_url: str = "" + """text-generation-inference instance base url""" + timeout: int = 120 + """Timeout in seconds""" + streaming: bool = False + """Whether to generate a stream of tokens asynchronously""" + do_sample: bool = False + """Activate logits sampling""" + watermark: bool = False + """Watermarking with [A Watermark for Large Language Models] + (https://arxiv.org/abs/2301.10226)""" + server_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any text-generation-inference server parameters not explicitly specified""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `call` not explicitly specified""" + model: str + client: Any + async_client: Any task: Optional[str] = None """Task to call the model with. Should be a task that returns `generated_text` or `summary_text`.""" - model_kwargs: Optional[dict] = None - """Keyword arguments to pass to the model.""" - - huggingfacehub_api_token: Optional[str] = None class Config: """Configuration for this pydantic object.""" extra = Extra.forbid + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = get_pydantic_field_names(cls) + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + if field_name not in all_required_field_names: + logger.warning( + f"""WARNING! {field_name} is not default parameter. + {field_name} was transferred to model_kwargs. + Please make sure that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + + invalid_model_kwargs = all_required_field_names.intersection(extra.keys()) + if invalid_model_kwargs: + raise ValueError( + f"Parameters {invalid_model_kwargs} should be specified explicitly. " + f"Instead they were passed in as part of `model_kwargs` parameter." + ) + + values["model_kwargs"] = extra + if "endpoint_url" not in values and "repo_id" not in values: + raise ValueError( + "Please specify an `endpoint_url` or `repo_id` for the model." + ) + if "endpoint_url" in values and "repo_id" in values: + raise ValueError( + "Please specify either an `endpoint_url` OR a `repo_id`, not both." + ) + values["model"] = values.get("endpoint_url") or values.get("repo_id") + return values + @root_validator() def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - huggingfacehub_api_token = get_from_dict_or_env( - values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" - ) + """Validate that package is installed and that the API token is valid.""" try: - from huggingface_hub.hf_api import HfApi - - try: - HfApi( - endpoint="https://huggingface.co", # Can be a Private Hub endpoint. - token=huggingfacehub_api_token, - ).whoami() - except Exception as e: - raise ValueError( - "Could not authenticate with huggingface_hub. " - "Please check your API token." - ) from e + from huggingface_hub import login except ImportError: raise ImportError( "Could not import huggingface_hub python package. " "Please install it with `pip install huggingface_hub`." ) - values["huggingfacehub_api_token"] = huggingfacehub_api_token + try: + huggingfacehub_api_token = get_from_dict_or_env( + values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" + ) + login(token=huggingfacehub_api_token) + except Exception as e: + raise ValueError( + "Could not authenticate with huggingface_hub. " + "Please check your API token." + ) from e + + from huggingface_hub import AsyncInferenceClient, InferenceClient + + values["client"] = InferenceClient( + model=values["model"], + timeout=values["timeout"], + token=huggingfacehub_api_token, + **values["server_kwargs"], + ) + values["async_client"] = AsyncInferenceClient( + model=values["model"], + timeout=values["timeout"], + token=huggingfacehub_api_token, + **values["server_kwargs"], + ) + return values + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling text generation inference API.""" + return { + "max_new_tokens": self.max_new_tokens, + "top_k": self.top_k, + "top_p": self.top_p, + "typical_p": self.typical_p, + "temperature": self.temperature, + "repetition_penalty": self.repetition_penalty, + "return_full_text": self.return_full_text, + "truncate": self.truncate, + "stop_sequences": self.stop_sequences, + "seed": self.seed, + "do_sample": self.do_sample, + "watermark": self.watermark, + **self.model_kwargs, + } + @property def _identifying_params(self) -> Mapping[str, Any]: """Get the identifying parameters.""" @@ -95,6 +228,13 @@ def _llm_type(self) -> str: """Return type of llm.""" return "huggingface_endpoint" + def _invocation_params( + self, runtime_stop: Optional[List[str]], **kwargs: Any + ) -> Dict[str, Any]: + params = {**self._default_params, **kwargs} + params["stop_sequences"] = params["stop_sequences"] + (runtime_stop or []) + return params + def _call( self, prompt: str, @@ -102,62 +242,129 @@ def _call( run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: - """Call out to HuggingFace Hub's inference endpoint. + """Call out to HuggingFace Hub's inference endpoint.""" + invocation_params = self._invocation_params(stop, **kwargs) + if self.streaming: + completion = "" + for chunk in self._stream(prompt, stop, run_manager, **invocation_params): + completion += chunk.text + return completion + else: + invocation_params["stop"] = invocation_params[ + "stop_sequences" + ] # porting 'stop_sequences' into the 'stop' argument + response = self.client.post( + json={"inputs": prompt, "parameters": invocation_params}, + stream=False, + task=self.task, + ) + response_text = json.loads(response.decode())[0]["generated_text"] - Args: - prompt: The prompt to pass into the model. - stop: Optional list of stop words to use when generating. + # Maybe the generation has stopped at one of the stop sequences: + # then we remove this stop sequence from the end of the generated text + for stop_seq in invocation_params["stop_sequences"]: + if response_text[-len(stop_seq) :] == stop_seq: + response_text = response_text[: -len(stop_seq)] + return response_text - Returns: - The string generated by the model. + async def _acall( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + invocation_params = self._invocation_params(stop, **kwargs) + if self.streaming: + completion = "" + async for chunk in self._astream( + prompt, stop, run_manager, **invocation_params + ): + completion += chunk.text + return completion + else: + invocation_params["stop"] = invocation_params["stop_sequences"] + response = await self.async_client.post( + json={"inputs": prompt, "parameters": invocation_params}, + stream=False, + task=self.task, + ) + response_text = json.loads(response.decode())[0]["generated_text"] - Example: - .. code-block:: python + # Maybe the generation has stopped at one of the stop sequences: + # then remove this stop sequence from the end of the generated text + for stop_seq in invocation_params["stop_sequences"]: + if response_text[-len(stop_seq) :] == stop_seq: + response_text = response_text[: -len(stop_seq)] + return response_text - response = hf("Tell me a joke.") - """ - _model_kwargs = self.model_kwargs or {} + def _stream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[GenerationChunk]: + invocation_params = self._invocation_params(stop, **kwargs) - # payload samples - params = {**_model_kwargs, **kwargs} - parameter_payload = {"inputs": prompt, "parameters": params} + for response in self.client.text_generation( + prompt, **invocation_params, stream=True + ): + # identify stop sequence in generated text, if any + stop_seq_found: Optional[str] = None + for stop_seq in invocation_params["stop_sequences"]: + if stop_seq in response: + stop_seq_found = stop_seq - # HTTP headers for authorization - headers = { - "Authorization": f"Bearer {self.huggingfacehub_api_token}", - "Content-Type": "application/json", - } + # identify text to yield + text: Optional[str] = None + if stop_seq_found: + text = response[: response.index(stop_seq_found)] + else: + text = response - # send request - try: - response = requests.post( - self.endpoint_url, headers=headers, json=parameter_payload - ) - except requests.exceptions.RequestException as e: # This is the correct syntax - raise ValueError(f"Error raised by inference endpoint: {e}") - generated_text = response.json() - if "error" in generated_text: - raise ValueError( - f"Error raised by inference API: {generated_text['error']}" - ) - if self.task == "text-generation": - text = generated_text[0]["generated_text"] - # Remove prompt if included in generated text. - if text.startswith(prompt): - text = text[len(prompt) :] - elif self.task == "text2text-generation": - text = generated_text[0]["generated_text"] - elif self.task == "summarization": - text = generated_text[0]["summary_text"] - elif self.task == "conversational": - text = generated_text["response"][1] - else: - raise ValueError( - f"Got invalid task {self.task}, " - f"currently only {VALID_TASKS} are supported" - ) - if stop is not None: - # This is a bit hacky, but I can't figure out a better way to enforce - # stop tokens when making calls to huggingface_hub. - text = enforce_stop_tokens(text, stop) - return text + # yield text, if any + if text: + chunk = GenerationChunk(text=text) + yield chunk + if run_manager: + run_manager.on_llm_new_token(chunk.text) + + # break if stop sequence found + if stop_seq_found: + break + + async def _astream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AsyncIterator[GenerationChunk]: + invocation_params = self._invocation_params(stop, **kwargs) + async for response in await self.async_client.text_generation( + prompt, **invocation_params, stream=True + ): + # identify stop sequence in generated text, if any + stop_seq_found: Optional[str] = None + for stop_seq in invocation_params["stop_sequences"]: + if stop_seq in response: + stop_seq_found = stop_seq + + # identify text to yield + text: Optional[str] = None + if stop_seq_found: + text = response[: response.index(stop_seq_found)] + else: + text = response + + # yield text, if any + if text: + chunk = GenerationChunk(text=text) + yield chunk + if run_manager: + await run_manager.on_llm_new_token(chunk.text) + + # break if stop sequence found + if stop_seq_found: + break diff --git a/libs/community/langchain_community/llms/huggingface_hub.py b/libs/community/langchain_community/llms/huggingface_hub.py index 2d912317752315..2a5deaf766d61a 100644 --- a/libs/community/langchain_community/llms/huggingface_hub.py +++ b/libs/community/langchain_community/llms/huggingface_hub.py @@ -1,6 +1,7 @@ import json from typing import Any, Dict, List, Mapping, Optional +from langchain_core._api.deprecation import deprecated from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.language_models.llms import LLM from langchain_core.pydantic_v1 import Extra, root_validator @@ -19,8 +20,10 @@ } +@deprecated("0.0.21", removal="0.2.0", alternative="HuggingFaceEndpoint") class HuggingFaceHub(LLM): """HuggingFaceHub models. + ! This class is deprecated, you should use HuggingFaceEndpoint instead. To use, you should have the ``huggingface_hub`` python package installed, and the environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass diff --git a/libs/community/langchain_community/llms/huggingface_pipeline.py b/libs/community/langchain_community/llms/huggingface_pipeline.py index 388ba117c25ecd..7a2b915054fb5a 100644 --- a/libs/community/langchain_community/llms/huggingface_pipeline.py +++ b/libs/community/langchain_community/llms/huggingface_pipeline.py @@ -9,8 +9,6 @@ from langchain_core.outputs import Generation, LLMResult from langchain_core.pydantic_v1 import Extra -from langchain_community.llms.utils import enforce_stop_tokens - DEFAULT_MODEL_ID = "gpt2" DEFAULT_TASK = "text-generation" VALID_TASKS = ("text2text-generation", "text-generation", "summarization") @@ -201,7 +199,12 @@ def _generate( batch_prompts = prompts[i : i + self.batch_size] # Process batch of prompts - responses = self.pipeline(batch_prompts, **pipeline_kwargs) + responses = self.pipeline( + batch_prompts, + stop_sequence=stop, + return_full_text=False, + **pipeline_kwargs, + ) # Process each response in the batch for j, response in enumerate(responses): @@ -210,23 +213,7 @@ def _generate( response = response[0] if self.pipeline.task == "text-generation": - try: - from transformers.pipelines.text_generation import ReturnType - - remove_prompt = ( - self.pipeline._postprocess_params.get("return_type") - != ReturnType.NEW_TEXT - ) - except Exception as e: - logger.warning( - f"Unable to extract pipeline return_type. " - f"Received error:\n\n{e}" - ) - remove_prompt = True - if remove_prompt: - text = response["generated_text"][len(batch_prompts[j]) :] - else: - text = response["generated_text"] + text = response["generated_text"] elif self.pipeline.task == "text2text-generation": text = response["generated_text"] elif self.pipeline.task == "summarization": @@ -236,9 +223,6 @@ def _generate( f"Got invalid task {self.pipeline.task}, " f"currently only {VALID_TASKS} are supported" ) - if stop: - # Enforce stop tokens - text = enforce_stop_tokens(text, stop) # Append the processed text to results text_generations.append(text) diff --git a/libs/community/langchain_community/llms/huggingface_text_gen_inference.py b/libs/community/langchain_community/llms/huggingface_text_gen_inference.py index e03b6f7adcf834..9f56a949c6b94f 100644 --- a/libs/community/langchain_community/llms/huggingface_text_gen_inference.py +++ b/libs/community/langchain_community/llms/huggingface_text_gen_inference.py @@ -1,6 +1,7 @@ import logging from typing import Any, AsyncIterator, Dict, Iterator, List, Optional +from langchain_core._api.deprecation import deprecated from langchain_core.callbacks import ( AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun, @@ -13,9 +14,11 @@ logger = logging.getLogger(__name__) +@deprecated("0.0.21", removal="0.2.0", alternative="HuggingFaceEndpoint") class HuggingFaceTextGenInference(LLM): """ HuggingFace text generation API. + ! This class is deprecated, you should use HuggingFaceEndpoint instead ! To use, you should have the `text-generation` python package installed and a text-generation server running. diff --git a/libs/community/langchain_community/llms/openllm.py b/libs/community/langchain_community/llms/openllm.py index afb5a18f9ba458..fa3b03e1f98d52 100644 --- a/libs/community/langchain_community/llms/openllm.py +++ b/libs/community/langchain_community/llms/openllm.py @@ -72,7 +72,7 @@ class OpenLLM(LLM): from langchain_community.llms import OpenLLM llm = OpenLLM(server_url='http://localhost:3000') - llm("What is the difference between a duck and a goose?") + llm.invoke("What is the difference between a duck and a goose?") """ model_name: Optional[str] = None @@ -82,6 +82,8 @@ class OpenLLM(LLM): See 'openllm models' for all available model variants.""" server_url: Optional[str] = None """Optional server URL that currently runs a LLMServer with 'openllm start'.""" + timeout: int = 30 + """"Time out for the openllm client""" server_type: ServerType = "http" """Optional server type. Either 'http' or 'grpc'.""" embedded: bool = True @@ -125,6 +127,7 @@ def __init__( *, model_id: Optional[str] = None, server_url: Optional[str] = None, + timeout: int = 30, server_type: Literal["grpc", "http"] = "http", embedded: bool = True, **llm_kwargs: Any, @@ -149,11 +152,12 @@ def __init__( if server_type == "http" else openllm.client.GrpcClient ) - client = client_cls(server_url) + client = client_cls(server_url, timeout) super().__init__( **{ "server_url": server_url, + "timeout": timeout, "server_type": server_type, "llm_kwargs": llm_kwargs, } @@ -217,9 +221,9 @@ def chat(input_text: str): def _identifying_params(self) -> IdentifyingParams: """Get the identifying parameters.""" if self._client is not None: - self.llm_kwargs.update(self._client._config()) - model_name = self._client._metadata()["model_name"] - model_id = self._client._metadata()["model_id"] + self.llm_kwargs.update(self._client._config) + model_name = self._client._metadata.model_dump()["model_name"] + model_id = self._client._metadata.model_dump()["model_id"] else: if self._runner is None: raise ValueError("Runner must be initialized.") @@ -265,9 +269,11 @@ def _call( self._identifying_params["model_name"], **copied ) if self._client: - res = self._client.generate( - prompt, **config.model_dump(flatten=True) - ).responses[0] + res = ( + self._client.generate(prompt, **config.model_dump(flatten=True)) + .outputs[0] + .text + ) else: assert self._runner is not None res = self._runner(prompt, **config.model_dump(flatten=True)) diff --git a/libs/community/langchain_community/storage/astradb.py b/libs/community/langchain_community/storage/astradb.py index 0cb2ea310aad2d..959ef374124c76 100644 --- a/libs/community/langchain_community/storage/astradb.py +++ b/libs/community/langchain_community/storage/astradb.py @@ -5,6 +5,7 @@ from typing import ( TYPE_CHECKING, Any, + AsyncIterator, Generic, Iterator, List, @@ -16,10 +17,13 @@ from langchain_core.stores import BaseStore, ByteStore -from langchain_community.utilities.astradb import _AstraDBEnvironment +from langchain_community.utilities.astradb import ( + SetupMode, + _AstraDBCollectionEnvironment, +) if TYPE_CHECKING: - from astrapy.db import AstraDB + from astrapy.db import AstraDB, AsyncAstraDB V = TypeVar("V") @@ -34,17 +38,23 @@ def __init__( api_endpoint: Optional[str] = None, astra_db_client: Optional[AstraDB] = None, namespace: Optional[str] = None, + *, + async_astra_db_client: Optional[AsyncAstraDB] = None, + pre_delete_collection: bool = False, + setup_mode: SetupMode = SetupMode.SYNC, ) -> None: - astra_env = _AstraDBEnvironment( + self.astra_env = _AstraDBCollectionEnvironment( + collection_name=collection_name, token=token, api_endpoint=api_endpoint, astra_db_client=astra_db_client, + async_astra_db_client=async_astra_db_client, namespace=namespace, + setup_mode=setup_mode, + pre_delete_collection=pre_delete_collection, ) - self.astra_db = astra_env.astra_db - self.collection = self.astra_db.create_collection( - collection_name=collection_name, - ) + self.collection = self.astra_env.collection + self.async_collection = self.astra_env.async_collection @abstractmethod def decode_value(self, value: Any) -> Optional[V]: @@ -56,28 +66,63 @@ def encode_value(self, value: Optional[V]) -> Any: def mget(self, keys: Sequence[str]) -> List[Optional[V]]: """Get the values associated with the given keys.""" + self.astra_env.ensure_db_setup() docs_dict = {} for doc in self.collection.paginated_find(filter={"_id": {"$in": list(keys)}}): docs_dict[doc["_id"]] = doc.get("value") return [self.decode_value(docs_dict.get(key)) for key in keys] + async def amget(self, keys: Sequence[str]) -> List[Optional[V]]: + """Get the values associated with the given keys.""" + await self.astra_env.aensure_db_setup() + docs_dict = {} + async for doc in self.async_collection.paginated_find( + filter={"_id": {"$in": list(keys)}} + ): + docs_dict[doc["_id"]] = doc.get("value") + return [self.decode_value(docs_dict.get(key)) for key in keys] + def mset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None: """Set the given key-value pairs.""" + self.astra_env.ensure_db_setup() for k, v in key_value_pairs: self.collection.upsert({"_id": k, "value": self.encode_value(v)}) + async def amset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None: + """Set the given key-value pairs.""" + await self.astra_env.aensure_db_setup() + for k, v in key_value_pairs: + await self.async_collection.upsert( + {"_id": k, "value": self.encode_value(v)} + ) + def mdelete(self, keys: Sequence[str]) -> None: """Delete the given keys.""" + self.astra_env.ensure_db_setup() self.collection.delete_many(filter={"_id": {"$in": list(keys)}}) + async def amdelete(self, keys: Sequence[str]) -> None: + """Delete the given keys.""" + await self.astra_env.aensure_db_setup() + await self.async_collection.delete_many(filter={"_id": {"$in": list(keys)}}) + def yield_keys(self, *, prefix: Optional[str] = None) -> Iterator[str]: """Yield keys in the store.""" + self.astra_env.ensure_db_setup() docs = self.collection.paginated_find() for doc in docs: key = doc["_id"] if not prefix or key.startswith(prefix): yield key + async def ayield_keys(self, *, prefix: Optional[str] = None) -> AsyncIterator[str]: + """Yield keys in the store.""" + await self.astra_env.aensure_db_setup() + async for doc in self.async_collection.paginated_find(): + key = doc["_id"] + if not prefix or key.startswith(prefix): + yield key + class AstraDBStore(AstraDBBaseStore[Any]): """BaseStore implementation using DataStax AstraDB as the underlying store. diff --git a/libs/community/langchain_community/tools/__init__.py b/libs/community/langchain_community/tools/__init__.py index 3456ef10bc1e62..59ad157de5bf83 100644 --- a/libs/community/langchain_community/tools/__init__.py +++ b/libs/community/langchain_community/tools/__init__.py @@ -118,6 +118,32 @@ def _import_brave_search_tool() -> Any: return BraveSearch +def _import_cogniswitch_store_file_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import ( + CogniswitchKnowledgeSourceFile, + ) + + return CogniswitchKnowledgeSourceFile + + +def _import_cogniswitch_store_url_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import CogniswitchKnowledgeSourceURL + + return CogniswitchKnowledgeSourceURL + + +def _import_cogniswitch_answer_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import CogniswitchKnowledgeRequest + + return CogniswitchKnowledgeRequest + + +def _import_cogniswitch_knowledge_status_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import CogniswitchKnowledgeStatus + + return CogniswitchKnowledgeStatus + + def _import_connery_tool() -> Any: from langchain_community.tools.connery import ConneryAction @@ -803,6 +829,14 @@ def __getattr__(name: str) -> Any: return _import_bing_search_tool_BingSearchRun() elif name == "BraveSearch": return _import_brave_search_tool() + elif name == "CogniswitchKnowledgeSourceFile": + return _import_cogniswitch_store_file_tool() + elif name == "CogniswitchKnowledgeSourceURL": + return _import_cogniswitch_store_url_tool() + elif name == "CogniswitchKnowledgeRequest": + return _import_cogniswitch_answer_tool() + elif name == "CogniswitchKnowledgeStatus": + return _import_cogniswitch_knowledge_status_tool() elif name == "ConneryAction": return _import_connery_tool() elif name == "DuckDuckGoSearchResults": @@ -1043,6 +1077,10 @@ def __getattr__(name: str) -> Any: "BingSearchRun", "BraveSearch", "ClickTool", + "CogniswitchKnowledgeSourceFile", + "CogniswitchKnowledgeSourceURL", + "CogniswitchKnowledgeRequest", + "CogniswitchKnowledgeStatus", "ConneryAction", "CopyFileTool", "CurrentWebPageTool", diff --git a/libs/community/langchain_community/tools/cogniswitch/__init__.py b/libs/community/langchain_community/tools/cogniswitch/__init__.py new file mode 100644 index 00000000000000..3a89a8d7d3a9ad --- /dev/null +++ b/libs/community/langchain_community/tools/cogniswitch/__init__.py @@ -0,0 +1 @@ +"Cogniswitch Tools" diff --git a/libs/community/langchain_community/tools/cogniswitch/tool.py b/libs/community/langchain_community/tools/cogniswitch/tool.py new file mode 100644 index 00000000000000..e2878e6ed544ec --- /dev/null +++ b/libs/community/langchain_community/tools/cogniswitch/tool.py @@ -0,0 +1,399 @@ +from __future__ import annotations + +from typing import Any, Dict, Optional + +import requests +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.tools import BaseTool + + +class CogniswitchKnowledgeRequest(BaseTool): + """ + A tool for interacting with the Cogniswitch service to answer questions. + name: str = "cogniswitch_knowledge_request" + description: str = ( + "A wrapper around cogniswitch service to answer the question + from the knowledge base." + "Input should be a search query." + ) + """ + + name: str = "cogniswitch_knowledge_request" + description: str = """A wrapper around cogniswitch service to + answer the question from the knowledge base.""" + cs_token: str + OAI_token: str + apiKey: str + api_url = "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeRequest" + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Use the tool to answer a query. + + Args: + query (str): Natural language query, + that you would like to ask to your knowledge graph. + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + response = self.answer_cs(self.cs_token, self.OAI_token, query, self.apiKey) + return response + + def answer_cs(self, cs_token: str, OAI_token: str, query: str, apiKey: str) -> dict: + """ + Send a query to the Cogniswitch service and retrieve the response. + + Args: + cs_token (str): Cogniswitch token. + OAI_token (str): OpenAI token. + apiKey (str): OAuth token. + query (str): Query to be answered. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + if not cs_token: + raise ValueError("Missing cs_token") + if not OAI_token: + raise ValueError("Missing OpenAI token") + if not apiKey: + raise ValueError("Missing cogniswitch OAuth token") + if not query: + raise ValueError("Missing input query") + + headers = { + "apiKey": apiKey, + "platformToken": cs_token, + "openAIToken": OAI_token, + } + + data = {"query": query} + response = requests.post(self.api_url, headers=headers, verify=False, data=data) + return response.json() + + +class CogniswitchKnowledgeStatus(BaseTool): + """ + A cogniswitch tool for interacting with the Cogniswitch services to know the + status of the document or url uploaded. + name: str = "cogniswitch_knowledge_status" + description: str = ( + "A wrapper around cogniswitch services to know the status of + the document uploaded from a url or a file. " + "Input should be a file name or the url link" + ) + """ + + name: str = "cogniswitch_knowledge_status" + description: str = """A wrapper around cogniswitch services to know + the status of the document uploaded from a url or a file.""" + cs_token: str + OAI_token: str + apiKey: str + knowledge_status_url = ( + "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeSource/status" + ) + + def _run( + self, + document_name: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Use the tool to know the status of the document uploaded. + + Args: + document_name (str): name of the document or + the url uploaded + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + response = self.knowledge_status(document_name) + return response + + def knowledge_status(self, document_name: str) -> dict: + """ + Use this function to know the status of the document or the URL uploaded + Args: + document_name (str): The document name or the url that is uploaded. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + + params = {"docName": document_name, "platformToken": self.cs_token} + headers = { + "apiKey": self.apiKey, + "openAIToken": self.OAI_token, + "platformToken": self.cs_token, + } + response = requests.get( + self.knowledge_status_url, + headers=headers, + params=params, + verify=False, + ) + if response.status_code == 200: + source_info = response.json() + source_data = dict(source_info[-1]) + status = source_data.get("status") + if status == 0: + source_data["status"] = "SUCCESS" + elif status == 1: + source_data["status"] = "PROCESSING" + elif status == 2: + source_data["status"] = "UPLOADED" + elif status == 3: + source_data["status"] = "FAILURE" + elif status == 4: + source_data["status"] = "UPLOAD_FAILURE" + elif status == 5: + source_data["status"] = "REJECTED" + + if "filePath" in source_data.keys(): + source_data.pop("filePath") + if "savedFileName" in source_data.keys(): + source_data.pop("savedFileName") + if "integrationConfigId" in source_data.keys(): + source_data.pop("integrationConfigId") + if "metaData" in source_data.keys(): + source_data.pop("metaData") + if "docEntryId" in source_data.keys(): + source_data.pop("docEntryId") + return source_data + else: + # error_message = response.json()["message"] + return { + "message": response.status_code, + } + + +class CogniswitchKnowledgeSourceFile(BaseTool): + """ + A cogniswitch tool for interacting with the Cogniswitch services to store data. + name: str = "cogniswitch_knowledge_source_file" + description: str = ( + "This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input." + ) + """ + + name: str = "cogniswitch_knowledge_source_file" + description: str = """ + This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input. + """ + cs_token: str + OAI_token: str + apiKey: str + knowledgesource_file = ( + "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeSource/file" + ) + + def _run( + self, + file: Optional[str] = None, + document_name: Optional[str] = None, + document_description: Optional[str] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Execute the tool to store the data given from a file. + This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input. + + Args: + file Optional[str]: The file path of your knowledge + document_name Optional[str]: Name of your knowledge document + document_description Optional[str]: Description of your knowledge document + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + if not file: + return { + "message": "No input provided", + } + else: + response = self.store_data( + file=file, + document_name=document_name, + document_description=document_description, + ) + return response + + def store_data( + self, + file: Optional[str], + document_name: Optional[str], + document_description: Optional[str], + ) -> dict: + """ + Store data using the Cogniswitch service. + This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input. + + Args: + file (Optional[str]): file path of your file. + the current files supported by the files are + .txt, .pdf, .docx, .doc, .html + document_name (Optional[str]): Name of the document you are uploading. + document_description (Optional[str]): Description of the document. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + headers = { + "apiKey": self.apiKey, + "openAIToken": self.OAI_token, + "platformToken": self.cs_token, + } + data: Dict[str, Any] + if not document_name: + document_name = "" + if not document_description: + document_description = "" + + if file is not None: + files = {"file": open(file, "rb")} + + data = { + "documentName": document_name, + "documentDescription": document_description, + } + response = requests.post( + self.knowledgesource_file, + headers=headers, + verify=False, + data=data, + files=files, + ) + if response.status_code == 200: + return response.json() + else: + return {"message": "Bad Request"} + + +class CogniswitchKnowledgeSourceURL(BaseTool): + """ + A cogniswitch tool for interacting with the Cogniswitch services to store data. + name: str = "cogniswitch_knowledge_source_url" + description: str = ( + "This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input" + ) + """ + + name: str = "cogniswitch_knowledge_source_url" + description: str = """ + This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input""" + cs_token: str + OAI_token: str + apiKey: str + knowledgesource_url = ( + "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeSource/url" + ) + + def _run( + self, + url: Optional[str] = None, + document_name: Optional[str] = None, + document_description: Optional[str] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Execute the tool to store the data given from a url. + This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input. + + Args: + url Optional[str]: The website/url link of your knowledge + document_name Optional[str]: Name of your knowledge document + document_description Optional[str]: Description of your knowledge document + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + if not url: + return { + "message": "No input provided", + } + response = self.store_data( + url=url, + document_name=document_name, + document_description=document_description, + ) + return response + + def store_data( + self, + url: Optional[str], + document_name: Optional[str], + document_description: Optional[str], + ) -> dict: + """ + Store data using the Cogniswitch service. + This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input. + + Args: + url (Optional[str]): URL link. + document_name (Optional[str]): Name of the document you are uploading. + document_description (Optional[str]): Description of the document. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + headers = { + "apiKey": self.apiKey, + "openAIToken": self.OAI_token, + "platformToken": self.cs_token, + } + data: Dict[str, Any] + if not document_name: + document_name = "" + if not document_description: + document_description = "" + if not url: + return { + "message": "No input provided", + } + else: + data = {"url": url} + response = requests.post( + self.knowledgesource_url, + headers=headers, + verify=False, + data=data, + ) + if response.status_code == 200: + return response.json() + else: + return {"message": "Bad Request"} diff --git a/libs/community/langchain_community/utilities/sql_database.py b/libs/community/langchain_community/utilities/sql_database.py index 087ac44bc112f8..fb542cfa883927 100644 --- a/libs/community/langchain_community/utilities/sql_database.py +++ b/libs/community/langchain_community/utilities/sql_database.py @@ -7,18 +7,17 @@ from langchain_core._api import deprecated from langchain_core.utils import get_from_env from sqlalchemy import ( - Executable, MetaData, - Result, Table, create_engine, inspect, select, text, ) -from sqlalchemy.engine import Engine +from sqlalchemy.engine import Engine, Result from sqlalchemy.exc import ProgrammingError, SQLAlchemyError from sqlalchemy.schema import CreateTable +from sqlalchemy.sql.expression import Executable from sqlalchemy.types import NullType diff --git a/libs/community/langchain_community/vectorstores/__init__.py b/libs/community/langchain_community/vectorstores/__init__.py index 61b573bb64952f..da412557f3753c 100644 --- a/libs/community/langchain_community/vectorstores/__init__.py +++ b/libs/community/langchain_community/vectorstores/__init__.py @@ -74,6 +74,12 @@ def _import_annoy() -> Any: return Annoy +def _import_apache_doris() -> Any: + from langchain_community.vectorstores.apache_doris import ApacheDoris + + return ApacheDoris + + def _import_atlas() -> Any: from langchain_community.vectorstores.atlas import AtlasDB @@ -497,6 +503,8 @@ def __getattr__(name: str) -> Any: return _import_elastic_vector_search() elif name == "Annoy": return _import_annoy() + elif name == "ApacheDoris": + return _import_apache_doris() elif name == "AtlasDB": return _import_atlas() elif name == "AwaDB": @@ -640,6 +648,7 @@ def __getattr__(name: str) -> Any: "AlibabaCloudOpenSearchSettings", "AnalyticDB", "Annoy", + "ApacheDoris", "AtlasDB", "AwaDB", "AzureSearch", diff --git a/libs/community/langchain_community/vectorstores/apache_doris.py b/libs/community/langchain_community/vectorstores/apache_doris.py new file mode 100644 index 00000000000000..12e9b58304f297 --- /dev/null +++ b/libs/community/langchain_community/vectorstores/apache_doris.py @@ -0,0 +1,480 @@ +from __future__ import annotations + +import json +import logging +from hashlib import sha1 +from threading import Thread +from typing import Any, Dict, Iterable, List, Optional, Tuple + +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseSettings +from langchain_core.vectorstores import VectorStore + +logger = logging.getLogger() +DEBUG = False + + +class ApacheDorisSettings(BaseSettings): + """Apache Doris client configuration. + + Attributes: + apache_doris_host (str) : An URL to connect to frontend. + Defaults to 'localhost'. + apache_doris_port (int) : URL port to connect with HTTP. Defaults to 9030. + username (str) : Username to login. Defaults to 'root'. + password (str) : Password to login. Defaults to None. + database (str) : Database name to find the table. Defaults to 'default'. + table (str) : Table name to operate on. + Defaults to 'langchain'. + + column_map (Dict) : Column type map to project column name onto langchain + semantics. Must have keys: `text`, `id`, `vector`, + must be same size to number of columns. For example: + .. code-block:: python + + { + 'id': 'text_id', + 'embedding': 'text_embedding', + 'document': 'text_plain', + 'metadata': 'metadata_dictionary_in_json', + } + + Defaults to identity map. + """ + + host: str = "localhost" + port: int = 9030 + username: str = "root" + password: str = "" + + column_map: Dict[str, str] = { + "id": "id", + "document": "document", + "embedding": "embedding", + "metadata": "metadata", + } + + database: str = "default" + table: str = "langchain" + + def __getitem__(self, item: str) -> Any: + return getattr(self, item) + + class Config: + env_file = ".env" + env_prefix = "apache_doris_" + env_file_encoding = "utf-8" + + +class ApacheDoris(VectorStore): + """`Apache Doris` vector store. + + You need a `pymysql` python package, and a valid account + to connect to Apache Doris. + + For more information, please visit + [Apache Doris official site](https://doris.apache.org/) + [Apache Doris github](https://github.com/apache/doris) + """ + + def __init__( + self, + embedding: Embeddings, + *, + config: Optional[ApacheDorisSettings] = None, + **kwargs: Any, + ) -> None: + """Constructor for Apache Doris. + + Args: + embedding (Embeddings): Text embedding model. + config (ApacheDorisSettings): Apache Doris client configuration information. + """ + try: + import pymysql # type: ignore[import] + except ImportError: + raise ImportError( + "Could not import pymysql python package. " + "Please install it with `pip install pymysql`." + ) + try: + from tqdm import tqdm + + self.pgbar = tqdm + except ImportError: + # Just in case if tqdm is not installed + self.pgbar = lambda x, **kwargs: x + super().__init__() + if config is not None: + self.config = config + else: + self.config = ApacheDorisSettings() + assert self.config + assert self.config.host and self.config.port + assert self.config.column_map and self.config.database and self.config.table + for k in ["id", "embedding", "document", "metadata"]: + assert k in self.config.column_map + + # initialize the schema + dim = len(embedding.embed_query("test")) + + self.schema = f"""\ +CREATE TABLE IF NOT EXISTS {self.config.database}.{self.config.table}( + {self.config.column_map['id']} varchar(50), + {self.config.column_map['document']} string, + {self.config.column_map['embedding']} array, + {self.config.column_map['metadata']} string +) ENGINE = OLAP UNIQUE KEY(id) DISTRIBUTED BY HASH(id) \ + PROPERTIES ("replication_allocation" = "tag.location.default: 1")\ +""" + self.dim = dim + self.BS = "\\" + self.must_escape = ("\\", "'") + self._embedding = embedding + self.dist_order = "DESC" + _debug_output(self.config) + + # Create a connection to Apache Doris + self.connection = pymysql.connect( + host=self.config.host, + port=self.config.port, + user=self.config.username, + password=self.config.password, + database=self.config.database, + **kwargs, + ) + + _debug_output(self.schema) + _get_named_result(self.connection, self.schema) + + def escape_str(self, value: str) -> str: + return "".join(f"{self.BS}{c}" if c in self.must_escape else c for c in value) + + @property + def embeddings(self) -> Embeddings: + return self._embedding + + def _build_insert_sql(self, transac: Iterable, column_names: Iterable[str]) -> str: + ks = ",".join(column_names) + embed_tuple_index = tuple(column_names).index( + self.config.column_map["embedding"] + ) + _data = [] + for n in transac: + n = ",".join( + [ + ( + f"'{self.escape_str(str(_n))}'" + if idx != embed_tuple_index + else f"array{str(_n)}" + ) + for (idx, _n) in enumerate(n) + ] + ) + _data.append(f"({n})") + i_str = f""" + INSERT INTO + {self.config.database}.{self.config.table}({ks}) + VALUES + {','.join(_data)} + """ + return i_str + + def _insert(self, transac: Iterable, column_names: Iterable[str]) -> None: + _insert_query = self._build_insert_sql(transac, column_names) + _debug_output(_insert_query) + _get_named_result(self.connection, _insert_query) + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + batch_size: int = 32, + ids: Optional[Iterable[str]] = None, + **kwargs: Any, + ) -> List[str]: + """Insert more texts through the embeddings and add to the VectorStore. + + Args: + texts: Iterable of strings to add to the VectorStore. + ids: Optional list of ids to associate with the texts. + batch_size: Batch size of insertion + metadata: Optional column data to be inserted + + Returns: + List of ids from adding the texts into the VectorStore. + + """ + # Embed and create the documents + ids = ids or [sha1(t.encode("utf-8")).hexdigest() for t in texts] + colmap_ = self.config.column_map + transac = [] + column_names = { + colmap_["id"]: ids, + colmap_["document"]: texts, + colmap_["embedding"]: self._embedding.embed_documents(list(texts)), + } + metadatas = metadatas or [{} for _ in texts] + column_names[colmap_["metadata"]] = map(json.dumps, metadatas) + assert len(set(colmap_) - set(column_names)) >= 0 + keys, values = zip(*column_names.items()) + try: + t = None + for v in self.pgbar( + zip(*values), desc="Inserting data...", total=len(metadatas) + ): + assert ( + len(v[keys.index(self.config.column_map["embedding"])]) == self.dim + ) + transac.append(v) + if len(transac) == batch_size: + if t: + t.join() + t = Thread(target=self._insert, args=[transac, keys]) + t.start() + transac = [] + if len(transac) > 0: + if t: + t.join() + self._insert(transac, keys) + return [i for i in ids] + except Exception as e: + logger.error(f"\033[91m\033[1m{type(e)}\033[0m \033[95m{str(e)}\033[0m") + return [] + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[Dict[Any, Any]]] = None, + config: Optional[ApacheDorisSettings] = None, + text_ids: Optional[Iterable[str]] = None, + batch_size: int = 32, + **kwargs: Any, + ) -> ApacheDoris: + """Create Apache Doris wrapper with existing texts + + Args: + embedding_function (Embeddings): Function to extract text embedding + texts (Iterable[str]): List or tuple of strings to be added + config (ApacheDorisSettings, Optional): Apache Doris configuration + text_ids (Optional[Iterable], optional): IDs for the texts. + Defaults to None. + batch_size (int, optional): BatchSize when transmitting data to Apache + Doris. Defaults to 32. + metadata (List[dict], optional): metadata to texts. Defaults to None. + Returns: + Apache Doris Index + """ + ctx = cls(embedding, config=config, **kwargs) + ctx.add_texts(texts, ids=text_ids, batch_size=batch_size, metadatas=metadatas) + return ctx + + def __repr__(self) -> str: + """Text representation for Apache Doris Vector Store, prints frontends, username + and schemas. Easy to use with `str(ApacheDoris())` + + Returns: + repr: string to show connection info and data schema + """ + _repr = f"\033[92m\033[1m{self.config.database}.{self.config.table} @ " + _repr += f"{self.config.host}:{self.config.port}\033[0m\n\n" + _repr += f"\033[1musername: {self.config.username}\033[0m\n\nTable Schema:\n" + width = 25 + fields = 3 + _repr += "-" * (width * fields + 1) + "\n" + columns = ["name", "type", "key"] + _repr += f"|\033[94m{columns[0]:24s}\033[0m|\033[96m{columns[1]:24s}" + _repr += f"\033[0m|\033[96m{columns[2]:24s}\033[0m|\n" + _repr += "-" * (width * fields + 1) + "\n" + q_str = f"DESC {self.config.database}.{self.config.table}" + _debug_output(q_str) + rs = _get_named_result(self.connection, q_str) + for r in rs: + _repr += f"|\033[94m{r['Field']:24s}\033[0m|\033[96m{r['Type']:24s}" + _repr += f"\033[0m|\033[96m{r['Key']:24s}\033[0m|\n" + _repr += "-" * (width * fields + 1) + "\n" + return _repr + + def _build_query_sql( + self, q_emb: List[float], topk: int, where_str: Optional[str] = None + ) -> str: + q_emb_str = ",".join(map(str, q_emb)) + if where_str: + where_str = f"WHERE {where_str}" + else: + where_str = "" + + q_str = f""" + SELECT {self.config.column_map['document']}, + {self.config.column_map['metadata']}, + cosine_distance(array[{q_emb_str}], + {self.config.column_map['embedding']}) as dist + FROM {self.config.database}.{self.config.table} + {where_str} + ORDER BY dist {self.dist_order} + LIMIT {topk} + """ + + _debug_output(q_str) + return q_str + + def similarity_search( + self, query: str, k: int = 4, where_str: Optional[str] = None, **kwargs: Any + ) -> List[Document]: + """Perform a similarity search with Apache Doris + + Args: + query (str): query string + k (int, optional): Top K neighbors to retrieve. Defaults to 4. + where_str (Optional[str], optional): where condition string. + Defaults to None. + + NOTE: Please do not let end-user to fill this and always be aware + of SQL injection. When dealing with metadatas, remember to + use `{self.metadata_column}.attribute` instead of `attribute` + alone. The default name for it is `metadata`. + + Returns: + List[Document]: List of Documents + """ + return self.similarity_search_by_vector( + self._embedding.embed_query(query), k, where_str, **kwargs + ) + + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + where_str: Optional[str] = None, + **kwargs: Any, + ) -> List[Document]: + """Perform a similarity search with Apache Doris by vectors + + Args: + query (str): query string + k (int, optional): Top K neighbors to retrieve. Defaults to 4. + where_str (Optional[str], optional): where condition string. + Defaults to None. + + NOTE: Please do not let end-user to fill this and always be aware + of SQL injection. When dealing with metadatas, remember to + use `{self.metadata_column}.attribute` instead of `attribute` + alone. The default name for it is `metadata`. + + Returns: + List[Document]: List of (Document, similarity) + """ + q_str = self._build_query_sql(embedding, k, where_str) + try: + return [ + Document( + page_content=r[self.config.column_map["document"]], + metadata=json.loads(r[self.config.column_map["metadata"]]), + ) + for r in _get_named_result(self.connection, q_str) + ] + except Exception as e: + logger.error(f"\033[91m\033[1m{type(e)}\033[0m \033[95m{str(e)}\033[0m") + return [] + + def similarity_search_with_relevance_scores( + self, query: str, k: int = 4, where_str: Optional[str] = None, **kwargs: Any + ) -> List[Tuple[Document, float]]: + """Perform a similarity search with Apache Doris + + Args: + query (str): query string + k (int, optional): Top K neighbors to retrieve. Defaults to 4. + where_str (Optional[str], optional): where condition string. + Defaults to None. + + NOTE: Please do not let end-user to fill this and always be aware + of SQL injection. When dealing with metadatas, remember to + use `{self.metadata_column}.attribute` instead of `attribute` + alone. The default name for it is `metadata`. + + Returns: + List[Document]: List of documents + """ + q_str = self._build_query_sql(self._embedding.embed_query(query), k, where_str) + try: + return [ + ( + Document( + page_content=r[self.config.column_map["document"]], + metadata=json.loads(r[self.config.column_map["metadata"]]), + ), + r["dist"], + ) + for r in _get_named_result(self.connection, q_str) + ] + except Exception as e: + logger.error(f"\033[91m\033[1m{type(e)}\033[0m \033[95m{str(e)}\033[0m") + return [] + + def drop(self) -> None: + """ + Helper function: Drop data + """ + _get_named_result( + self.connection, + f"DROP TABLE IF EXISTS {self.config.database}.{self.config.table}", + ) + + @property + def metadata_column(self) -> str: + return self.config.column_map["metadata"] + + +def _has_mul_sub_str(s: str, *args: Any) -> bool: + """Check if a string has multiple substrings. + + Args: + s: The string to check + *args: The substrings to check for in the string + + Returns: + bool: True if all substrings are present in the string, False otherwise + """ + for a in args: + if a not in s: + return False + return True + + +def _debug_output(s: Any) -> None: + """Print a debug message if DEBUG is True. + + Args: + s: The message to print + """ + if DEBUG: + print(s) # noqa: T201 + + +def _get_named_result(connection: Any, query: str) -> List[dict[str, Any]]: + """Get a named result from a query. + + Args: + connection: The connection to the database + query: The query to execute + + Returns: + List[dict[str, Any]]: The result of the query + """ + cursor = connection.cursor() + cursor.execute(query) + columns = cursor.description + result = [] + for value in cursor.fetchall(): + r = {} + for idx, datum in enumerate(value): + k = columns[idx][0] + r[k] = datum + result.append(r) + _debug_output(result) + cursor.close() + return result diff --git a/libs/community/langchain_community/vectorstores/astradb.py b/libs/community/langchain_community/vectorstores/astradb.py index e6d1a5e010072f..67751e4410c544 100644 --- a/libs/community/langchain_community/vectorstores/astradb.py +++ b/libs/community/langchain_community/vectorstores/astradb.py @@ -1,13 +1,12 @@ from __future__ import annotations -import asyncio import uuid import warnings -from asyncio import Task from concurrent.futures import ThreadPoolExecutor from typing import ( TYPE_CHECKING, Any, + Awaitable, Callable, Dict, Iterable, @@ -17,16 +16,21 @@ Tuple, Type, TypeVar, + Union, ) import numpy as np +from langchain_core._api.deprecation import deprecated from langchain_core.documents import Document from langchain_core.embeddings import Embeddings -from langchain_core.runnables import run_in_executor from langchain_core.runnables.utils import gather_with_concurrency from langchain_core.utils.iter import batch_iterate from langchain_core.vectorstores import VectorStore +from langchain_community.utilities.astradb import ( + SetupMode, + _AstraDBCollectionEnvironment, +) from langchain_community.vectorstores.utils import maximal_marginal_relevance if TYPE_CHECKING: @@ -61,6 +65,11 @@ def _unique_list(lst: List[T], key: Callable[[T], U]) -> List[T]: return new_lst +@deprecated( + since="0.1.23", + removal="0.2.0", + alternative_import="langchain_astradb.AstraDBVectorStore", +) class AstraDB(VectorStore): """Wrapper around DataStax Astra DB for vector-store workloads. @@ -161,28 +170,12 @@ def __init__( bulk_insert_batch_concurrency: Optional[int] = None, bulk_insert_overwrite_concurrency: Optional[int] = None, bulk_delete_concurrency: Optional[int] = None, + setup_mode: SetupMode = SetupMode.SYNC, pre_delete_collection: bool = False, ) -> None: """ Create an AstraDB vector store object. See class docstring for help. """ - try: - from astrapy.db import AstraDB as LibAstraDB - from astrapy.db import AstraDBCollection - except (ImportError, ModuleNotFoundError): - raise ImportError( - "Could not import a recent astrapy python package. " - "Please install it with `pip install --upgrade astrapy`." - ) - - # Conflicting-arg checks: - if astra_db_client is not None or async_astra_db_client is not None: - if token is not None or api_endpoint is not None: - raise ValueError( - "You cannot pass 'astra_db_client' or 'async_astra_db_client' to " - "AstraDB if passing 'token' and 'api_endpoint'." - ) - self.embedding = embedding self.collection_name = collection_name self.token = token @@ -201,105 +194,35 @@ def __init__( bulk_delete_concurrency or DEFAULT_BULK_DELETE_CONCURRENCY ) # "vector-related" settings - self._embedding_dimension: Optional[int] = None self.metric = metric + embedding_dimension: Union[int, Awaitable[int], None] = None + if setup_mode == SetupMode.ASYNC: + embedding_dimension = self._aget_embedding_dimension() + elif setup_mode == SetupMode.SYNC: + embedding_dimension = self._get_embedding_dimension() - self.astra_db = astra_db_client - self.async_astra_db = async_astra_db_client - self.collection = None - self.async_collection = None - - if token and api_endpoint: - self.astra_db = LibAstraDB( - token=self.token, - api_endpoint=self.api_endpoint, - namespace=self.namespace, - ) - try: - from astrapy.db import AsyncAstraDB - - self.async_astra_db = AsyncAstraDB( - token=self.token, - api_endpoint=self.api_endpoint, - namespace=self.namespace, - ) - except (ImportError, ModuleNotFoundError): - pass - - if self.astra_db is not None: - self.collection = AstraDBCollection( - collection_name=self.collection_name, - astra_db=self.astra_db, - ) - - self.async_setup_db_task: Optional[Task] = None - if self.async_astra_db is not None: - from astrapy.db import AsyncAstraDBCollection - - self.async_collection = AsyncAstraDBCollection( - collection_name=self.collection_name, - astra_db=self.async_astra_db, - ) - try: - self.async_setup_db_task = asyncio.create_task( - self._setup_db(pre_delete_collection) - ) - except RuntimeError: - pass - - if self.async_setup_db_task is None: - if not pre_delete_collection: - self._provision_collection() - else: - self.clear() - - def _ensure_astra_db_client(self): # type: ignore[no-untyped-def] - if not self.astra_db: - raise ValueError("Missing AstraDB client") - - async def _setup_db(self, pre_delete_collection: bool) -> None: - if pre_delete_collection: - await self.async_astra_db.delete_collection( # type: ignore[union-attr] - collection_name=self.collection_name, - ) - await self._aprovision_collection() - - async def _ensure_db_setup(self) -> None: - if self.async_setup_db_task: - await self.async_setup_db_task - - def _get_embedding_dimension(self) -> int: - if self._embedding_dimension is None: - self._embedding_dimension = len( - self.embedding.embed_query("This is a sample sentence.") - ) - return self._embedding_dimension - - def _provision_collection(self) -> None: - """ - Run the API invocation to create the collection on the backend. - - Internal-usage method, no object members are set, - other than working on the underlying actual storage. - """ - self.astra_db.create_collection( # type: ignore[union-attr] - dimension=self._get_embedding_dimension(), - collection_name=self.collection_name, - metric=self.metric, + self.astra_env = _AstraDBCollectionEnvironment( + collection_name=collection_name, + token=token, + api_endpoint=api_endpoint, + astra_db_client=astra_db_client, + async_astra_db_client=async_astra_db_client, + namespace=namespace, + setup_mode=setup_mode, + pre_delete_collection=pre_delete_collection, + embedding_dimension=embedding_dimension, + metric=metric, ) + self.astra_db = self.astra_env.astra_db + self.async_astra_db = self.astra_env.async_astra_db + self.collection = self.astra_env.collection + self.async_collection = self.astra_env.async_collection - async def _aprovision_collection(self) -> None: - """ - Run the API invocation to create the collection on the backend. + def _get_embedding_dimension(self) -> int: + return len(self.embedding.embed_query(text="This is a sample sentence.")) - Internal-usage method, no object members are set, - other than working on the underlying actual storage. - """ - await self.async_astra_db.create_collection( # type: ignore[union-attr] - dimension=self._get_embedding_dimension(), - collection_name=self.collection_name, - metric=self.metric, - ) + async def _aget_embedding_dimension(self) -> int: + return len(await self.embedding.aembed_query(text="This is a sample sentence.")) @property def embeddings(self) -> Embeddings: @@ -320,14 +243,12 @@ def _select_relevance_score_fn(self) -> Callable[[float], float]: def clear(self) -> None: """Empty the collection of all its stored entries.""" - self.delete_collection() - self._provision_collection() + self.astra_env.ensure_db_setup() + self.collection.delete_many({}) async def aclear(self) -> None: """Empty the collection of all its stored entries.""" - await self._ensure_db_setup() - if not self.async_astra_db: - await run_in_executor(None, self.clear) + await self.astra_env.aensure_db_setup() await self.async_collection.delete_many({}) # type: ignore[union-attr] def delete_by_document_id(self, document_id: str) -> bool: @@ -335,7 +256,7 @@ def delete_by_document_id(self, document_id: str) -> bool: Remove a single document from the store, given its document_id (str). Return True if a document has indeed been deleted, False if ID not found. """ - self._ensure_astra_db_client() + self.astra_env.ensure_db_setup() deletion_response = self.collection.delete_one(document_id) # type: ignore[union-attr] return ((deletion_response or {}).get("status") or {}).get( "deletedCount", 0 @@ -346,9 +267,7 @@ async def adelete_by_document_id(self, document_id: str) -> bool: Remove a single document from the store, given its document_id (str). Return True if a document has indeed been deleted, False if ID not found. """ - await self._ensure_db_setup() - if not self.async_collection: - return await run_in_executor(None, self.delete_by_document_id, document_id) + await self.astra_env.aensure_db_setup() deletion_response = await self.async_collection.delete_one(document_id) return ((deletion_response or {}).get("status") or {}).get( "deletedCount", 0 @@ -433,8 +352,8 @@ def delete_collection(self) -> None: Stored data is lost and unrecoverable, resources are freed. Use with caution. """ - self._ensure_astra_db_client() - self.astra_db.delete_collection( # type: ignore[union-attr] + self.astra_env.ensure_db_setup() + self.astra_db.delete_collection( collection_name=self.collection_name, ) @@ -445,10 +364,8 @@ async def adelete_collection(self) -> None: Stored data is lost and unrecoverable, resources are freed. Use with caution. """ - await self._ensure_db_setup() - if not self.async_astra_db: - await run_in_executor(None, self.delete_collection) - await self.async_astra_db.delete_collection( # type: ignore[union-attr] + await self.astra_env.aensure_db_setup() + await self.async_astra_db.delete_collection( collection_name=self.collection_name, ) @@ -563,7 +480,7 @@ def add_texts( f"unsupported arguments ({', '.join(sorted(kwargs.keys()))}), " "which will be ignored." ) - self._ensure_astra_db_client() + self.astra_env.ensure_db_setup() embedding_vectors = self.embedding.embed_documents(list(texts)) documents_to_insert = self._get_documents_to_insert( @@ -649,22 +566,13 @@ async def aadd_texts( Returns: List[str]: List of ids of the added texts. """ - await self._ensure_db_setup() - if not self.async_collection: - await super().aadd_texts( - texts, - metadatas, - ids=ids, - batch_size=batch_size, - batch_concurrency=batch_concurrency, - overwrite_concurrency=overwrite_concurrency, - ) if kwargs: warnings.warn( "Method 'aadd_texts' of AstraDB vector store invoked with " f"unsupported arguments ({', '.join(sorted(kwargs.keys()))}), " "which will be ignored." ) + await self.astra_env.aensure_db_setup() embedding_vectors = await self.embedding.aembed_documents(list(texts)) documents_to_insert = self._get_documents_to_insert( @@ -725,7 +633,7 @@ def similarity_search_with_score_id_by_vector( Returns: List of (Document, score, id), the most similar to the query vector. """ - self._ensure_astra_db_client() + self.astra_env.ensure_db_setup() metadata_parameter = self._filter_to_metadata(filter) # hits = list( @@ -767,15 +675,7 @@ async def asimilarity_search_with_score_id_by_vector( Returns: List of (Document, score, id), the most similar to the query vector. """ - await self._ensure_db_setup() - if not self.async_collection: - return await run_in_executor( - None, - self.asimilarity_search_with_score_id_by_vector, # type: ignore[arg-type] - embedding, - k, - filter, - ) + await self.astra_env.aensure_db_setup() metadata_parameter = self._filter_to_metadata(filter) # return [ @@ -1004,7 +904,7 @@ def max_marginal_relevance_search_by_vector( Returns: List of Documents selected by maximal marginal relevance. """ - self._ensure_astra_db_client() + self.astra_env.ensure_db_setup() metadata_parameter = self._filter_to_metadata(filter) prefetch_hits = list( @@ -1045,18 +945,7 @@ async def amax_marginal_relevance_search_by_vector( Returns: List of Documents selected by maximal marginal relevance. """ - await self._ensure_db_setup() - if not self.async_collection: - return await run_in_executor( - None, - self.max_marginal_relevance_search_by_vector, - embedding, - k, - fetch_k, - lambda_mult, - filter, - **kwargs, - ) + await self.astra_env.aensure_db_setup() metadata_parameter = self._filter_to_metadata(filter) prefetch_hits = [ diff --git a/libs/community/langchain_community/vectorstores/azuresearch.py b/libs/community/langchain_community/vectorstores/azuresearch.py index 6e4aed8d22c19e..44455199961f59 100644 --- a/libs/community/langchain_community/vectorstores/azuresearch.py +++ b/libs/community/langchain_community/vectorstores/azuresearch.py @@ -91,6 +91,7 @@ def _get_search_client( SemanticField, SemanticPrioritizedFields, SemanticSearch, + VectorSearch, VectorSearchAlgorithmKind, VectorSearchAlgorithmMetric, VectorSearchProfile, @@ -255,7 +256,7 @@ def __init__( type=SearchFieldDataType.Collection(SearchFieldDataType.Single), searchable=True, vector_search_dimensions=len(self.embed_query("Text")), - vector_search_configuration="default", + vector_search_profile_name="myHnswProfile", ), SearchableField( name=FIELDS_METADATA, diff --git a/libs/community/langchain_community/vectorstores/lancedb.py b/libs/community/langchain_community/vectorstores/lancedb.py index 4ca68c92ca66b0..414517793ee44d 100644 --- a/libs/community/langchain_community/vectorstores/lancedb.py +++ b/libs/community/langchain_community/vectorstores/lancedb.py @@ -12,6 +12,18 @@ class LanceDB(VectorStore): """`LanceDB` vector store. To use, you should have ``lancedb`` python package installed. + You can install it with ``pip install lancedb``. + + Args: + connection: LanceDB connection to use. If not provided, a new connection + will be created. + embedding: Embedding to use for the vectorstore. + vector_key: Key to use for the vector in the database. Defaults to ``vector``. + id_key: Key to use for the id in the database. Defaults to ``id``. + text_key: Key to use for the text in the database. Defaults to ``text``. + table_name: Name of the table to use. Defaults to ``vectorstore``. + + Example: .. code-block:: python @@ -25,13 +37,14 @@ class LanceDB(VectorStore): def __init__( self, - connection: Any, - embedding: Embeddings, + connection: Optional[Any] = None, + embedding: Optional[Embeddings] = None, vector_key: Optional[str] = "vector", id_key: Optional[str] = "id", text_key: Optional[str] = "text", + table_name: Optional[str] = "vectorstore", ): - """Initialize with Lance DB connection""" + """Initialize with Lance DB vectorstore""" try: import lancedb except ImportError: @@ -39,19 +52,28 @@ def __init__( "Could not import lancedb python package. " "Please install it with `pip install lancedb`." ) - if not isinstance(connection, lancedb.db.LanceTable): - raise ValueError( - "connection should be an instance of lancedb.db.LanceTable, ", - f"got {type(connection)}", - ) - self._connection = connection + self.lancedb = lancedb self._embedding = embedding self._vector_key = vector_key self._id_key = id_key self._text_key = text_key + self._table_name = table_name + + if self._embedding is None: + raise ValueError("embedding should be provided") + + if connection is not None: + if not isinstance(connection, lancedb.db.LanceTable): + raise ValueError( + "connection should be an instance of lancedb.db.LanceTable, ", + f"got {type(connection)}", + ) + self._connection = connection + else: + self._connection = self._init_table() @property - def embeddings(self) -> Embeddings: + def embeddings(self) -> Optional[Embeddings]: return self._embedding def add_texts( @@ -74,7 +96,7 @@ def add_texts( # Embed texts and create documents docs = [] ids = ids or [str(uuid.uuid4()) for _ in texts] - embeddings = self._embedding.embed_documents(list(texts)) + embeddings = self._embedding.embed_documents(list(texts)) # type: ignore for idx, text in enumerate(texts): embedding = embeddings[idx] metadata = metadatas[idx] if metadatas else {} @@ -86,7 +108,6 @@ def add_texts( **metadata, } ) - self._connection.add(docs) return ids @@ -102,14 +123,23 @@ def similarity_search( Returns: List of documents most similar to the query. """ - embedding = self._embedding.embed_query(query) - docs = self._connection.search(embedding).limit(k).to_df() + embedding = self._embedding.embed_query(query) # type: ignore + docs = ( + self._connection.search(embedding, vector_column_name=self._vector_key) + .limit(k) + .to_arrow() + ) + columns = docs.schema.names return [ Document( - page_content=row[self._text_key], - metadata=row[docs.columns != self._text_key], + page_content=docs[self._text_key][idx].as_py(), + metadata={ + col: docs[col][idx].as_py() + for col in columns + if col != self._text_key + }, ) - for _, row in docs.iterrows() + for idx in range(len(docs)) ] @classmethod @@ -134,3 +164,23 @@ def from_texts( instance.add_texts(texts, metadatas=metadatas, **kwargs) return instance + + def _init_table(self) -> Any: + import pyarrow as pa + + schema = pa.schema( + [ + pa.field( + self._vector_key, + pa.list_( + pa.float32(), + len(self.embeddings.embed_query("test")), # type: ignore + ), + ), + pa.field(self._id_key, pa.string()), + pa.field(self._text_key, pa.string()), + ] + ) + db = self.lancedb.connect("/tmp/lancedb") + tbl = db.create_table(self._table_name, schema=schema, mode="overwrite") + return tbl diff --git a/libs/community/langchain_community/vectorstores/rocksetdb.py b/libs/community/langchain_community/vectorstores/rocksetdb.py index 0e94089a0cddfd..ffb9f7f7f20068 100644 --- a/libs/community/langchain_community/vectorstores/rocksetdb.py +++ b/libs/community/langchain_community/vectorstores/rocksetdb.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +from copy import deepcopy from enum import Enum from typing import Any, Iterable, List, Optional, Tuple @@ -123,7 +124,7 @@ def add_texts( batch = [] doc = {} if metadatas and len(metadatas) > i: - doc = metadatas[i] + doc = deepcopy(metadatas[i]) if ids and len(ids) > i: doc["_id"] = ids[i] doc[self._text_key] = text diff --git a/libs/community/langchain_community/vectorstores/surrealdb.py b/libs/community/langchain_community/vectorstores/surrealdb.py index ef65c5ec6b0021..34f002305e1fdb 100644 --- a/libs/community/langchain_community/vectorstores/surrealdb.py +++ b/libs/community/langchain_community/vectorstores/surrealdb.py @@ -116,6 +116,8 @@ async def aadd_texts( data = {"text": text, "embedding": embeddings[idx]} if metadatas is not None and idx < len(metadatas): data["metadata"] = metadatas[idx] # type: ignore[assignment] + else: + data["metadata"] = [] record = await self.sdb.create( self.collection, data, diff --git a/libs/community/poetry.lock b/libs/community/poetry.lock index b4b4c21e8fd6d3..be5c69eebb2adf 100644 --- a/libs/community/poetry.lock +++ b/libs/community/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aenum" @@ -3140,7 +3140,6 @@ files = [ {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:227b178b22a7f91ae88525810441791b1ca1fc71c86f03190911793be15cec3d"}, {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:780eb6383fbae12afa819ef676fc93e1548ae4b076c004a393af26a04b460742"}, {file = "jq-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08ded6467f4ef89fec35b2bf310f210f8cd13fbd9d80e521500889edf8d22441"}, - {file = "jq-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49e44ed677713f4115bd5bf2dbae23baa4cd503be350e12a1c1f506b0687848f"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:984f33862af285ad3e41e23179ac4795f1701822473e1a26bf87ff023e5a89ea"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42264fafc6166efb5611b5d4cb01058887d050a6c19334f6a3f8a13bb369df5"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a67154f150aaf76cc1294032ed588436eb002097dd4fd1e283824bf753a05080"}, @@ -3651,7 +3650,7 @@ files = [ [[package]] name = "langchain-core" -version = "0.1.22" +version = "0.1.24" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -3661,7 +3660,7 @@ develop = true [package.dependencies] anyio = ">=3,<5" jsonpatch = "^1.33" -langsmith = "^0.0.87" +langsmith = "^0.1.0" packaging = "^23.2" pydantic = ">=1,<3" PyYAML = ">=5.3" @@ -3677,13 +3676,13 @@ url = "../core" [[package]] name = "langsmith" -version = "0.0.87" +version = "0.1.1" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.87-py3-none-any.whl", hash = "sha256:8903d3811b9fc89eb18f5961c8e6935fbd2d0f119884fbf30dc70b8f8f4121fc"}, - {file = "langsmith-0.0.87.tar.gz", hash = "sha256:36c4cc47e5b54be57d038036a30fb19ce6e4c73048cd7a464b8f25b459694d34"}, + {file = "langsmith-0.1.1-py3-none-any.whl", hash = "sha256:10ff2b977a41e3f6351d1a4239d9bd57af0547aa909e839d2791e16cc197a6f9"}, + {file = "langsmith-0.1.1.tar.gz", hash = "sha256:09df0c2ca9085105f97a4e4f281b083e312c99d162f3fe2b2d5eefd5c3692e60"}, ] [package.dependencies] @@ -5969,6 +5968,7 @@ files = [ {file = "pymongo-4.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b8729dbf25eb32ad0dc0b9bd5e6a0d0b7e5c2dc8ec06ad171088e1896b522a74"}, {file = "pymongo-4.6.1-cp312-cp312-win32.whl", hash = "sha256:3177f783ae7e08aaf7b2802e0df4e4b13903520e8380915e6337cdc7a6ff01d8"}, {file = "pymongo-4.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:00c199e1c593e2c8b033136d7a08f0c376452bac8a896c923fcd6f419e07bdd2"}, + {file = "pymongo-4.6.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6dcc95f4bb9ed793714b43f4f23a7b0c57e4ef47414162297d6f650213512c19"}, {file = "pymongo-4.6.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:13552ca505366df74e3e2f0a4f27c363928f3dff0eef9f281eb81af7f29bc3c5"}, {file = "pymongo-4.6.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:77e0df59b1a4994ad30c6d746992ae887f9756a43fc25dec2db515d94cf0222d"}, {file = "pymongo-4.6.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:3a7f02a58a0c2912734105e05dedbee4f7507e6f1bd132ebad520be0b11d46fd"}, @@ -6508,6 +6508,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -9161,4 +9162,4 @@ extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "as [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "e98000541a4991b1d41c9e995a4153ca24745e880afe75af6516574e3fb8b4a2" +content-hash = "5fdd9b2eb766411463fa27e19433daf5d5325f2af01ddd93b6a594e3e02a31de" diff --git a/libs/community/pyproject.toml b/libs/community/pyproject.toml index 44600db0934869..abf8dc89b6384d 100644 --- a/libs/community/pyproject.toml +++ b/libs/community/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-community" -version = "0.0.20" +version = "0.0.21" description = "Community contributed LangChain integrations." authors = [] license = "MIT" @@ -9,7 +9,7 @@ repository = "https://github.com/langchain-ai/langchain" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.21,<0.2" +langchain-core = ">=0.1.24,<0.2" SQLAlchemy = ">=1.4,<3" requests = "^2" PyYAML = ">=5.3" @@ -17,7 +17,7 @@ numpy = "^1" aiohttp = "^3.8.3" tenacity = "^8.1.0" dataclasses-json = ">= 0.5.7, < 0.7" -langsmith = ">=0.0.83,<0.1" +langsmith = "^0.1.0" tqdm = {version = ">=4.48.0", optional = true} openapi-pydantic = {version = "^0.3.2", optional = true} faiss-cpu = {version = "^1", optional = true} diff --git a/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py b/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py index ca89d54cde7759..11af7df3742690 100644 --- a/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py +++ b/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py @@ -1,6 +1,5 @@ -"""Test HuggingFace API wrapper.""" +"""Test HuggingFace Endpoints.""" -import unittest from pathlib import Path import pytest @@ -10,51 +9,73 @@ from tests.integration_tests.llms.utils import assert_llm_equality -@unittest.skip( - "This test requires an inference endpoint. Tested with Hugging Face endpoints" -) -def test_huggingface_endpoint_text_generation() -> None: - """Test valid call to HuggingFace text generation model.""" +def test_huggingface_endpoint_call_error() -> None: + """Test valid call to HuggingFace that errors.""" + llm = HuggingFaceEndpoint(endpoint_url="", model_kwargs={"max_new_tokens": -1}) + with pytest.raises(ValueError): + llm("Say foo:") + + +def test_saving_loading_endpoint_llm(tmp_path: Path) -> None: + """Test saving/loading an HuggingFaceHub LLM.""" llm = HuggingFaceEndpoint( endpoint_url="", task="text-generation", model_kwargs={"max_new_tokens": 10} ) + llm.save(file_path=tmp_path / "hf.yaml") + loaded_llm = load_llm(tmp_path / "hf.yaml") + assert_llm_equality(llm, loaded_llm) + + +def test_huggingface_text_generation() -> None: + """Test valid call to HuggingFace text generation model.""" + llm = HuggingFaceEndpoint(repo_id="gpt2", model_kwargs={"max_new_tokens": 10}) output = llm("Say foo:") print(output) # noqa: T201 assert isinstance(output, str) -@unittest.skip( - "This test requires an inference endpoint. Tested with Hugging Face endpoints" -) -def test_huggingface_endpoint_text2text_generation() -> None: +def test_huggingface_text2text_generation() -> None: """Test valid call to HuggingFace text2text model.""" - llm = HuggingFaceEndpoint(endpoint_url="", task="text2text-generation") + llm = HuggingFaceEndpoint(repo_id="google/flan-t5-xl") output = llm("The capital of New York is") assert output == "Albany" -@unittest.skip( - "This test requires an inference endpoint. Tested with Hugging Face endpoints" -) -def test_huggingface_endpoint_summarization() -> None: +def test_huggingface_summarization() -> None: """Test valid call to HuggingFace summarization model.""" - llm = HuggingFaceEndpoint(endpoint_url="", task="summarization") + llm = HuggingFaceEndpoint(repo_id="facebook/bart-large-cnn") output = llm("Say foo:") assert isinstance(output, str) -def test_huggingface_endpoint_call_error() -> None: +def test_huggingface_call_error() -> None: """Test valid call to HuggingFace that errors.""" - llm = HuggingFaceEndpoint(model_kwargs={"max_new_tokens": -1}) + llm = HuggingFaceEndpoint(repo_id="gpt2", model_kwargs={"max_new_tokens": -1}) with pytest.raises(ValueError): llm("Say foo:") -def test_saving_loading_endpoint_llm(tmp_path: Path) -> None: - """Test saving/loading an HuggingFaceHub LLM.""" - llm = HuggingFaceEndpoint( - endpoint_url="", task="text-generation", model_kwargs={"max_new_tokens": 10} - ) +def test_saving_loading_llm(tmp_path: Path) -> None: + """Test saving/loading an HuggingFaceEndpoint LLM.""" + llm = HuggingFaceEndpoint(repo_id="gpt2", model_kwargs={"max_new_tokens": 10}) llm.save(file_path=tmp_path / "hf.yaml") loaded_llm = load_llm(tmp_path / "hf.yaml") assert_llm_equality(llm, loaded_llm) + + +def test_invocation_params_stop_sequences() -> None: + llm = HuggingFaceEndpoint() + assert llm._default_params["stop_sequences"] == [] + + runtime_stop = None + assert llm._invocation_params(runtime_stop)["stop_sequences"] == [] + assert llm._default_params["stop_sequences"] == [] + + runtime_stop = ["stop"] + assert llm._invocation_params(runtime_stop)["stop_sequences"] == ["stop"] + assert llm._default_params["stop_sequences"] == [] + + llm = HuggingFaceEndpoint(stop_sequences=["."]) + runtime_stop = ["stop"] + assert llm._invocation_params(runtime_stop)["stop_sequences"] == [".", "stop"] + assert llm._default_params["stop_sequences"] == ["."] diff --git a/libs/community/tests/integration_tests/storage/test_astradb.py b/libs/community/tests/integration_tests/storage/test_astradb.py index 643b4e93a3185e..63108ef0c84a7e 100644 --- a/libs/community/tests/integration_tests/storage/test_astradb.py +++ b/libs/community/tests/integration_tests/storage/test_astradb.py @@ -1,9 +1,16 @@ """Implement integration tests for AstraDB storage.""" +from __future__ import annotations + import os +from typing import TYPE_CHECKING import pytest from langchain_community.storage.astradb import AstraDBByteStore, AstraDBStore +from langchain_community.utilities.astradb import SetupMode + +if TYPE_CHECKING: + from astrapy.db import AstraDB, AsyncAstraDB def _has_env_vars() -> bool: @@ -16,7 +23,7 @@ def _has_env_vars() -> bool: @pytest.fixture -def astra_db(): # type: ignore[no-untyped-def] +def astra_db() -> AstraDB: from astrapy.db import AstraDB return AstraDB( @@ -26,24 +33,45 @@ def astra_db(): # type: ignore[no-untyped-def] ) -def init_store(astra_db, collection_name: str): # type: ignore[no-untyped-def, no-untyped-def] - astra_db.create_collection(collection_name) +@pytest.fixture +def async_astra_db() -> AsyncAstraDB: + from astrapy.db import AsyncAstraDB + + return AsyncAstraDB( + token=os.environ["ASTRA_DB_APPLICATION_TOKEN"], + api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"], + namespace=os.environ.get("ASTRA_DB_KEYSPACE"), + ) + + +def init_store(astra_db: AstraDB, collection_name: str) -> AstraDBStore: store = AstraDBStore(collection_name=collection_name, astra_db_client=astra_db) store.mset([("key1", [0.1, 0.2]), ("key2", "value2")]) return store -def init_bytestore(astra_db, collection_name: str): # type: ignore[no-untyped-def, no-untyped-def] - astra_db.create_collection(collection_name) +def init_bytestore(astra_db: AstraDB, collection_name: str) -> AstraDBByteStore: store = AstraDBByteStore(collection_name=collection_name, astra_db_client=astra_db) store.mset([("key1", b"value1"), ("key2", b"value2")]) return store +async def init_async_store( + async_astra_db: AsyncAstraDB, collection_name: str +) -> AstraDBStore: + store = AstraDBStore( + collection_name=collection_name, + async_astra_db_client=async_astra_db, + setup_mode=SetupMode.ASYNC, + ) + await store.amset([("key1", [0.1, 0.2]), ("key2", "value2")]) + return store + + @pytest.mark.requires("astrapy") @pytest.mark.skipif(not _has_env_vars(), reason="Missing Astra DB env. vars") class TestAstraDBStore: - def test_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] + def test_mget(self, astra_db: AstraDB) -> None: """Test AstraDBStore mget method.""" collection_name = "lc_test_store_mget" try: @@ -52,7 +80,16 @@ def test_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_mset(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_amget(self, async_astra_db: AsyncAstraDB) -> None: + """Test AstraDBStore amget method.""" + collection_name = "lc_test_store_mget" + try: + store = await init_async_store(async_astra_db, collection_name) + assert await store.amget(["key1", "key2"]) == [[0.1, 0.2], "value2"] + finally: + await async_astra_db.delete_collection(collection_name) + + def test_mset(self, astra_db: AstraDB) -> None: """Test that multiple keys can be set with AstraDBStore.""" collection_name = "lc_test_store_mset" try: @@ -64,7 +101,19 @@ def test_mset(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_mdelete(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_amset(self, async_astra_db: AsyncAstraDB) -> None: + """Test that multiple keys can be set with AstraDBStore.""" + collection_name = "lc_test_store_mset" + try: + store = await init_async_store(async_astra_db, collection_name) + result = await store.async_collection.find_one({"_id": "key1"}) + assert result["data"]["document"]["value"] == [0.1, 0.2] + result = await store.async_collection.find_one({"_id": "key2"}) + assert result["data"]["document"]["value"] == "value2" + finally: + await async_astra_db.delete_collection(collection_name) + + def test_mdelete(self, astra_db: AstraDB) -> None: """Test that deletion works as expected.""" collection_name = "lc_test_store_mdelete" try: @@ -75,7 +124,18 @@ def test_mdelete(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_yield_keys(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_amdelete(self, async_astra_db: AsyncAstraDB) -> None: + """Test that deletion works as expected.""" + collection_name = "lc_test_store_mdelete" + try: + store = await init_async_store(async_astra_db, collection_name) + await store.amdelete(["key1", "key2"]) + result = await store.amget(["key1", "key2"]) + assert result == [None, None] + finally: + await async_astra_db.delete_collection(collection_name) + + def test_yield_keys(self, astra_db: AstraDB) -> None: collection_name = "lc_test_store_yield_keys" try: store = init_store(astra_db, collection_name) @@ -85,7 +145,20 @@ def test_yield_keys(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_bytestore_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_ayield_keys(self, async_astra_db: AsyncAstraDB) -> None: + collection_name = "lc_test_store_yield_keys" + try: + store = await init_async_store(async_astra_db, collection_name) + assert {key async for key in store.ayield_keys()} == {"key1", "key2"} + assert {key async for key in store.ayield_keys(prefix="key")} == { + "key1", + "key2", + } + assert {key async for key in store.ayield_keys(prefix="lang")} == set() + finally: + await async_astra_db.delete_collection(collection_name) + + def test_bytestore_mget(self, astra_db: AstraDB) -> None: """Test AstraDBByteStore mget method.""" collection_name = "lc_test_bytestore_mget" try: @@ -94,7 +167,7 @@ def test_bytestore_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_bytestore_mset(self, astra_db) -> None: # type: ignore[no-untyped-def] + def test_bytestore_mset(self, astra_db: AstraDB) -> None: """Test that multiple keys can be set with AstraDBByteStore.""" collection_name = "lc_test_bytestore_mset" try: diff --git a/libs/community/tests/integration_tests/vectorstores/test_lancedb.py b/libs/community/tests/integration_tests/vectorstores/test_lancedb.py index 37098e221141d1..bde46e800e1166 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_lancedb.py +++ b/libs/community/tests/integration_tests/vectorstores/test_lancedb.py @@ -1,8 +1,11 @@ +import pytest + from langchain_community.vectorstores import LanceDB from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings -def test_lancedb() -> None: +@pytest.mark.requires("lancedb") +def test_lancedb_with_connection() -> None: import lancedb embeddings = FakeEmbeddings() @@ -23,22 +26,23 @@ def test_lancedb() -> None: assert "text 1" in result_texts -def test_lancedb_add_texts() -> None: - import lancedb +@pytest.mark.requires("lancedb") +def test_lancedb_without_connection() -> None: + embeddings = FakeEmbeddings() + texts = ["text 1", "text 2", "item 3"] + + store = LanceDB(embedding=embeddings) + store.add_texts(texts) + result = store.similarity_search("text 1") + result_texts = [doc.page_content for doc in result] + assert "text 1" in result_texts + +@pytest.mark.requires("lancedb") +def test_lancedb_add_texts() -> None: embeddings = FakeEmbeddings() - db = lancedb.connect("/tmp/lancedb") - texts = ["text 1"] - vectors = embeddings.embed_documents(texts) - table = db.create_table( - "my_table", - data=[ - {"vector": vectors[idx], "id": text, "text": text} - for idx, text in enumerate(texts) - ], - mode="overwrite", - ) - store = LanceDB(table, embeddings) + + store = LanceDB(embedding=embeddings) store.add_texts(["text 2"]) result = store.similarity_search("text 2") result_texts = [doc.page_content for doc in result] diff --git a/libs/community/tests/integration_tests/vectorstores/test_rocksetdb.py b/libs/community/tests/integration_tests/vectorstores/test_rocksetdb.py index b4c79b610718f9..56950ce8c4623d 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_rocksetdb.py +++ b/libs/community/tests/integration_tests/vectorstores/test_rocksetdb.py @@ -1,5 +1,6 @@ import logging import os +import uuid from langchain_core.documents import Document @@ -31,10 +32,10 @@ # # See https://rockset.com/blog/introducing-vector-search-on-rockset/ for more details. -workspace = "langchain_tests" -collection_name = "langchain_demo" -text_key = "description" -embedding_key = "description_embedding" +WORKSPACE = "morgana" +COLLECTION_NAME = "langchain_demo" +TEXT_KEY = "description" +EMBEDDING_KEY = "description_embedding" class TestRockset: @@ -59,7 +60,7 @@ def setup_class(cls) -> None: elif region == "dev": host = rockset.DevRegions.usw2a1 else: - logger.warn( + logger.warning( "Using ROCKSET_REGION:%s as it is.. \ You should know what you're doing...", region, @@ -71,9 +72,9 @@ def setup_class(cls) -> None: if os.environ.get("ROCKSET_DELETE_DOCS_ON_START") == "1": logger.info( "Deleting all existing documents from the Rockset collection %s", - collection_name, + COLLECTION_NAME, ) - query = f"select _id from {workspace}.{collection_name}" + query = f"select _id from {WORKSPACE}.{COLLECTION_NAME}" query_response = client.Queries.query(sql={"query": query}) ids = [ @@ -84,15 +85,15 @@ def setup_class(cls) -> None: ] logger.info("Existing ids in collection: %s", ids) client.Documents.delete_documents( - collection=collection_name, + collection=COLLECTION_NAME, data=[rockset.models.DeleteDocumentsRequestData(id=i) for i in ids], - workspace=workspace, + workspace=WORKSPACE, ) embeddings = ConsistentFakeEmbeddings() embeddings.embed_documents(fake_texts) cls.rockset_vectorstore = Rockset( - client, embeddings, collection_name, text_key, embedding_key, workspace + client, embeddings, COLLECTION_NAME, TEXT_KEY, EMBEDDING_KEY, WORKSPACE ) def test_rockset_insert_and_search(self) -> None: @@ -120,6 +121,36 @@ def test_rockset_insert_and_search(self) -> None: ) assert output == [Document(page_content="bar", metadata={"metadata_index": 1})] + def test_add_documents_and_delete(self) -> None: + """ "add_documents" and "delete" are requirements to support use + with RecordManager""" + + texts = ["foo", "bar", "baz"] + metadatas = [{"metadata_index": i} for i in range(len(texts))] + + _docs = zip(texts, metadatas) + docs = [Document(page_content=pc, metadata=i) for pc, i in _docs] + + ids = self.rockset_vectorstore.add_documents(docs) + assert len(ids) == len(texts) + + deleted = self.rockset_vectorstore.delete(ids) + assert deleted + + def test_add_texts_does_not_modify_metadata(self) -> None: + """If metadata changes it will inhibit the langchain RecordManager + functionality""" + + texts = ["kitty", "doggy"] + metadatas = [{"source": "kitty.txt"}, {"source": "doggy.txt"}] + ids = [str(uuid.uuid4()), str(uuid.uuid4())] + + self.rockset_vectorstore.add_texts(texts=texts, metadatas=metadatas, ids=ids) + + for metadata in metadatas: + assert len(metadata) == 1 + assert list(metadata.keys())[0] == "source" + def test_build_query_sql(self) -> None: vector = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] q_str = self.rockset_vectorstore._build_query_sql( @@ -129,9 +160,9 @@ def test_build_query_sql(self) -> None: ) vector_str = ",".join(map(str, vector)) expected = f"""\ -SELECT * EXCEPT({embedding_key}), \ -COSINE_SIM({embedding_key}, [{vector_str}]) as dist -FROM {workspace}.{collection_name} +SELECT * EXCEPT({EMBEDDING_KEY}), \ +COSINE_SIM({EMBEDDING_KEY}, [{vector_str}]) as dist +FROM {WORKSPACE}.{COLLECTION_NAME} ORDER BY dist DESC LIMIT 4 """ @@ -147,27 +178,11 @@ def test_build_query_sql_with_where(self) -> None: ) vector_str = ",".join(map(str, vector)) expected = f"""\ -SELECT * EXCEPT({embedding_key}), \ -COSINE_SIM({embedding_key}, [{vector_str}]) as dist -FROM {workspace}.{collection_name} +SELECT * EXCEPT({EMBEDDING_KEY}), \ +COSINE_SIM({EMBEDDING_KEY}, [{vector_str}]) as dist +FROM {WORKSPACE}.{COLLECTION_NAME} WHERE age >= 10 ORDER BY dist DESC LIMIT 4 """ assert q_str == expected - - def test_add_documents_and_delete(self) -> None: - """ "add_documents" and "delete" are requirements to support use - with RecordManager""" - - texts = ["foo", "bar", "baz"] - metadatas = [{"metadata_index": i} for i in range(len(texts))] - - _docs = zip(texts, metadatas) - docs = [Document(page_content=pc, metadata=i) for pc, i in _docs] - - ids = self.rockset_vectorstore.add_documents(docs) - assert len(ids) == len(texts) - - deleted = self.rockset_vectorstore.delete(ids) - assert deleted diff --git a/libs/community/tests/unit_tests/agent_toolkits/test_imports.py b/libs/community/tests/unit_tests/agent_toolkits/test_imports.py index 3a7ca10efdf26a..c2dbdd38333999 100644 --- a/libs/community/tests/unit_tests/agent_toolkits/test_imports.py +++ b/libs/community/tests/unit_tests/agent_toolkits/test_imports.py @@ -28,6 +28,7 @@ "create_pbi_chat_agent", "create_spark_sql_agent", "create_sql_agent", + "CogniswitchToolkit", ] diff --git a/libs/community/tests/unit_tests/document_loaders/parsers/language/test_lua.py b/libs/community/tests/unit_tests/document_loaders/parsers/language/test_lua.py index 56df8f3310a669..afb50b8345bfec 100644 --- a/libs/community/tests/unit_tests/document_loaders/parsers/language/test_lua.py +++ b/libs/community/tests/unit_tests/document_loaders/parsers/language/test_lua.py @@ -29,11 +29,26 @@ def test_is_valid(self) -> None: self.assertTrue(LuaSegmenter("local a").is_valid()) self.assertFalse(LuaSegmenter("a b c 1 2 3").is_valid()) + # TODO: Investigate flakey-ness. + @pytest.mark.skip( + reason=( + "Flakey. To be investigated. See " + "https://github.com/langchain-ai/langchain/actions/runs/7907779756/job/21585580650." # noqa: E501 + ) + ) def test_extract_functions_classes(self) -> None: segmenter = LuaSegmenter(self.example_code) extracted_code = segmenter.extract_functions_classes() self.assertEqual(extracted_code, self.expected_extracted_code) + # TODO: Investigate flakey-ness. + @pytest.mark.skip( + reason=( + "Flakey. To be investigated. See " + "https://github.com/langchain-ai/langchain/actions/runs/7923203031/job/21632416298?pr=17599 " # noqa: E501 + "and https://github.com/langchain-ai/langchain/actions/runs/7923784089/job/2163420864." # noqa: E501 + ) + ) def test_simplify_code(self) -> None: segmenter = LuaSegmenter(self.example_code) simplified_code = segmenter.simplify_code() diff --git a/libs/community/tests/unit_tests/document_loaders/test_directory.py b/libs/community/tests/unit_tests/document_loaders/test_directory.py index dc028e4814c704..f83e4bc2dfe1f4 100644 --- a/libs/community/tests/unit_tests/document_loaders/test_directory.py +++ b/libs/community/tests/unit_tests/document_loaders/test_directory.py @@ -1,4 +1,8 @@ +from pathlib import Path +from typing import Any, List + import pytest +from langchain_core.documents import Document from langchain_community.document_loaders import DirectoryLoader @@ -17,3 +21,35 @@ def test_raise_error_if_path_is_not_directory() -> None: loader.load() assert str(e.value) == f"Expected directory, got file: '{__file__}'" + + +class CustomLoader: + """Test loader. Mimics interface of existing file loader.""" + + def __init__(self, path: Path, **kwargs: Any) -> None: + """Initialize the loader.""" + self.path = path + + def load(self) -> List[Document]: + """Load documents.""" + with open(self.path, "r") as f: + return [Document(page_content=f.read())] + + +def test_exclude_ignores_matching_files(tmp_path: Path) -> None: + txt_file = tmp_path / "test.txt" + py_file = tmp_path / "test.py" + txt_file.touch() + py_file.touch() + loader = DirectoryLoader( + str(tmp_path), + exclude=["*.py"], + loader_cls=CustomLoader, # type: ignore + ) + data = loader.load() + assert len(data) == 1 + + +def test_exclude_as_string_converts_to_sequence() -> None: + loader = DirectoryLoader("./some_directory", exclude="*.py") + assert loader.exclude == ("*.py",) diff --git a/libs/community/tests/unit_tests/tools/test_imports.py b/libs/community/tests/unit_tests/tools/test_imports.py index 4bf70aa0842f98..95fd4315575a5f 100644 --- a/libs/community/tests/unit_tests/tools/test_imports.py +++ b/libs/community/tests/unit_tests/tools/test_imports.py @@ -24,6 +24,10 @@ "BingSearchRun", "BraveSearch", "ClickTool", + "CogniswitchKnowledgeSourceFile", + "CogniswitchKnowledgeSourceURL", + "CogniswitchKnowledgeRequest", + "CogniswitchKnowledgeStatus", "ConneryAction", "CopyFileTool", "CurrentWebPageTool", diff --git a/libs/community/tests/unit_tests/tools/test_public_api.py b/libs/community/tests/unit_tests/tools/test_public_api.py index 31ea8327022e18..1595dd47109176 100644 --- a/libs/community/tests/unit_tests/tools/test_public_api.py +++ b/libs/community/tests/unit_tests/tools/test_public_api.py @@ -25,6 +25,10 @@ "BingSearchRun", "BraveSearch", "ClickTool", + "CogniswitchKnowledgeSourceFile", + "CogniswitchKnowledgeStatus", + "CogniswitchKnowledgeSourceURL", + "CogniswitchKnowledgeRequest", "ConneryAction", "CopyFileTool", "CurrentWebPageTool", diff --git a/libs/community/tests/unit_tests/vectorstores/test_public_api.py b/libs/community/tests/unit_tests/vectorstores/test_public_api.py index 53713f3e1bebf4..48b51accdda87f 100644 --- a/libs/community/tests/unit_tests/vectorstores/test_public_api.py +++ b/libs/community/tests/unit_tests/vectorstores/test_public_api.py @@ -6,6 +6,7 @@ "AlibabaCloudOpenSearchSettings", "AnalyticDB", "Annoy", + "ApacheDoris", "AtlasDB", "AwaDB", "AzureSearch", diff --git a/libs/core/Makefile b/libs/core/Makefile index ab8e9cadf03b32..d577f66cc7541b 100644 --- a/libs/core/Makefile +++ b/libs/core/Makefile @@ -15,6 +15,9 @@ tests: test_watch: poetry run ptw --snapshot-update --now . -- -vv -x tests/unit_tests +test_profile: + poetry run pytest -vv tests/unit_tests/ --profile-svg + check_imports: $(shell find langchain_core -name '*.py') poetry run python ./scripts/check_imports.py $^ diff --git a/libs/core/langchain_core/_api/beta_decorator.py b/libs/core/langchain_core/_api/beta_decorator.py index 7326dbb5ef4a0f..19f5db11df5e5d 100644 --- a/libs/core/langchain_core/_api/beta_decorator.py +++ b/libs/core/langchain_core/_api/beta_decorator.py @@ -206,10 +206,9 @@ def finalize( # type: ignore old_doc = inspect.cleandoc(old_doc or "").strip("\n") + # old_doc can be None if not old_doc: - new_doc = "[*Beta*]" - else: - new_doc = f"[*Beta*] {old_doc}" + old_doc = "" # Modify the docstring to include a beta notice. notes_header = "\nNotes\n-----" @@ -218,7 +217,7 @@ def finalize( # type: ignore addendum, ] details = " ".join([component.strip() for component in components if component]) - new_doc += ( + new_doc = ( f"[*Beta*] {old_doc}\n" f"{notes_header if notes_header not in old_doc else ''}\n" f".. beta::\n" diff --git a/libs/core/langchain_core/_api/deprecation.py b/libs/core/langchain_core/_api/deprecation.py index b4d56b70cb4946..970cfa569463b5 100644 --- a/libs/core/langchain_core/_api/deprecation.py +++ b/libs/core/langchain_core/_api/deprecation.py @@ -245,10 +245,9 @@ def finalize( # type: ignore old_doc = inspect.cleandoc(old_doc or "").strip("\n") + # old_doc can be None if not old_doc: - new_doc = "[*Deprecated*]" - else: - new_doc = f"[*Deprecated*] {old_doc}" + old_doc = "" # Modify the docstring to include a deprecation notice. notes_header = "\nNotes\n-----" @@ -258,7 +257,7 @@ def finalize( # type: ignore addendum, ] details = " ".join([component.strip() for component in components if component]) - new_doc += ( + new_doc = ( f"[*Deprecated*] {old_doc}\n" f"{notes_header if notes_header not in old_doc else ''}\n" f".. deprecated:: {since}\n" diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 6279116093e5f8..fe622f61b02950 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -794,7 +794,7 @@ def dict(self, **kwargs: Any) -> Dict: class SimpleChatModel(BaseChatModel): - """Simple Chat Model.""" + """A simplified implementation for a chat model to inherit from.""" def _generate( self, diff --git a/libs/core/langchain_core/prompts/base.py b/libs/core/langchain_core/prompts/base.py index 96cfbf63740e1d..9f8ae5082f9f70 100644 --- a/libs/core/langchain_core/prompts/base.py +++ b/libs/core/langchain_core/prompts/base.py @@ -25,9 +25,10 @@ PromptValue, StringPromptValue, ) -from langchain_core.pydantic_v1 import BaseModel, Field, create_model, root_validator +from langchain_core.pydantic_v1 import BaseModel, Field, root_validator from langchain_core.runnables import RunnableConfig, RunnableSerializable from langchain_core.runnables.config import ensure_config +from langchain_core.runnables.utils import create_model if TYPE_CHECKING: from langchain_core.documents import Document diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py index 4e4a79a50cc4ea..8c9f0b8b023788 100644 --- a/libs/core/langchain_core/runnables/base.py +++ b/libs/core/langchain_core/runnables/base.py @@ -38,7 +38,7 @@ from langchain_core._api import beta_decorator from langchain_core.load.dump import dumpd from langchain_core.load.serializable import Serializable -from langchain_core.pydantic_v1 import BaseConfig, BaseModel, Field, create_model +from langchain_core.pydantic_v1 import BaseModel, Field from langchain_core.runnables.config import ( RunnableConfig, acall_func_with_variable_args, @@ -65,6 +65,7 @@ accepts_config, accepts_context, accepts_run_manager, + create_model, gather_with_concurrency, get_function_first_arg_dict_keys, get_function_nonlocals, @@ -95,10 +96,6 @@ Other = TypeVar("Other") -class _SchemaConfig(BaseConfig): - arbitrary_types_allowed = True - - class Runnable(Generic[Input, Output], ABC): """A unit of work that can be invoked, batched, streamed, transformed and composed. @@ -301,7 +298,6 @@ def get_input_schema( return create_model( self.get_name("Input"), __root__=(root_type, None), - __config__=_SchemaConfig, ) @property @@ -334,7 +330,6 @@ def get_output_schema( return create_model( self.get_name("Output"), __root__=(root_type, None), - __config__=_SchemaConfig, ) @property @@ -371,7 +366,6 @@ def config_schema( ) for spec in config_specs }, - __config__=_SchemaConfig, ) if config_specs else None @@ -379,7 +373,6 @@ def config_schema( return create_model( # type: ignore[call-overload] self.get_name("Config"), - __config__=_SchemaConfig, **({"configurable": (configurable, None)} if configurable else {}), **{ field_name: (field_type, None) @@ -1691,7 +1684,6 @@ def _seq_input_schema( for k, v in next_input_schema.__fields__.items() if k not in first.mapper.steps }, - __config__=_SchemaConfig, ) elif isinstance(first, RunnablePick): return _seq_input_schema(steps[1:], config) @@ -1724,7 +1716,6 @@ def _seq_output_schema( for k, v in mapper_output_schema.__fields__.items() }, }, - __config__=_SchemaConfig, ) elif isinstance(last, RunnablePick): prev_output_schema = _seq_output_schema(steps[:-1], config) @@ -1738,14 +1729,12 @@ def _seq_output_schema( for k, v in prev_output_schema.__fields__.items() if k in last.keys }, - __config__=_SchemaConfig, ) else: field = prev_output_schema.__fields__[last.keys] return create_model( # type: ignore[call-overload] "RunnableSequenceOutput", __root__=(field.annotation, field.default), - __config__=_SchemaConfig, ) return last.get_output_schema(config) @@ -2598,7 +2587,6 @@ def get_input_schema( for k, v in step.get_input_schema(config).__fields__.items() if k != "__root__" }, - __config__=_SchemaConfig, ) return super().get_input_schema(config) @@ -2610,7 +2598,6 @@ def get_output_schema( return create_model( # type: ignore[call-overload] self.get_name("Output"), **{k: (v.OutputType, None) for k, v in self.steps.items()}, - __config__=_SchemaConfig, ) @property @@ -3250,13 +3237,11 @@ def get_input_schema( return create_model( self.get_name("Input"), **{item[1:-1]: (Any, None) for item in items}, # type: ignore - __config__=_SchemaConfig, ) else: return create_model( self.get_name("Input"), __root__=(List[Any], None), - __config__=_SchemaConfig, ) if self.InputType != Any: @@ -3266,7 +3251,6 @@ def get_input_schema( return create_model( self.get_name("Input"), **{key: (Any, None) for key in dict_keys}, # type: ignore - __config__=_SchemaConfig, ) return super().get_input_schema(config) @@ -3756,7 +3740,6 @@ def get_input_schema( List[self.bound.get_input_schema(config)], # type: ignore None, ), - __config__=_SchemaConfig, ) @property @@ -3773,7 +3756,6 @@ def get_output_schema( List[schema], # type: ignore None, ), - __config__=_SchemaConfig, ) @property diff --git a/libs/core/langchain_core/runnables/history.py b/libs/core/langchain_core/runnables/history.py index f9b68cd68e8b82..5037d2d9f55238 100644 --- a/libs/core/langchain_core/runnables/history.py +++ b/libs/core/langchain_core/runnables/history.py @@ -15,12 +15,13 @@ from langchain_core.chat_history import BaseChatMessageHistory from langchain_core.load.load import load -from langchain_core.pydantic_v1 import BaseModel, create_model +from langchain_core.pydantic_v1 import BaseModel from langchain_core.runnables.base import Runnable, RunnableBindingBase, RunnableLambda from langchain_core.runnables.config import run_in_executor from langchain_core.runnables.passthrough import RunnablePassthrough from langchain_core.runnables.utils import ( ConfigurableFieldSpec, + create_model, get_unique_config_specs, ) diff --git a/libs/core/langchain_core/runnables/passthrough.py b/libs/core/langchain_core/runnables/passthrough.py index 3e9277a7a2d88d..17ddda9584c7bc 100644 --- a/libs/core/langchain_core/runnables/passthrough.py +++ b/libs/core/langchain_core/runnables/passthrough.py @@ -20,7 +20,7 @@ cast, ) -from langchain_core.pydantic_v1 import BaseModel, create_model +from langchain_core.pydantic_v1 import BaseModel from langchain_core.runnables.base import ( Other, Runnable, @@ -36,7 +36,11 @@ patch_config, ) from langchain_core.runnables.graph import Graph -from langchain_core.runnables.utils import AddableDict, ConfigurableFieldSpec +from langchain_core.runnables.utils import ( + AddableDict, + ConfigurableFieldSpec, + create_model, +) from langchain_core.utils.aiter import atee, py_anext from langchain_core.utils.iter import safetee diff --git a/libs/core/langchain_core/runnables/utils.py b/libs/core/langchain_core/runnables/utils.py index 59d2c862037016..8ff332173cc693 100644 --- a/libs/core/langchain_core/runnables/utils.py +++ b/libs/core/langchain_core/runnables/utils.py @@ -5,6 +5,7 @@ import asyncio import inspect import textwrap +from functools import lru_cache from inspect import signature from itertools import groupby from typing import ( @@ -21,10 +22,13 @@ Protocol, Sequence, Set, + Type, TypeVar, Union, ) +from langchain_core.pydantic_v1 import BaseConfig, BaseModel +from langchain_core.pydantic_v1 import create_model as _create_model_base from langchain_core.runnables.schema import StreamEvent Input = TypeVar("Input", contravariant=True) @@ -489,3 +493,31 @@ def include_event(self, event: StreamEvent, root_type: str) -> bool: ) return include + + +class _SchemaConfig(BaseConfig): + arbitrary_types_allowed = True + frozen = True + + +def create_model( + __model_name: str, + **field_definitions: Any, +) -> Type[BaseModel]: + try: + return _create_model_cached(__model_name, **field_definitions) + except TypeError: + # something in field definitions is not hashable + return _create_model_base( + __model_name, __config__=_SchemaConfig, **field_definitions + ) + + +@lru_cache(maxsize=256) +def _create_model_cached( + __model_name: str, + **field_definitions: Any, +) -> Type[BaseModel]: + return _create_model_base( + __model_name, __config__=_SchemaConfig, **field_definitions + ) diff --git a/libs/core/poetry.lock b/libs/core/poetry.lock index b8b0e512b26eab..3078c2ae55e0bd 100644 --- a/libs/core/poetry.lock +++ b/libs/core/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "annotated-types" @@ -555,6 +555,17 @@ files = [ [package.dependencies] python-dateutil = ">=2.7" +[[package]] +name = "gprof2dot" +version = "2022.7.29" +description = "Generate a dot graph from the output of several profilers." +optional = false +python-versions = ">=2.7" +files = [ + {file = "gprof2dot-2022.7.29-py2.py3-none-any.whl", hash = "sha256:f165b3851d3c52ee4915eb1bd6cca571e5759823c2cd0f71a79bda93c2dc85d6"}, + {file = "gprof2dot-2022.7.29.tar.gz", hash = "sha256:45b4d298bd36608fccf9511c3fd88a773f7a1abc04d6cd39445b11ba43133ec5"}, +] + [[package]] name = "grandalf" version = "0.8" @@ -1124,13 +1135,13 @@ files = [ [[package]] name = "langsmith" -version = "0.0.87" +version = "0.1.1" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.87-py3-none-any.whl", hash = "sha256:8903d3811b9fc89eb18f5961c8e6935fbd2d0f119884fbf30dc70b8f8f4121fc"}, - {file = "langsmith-0.0.87.tar.gz", hash = "sha256:36c4cc47e5b54be57d038036a30fb19ce6e4c73048cd7a464b8f25b459694d34"}, + {file = "langsmith-0.1.1-py3-none-any.whl", hash = "sha256:10ff2b977a41e3f6351d1a4239d9bd57af0547aa909e839d2791e16cc197a6f9"}, + {file = "langsmith-0.1.1.tar.gz", hash = "sha256:09df0c2ca9085105f97a4e4f281b083e312c99d162f3fe2b2d5eefd5c3692e60"}, ] [package.dependencies] @@ -1852,6 +1863,25 @@ pytest = ">=5.0" [package.extras] dev = ["pre-commit", "pytest-asyncio", "tox"] +[[package]] +name = "pytest-profiling" +version = "1.7.0" +description = "Profiling plugin for py.test" +optional = false +python-versions = "*" +files = [ + {file = "pytest-profiling-1.7.0.tar.gz", hash = "sha256:93938f147662225d2b8bd5af89587b979652426a8a6ffd7e73ec4a23e24b7f29"}, + {file = "pytest_profiling-1.7.0-py2.py3-none-any.whl", hash = "sha256:999cc9ac94f2e528e3f5d43465da277429984a1c237ae9818f8cfd0b06acb019"}, +] + +[package.dependencies] +gprof2dot = "*" +pytest = "*" +six = "*" + +[package.extras] +tests = ["pytest-virtualenv"] + [[package]] name = "pytest-watcher" version = "0.3.4" @@ -1966,7 +1996,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -2766,4 +2795,4 @@ extended-testing = ["jinja2"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "fec5bca68972fa7f7eea00a3cf3e158424bafdf028573611c575c3859ab289c5" +content-hash = "8fe07123109b62d7210542d8aff20df6df00819e5b0f36bc12f02206c5161c43" diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index 29dfa307678b16..fba2679af64cd2 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-core" -version = "0.1.23" +version = "0.1.24" description = "Building applications with LLMs through composability" authors = [] license = "MIT" @@ -11,7 +11,7 @@ repository = "https://github.com/langchain-ai/langchain" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" pydantic = ">=1,<3" -langsmith = "^0.0.87" +langsmith = "^0.1.0" tenacity = "^8.1.0" jsonpatch = "^1.33" anyio = ">=3,<5" @@ -57,6 +57,7 @@ syrupy = "^4.0.2" pytest-watcher = "^0.3.4" pytest-asyncio = "^0.21.1" grandalf = "^0.8" +pytest-profiling = "^1.7.0" [tool.poetry.group.test_integration] diff --git a/libs/core/tests/unit_tests/_api/test_beta_decorator.py b/libs/core/tests/unit_tests/_api/test_beta_decorator.py index 499e63745f995b..caef30c251c347 100644 --- a/libs/core/tests/unit_tests/_api/test_beta_decorator.py +++ b/libs/core/tests/unit_tests/_api/test_beta_decorator.py @@ -112,7 +112,7 @@ def test_beta_function() -> None: doc = beta_function.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Beta*] original doc") + assert doc.startswith("[*Beta*] original doc") assert not inspect.iscoroutinefunction(beta_function) @@ -132,7 +132,7 @@ async def test_beta_async_function() -> None: doc = beta_function.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Beta*] original doc") + assert doc.startswith("[*Beta*] original doc") assert inspect.iscoroutinefunction(beta_async_function) @@ -152,7 +152,7 @@ def test_beta_method() -> None: doc = obj.beta_method.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Beta*] original doc") + assert doc.startswith("[*Beta*] original doc") assert not inspect.iscoroutinefunction(obj.beta_method) @@ -173,7 +173,7 @@ async def test_beta_async_method() -> None: doc = obj.beta_method.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Beta*] original doc") + assert doc.startswith("[*Beta*] original doc") assert inspect.iscoroutinefunction(obj.beta_async_method) @@ -192,7 +192,7 @@ def test_beta_classmethod() -> None: doc = ClassWithBetaMethods.beta_classmethod.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Beta*] original doc") + assert doc.startswith("[*Beta*] original doc") def test_beta_staticmethod() -> None: @@ -211,7 +211,7 @@ def test_beta_staticmethod() -> None: ) doc = ClassWithBetaMethods.beta_staticmethod.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Beta*] original doc") + assert doc.startswith("[*Beta*] original doc") def test_beta_property() -> None: @@ -231,13 +231,12 @@ def test_beta_property() -> None: ) doc = ClassWithBetaMethods.beta_property.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Beta*] original doc") + assert doc.startswith("[*Beta*] original doc") -def test_whole_class_deprecation() -> None: - """Test whole class deprecation.""" +def test_whole_class_beta() -> None: + """Test whole class beta status.""" - # Test whole class deprecation @beta() class BetaClass: def __init__(self) -> None: @@ -269,6 +268,73 @@ def beta_method(self) -> str: ) +def test_whole_class_inherited_beta() -> None: + """Test whole class beta status for inherited class. + + The original version of beta decorator created duplicates with + '[*Beta*]'. + """ + + # Test whole class beta status + @beta() + class BetaClass: + @beta() + def beta_method(self) -> str: + """original doc""" + return "This is a beta method." + + @beta() + class InheritedBetaClass(BetaClass): + @beta() + def beta_method(self) -> str: + """original doc""" + return "This is a beta method 2." + + with warnings.catch_warnings(record=True) as warning_list: + warnings.simplefilter("always") + + obj = BetaClass() + assert obj.beta_method() == "This is a beta method." + + assert len(warning_list) == 2 + warning = warning_list[0].message + assert str(warning) == ( + "The class `BetaClass` is in beta. It is actively being worked on, so the " + "API may change." + ) + + warning = warning_list[1].message + assert str(warning) == ( + "The function `beta_method` is in beta. It is actively being worked on, so " + "the API may change." + ) + + with warnings.catch_warnings(record=True) as warning_list: + warnings.simplefilter("always") + + obj = InheritedBetaClass() + assert obj.beta_method() == "This is a beta method 2." + + assert len(warning_list) == 2 + warning = warning_list[0].message + assert str(warning) == ( + "The class `InheritedBetaClass` is in beta. " + "It is actively being worked on, so the " + "API may change." + ) + + warning = warning_list[1].message + assert str(warning) == ( + "The function `beta_method` is in beta. " + "It is actively being worked on, so " + "the API may change." + ) + + # if [*Beta*] was inserted only once: + if obj.__doc__ is not None: + assert obj.__doc__.count("[*Beta*]") == 1 + + # Tests with pydantic models class MyModel(BaseModel): @beta() @@ -292,4 +358,4 @@ def test_beta_method_pydantic() -> None: doc = obj.beta_method.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Beta*] original doc") + assert doc.startswith("[*Beta*] original doc") diff --git a/libs/core/tests/unit_tests/_api/test_deprecation.py b/libs/core/tests/unit_tests/_api/test_deprecation.py index d26b18c3ad448e..8573d64b379060 100644 --- a/libs/core/tests/unit_tests/_api/test_deprecation.py +++ b/libs/core/tests/unit_tests/_api/test_deprecation.py @@ -129,7 +129,7 @@ def test_deprecated_function() -> None: doc = deprecated_function.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Deprecated*] original doc") + assert doc.startswith("[*Deprecated*] original doc") assert not inspect.iscoroutinefunction(deprecated_function) @@ -151,7 +151,7 @@ async def test_deprecated_async_function() -> None: doc = deprecated_function.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Deprecated*] original doc") + assert doc.startswith("[*Deprecated*] original doc") assert inspect.iscoroutinefunction(deprecated_async_function) @@ -171,7 +171,7 @@ def test_deprecated_method() -> None: doc = obj.deprecated_method.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Deprecated*] original doc") + assert doc.startswith("[*Deprecated*] original doc") assert not inspect.iscoroutinefunction(obj.deprecated_method) @@ -194,7 +194,7 @@ async def test_deprecated_async_method() -> None: doc = obj.deprecated_method.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Deprecated*] original doc") + assert doc.startswith("[*Deprecated*] original doc") assert inspect.iscoroutinefunction(obj.deprecated_async_method) @@ -213,7 +213,7 @@ def test_deprecated_classmethod() -> None: doc = ClassWithDeprecatedMethods.deprecated_classmethod.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Deprecated*] original doc") + assert doc.startswith("[*Deprecated*] original doc") def test_deprecated_staticmethod() -> None: @@ -233,7 +233,7 @@ def test_deprecated_staticmethod() -> None: ) doc = ClassWithDeprecatedMethods.deprecated_staticmethod.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Deprecated*] original doc") + assert doc.startswith("[*Deprecated*] original doc") def test_deprecated_property() -> None: @@ -253,7 +253,7 @@ def test_deprecated_property() -> None: ) doc = ClassWithDeprecatedMethods.deprecated_property.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Deprecated*] original doc") + assert doc.startswith("[*Deprecated*] original doc") def test_whole_class_deprecation() -> None: @@ -289,6 +289,88 @@ def deprecated_method(self) -> str: "The function `deprecated_method` was deprecated in " "LangChain 2.0.0 and will be removed in 3.0.0" ) + # [*Deprecated*] should be inserted only once: + if obj.__doc__ is not None: + assert obj.__doc__.count("[*Deprecated*]") == 1 + + +def test_whole_class_inherited_deprecation() -> None: + """Test whole class deprecation for inherited class. + + The original version of deprecation decorator created duplicates with + '[*Deprecated*]'. + """ + + # Test whole class deprecation + @deprecated(since="2.0.0", removal="3.0.0") + class DeprecatedClass: + def __init__(self) -> None: + """original doc""" + pass + + @deprecated(since="2.0.0", removal="3.0.0") + def deprecated_method(self) -> str: + """original doc""" + return "This is a deprecated method." + + @deprecated(since="2.2.0", removal="3.2.0") + class InheritedDeprecatedClass(DeprecatedClass): + """Inherited deprecated class.""" + + def __init__(self) -> None: + """original doc""" + pass + + @deprecated(since="2.2.0", removal="3.2.0") + def deprecated_method(self) -> str: + """original doc""" + return "This is a deprecated method." + + with warnings.catch_warnings(record=True) as warning_list: + warnings.simplefilter("always") + + obj = DeprecatedClass() + assert obj.deprecated_method() == "This is a deprecated method." + + assert len(warning_list) == 2 + warning = warning_list[0].message + assert str(warning) == ( + "The class `tests.unit_tests._api.test_deprecation.DeprecatedClass` was " + "deprecated in tests 2.0.0 and will be removed in 3.0.0" + ) + + warning = warning_list[1].message + assert str(warning) == ( + "The function `deprecated_method` was deprecated in " + "LangChain 2.0.0 and will be removed in 3.0.0" + ) + # if [*Deprecated*] was inserted only once: + if obj.__doc__ is not None: + assert obj.__doc__.count("[*Deprecated*]") == 1 + + with warnings.catch_warnings(record=True) as warning_list: + warnings.simplefilter("always") + + obj = InheritedDeprecatedClass() + assert obj.deprecated_method() == "This is a deprecated method." + + assert len(warning_list) == 2 + warning = warning_list[0].message + assert str(warning) == ( + "The class " + "`tests.unit_tests._api.test_deprecation.InheritedDeprecatedClass` " + "was deprecated in tests 2.2.0 and will be removed in 3.2.0" + ) + + warning = warning_list[1].message + assert str(warning) == ( + "The function `deprecated_method` was deprecated in " + "LangChain 2.2.0 and will be removed in 3.2.0" + ) + # if [*Deprecated*] was inserted only once: + if obj.__doc__ is not None: + assert obj.__doc__.count("[*Deprecated*]") == 1 + assert "[*Deprecated*] Inherited deprecated class." in obj.__doc__ # Tests with pydantic models @@ -314,4 +396,4 @@ def test_deprecated_method_pydantic() -> None: doc = obj.deprecated_method.__doc__ assert isinstance(doc, str) - assert doc.startswith("[*Deprecated*] original doc") + assert doc.startswith("[*Deprecated*] original doc") diff --git a/libs/experimental/langchain_experimental/recommenders/__init__.py b/libs/experimental/langchain_experimental/recommenders/__init__.py new file mode 100644 index 00000000000000..ec06f5541894df --- /dev/null +++ b/libs/experimental/langchain_experimental/recommenders/__init__.py @@ -0,0 +1,7 @@ +"""Amazon Personalize primitives.""" +from langchain_experimental.recommenders.amazon_personalize import AmazonPersonalize +from langchain_experimental.recommenders.amazon_personalize_chain import ( + AmazonPersonalizeChain, +) + +__all__ = ["AmazonPersonalize", "AmazonPersonalizeChain"] diff --git a/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py b/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py new file mode 100644 index 00000000000000..b2300f0a19c3a8 --- /dev/null +++ b/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py @@ -0,0 +1,195 @@ +from typing import Any, List, Mapping, Optional, Sequence + + +class AmazonPersonalize: + """Amazon Personalize Runtime wrapper for executing real-time operations: + https://docs.aws.amazon.com/personalize/latest/dg/API_Operations_Amazon_Personalize_Runtime.html + + Args: + campaign_arn: str, Optional: The Amazon Resource Name (ARN) of the campaign + to use for getting recommendations. + recommender_arn: str, Optional: The Amazon Resource Name (ARN) of the + recommender to use to get recommendations + client: Optional: boto3 client + credentials_profile_name: str, Optional :AWS profile name + region_name: str, Optional: AWS region, e.g., us-west-2 + + Example: + .. code-block:: python + + personalize_client = AmazonPersonalize ( + campaignArn='' ) + """ + + def __init__( + self, + campaign_arn: Optional[str] = None, + recommender_arn: Optional[str] = None, + client: Optional[Any] = None, + credentials_profile_name: Optional[str] = None, + region_name: Optional[str] = None, + ): + self.campaign_arn = campaign_arn + self.recommender_arn = recommender_arn + + if campaign_arn and recommender_arn: + raise ValueError( + "Cannot initialize AmazonPersonalize with both " + "campaign_arn and recommender_arn." + ) + + if not campaign_arn and not recommender_arn: + raise ValueError( + "Cannot initialize AmazonPersonalize. Provide one of " + "campaign_arn or recommender_arn" + ) + + try: + if client is not None: + self.client = client + else: + import boto3 + import botocore.config + + if credentials_profile_name is not None: + session = boto3.Session(profile_name=credentials_profile_name) + else: + # use default credentials + session = boto3.Session() + + client_params = {} + if region_name: + client_params["region_name"] = region_name + + service = "personalize-runtime" + session_config = botocore.config.Config(user_agent_extra="langchain") + client_params["config"] = session_config + self.client = session.client(service, **client_params) + + except ImportError: + raise ModuleNotFoundError( + "Could not import boto3 python package. " + "Please install it with `pip install boto3`." + ) + + def get_recommendations( + self, + user_id: Optional[str] = None, + item_id: Optional[str] = None, + filter_arn: Optional[str] = None, + filter_values: Optional[Mapping[str, str]] = None, + num_results: Optional[int] = 10, + context: Optional[Mapping[str, str]] = None, + promotions: Optional[Sequence[Mapping[str, Any]]] = None, + metadata_columns: Optional[Mapping[str, Sequence[str]]] = None, + **kwargs: Any, + ) -> Mapping[str, Any]: + """Get recommendations from Amazon Personalize: + https://docs.aws.amazon.com/personalize/latest/dg/API_RS_GetRecommendations.html + + Args: + user_id: str, Optional: The user identifier + for which to retrieve recommendations + item_id: str, Optional: The item identifier + for which to retrieve recommendations + filter_arn: str, Optional: The ARN of the filter + to apply to the returned recommendations + filter_values: Mapping, Optional: The values + to use when filtering recommendations. + num_results: int, Optional: Default=10: The number of results to return + context: Mapping, Optional: The contextual metadata + to use when getting recommendations + promotions: Sequence, Optional: The promotions + to apply to the recommendation request. + metadata_columns: Mapping, Optional: The metadata Columns to be returned + as part of the response. + + Returns: + response: Mapping[str, Any]: Returns an itemList and recommendationId. + + Example: + .. code-block:: python + + personalize_client = AmazonPersonalize(campaignArn='' )\n + response = personalize_client.get_recommendations(user_id="1") + + """ + if not user_id and not item_id: + raise ValueError("One of user_id or item_id is required") + + if filter_arn: + kwargs["filterArn"] = filter_arn + if filter_values: + kwargs["filterValues"] = filter_values + if user_id: + kwargs["userId"] = user_id + if num_results: + kwargs["numResults"] = num_results + if context: + kwargs["context"] = context + if promotions: + kwargs["promotions"] = promotions + if item_id: + kwargs["itemId"] = item_id + if metadata_columns: + kwargs["metadataColumns"] = metadata_columns + if self.campaign_arn: + kwargs["campaignArn"] = self.campaign_arn + if self.recommender_arn: + kwargs["recommenderArn"] = self.recommender_arn + + return self.client.get_recommendations(**kwargs) + + def get_personalized_ranking( + self, + user_id: str, + input_list: List[str], + filter_arn: Optional[str] = None, + filter_values: Optional[Mapping[str, str]] = None, + context: Optional[Mapping[str, str]] = None, + metadata_columns: Optional[Mapping[str, Sequence[str]]] = None, + **kwargs: Any, + ) -> Mapping[str, Any]: + """Re-ranks a list of recommended items for the given user. + https://docs.aws.amazon.com/personalize/latest/dg/API_RS_GetPersonalizedRanking.html + + Args: + user_id: str, Required: The user identifier + for which to retrieve recommendations + input_list: List[str], Required: A list of items (by itemId) to rank + filter_arn: str, Optional: The ARN of the filter to apply + filter_values: Mapping, Optional: The values to use + when filtering recommendations. + context: Mapping, Optional: The contextual metadata + to use when getting recommendations + metadata_columns: Mapping, Optional: The metadata Columns to be returned + as part of the response. + + Returns: + response: Mapping[str, Any]: Returns personalizedRanking + and recommendationId. + + Example: + .. code-block:: python + + personalize_client = AmazonPersonalize(campaignArn='' )\n + response = personalize_client.get_personalized_ranking(user_id="1", + input_list=["123,"256"]) + + """ + + if filter_arn: + kwargs["filterArn"] = filter_arn + if filter_values: + kwargs["filterValues"] = filter_values + if user_id: + kwargs["userId"] = user_id + if input_list: + kwargs["inputList"] = input_list + if context: + kwargs["context"] = context + if metadata_columns: + kwargs["metadataColumns"] = metadata_columns + kwargs["campaignArn"] = self.campaign_arn + + return self.client.get_personalized_ranking(kwargs) diff --git a/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py b/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py new file mode 100644 index 00000000000000..4c187a8006463f --- /dev/null +++ b/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py @@ -0,0 +1,192 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Mapping, Optional, cast + +from langchain.callbacks.manager import ( + CallbackManagerForChainRun, +) +from langchain.chains import LLMChain +from langchain.chains.base import Chain +from langchain.prompts.prompt import PromptTemplate +from langchain.schema.language_model import BaseLanguageModel + +from langchain_experimental.recommenders.amazon_personalize import AmazonPersonalize + +SUMMARIZE_PROMPT_QUERY = """ +Summarize the recommended items for a user from the items list in tag below. +Make correlation into the items in the list and provide a summary. + + {result} + +""" + +SUMMARIZE_PROMPT = PromptTemplate( + input_variables=["result"], template=SUMMARIZE_PROMPT_QUERY +) + +INTERMEDIATE_STEPS_KEY = "intermediate_steps" + +# Input Key Names to be used +USER_ID_INPUT_KEY = "user_id" +ITEM_ID_INPUT_KEY = "item_id" +INPUT_LIST_INPUT_KEY = "input_list" +FILTER_ARN_INPUT_KEY = "filter_arn" +FILTER_VALUES_INPUT_KEY = "filter_values" +CONTEXT_INPUT_KEY = "context" +PROMOTIONS_INPUT_KEY = "promotions" +METADATA_COLUMNS_INPUT_KEY = "metadata_columns" +RESULT_OUTPUT_KEY = "result" + + +class AmazonPersonalizeChain(Chain): + """Amazon Personalize Chain for retrieving recommendations + from Amazon Personalize, and summarizing + the recommendations in natural language. + It will only return recommendations if return_direct=True. + Can also be used in sequential chains for working with + the output of Amazon Personalize. + + Example: + .. code-block:: python + + chain = PersonalizeChain.from_llm(llm=agent_llm, client=personalize_lg, + return_direct=True)\n + response = chain.run({'user_id':'1'})\n + response = chain.run({'user_id':'1', 'item_id':'234'}) + """ + + client: AmazonPersonalize + summarization_chain: LLMChain + return_direct: bool = False + return_intermediate_steps: bool = False + is_ranking_recipe: bool = False + + @property + def input_keys(self) -> List[str]: + """This returns an empty list since not there are optional + input_keys and none is required. + + :meta private: + """ + return [] + + @property + def output_keys(self) -> List[str]: + """Will always return result key. + + :meta private: + """ + return [RESULT_OUTPUT_KEY] + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + client: AmazonPersonalize, + prompt_template: PromptTemplate = SUMMARIZE_PROMPT, + is_ranking_recipe: bool = False, + **kwargs: Any, + ) -> AmazonPersonalizeChain: + """Initializes the Personalize Chain with LLMAgent, Personalize Client, + Prompts to be used + + Args: + llm: BaseLanguageModel: The LLM to be used in the Chain + client: AmazonPersonalize: The client created to support + invoking AmazonPersonalize + prompt_template: PromptTemplate: The prompt template which can be + invoked with the output from Amazon Personalize + is_ranking_recipe: bool: default: False: specifies + if the trained recipe is USER_PERSONALIZED_RANKING + + Example: + .. code-block:: python + + chain = PersonalizeChain.from_llm(llm=agent_llm, + client=personalize_lg, return_direct=True)\n + response = chain.run({'user_id':'1'})\n + response = chain.run({'user_id':'1', 'item_id':'234'}) + + RANDOM_PROMPT_QUERY=" Summarize recommendations in {result}" + chain = PersonalizeChain.from_llm(llm=agent_llm, + client=personalize_lg, prompt_template=PROMPT_TEMPLATE)\n + """ + summarization_chain = LLMChain(llm=llm, prompt=prompt_template) + + return cls( + summarization_chain=summarization_chain, + client=client, + is_ranking_recipe=is_ranking_recipe, + **kwargs, + ) + + def _call( + self, + inputs: Mapping[str, Any], + run_manager: Optional[CallbackManagerForChainRun] = None, + ) -> Dict[str, Any]: + """Retrieves recommendations by invoking Amazon Personalize, + and invokes an LLM using the default/overridden + prompt template with the output from Amazon Personalize + + Args: + inputs: Mapping [str, Any] : Provide input identifiers in a map. + For example - {'user_id','1'} or + {'user_id':'1', 'item_id':'123'}. You can also pass the + filter_arn, filter_values as an + input. + """ + _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() + callbacks = _run_manager.get_child() + + user_id = inputs.get(USER_ID_INPUT_KEY) + item_id = inputs.get(ITEM_ID_INPUT_KEY) + input_list = inputs.get(INPUT_LIST_INPUT_KEY) + filter_arn = inputs.get(FILTER_ARN_INPUT_KEY) + filter_values = inputs.get(FILTER_VALUES_INPUT_KEY) + promotions = inputs.get(PROMOTIONS_INPUT_KEY) + context = inputs.get(CONTEXT_INPUT_KEY) + metadata_columns = inputs.get(METADATA_COLUMNS_INPUT_KEY) + + intermediate_steps: List = [] + intermediate_steps.append({"Calling Amazon Personalize"}) + + if self.is_ranking_recipe: + response = self.client.get_personalized_ranking( + user_id=str(user_id), + input_list=cast(List[str], input_list), + filter_arn=filter_arn, + filter_values=filter_values, + context=context, + metadata_columns=metadata_columns, + ) + else: + response = self.client.get_recommendations( + user_id=user_id, + item_id=item_id, + filter_arn=filter_arn, + filter_values=filter_values, + context=context, + promotions=promotions, + metadata_columns=metadata_columns, + ) + + _run_manager.on_text("Call to Amazon Personalize complete \n") + + if self.return_direct: + final_result = response + else: + result = self.summarization_chain( + {RESULT_OUTPUT_KEY: response}, callbacks=callbacks + ) + final_result = result[self.summarization_chain.output_key] + + intermediate_steps.append({"context": response}) + chain_result: Dict[str, Any] = {RESULT_OUTPUT_KEY: final_result} + if self.return_intermediate_steps: + chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps + return chain_result + + @property + def _chain_type(self) -> str: + return "amazon_personalize_chain" diff --git a/libs/experimental/poetry.lock b/libs/experimental/poetry.lock index 65cd4f58b0aa3a..7975be85b940de 100644 --- a/libs/experimental/poetry.lock +++ b/libs/experimental/poetry.lock @@ -1642,7 +1642,7 @@ files = [ [[package]] name = "langchain" -version = "0.1.6" +version = "0.1.8" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -1654,9 +1654,9 @@ aiohttp = "^3.8.3" async-timeout = {version = "^4.0.0", markers = "python_version < \"3.11\""} dataclasses-json = ">= 0.5.7, < 0.7" jsonpatch = "^1.33" -langchain-community = ">=0.0.18,<0.1" -langchain-core = ">=0.1.22,<0.2" -langsmith = ">=0.0.83,<0.1" +langchain-community = ">=0.0.21,<0.1" +langchain-core = ">=0.1.24,<0.2" +langsmith = "^0.1.0" numpy = "^1" pydantic = ">=1,<3" PyYAML = ">=5.3" @@ -1685,7 +1685,7 @@ url = "../langchain" [[package]] name = "langchain-community" -version = "0.0.19" +version = "0.0.21" description = "Community contributed LangChain integrations." optional = false python-versions = ">=3.8.1,<4.0" @@ -1695,8 +1695,8 @@ develop = true [package.dependencies] aiohttp = "^3.8.3" dataclasses-json = ">= 0.5.7, < 0.7" -langchain-core = ">=0.1.21,<0.2" -langsmith = ">=0.0.83,<0.1" +langchain-core = ">=0.1.24,<0.2" +langsmith = "^0.1.0" numpy = "^1" PyYAML = ">=5.3" requests = "^2" @@ -1705,7 +1705,7 @@ tenacity = "^8.1.0" [package.extras] cli = ["typer (>=0.9.0,<0.10.0)"] -extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "elasticsearch (>=8.12.0,<9.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hdbcli (>=2.19.21,<3.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "httpx (>=0.24.1,<0.25.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "nvidia-riva-client (>=2.14.0,<3.0.0)", "oci (>=2.119.1,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)", "zhipuai (>=1.0.7,<2.0.0)"] +extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.3.11,<0.4.0)", "arxiv (>=1.4,<2.0)", "assemblyai (>=0.17.0,<0.18.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "azure-ai-documentintelligence (>=1.0.0b1,<2.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "cassio (>=0.1.0,<0.2.0)", "chardet (>=5.1.0,<6.0.0)", "cohere (>=4,<5)", "databricks-vectorsearch (>=0.21,<0.22)", "datasets (>=2.15.0,<3.0.0)", "dgml-utils (>=0.3.0,<0.4.0)", "elasticsearch (>=8.12.0,<9.0.0)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "feedparser (>=6.0.10,<7.0.0)", "fireworks-ai (>=0.9.0,<0.10.0)", "geopandas (>=0.13.1,<0.14.0)", "gitpython (>=3.1.32,<4.0.0)", "google-cloud-documentai (>=2.20.1,<3.0.0)", "gql (>=3.4.1,<4.0.0)", "gradientai (>=1.4.0,<2.0.0)", "hdbcli (>=2.19.21,<3.0.0)", "hologres-vector (>=0.0.6,<0.0.7)", "html2text (>=2020.1.16,<2021.0.0)", "httpx (>=0.24.1,<0.25.0)", "javelin-sdk (>=0.1.8,<0.2.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "jsonschema (>1)", "lxml (>=4.9.2,<5.0.0)", "markdownify (>=0.11.6,<0.12.0)", "motor (>=3.3.1,<4.0.0)", "msal (>=1.25.0,<2.0.0)", "mwparserfromhell (>=0.6.4,<0.7.0)", "mwxml (>=0.3.3,<0.4.0)", "newspaper3k (>=0.2.8,<0.3.0)", "numexpr (>=2.8.6,<3.0.0)", "nvidia-riva-client (>=2.14.0,<3.0.0)", "oci (>=2.119.1,<3.0.0)", "openai (<2)", "openapi-pydantic (>=0.3.2,<0.4.0)", "oracle-ads (>=2.9.1,<3.0.0)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "praw (>=7.7.1,<8.0.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "rapidfuzz (>=3.1.1,<4.0.0)", "rapidocr-onnxruntime (>=1.3.2,<2.0.0)", "rdflib (==7.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "rspace_client (>=2.5.0,<3.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "sqlite-vss (>=0.1.2,<0.2.0)", "streamlit (>=1.18.0,<2.0.0)", "sympy (>=1.12,<2.0)", "telethon (>=1.28.5,<2.0.0)", "timescale-vector (>=0.0.1,<0.0.2)", "tqdm (>=4.48.0)", "tree-sitter (>=0.20.2,<0.21.0)", "tree-sitter-languages (>=1.8.0,<2.0.0)", "upstash-redis (>=0.15.0,<0.16.0)", "xata (>=1.0.0a7,<2.0.0)", "xmltodict (>=0.13.0,<0.14.0)", "zhipuai (>=1.0.7,<2.0.0)"] [package.source] type = "directory" @@ -1713,7 +1713,7 @@ url = "../community" [[package]] name = "langchain-core" -version = "0.1.22" +version = "0.1.24" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -1723,7 +1723,7 @@ develop = true [package.dependencies] anyio = ">=3,<5" jsonpatch = "^1.33" -langsmith = "^0.0.87" +langsmith = "^0.1.0" packaging = "^23.2" pydantic = ">=1,<3" PyYAML = ">=5.3" @@ -1753,13 +1753,13 @@ data = ["language-data (>=1.1,<2.0)"] [[package]] name = "langsmith" -version = "0.0.87" +version = "0.1.1" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.87-py3-none-any.whl", hash = "sha256:8903d3811b9fc89eb18f5961c8e6935fbd2d0f119884fbf30dc70b8f8f4121fc"}, - {file = "langsmith-0.0.87.tar.gz", hash = "sha256:36c4cc47e5b54be57d038036a30fb19ce6e4c73048cd7a464b8f25b459694d34"}, + {file = "langsmith-0.1.1-py3-none-any.whl", hash = "sha256:10ff2b977a41e3f6351d1a4239d9bd57af0547aa909e839d2791e16cc197a6f9"}, + {file = "langsmith-0.1.1.tar.gz", hash = "sha256:09df0c2ca9085105f97a4e4f281b083e312c99d162f3fe2b2d5eefd5c3692e60"}, ] [package.dependencies] @@ -5060,4 +5060,4 @@ extended-testing = ["faker", "jinja2", "pandas", "presidio-analyzer", "presidio- [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "0e4b297b0a8c595fbfe1e8a00d5a13057b1bdd4a0ce08d415ca4c4a7712cee88" +content-hash = "577689c4eebd644296ea46af03ec0eead2b2877b739c0989b59dc633f904099f" diff --git a/libs/experimental/pyproject.toml b/libs/experimental/pyproject.toml index be3246167aac87..307dd276f100a6 100644 --- a/libs/experimental/pyproject.toml +++ b/libs/experimental/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-experimental" -version = "0.0.51" +version = "0.0.52" description = "Building applications with LLMs through composability" authors = [] license = "MIT" @@ -10,8 +10,8 @@ repository = "https://github.com/langchain-ai/langchain" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = "^0.1.16" -langchain = "^0.1.5" +langchain-core = "^0.1.24" +langchain = "^0.1.8" presidio-anonymizer = {version = "^2.2.352", optional = true} presidio-analyzer = {version = "^2.2.352", optional = true} faker = {version = "^19.3.1", optional = true} diff --git a/libs/langchain/langchain/chains/base.py b/libs/langchain/langchain/chains/base.py index 5c5bd1aadfa0c9..2f15d1fcc87a7a 100644 --- a/libs/langchain/langchain/chains/base.py +++ b/libs/langchain/langchain/chains/base.py @@ -20,19 +20,14 @@ from langchain_core.load.dump import dumpd from langchain_core.memory import BaseMemory from langchain_core.outputs import RunInfo -from langchain_core.pydantic_v1 import ( - BaseModel, - Field, - create_model, - root_validator, - validator, -) +from langchain_core.pydantic_v1 import BaseModel, Field, root_validator, validator from langchain_core.runnables import ( RunnableConfig, RunnableSerializable, ensure_config, run_in_executor, ) +from langchain_core.runnables.utils import create_model from langchain.schema import RUN_KEY diff --git a/libs/langchain/langchain/chains/combine_documents/base.py b/libs/langchain/langchain/chains/combine_documents/base.py index 6fbd22f3f12d4a..89ad181dace77b 100644 --- a/libs/langchain/langchain/chains/combine_documents/base.py +++ b/libs/langchain/langchain/chains/combine_documents/base.py @@ -9,8 +9,9 @@ ) from langchain_core.documents import Document from langchain_core.prompts import BasePromptTemplate, PromptTemplate -from langchain_core.pydantic_v1 import BaseModel, Field, create_model +from langchain_core.pydantic_v1 import BaseModel, Field from langchain_core.runnables.config import RunnableConfig +from langchain_core.runnables.utils import create_model from langchain.chains.base import Chain from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter diff --git a/libs/langchain/langchain/chains/combine_documents/map_reduce.py b/libs/langchain/langchain/chains/combine_documents/map_reduce.py index 18be6d4cf279bc..cdeeda3302a9f9 100644 --- a/libs/langchain/langchain/chains/combine_documents/map_reduce.py +++ b/libs/langchain/langchain/chains/combine_documents/map_reduce.py @@ -6,8 +6,9 @@ from langchain_core.callbacks import Callbacks from langchain_core.documents import Document -from langchain_core.pydantic_v1 import BaseModel, Extra, create_model, root_validator +from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator from langchain_core.runnables.config import RunnableConfig +from langchain_core.runnables.utils import create_model from langchain.chains.combine_documents.base import BaseCombineDocumentsChain from langchain.chains.combine_documents.reduce import ReduceDocumentsChain diff --git a/libs/langchain/langchain/chains/combine_documents/map_rerank.py b/libs/langchain/langchain/chains/combine_documents/map_rerank.py index 0466aac56b941f..8650828f9b64f2 100644 --- a/libs/langchain/langchain/chains/combine_documents/map_rerank.py +++ b/libs/langchain/langchain/chains/combine_documents/map_rerank.py @@ -6,8 +6,9 @@ from langchain_core.callbacks import Callbacks from langchain_core.documents import Document -from langchain_core.pydantic_v1 import BaseModel, Extra, create_model, root_validator +from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator from langchain_core.runnables.config import RunnableConfig +from langchain_core.runnables.utils import create_model from langchain.chains.combine_documents.base import BaseCombineDocumentsChain from langchain.chains.llm import LLMChain diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index 58c7128e8ee106..1028d8b813dc41 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -3446,7 +3446,7 @@ files = [ [[package]] name = "langchain-community" -version = "0.0.20" +version = "0.0.21" description = "Community contributed LangChain integrations." optional = false python-versions = ">=3.8.1,<4.0" @@ -3456,8 +3456,8 @@ develop = true [package.dependencies] aiohttp = "^3.8.3" dataclasses-json = ">= 0.5.7, < 0.7" -langchain-core = ">=0.1.21,<0.2" -langsmith = ">=0.0.83,<0.1" +langchain-core = ">=0.1.24,<0.2" +langsmith = "^0.1.0" numpy = "^1" PyYAML = ">=5.3" requests = "^2" @@ -3474,7 +3474,7 @@ url = "../community" [[package]] name = "langchain-core" -version = "0.1.23" +version = "0.1.24" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -3484,7 +3484,7 @@ develop = true [package.dependencies] anyio = ">=3,<5" jsonpatch = "^1.33" -langsmith = "^0.0.87" +langsmith = "^0.1.0" packaging = "^23.2" pydantic = ">=1,<3" PyYAML = ">=5.3" @@ -3517,13 +3517,13 @@ tiktoken = ">=0.5.2,<0.6.0" [[package]] name = "langsmith" -version = "0.0.87" +version = "0.1.1" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.87-py3-none-any.whl", hash = "sha256:8903d3811b9fc89eb18f5961c8e6935fbd2d0f119884fbf30dc70b8f8f4121fc"}, - {file = "langsmith-0.0.87.tar.gz", hash = "sha256:36c4cc47e5b54be57d038036a30fb19ce6e4c73048cd7a464b8f25b459694d34"}, + {file = "langsmith-0.1.1-py3-none-any.whl", hash = "sha256:10ff2b977a41e3f6351d1a4239d9bd57af0547aa909e839d2791e16cc197a6f9"}, + {file = "langsmith-0.1.1.tar.gz", hash = "sha256:09df0c2ca9085105f97a4e4f281b083e312c99d162f3fe2b2d5eefd5c3692e60"}, ] [package.dependencies] @@ -9150,4 +9150,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "0a2b1df8c27614d1f20d6f71f68c618144fd353aa75b6af36302ff45660b7541" +content-hash = "417ecc70e983739852f1556da647cf6c97ae6236b670d37227a19d1be92cb66d" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index f811b13a5d8269..17f7e50f76d559 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain" -version = "0.1.7" +version = "0.1.8" description = "Building applications with LLMs through composability" authors = [] license = "MIT" @@ -12,9 +12,9 @@ langchain-server = "langchain.server:main" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.22,<0.2" -langchain-community = ">=0.0.20,<0.1" -langsmith = ">=0.0.83,<0.1" +langchain-core = ">=0.1.24,<0.2" +langchain-community = ">=0.0.21,<0.1" +langsmith = "^0.1.0" pydantic = ">=1,<3" SQLAlchemy = ">=1.4,<3" requests = "^2" diff --git a/libs/partners/ai21/.gitignore b/libs/partners/ai21/.gitignore new file mode 100644 index 00000000000000..bee8a64b79a995 --- /dev/null +++ b/libs/partners/ai21/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/libs/partners/ai21/LICENSE b/libs/partners/ai21/LICENSE new file mode 100644 index 00000000000000..426b65090341f3 --- /dev/null +++ b/libs/partners/ai21/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 LangChain, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libs/partners/ai21/Makefile b/libs/partners/ai21/Makefile new file mode 100644 index 00000000000000..1e9cd1ea594b27 --- /dev/null +++ b/libs/partners/ai21/Makefile @@ -0,0 +1,56 @@ +.PHONY: all format lint test tests integration_tests docker_tests help extended_tests + +# Default target executed when no arguments are given to make. +all: help + +# Define a variable for the test file path. +TEST_FILE ?= tests/unit_tests/ +integration_test integration_tests: TEST_FILE = tests/integration_tests/ +test tests integration_test integration_tests: + poetry run pytest $(TEST_FILE) + + +###################### +# LINTING AND FORMATTING +###################### + +# Define a variable for Python and notebook files. +PYTHON_FILES=. +MYPY_CACHE=.mypy_cache +lint format: PYTHON_FILES=. +lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/ai21 --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$') +lint_package: PYTHON_FILES=langchain_ai21 +lint_tests: PYTHON_FILES=tests +lint_tests: MYPY_CACHE=.mypy_cache_test + +lint lint_diff lint_package lint_tests: + poetry run ruff . + poetry run ruff format $(PYTHON_FILES) --diff + poetry run ruff --select I $(PYTHON_FILES) + mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) + +format format_diff: + poetry run ruff format $(PYTHON_FILES) + poetry run ruff --select I --fix $(PYTHON_FILES) + +spell_check: + poetry run codespell --toml pyproject.toml + +spell_fix: + poetry run codespell --toml pyproject.toml -w + +check_imports: $(shell find langchain_ai21 -name '*.py') + poetry run python ./scripts/check_imports.py $^ + +###################### +# HELP +###################### + +help: + @echo '----' + @echo 'check_imports - check imports' + @echo 'format - run code formatters' + @echo 'lint - run linters' + @echo 'test - run unit tests' + @echo 'tests - run unit tests' + @echo 'test TEST_FILE= - run all tests in file' diff --git a/libs/partners/ai21/README.md b/libs/partners/ai21/README.md new file mode 100644 index 00000000000000..7b563969e94df6 --- /dev/null +++ b/libs/partners/ai21/README.md @@ -0,0 +1,75 @@ +# langchain-ai21 + +This package contains the LangChain integrations for [AI21](https://docs.ai21.com/) through their [AI21](https://pypi.org/project/ai21/) SDK. + +## Installation and Setup + +- Install the AI21 partner package +```bash +pip install langchain-ai21 +``` +- Get an AI21 api key and set it as an environment variable (`AI21_API_KEY`) + + +## Chat Models + +This package contains the `ChatAI21` class, which is the recommended way to interface with AI21 Chat models. + +To use, install the requirements, and configure your environment. + +```bash +export AI21_API_KEY=your-api-key +``` + +Then initialize + +```python +from langchain_core.messages import HumanMessage +from langchain_ai21.chat_models import ChatAI21 + +chat = ChatAI21(model="j2-ultra") +messages = [HumanMessage(content="Hello from AI21")] +chat.invoke(messages) +``` + +## LLMs +You can use AI21's generative AI models as Langchain LLMs: + +```python +from langchain.prompts import PromptTemplate +from langchain_ai21 import AI21LLM + +llm = AI21LLM(model="j2-ultra") + +template = """Question: {question} + +Answer: Let's think step by step.""" +prompt = PromptTemplate.from_template(template) + +chain = prompt | llm + +question = "Which scientist discovered relativity?" +print(chain.invoke({"question": question})) +``` + +## Embeddings + +You can use AI21's embeddings models as: + +### Query + +```python +from langchain_ai21 import AI21Embeddings + +embeddings = AI21Embeddings() +embeddings.embed_query("Hello! This is some query") +``` + +### Document + +```python +from langchain_ai21 import AI21Embeddings + +embeddings = AI21Embeddings() +embeddings.embed_documents(["Hello! This is document 1", "And this is document 2!"]) +``` diff --git a/libs/partners/ai21/langchain_ai21/__init__.py b/libs/partners/ai21/langchain_ai21/__init__.py new file mode 100644 index 00000000000000..253de3778c91e5 --- /dev/null +++ b/libs/partners/ai21/langchain_ai21/__init__.py @@ -0,0 +1,9 @@ +from langchain_ai21.chat_models import ChatAI21 +from langchain_ai21.embeddings import AI21Embeddings +from langchain_ai21.llms import AI21LLM + +__all__ = [ + "AI21LLM", + "ChatAI21", + "AI21Embeddings", +] diff --git a/libs/partners/ai21/langchain_ai21/ai21_base.py b/libs/partners/ai21/langchain_ai21/ai21_base.py new file mode 100644 index 00000000000000..39c5ffbf1f054a --- /dev/null +++ b/libs/partners/ai21/langchain_ai21/ai21_base.py @@ -0,0 +1,48 @@ +import os +from typing import Dict, Optional + +from ai21 import AI21Client +from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator +from langchain_core.utils import convert_to_secret_str + +_DEFAULT_TIMEOUT_SEC = 300 + + +class AI21Base(BaseModel): + class Config: + arbitrary_types_allowed = True + + client: AI21Client = Field(default=None) + api_key: Optional[SecretStr] = None + api_host: Optional[str] = None + timeout_sec: Optional[float] = None + num_retries: Optional[int] = None + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + api_key = convert_to_secret_str( + values.get("api_key") or os.getenv("AI21_API_KEY") or "" + ) + values["api_key"] = api_key + + api_host = ( + values.get("api_host") + or os.getenv("AI21_API_URL") + or "https://api.ai21.com" + ) + values["api_host"] = api_host + + timeout_sec = values.get("timeout_sec") or float( + os.getenv("AI21_TIMEOUT_SEC", _DEFAULT_TIMEOUT_SEC) + ) + values["timeout_sec"] = timeout_sec + + if values.get("client") is None: + values["client"] = AI21Client( + api_key=api_key.get_secret_value(), + api_host=api_host, + timeout_sec=None if timeout_sec is None else float(timeout_sec), + via="langchain", + ) + + return values diff --git a/libs/partners/ai21/langchain_ai21/chat_models.py b/libs/partners/ai21/langchain_ai21/chat_models.py new file mode 100644 index 00000000000000..0839a493862e2a --- /dev/null +++ b/libs/partners/ai21/langchain_ai21/chat_models.py @@ -0,0 +1,171 @@ +import asyncio +from functools import partial +from typing import Any, List, Optional, Tuple, cast + +from ai21.models import ChatMessage, Penalty, RoleType +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import ( + AIMessage, + BaseMessage, + HumanMessage, + SystemMessage, +) +from langchain_core.outputs import ChatGeneration, ChatResult + +from langchain_ai21.ai21_base import AI21Base + + +def _get_system_message_from_message(message: BaseMessage) -> str: + if not isinstance(message.content, str): + raise ValueError( + f"System Message must be of type str. Got {type(message.content)}" + ) + + return message.content + + +def _convert_messages_to_ai21_messages( + messages: List[BaseMessage], +) -> Tuple[Optional[str], List[ChatMessage]]: + system_message = None + converted_messages: List[ChatMessage] = [] + + for i, message in enumerate(messages): + if message.type == "system": + if i != 0: + raise ValueError("System message must be at beginning of message list.") + else: + system_message = _get_system_message_from_message(message) + else: + converted_message = _convert_message_to_ai21_message(message) + converted_messages.append(converted_message) + + return system_message, converted_messages + + +def _convert_message_to_ai21_message( + message: BaseMessage, +) -> ChatMessage: + content = cast(str, message.content) + + role = None + + if isinstance(message, HumanMessage): + role = RoleType.USER + elif isinstance(message, AIMessage): + role = RoleType.ASSISTANT + + if not role: + raise ValueError( + f"Could not resolve role type from message {message}. " + f"Only support {HumanMessage.__name__} and {AIMessage.__name__}." + ) + + return ChatMessage(role=role, text=content) + + +def _pop_system_messages(messages: List[BaseMessage]) -> List[SystemMessage]: + system_message_indexes = [ + i for i, message in enumerate(messages) if isinstance(message, SystemMessage) + ] + + return [cast(SystemMessage, messages.pop(i)) for i in system_message_indexes] + + +class ChatAI21(BaseChatModel, AI21Base): + """ChatAI21 chat model. + + Example: + .. code-block:: python + + from langchain_ai21 import ChatAI21 + + + model = ChatAI21() + """ + + model: str + """Model type you wish to interact with. + You can view the options at https://github.com/AI21Labs/ai21-python?tab=readme-ov-file#model-types""" + num_results: int = 1 + """The number of responses to generate for a given prompt.""" + + max_tokens: int = 16 + """The maximum number of tokens to generate for each response.""" + + min_tokens: int = 0 + """The minimum number of tokens to generate for each response.""" + + temperature: float = 0.7 + """A value controlling the "creativity" of the model's responses.""" + + top_p: float = 1 + """A value controlling the diversity of the model's responses.""" + + top_k_return: int = 0 + """The number of top-scoring tokens to consider for each generation step.""" + + frequency_penalty: Optional[Penalty] = None + """A penalty applied to tokens that are frequently generated.""" + + presence_penalty: Optional[Penalty] = None + """ A penalty applied to tokens that are already present in the prompt.""" + + count_penalty: Optional[Penalty] = None + """A penalty applied to tokens based on their frequency + in the generated responses.""" + + class Config: + """Configuration for this pydantic object.""" + + arbitrary_types_allowed = True + + @property + def _llm_type(self) -> str: + """Return type of chat model.""" + return "chat-ai21" + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + system, ai21_messages = _convert_messages_to_ai21_messages(messages) + + response = self.client.chat.create( + model=self.model, + messages=ai21_messages, + system=system or "", + num_results=self.num_results, + temperature=self.temperature, + max_tokens=self.max_tokens, + min_tokens=self.min_tokens, + top_p=self.top_p, + top_k_return=self.top_k_return, + stop_sequences=stop, + frequency_penalty=self.frequency_penalty, + presence_penalty=self.presence_penalty, + count_penalty=self.count_penalty, + **kwargs, + ) + + outputs = response.outputs + message = AIMessage(content=outputs[0].text) + return ChatResult(generations=[ChatGeneration(message=message)]) + + async def _agenerate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + return await asyncio.get_running_loop().run_in_executor( + None, partial(self._generate, **kwargs), messages, stop, run_manager + ) diff --git a/libs/partners/ai21/langchain_ai21/embeddings.py b/libs/partners/ai21/langchain_ai21/embeddings.py new file mode 100644 index 00000000000000..59fad67b5c8766 --- /dev/null +++ b/libs/partners/ai21/langchain_ai21/embeddings.py @@ -0,0 +1,41 @@ +from typing import Any, List + +from ai21.models import EmbedType +from langchain_core.embeddings import Embeddings + +from langchain_ai21.ai21_base import AI21Base + + +class AI21Embeddings(Embeddings, AI21Base): + """AI21 Embeddings embedding model. + To use, you should have the 'AI21_API_KEY' environment variable set + or pass as a named parameter to the constructor. + + Example: + .. code-block:: python + + from langchain_ai21 import AI21Embeddings + + embeddings = AI21Embeddings() + query_result = embeddings.embed_query("Hello embeddings world!") + """ + + def embed_documents(self, texts: List[str], **kwargs: Any) -> List[List[float]]: + """Embed search docs.""" + response = self.client.embed.create( + texts=texts, + type=EmbedType.SEGMENT, + **kwargs, + ) + + return [result.embedding for result in response.results] + + def embed_query(self, text: str, **kwargs: Any) -> List[float]: + """Embed query text.""" + response = self.client.embed.create( + texts=[text], + type=EmbedType.QUERY, + **kwargs, + ) + + return [result.embedding for result in response.results][0] diff --git a/libs/partners/ai21/langchain_ai21/llms.py b/libs/partners/ai21/langchain_ai21/llms.py new file mode 100644 index 00000000000000..27a8121bbe154b --- /dev/null +++ b/libs/partners/ai21/langchain_ai21/llms.py @@ -0,0 +1,142 @@ +import asyncio +from functools import partial +from typing import ( + Any, + List, + Optional, +) + +from ai21.models import CompletionsResponse, Penalty +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) +from langchain_core.language_models import BaseLLM +from langchain_core.outputs import Generation, LLMResult + +from langchain_ai21.ai21_base import AI21Base + + +class AI21LLM(BaseLLM, AI21Base): + """AI21LLM large language models. + + Example: + .. code-block:: python + + from langchain_ai21 import AI21LLM + + model = AI21LLM() + """ + + model: str + """Model type you wish to interact with. + You can view the options at https://github.com/AI21Labs/ai21-python?tab=readme-ov-file#model-types""" + + num_results: int = 1 + """The number of responses to generate for a given prompt.""" + + max_tokens: int = 16 + """The maximum number of tokens to generate for each response.""" + + min_tokens: int = 0 + """The minimum number of tokens to generate for each response.""" + + temperature: float = 0.7 + """A value controlling the "creativity" of the model's responses.""" + + top_p: float = 1 + """A value controlling the diversity of the model's responses.""" + + top_k_returns: int = 0 + """The number of top-scoring tokens to consider for each generation step.""" + + frequency_penalty: Optional[Penalty] = None + """A penalty applied to tokens that are frequently generated.""" + + presence_penalty: Optional[Penalty] = None + """ A penalty applied to tokens that are already present in the prompt.""" + + count_penalty: Optional[Penalty] = None + """A penalty applied to tokens based on their frequency + in the generated responses.""" + + custom_model: Optional[str] = None + epoch: Optional[int] = None + + class Config: + """Configuration for this pydantic object.""" + + allow_population_by_field_name = True + + @property + def _llm_type(self) -> str: + """Return type of LLM.""" + return "ai21-llm" + + def _generate( + self, + prompts: List[str], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> LLMResult: + generations: List[List[Generation]] = [] + token_count = 0 + + for prompt in prompts: + response = self._invoke_completion( + prompt=prompt, model=self.model, stop_sequences=stop, **kwargs + ) + generation = self._response_to_generation(response) + generations.append(generation) + token_count += self.client.count_tokens(prompt) + + llm_output = {"token_count": token_count, "model_name": self.model} + return LLMResult(generations=generations, llm_output=llm_output) + + async def _agenerate( + self, + prompts: List[str], + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> LLMResult: + # Change implementation if integration natively supports async generation. + return await asyncio.get_running_loop().run_in_executor( + None, partial(self._generate, **kwargs), prompts, stop, run_manager + ) + + def _invoke_completion( + self, + prompt: str, + model: str, + stop_sequences: Optional[List[str]] = None, + **kwargs: Any, + ) -> CompletionsResponse: + return self.client.completion.create( + prompt=prompt, + model=model, + max_tokens=self.max_tokens, + num_results=self.num_results, + min_tokens=self.min_tokens, + temperature=self.temperature, + top_p=self.top_p, + top_k_return=self.top_k_returns, + custom_model=self.custom_model, + stop_sequences=stop_sequences, + frequency_penalty=self.frequency_penalty, + presence_penalty=self.presence_penalty, + count_penalty=self.count_penalty, + epoch=self.epoch, + ) + + def _response_to_generation( + self, response: CompletionsResponse + ) -> List[Generation]: + return [ + Generation( + text=completion.data.text, + generation_info=completion.to_dict(), + ) + for completion in response.completions + ] diff --git a/libs/partners/ai21/langchain_ai21/py.typed b/libs/partners/ai21/langchain_ai21/py.typed new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/libs/partners/ai21/poetry.lock b/libs/partners/ai21/poetry.lock new file mode 100644 index 00000000000000..49a59858f739b3 --- /dev/null +++ b/libs/partners/ai21/poetry.lock @@ -0,0 +1,975 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + +[[package]] +name = "ai21" +version = "2.0.0" +description = "" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "ai21-2.0.0-py3-none-any.whl", hash = "sha256:0f40b0fe9254b1ddface81681aa18b74635dbf318ca1aeab38c31b500ecd1fe5"}, + {file = "ai21-2.0.0.tar.gz", hash = "sha256:37ac1bda7c2584aafc05cfc69f3e43f7c46571aea29ff292a1f40950f4d9aa70"}, +] + +[package.dependencies] +ai21-tokenizer = ">=0.3.9,<0.4.0" +dataclasses-json = ">=0.6.3,<0.7.0" +requests = ">=2.31.0,<3.0.0" + +[package.extras] +aws = ["boto3 (>=1.28.82,<2.0.0)"] + +[[package]] +name = "ai21-tokenizer" +version = "0.3.11" +description = "" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "ai21_tokenizer-0.3.11-py3-none-any.whl", hash = "sha256:80d332c51cab3fa88f0fea7493240a6a5bc38fd24a3d0806d28731d8fc97691f"}, + {file = "ai21_tokenizer-0.3.11.tar.gz", hash = "sha256:ec11ce4e46d24f71f1c2756ad0de34e0adfd51b5bcd81b544aea13d6935ec905"}, +] + +[package.dependencies] +sentencepiece = ">=0.1.96,<0.2.0" + +[[package]] +name = "annotated-types" +version = "0.6.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"}, + {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} + +[[package]] +name = "anyio" +version = "4.2.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.8" +files = [ + {file = "anyio-4.2.0-py3-none-any.whl", hash = "sha256:745843b39e829e108e518c489b31dc757de7d2131d53fac32bd8df268227bfee"}, + {file = "anyio-4.2.0.tar.gz", hash = "sha256:e1875bb4b4e2de1669f4bc7869b6d3f54231cdced71605e6e64c9be77e3be50f"}, +] + +[package.dependencies] +exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} +idna = ">=2.8" +sniffio = ">=1.1" +typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} + +[package.extras] +doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (>=0.23)"] + +[[package]] +name = "certifi" +version = "2023.11.17" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2023.11.17-py3-none-any.whl", hash = "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474"}, + {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "codespell" +version = "2.2.6" +description = "Codespell" +optional = false +python-versions = ">=3.8" +files = [ + {file = "codespell-2.2.6-py3-none-any.whl", hash = "sha256:9ee9a3e5df0990604013ac2a9f22fa8e57669c827124a2e961fe8a1da4cacc07"}, + {file = "codespell-2.2.6.tar.gz", hash = "sha256:a8c65d8eb3faa03deabab6b3bbe798bea72e1799c7e9e955d57eca4096abcff9"}, +] + +[package.extras] +dev = ["Pygments", "build", "chardet", "pre-commit", "pytest", "pytest-cov", "pytest-dependency", "ruff", "tomli", "twine"] +hard-encoding-detection = ["chardet"] +toml = ["tomli"] +types = ["chardet (>=5.1.0)", "mypy", "pytest", "pytest-cov", "pytest-dependency"] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "dataclasses-json" +version = "0.6.3" +description = "Easily serialize dataclasses to and from JSON." +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "dataclasses_json-0.6.3-py3-none-any.whl", hash = "sha256:4aeb343357997396f6bca1acae64e486c3a723d8f5c76301888abeccf0c45176"}, + {file = "dataclasses_json-0.6.3.tar.gz", hash = "sha256:35cb40aae824736fdf959801356641836365219cfe14caeb115c39136f775d2a"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + +[[package]] +name = "exceptiongroup" +version = "1.2.0" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, + {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "freezegun" +version = "1.4.0" +description = "Let your Python tests travel through time" +optional = false +python-versions = ">=3.7" +files = [ + {file = "freezegun-1.4.0-py3-none-any.whl", hash = "sha256:55e0fc3c84ebf0a96a5aa23ff8b53d70246479e9a68863f1fcac5a3e52f19dd6"}, + {file = "freezegun-1.4.0.tar.gz", hash = "sha256:10939b0ba0ff5adaecf3b06a5c2f73071d9678e507c5eaedb23c761d56ac774b"}, +] + +[package.dependencies] +python-dateutil = ">=2.7" + +[[package]] +name = "idna" +version = "3.6" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, + {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + +[[package]] +name = "jsonpointer" +version = "2.4" +description = "Identify specific nodes in a JSON document (RFC 6901)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, +] + +[[package]] +name = "langchain-core" +version = "0.1.22" +description = "Building applications with LLMs through composability" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [] +develop = true + +[package.dependencies] +anyio = ">=3,<5" +jsonpatch = "^1.33" +langsmith = "^0.0.87" +packaging = "^23.2" +pydantic = ">=1,<3" +PyYAML = ">=5.3" +requests = "^2" +tenacity = "^8.1.0" + +[package.extras] +extended-testing = ["jinja2 (>=3,<4)"] + +[package.source] +type = "directory" +url = "../../core" + +[[package]] +name = "langsmith" +version = "0.0.87" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langsmith-0.0.87-py3-none-any.whl", hash = "sha256:8903d3811b9fc89eb18f5961c8e6935fbd2d0f119884fbf30dc70b8f8f4121fc"}, + {file = "langsmith-0.0.87.tar.gz", hash = "sha256:36c4cc47e5b54be57d038036a30fb19ce6e4c73048cd7a464b8f25b459694d34"}, +] + +[package.dependencies] +pydantic = ">=1,<3" +requests = ">=2,<3" + +[[package]] +name = "marshmallow" +version = "3.20.2" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = false +python-versions = ">=3.8" +files = [ + {file = "marshmallow-3.20.2-py3-none-any.whl", hash = "sha256:c21d4b98fee747c130e6bc8f45c4b3199ea66bc00c12ee1f639f0aeca034d5e9"}, + {file = "marshmallow-3.20.2.tar.gz", hash = "sha256:4c1daff273513dc5eb24b219a8035559dc573c8f322558ef85f5438ddd1236dd"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["pre-commit (>=2.4,<4.0)", "pytest", "pytz", "simplejson", "tox"] +docs = ["alabaster (==0.7.15)", "autodocsumm (==0.2.12)", "sphinx (==7.2.6)", "sphinx-issues (==3.0.1)", "sphinx-version-warning (==1.1.2)"] +lint = ["pre-commit (>=2.4,<4.0)"] +tests = ["pytest", "pytz", "simplejson"] + +[[package]] +name = "mypy" +version = "0.991" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mypy-0.991-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7d17e0a9707d0772f4a7b878f04b4fd11f6f5bcb9b3813975a9b13c9332153ab"}, + {file = "mypy-0.991-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0714258640194d75677e86c786e80ccf294972cc76885d3ebbb560f11db0003d"}, + {file = "mypy-0.991-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c8f3be99e8a8bd403caa8c03be619544bc2c77a7093685dcf308c6b109426c6"}, + {file = "mypy-0.991-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9ec663ed6c8f15f4ae9d3c04c989b744436c16d26580eaa760ae9dd5d662eb"}, + {file = "mypy-0.991-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4307270436fd7694b41f913eb09210faff27ea4979ecbcd849e57d2da2f65305"}, + {file = "mypy-0.991-cp310-cp310-win_amd64.whl", hash = "sha256:901c2c269c616e6cb0998b33d4adbb4a6af0ac4ce5cd078afd7bc95830e62c1c"}, + {file = "mypy-0.991-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d13674f3fb73805ba0c45eb6c0c3053d218aa1f7abead6e446d474529aafc372"}, + {file = "mypy-0.991-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1c8cd4fb70e8584ca1ed5805cbc7c017a3d1a29fb450621089ffed3e99d1857f"}, + {file = "mypy-0.991-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:209ee89fbb0deed518605edddd234af80506aec932ad28d73c08f1400ef80a33"}, + {file = "mypy-0.991-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37bd02ebf9d10e05b00d71302d2c2e6ca333e6c2a8584a98c00e038db8121f05"}, + {file = "mypy-0.991-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:26efb2fcc6b67e4d5a55561f39176821d2adf88f2745ddc72751b7890f3194ad"}, + {file = "mypy-0.991-cp311-cp311-win_amd64.whl", hash = "sha256:3a700330b567114b673cf8ee7388e949f843b356a73b5ab22dd7cff4742a5297"}, + {file = "mypy-0.991-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f7d1a520373e2272b10796c3ff721ea1a0712288cafaa95931e66aa15798813"}, + {file = "mypy-0.991-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:641411733b127c3e0dab94c45af15fea99e4468f99ac88b39efb1ad677da5711"}, + {file = "mypy-0.991-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3d80e36b7d7a9259b740be6d8d906221789b0d836201af4234093cae89ced0cd"}, + {file = "mypy-0.991-cp37-cp37m-win_amd64.whl", hash = "sha256:e62ebaad93be3ad1a828a11e90f0e76f15449371ffeecca4a0a0b9adc99abcef"}, + {file = "mypy-0.991-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b86ce2c1866a748c0f6faca5232059f881cda6dda2a893b9a8373353cfe3715a"}, + {file = "mypy-0.991-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac6e503823143464538efda0e8e356d871557ef60ccd38f8824a4257acc18d93"}, + {file = "mypy-0.991-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cca5adf694af539aeaa6ac633a7afe9bbd760df9d31be55ab780b77ab5ae8bf"}, + {file = "mypy-0.991-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12c56bf73cdab116df96e4ff39610b92a348cc99a1307e1da3c3768bbb5b135"}, + {file = "mypy-0.991-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:652b651d42f155033a1967739788c436491b577b6a44e4c39fb340d0ee7f0d70"}, + {file = "mypy-0.991-cp38-cp38-win_amd64.whl", hash = "sha256:4175593dc25d9da12f7de8de873a33f9b2b8bdb4e827a7cae952e5b1a342e243"}, + {file = "mypy-0.991-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98e781cd35c0acf33eb0295e8b9c55cdbef64fcb35f6d3aa2186f289bed6e80d"}, + {file = "mypy-0.991-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6d7464bac72a85cb3491c7e92b5b62f3dcccb8af26826257760a552a5e244aa5"}, + {file = "mypy-0.991-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c9166b3f81a10cdf9b49f2d594b21b31adadb3d5e9db9b834866c3258b695be3"}, + {file = "mypy-0.991-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8472f736a5bfb159a5e36740847808f6f5b659960115ff29c7cecec1741c648"}, + {file = "mypy-0.991-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e80e758243b97b618cdf22004beb09e8a2de1af481382e4d84bc52152d1c476"}, + {file = "mypy-0.991-cp39-cp39-win_amd64.whl", hash = "sha256:74e259b5c19f70d35fcc1ad3d56499065c601dfe94ff67ae48b85596b9ec1461"}, + {file = "mypy-0.991-py3-none-any.whl", hash = "sha256:de32edc9b0a7e67c2775e574cb061a537660e51210fbf6006b0b36ea695ae9bb"}, + {file = "mypy-0.991.tar.gz", hash = "sha256:3c0165ba8f354a6d9881809ef29f1a9318a236a6d81c690094c5df32107bde06"}, +] + +[package.dependencies] +mypy-extensions = ">=0.4.3" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=3.10" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +python2 = ["typed-ast (>=1.4.0,<2)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pydantic" +version = "2.5.3" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic-2.5.3-py3-none-any.whl", hash = "sha256:d0caf5954bee831b6bfe7e338c32b9e30c85dfe080c843680783ac2b631673b4"}, + {file = "pydantic-2.5.3.tar.gz", hash = "sha256:b3ef57c62535b0941697cce638c08900d87fcb67e29cfa99e8a68f747f393f7a"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.14.6" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.14.6" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic_core-2.14.6-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:72f9a942d739f09cd42fffe5dc759928217649f070056f03c70df14f5770acf9"}, + {file = "pydantic_core-2.14.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6a31d98c0d69776c2576dda4b77b8e0c69ad08e8b539c25c7d0ca0dc19a50d6c"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa90562bc079c6c290f0512b21768967f9968e4cfea84ea4ff5af5d917016e4"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:370ffecb5316ed23b667d99ce4debe53ea664b99cc37bfa2af47bc769056d534"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f85f3843bdb1fe80e8c206fe6eed7a1caeae897e496542cee499c374a85c6e08"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862bf828112e19685b76ca499b379338fd4c5c269d897e218b2ae8fcb80139d"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036137b5ad0cb0004c75b579445a1efccd072387a36c7f217bb8efd1afbe5245"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92879bce89f91f4b2416eba4429c7b5ca22c45ef4a499c39f0c5c69257522c7c"}, + {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0c08de15d50fa190d577e8591f0329a643eeaed696d7771760295998aca6bc66"}, + {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:36099c69f6b14fc2c49d7996cbf4f87ec4f0e66d1c74aa05228583225a07b590"}, + {file = "pydantic_core-2.14.6-cp310-none-win32.whl", hash = "sha256:7be719e4d2ae6c314f72844ba9d69e38dff342bc360379f7c8537c48e23034b7"}, + {file = "pydantic_core-2.14.6-cp310-none-win_amd64.whl", hash = "sha256:36fa402dcdc8ea7f1b0ddcf0df4254cc6b2e08f8cd80e7010d4c4ae6e86b2a87"}, + {file = "pydantic_core-2.14.6-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:dea7fcd62915fb150cdc373212141a30037e11b761fbced340e9db3379b892d4"}, + {file = "pydantic_core-2.14.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffff855100bc066ff2cd3aa4a60bc9534661816b110f0243e59503ec2df38421"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b027c86c66b8627eb90e57aee1f526df77dc6d8b354ec498be9a757d513b92b"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00b1087dabcee0b0ffd104f9f53d7d3eaddfaa314cdd6726143af6bc713aa27e"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75ec284328b60a4e91010c1acade0c30584f28a1f345bc8f72fe8b9e46ec6a96"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e1f4744eea1501404b20b0ac059ff7e3f96a97d3e3f48ce27a139e053bb370b"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2602177668f89b38b9f84b7b3435d0a72511ddef45dc14446811759b82235a1"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8edaea3089bf908dd27da8f5d9e395c5b4dc092dbcce9b65e7156099b4b937"}, + {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:478e9e7b360dfec451daafe286998d4a1eeaecf6d69c427b834ae771cad4b622"}, + {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b6ca36c12a5120bad343eef193cc0122928c5c7466121da7c20f41160ba00ba2"}, + {file = "pydantic_core-2.14.6-cp311-none-win32.whl", hash = "sha256:2b8719037e570639e6b665a4050add43134d80b687288ba3ade18b22bbb29dd2"}, + {file = "pydantic_core-2.14.6-cp311-none-win_amd64.whl", hash = "sha256:78ee52ecc088c61cce32b2d30a826f929e1708f7b9247dc3b921aec367dc1b23"}, + {file = "pydantic_core-2.14.6-cp311-none-win_arm64.whl", hash = "sha256:a19b794f8fe6569472ff77602437ec4430f9b2b9ec7a1105cfd2232f9ba355e6"}, + {file = "pydantic_core-2.14.6-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:667aa2eac9cd0700af1ddb38b7b1ef246d8cf94c85637cbb03d7757ca4c3fdec"}, + {file = "pydantic_core-2.14.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdee837710ef6b56ebd20245b83799fce40b265b3b406e51e8ccc5b85b9099b7"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c5bcf3414367e29f83fd66f7de64509a8fd2368b1edf4351e862910727d3e51"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a92ae76f75d1915806b77cf459811e772d8f71fd1e4339c99750f0e7f6324f"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a983cca5ed1dd9a35e9e42ebf9f278d344603bfcb174ff99a5815f953925140a"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb92f9061657287eded380d7dc455bbf115430b3aa4741bdc662d02977e7d0af"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ace1e220b078c8e48e82c081e35002038657e4b37d403ce940fa679e57113b"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef633add81832f4b56d3b4c9408b43d530dfca29e68fb1b797dcb861a2c734cd"}, + {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e90d6cc4aad2cc1f5e16ed56e46cebf4877c62403a311af20459c15da76fd91"}, + {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e8a5ac97ea521d7bde7621d86c30e86b798cdecd985723c4ed737a2aa9e77d0c"}, + {file = "pydantic_core-2.14.6-cp312-none-win32.whl", hash = "sha256:f27207e8ca3e5e021e2402ba942e5b4c629718e665c81b8b306f3c8b1ddbb786"}, + {file = "pydantic_core-2.14.6-cp312-none-win_amd64.whl", hash = "sha256:b3e5fe4538001bb82e2295b8d2a39356a84694c97cb73a566dc36328b9f83b40"}, + {file = "pydantic_core-2.14.6-cp312-none-win_arm64.whl", hash = "sha256:64634ccf9d671c6be242a664a33c4acf12882670b09b3f163cd00a24cffbd74e"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:24368e31be2c88bd69340fbfe741b405302993242ccb476c5c3ff48aeee1afe0"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:e33b0834f1cf779aa839975f9d8755a7c2420510c0fa1e9fa0497de77cd35d2c"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af4b3f52cc65f8a0bc8b1cd9676f8c21ef3e9132f21fed250f6958bd7223bed"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d15687d7d7f40333bd8266f3814c591c2e2cd263fa2116e314f60d82086e353a"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:095b707bb287bfd534044166ab767bec70a9bba3175dcdc3371782175c14e43c"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94fc0e6621e07d1e91c44e016cc0b189b48db053061cc22d6298a611de8071bb"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce830e480f6774608dedfd4a90c42aac4a7af0a711f1b52f807130c2e434c06"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a306cdd2ad3a7d795d8e617a58c3a2ed0f76c8496fb7621b6cd514eb1532cae8"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2f5fa187bde8524b1e37ba894db13aadd64faa884657473b03a019f625cee9a8"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:438027a975cc213a47c5d70672e0d29776082155cfae540c4e225716586be75e"}, + {file = "pydantic_core-2.14.6-cp37-none-win32.whl", hash = "sha256:f96ae96a060a8072ceff4cfde89d261837b4294a4f28b84a28765470d502ccc6"}, + {file = "pydantic_core-2.14.6-cp37-none-win_amd64.whl", hash = "sha256:e646c0e282e960345314f42f2cea5e0b5f56938c093541ea6dbf11aec2862391"}, + {file = "pydantic_core-2.14.6-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:db453f2da3f59a348f514cfbfeb042393b68720787bbef2b4c6068ea362c8149"}, + {file = "pydantic_core-2.14.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3860c62057acd95cc84044e758e47b18dcd8871a328ebc8ccdefd18b0d26a21b"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36026d8f99c58d7044413e1b819a67ca0e0b8ebe0f25e775e6c3d1fabb3c38fb"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ed1af8692bd8d2a29d702f1a2e6065416d76897d726e45a1775b1444f5928a7"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:314ccc4264ce7d854941231cf71b592e30d8d368a71e50197c905874feacc8a8"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:982487f8931067a32e72d40ab6b47b1628a9c5d344be7f1a4e668fb462d2da42"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dbe357bc4ddda078f79d2a36fc1dd0494a7f2fad83a0a684465b6f24b46fe80"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2f6ffc6701a0eb28648c845f4945a194dc7ab3c651f535b81793251e1185ac3d"}, + {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5025db12fc6de7bc1104d826d5aee1d172f9ba6ca936bf6474c2148ac336c1"}, + {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dab03ed811ed1c71d700ed08bde8431cf429bbe59e423394f0f4055f1ca0ea60"}, + {file = "pydantic_core-2.14.6-cp38-none-win32.whl", hash = "sha256:dfcbebdb3c4b6f739a91769aea5ed615023f3c88cb70df812849aef634c25fbe"}, + {file = "pydantic_core-2.14.6-cp38-none-win_amd64.whl", hash = "sha256:99b14dbea2fdb563d8b5a57c9badfcd72083f6006caf8e126b491519c7d64ca8"}, + {file = "pydantic_core-2.14.6-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4ce8299b481bcb68e5c82002b96e411796b844d72b3e92a3fbedfe8e19813eab"}, + {file = "pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b9a9d92f10772d2a181b5ca339dee066ab7d1c9a34ae2421b2a52556e719756f"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd9e98b408384989ea4ab60206b8e100d8687da18b5c813c11e92fd8212a98e0"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4f86f1f318e56f5cbb282fe61eb84767aee743ebe32c7c0834690ebea50c0a6b"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86ce5fcfc3accf3a07a729779d0b86c5d0309a4764c897d86c11089be61da160"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dcf1978be02153c6a31692d4fbcc2a3f1db9da36039ead23173bc256ee3b91b"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eedf97be7bc3dbc8addcef4142f4b4164066df0c6f36397ae4aaed3eb187d8ab"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5f916acf8afbcab6bacbb376ba7dc61f845367901ecd5e328fc4d4aef2fcab0"}, + {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8a14c192c1d724c3acbfb3f10a958c55a2638391319ce8078cb36c02283959b9"}, + {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0348b1dc6b76041516e8a854ff95b21c55f5a411c3297d2ca52f5528e49d8411"}, + {file = "pydantic_core-2.14.6-cp39-none-win32.whl", hash = "sha256:de2a0645a923ba57c5527497daf8ec5df69c6eadf869e9cd46e86349146e5975"}, + {file = "pydantic_core-2.14.6-cp39-none-win_amd64.whl", hash = "sha256:aca48506a9c20f68ee61c87f2008f81f8ee99f8d7f0104bff3c47e2d148f89d9"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d5c28525c19f5bb1e09511669bb57353d22b94cf8b65f3a8d141c389a55dec95"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:78d0768ee59baa3de0f4adac9e3748b4b1fffc52143caebddfd5ea2961595277"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b93785eadaef932e4fe9c6e12ba67beb1b3f1e5495631419c784ab87e975670"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a874f21f87c485310944b2b2734cd6d318765bcbb7515eead33af9641816506e"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89f4477d915ea43b4ceea6756f63f0288941b6443a2b28c69004fe07fde0d0d"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:172de779e2a153d36ee690dbc49c6db568d7b33b18dc56b69a7514aecbcf380d"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dfcebb950aa7e667ec226a442722134539e77c575f6cfaa423f24371bb8d2e94"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:55a23dcd98c858c0db44fc5c04fc7ed81c4b4d33c653a7c45ddaebf6563a2f66"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4241204e4b36ab5ae466ecec5c4c16527a054c69f99bba20f6f75232a6a534e2"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e574de99d735b3fc8364cba9912c2bec2da78775eba95cbb225ef7dda6acea24"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1302a54f87b5cd8528e4d6d1bf2133b6aa7c6122ff8e9dc5220fbc1e07bffebd"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8e81e4b55930e5ffab4a68db1af431629cf2e4066dbdbfef65348b8ab804ea8"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c99462ffc538717b3e60151dfaf91125f637e801f5ab008f81c402f1dff0cd0f"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e4cf2d5829f6963a5483ec01578ee76d329eb5caf330ecd05b3edd697e7d768a"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:cf10b7d58ae4a1f07fccbf4a0a956d705356fea05fb4c70608bb6fa81d103cda"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:399ac0891c284fa8eb998bcfa323f2234858f5d2efca3950ae58c8f88830f145"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c6a5c79b28003543db3ba67d1df336f253a87d3112dac3a51b94f7d48e4c0e1"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599c87d79cab2a6a2a9df4aefe0455e61e7d2aeede2f8577c1b7c0aec643ee8e"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43e166ad47ba900f2542a80d83f9fc65fe99eb63ceec4debec160ae729824052"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a0b5db001b98e1c649dd55afa928e75aa4087e587b9524a4992316fa23c9fba"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:747265448cb57a9f37572a488a57d873fd96bf51e5bb7edb52cfb37124516da4"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7ebe3416785f65c28f4f9441e916bfc8a54179c8dea73c23023f7086fa601c5d"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:86c963186ca5e50d5c8287b1d1c9d3f8f024cbe343d048c5bd282aec2d8641f2"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e0641b506486f0b4cd1500a2a65740243e8670a2549bb02bc4556a83af84ae03"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71d72ca5eaaa8d38c8df16b7deb1a2da4f650c41b58bb142f3fb75d5ad4a611f"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27e524624eace5c59af499cd97dc18bb201dc6a7a2da24bfc66ef151c69a5f2a"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3dde6cac75e0b0902778978d3b1646ca9f438654395a362cb21d9ad34b24acf"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:00646784f6cd993b1e1c0e7b0fdcbccc375d539db95555477771c27555e3c556"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:23598acb8ccaa3d1d875ef3b35cb6376535095e9405d91a3d57a8c7db5d29341"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7f41533d7e3cf9520065f610b41ac1c76bc2161415955fbcead4981b22c7611e"}, + {file = "pydantic_core-2.14.6.tar.gz", hash = "sha256:1fd0c1d395372843fba13a51c28e3bb9d59bd7aebfeb17358ffaaa1e4dbbe948"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + +[[package]] +name = "pytest" +version = "7.4.4" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.21.1" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-asyncio-0.21.1.tar.gz", hash = "sha256:40a7eae6dded22c7b604986855ea48400ab15b069ae38116e8c01238e9eeb64d"}, + {file = "pytest_asyncio-0.21.1-py3-none-any.whl", hash = "sha256:8666c1c8ac02631d7c51ba282e0c69a8a452b211ffedf2599099845da5c5c37b"}, +] + +[package.dependencies] +pytest = ">=7.0.0" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"] + +[[package]] +name = "pytest-mock" +version = "3.12.0" +description = "Thin-wrapper around the mock package for easier use with pytest" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9"}, + {file = "pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f"}, +] + +[package.dependencies] +pytest = ">=5.0" + +[package.extras] +dev = ["pre-commit", "pytest-asyncio", "tox"] + +[[package]] +name = "pytest-watcher" +version = "0.3.4" +description = "Automatically rerun your tests on file modifications" +optional = false +python-versions = ">=3.7.0,<4.0.0" +files = [ + {file = "pytest_watcher-0.3.4-py3-none-any.whl", hash = "sha256:edd2bd9c8a1fb14d48c9f4947234065eb9b4c1acedc0bf213b1f12501dfcffd3"}, + {file = "pytest_watcher-0.3.4.tar.gz", hash = "sha256:d39491ba15b589221bb9a78ef4bed3d5d1503aed08209b1a138aeb95b9117a18"}, +] + +[package.dependencies] +tomli = {version = ">=2.0.1,<3.0.0", markers = "python_version < \"3.11\""} +watchdog = ">=2.0.0" + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "ruff" +version = "0.1.9" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.1.9-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:e6a212f436122ac73df851f0cf006e0c6612fe6f9c864ed17ebefce0eff6a5fd"}, + {file = "ruff-0.1.9-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:28d920e319783d5303333630dae46ecc80b7ba294aeffedf946a02ac0b7cc3db"}, + {file = "ruff-0.1.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:104aa9b5e12cb755d9dce698ab1b97726b83012487af415a4512fedd38b1459e"}, + {file = "ruff-0.1.9-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1e63bf5a4a91971082a4768a0aba9383c12392d0d6f1e2be2248c1f9054a20da"}, + {file = "ruff-0.1.9-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4d0738917c203246f3e275b37006faa3aa96c828b284ebfe3e99a8cb413c8c4b"}, + {file = "ruff-0.1.9-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:69dac82d63a50df2ab0906d97a01549f814b16bc806deeac4f064ff95c47ddf5"}, + {file = "ruff-0.1.9-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2aec598fb65084e41a9c5d4b95726173768a62055aafb07b4eff976bac72a592"}, + {file = "ruff-0.1.9-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:744dfe4b35470fa3820d5fe45758aace6269c578f7ddc43d447868cfe5078bcb"}, + {file = "ruff-0.1.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:479ca4250cab30f9218b2e563adc362bd6ae6343df7c7b5a7865300a5156d5a6"}, + {file = "ruff-0.1.9-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:aa8344310f1ae79af9ccd6e4b32749e93cddc078f9b5ccd0e45bd76a6d2e8bb6"}, + {file = "ruff-0.1.9-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:837c739729394df98f342319f5136f33c65286b28b6b70a87c28f59354ec939b"}, + {file = "ruff-0.1.9-py3-none-musllinux_1_2_i686.whl", hash = "sha256:e6837202c2859b9f22e43cb01992373c2dbfeae5c0c91ad691a4a2e725392464"}, + {file = "ruff-0.1.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:331aae2cd4a0554667ac683243b151c74bd60e78fb08c3c2a4ac05ee1e606a39"}, + {file = "ruff-0.1.9-py3-none-win32.whl", hash = "sha256:8151425a60878e66f23ad47da39265fc2fad42aed06fb0a01130e967a7a064f4"}, + {file = "ruff-0.1.9-py3-none-win_amd64.whl", hash = "sha256:c497d769164df522fdaf54c6eba93f397342fe4ca2123a2e014a5b8fc7df81c7"}, + {file = "ruff-0.1.9-py3-none-win_arm64.whl", hash = "sha256:0e17f53bcbb4fff8292dfd84cf72d767b5e146f009cccd40c2fad27641f8a7a9"}, + {file = "ruff-0.1.9.tar.gz", hash = "sha256:b041dee2734719ddbb4518f762c982f2e912e7f28b8ee4fe1dee0b15d1b6e800"}, +] + +[[package]] +name = "sentencepiece" +version = "0.1.99" +description = "SentencePiece python wrapper" +optional = false +python-versions = "*" +files = [ + {file = "sentencepiece-0.1.99-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0eb528e70571b7c02723e5804322469b82fe7ea418c96051d0286c0fa028db73"}, + {file = "sentencepiece-0.1.99-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77d7fafb2c4e4659cbdf303929503f37a26eabc4ff31d3a79bf1c5a1b338caa7"}, + {file = "sentencepiece-0.1.99-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be9cf5b9e404c245aeb3d3723c737ba7a8f5d4ba262ef233a431fa6c45f732a0"}, + {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baed1a26464998f9710d20e52607c29ffd4293e7c71c6a1f83f51ad0911ec12c"}, + {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9832f08bb372d4c8b567612f8eab9e36e268dff645f1c28f9f8e851be705f6d1"}, + {file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:019e7535108e309dae2b253a75834fc3128240aa87c00eb80732078cdc182588"}, + {file = "sentencepiece-0.1.99-cp310-cp310-win32.whl", hash = "sha256:fa16a830416bb823fa2a52cbdd474d1f7f3bba527fd2304fb4b140dad31bb9bc"}, + {file = "sentencepiece-0.1.99-cp310-cp310-win_amd64.whl", hash = "sha256:14b0eccb7b641d4591c3e12ae44cab537d68352e4d3b6424944f0c447d2348d5"}, + {file = "sentencepiece-0.1.99-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6d3c56f24183a1e8bd61043ff2c58dfecdc68a5dd8955dc13bab83afd5f76b81"}, + {file = "sentencepiece-0.1.99-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ed6ea1819fd612c989999e44a51bf556d0ef6abfb553080b9be3d347e18bcfb7"}, + {file = "sentencepiece-0.1.99-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2a0260cd1fb7bd8b4d4f39dc2444a8d5fd4e0a0c4d5c899810ef1abf99b2d45"}, + {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a1abff4d1ff81c77cac3cc6fefa34fa4b8b371e5ee51cb7e8d1ebc996d05983"}, + {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:004e6a621d4bc88978eecb6ea7959264239a17b70f2cbc348033d8195c9808ec"}, + {file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db361e03342c41680afae5807590bc88aa0e17cfd1a42696a160e4005fcda03b"}, + {file = "sentencepiece-0.1.99-cp311-cp311-win32.whl", hash = "sha256:2d95e19168875b70df62916eb55428a0cbcb834ac51d5a7e664eda74def9e1e0"}, + {file = "sentencepiece-0.1.99-cp311-cp311-win_amd64.whl", hash = "sha256:f90d73a6f81248a909f55d8e6ef56fec32d559e1e9af045f0b0322637cb8e5c7"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:62e24c81e74bd87a6e0d63c51beb6527e4c0add67e1a17bac18bcd2076afcfeb"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57efcc2d51caff20d9573567d9fd3f854d9efe613ed58a439c78c9f93101384a"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a904c46197993bd1e95b93a6e373dca2f170379d64441041e2e628ad4afb16f"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d89adf59854741c0d465f0e1525b388c0d174f611cc04af54153c5c4f36088c4"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-win32.whl", hash = "sha256:47c378146928690d1bc106fdf0da768cebd03b65dd8405aa3dd88f9c81e35dba"}, + {file = "sentencepiece-0.1.99-cp36-cp36m-win_amd64.whl", hash = "sha256:9ba142e7a90dd6d823c44f9870abdad45e6c63958eb60fe44cca6828d3b69da2"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b7b1a9ae4d7c6f1f867e63370cca25cc17b6f4886729595b885ee07a58d3cec3"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0f644c9d4d35c096a538507b2163e6191512460035bf51358794a78515b74f7"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8843d23a0f686d85e569bd6dcd0dd0e0cbc03731e63497ca6d5bacd18df8b85"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33e6f690a1caebb4867a2e367afa1918ad35be257ecdb3455d2bbd787936f155"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-win32.whl", hash = "sha256:8a321866c2f85da7beac74a824b4ad6ddc2a4c9bccd9382529506d48f744a12c"}, + {file = "sentencepiece-0.1.99-cp37-cp37m-win_amd64.whl", hash = "sha256:c42f753bcfb7661c122a15b20be7f684b61fc8592c89c870adf52382ea72262d"}, + {file = "sentencepiece-0.1.99-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:85b476406da69c70586f0bb682fcca4c9b40e5059814f2db92303ea4585c650c"}, + {file = "sentencepiece-0.1.99-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cfbcfe13c69d3f87b7fcd5da168df7290a6d006329be71f90ba4f56bc77f8561"}, + {file = "sentencepiece-0.1.99-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:445b0ec381af1cd4eef95243e7180c63d9c384443c16c4c47a28196bd1cda937"}, + {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6890ea0f2b4703f62d0bf27932e35808b1f679bdb05c7eeb3812b935ba02001"}, + {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb71af492b0eefbf9f2501bec97bcd043b6812ab000d119eaf4bd33f9e283d03"}, + {file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b866b5bd3ddd54166bbcbf5c8d7dd2e0b397fac8537991c7f544220b1f67bc"}, + {file = "sentencepiece-0.1.99-cp38-cp38-win32.whl", hash = "sha256:b133e8a499eac49c581c3c76e9bdd08c338cc1939e441fee6f92c0ccb5f1f8be"}, + {file = "sentencepiece-0.1.99-cp38-cp38-win_amd64.whl", hash = "sha256:0eaf3591dd0690a87f44f4df129cf8d05d8a4029b5b6709b489b8e27f9a9bcff"}, + {file = "sentencepiece-0.1.99-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38efeda9bbfb55052d482a009c6a37e52f42ebffcea9d3a98a61de7aee356a28"}, + {file = "sentencepiece-0.1.99-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6c030b081dc1e1bcc9fadc314b19b740715d3d566ad73a482da20d7d46fd444c"}, + {file = "sentencepiece-0.1.99-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:84dbe53e02e4f8a2e45d2ac3e430d5c83182142658e25edd76539b7648928727"}, + {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b0f55d0a0ee1719b4b04221fe0c9f0c3461dc3dabd77a035fa2f4788eb3ef9a"}, + {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e800f206cd235dc27dc749299e05853a4e4332e8d3dfd81bf13d0e5b9007d9"}, + {file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ae1c40cda8f9d5b0423cfa98542735c0235e7597d79caf318855cdf971b2280"}, + {file = "sentencepiece-0.1.99-cp39-cp39-win32.whl", hash = "sha256:c84ce33af12ca222d14a1cdd37bd76a69401e32bc68fe61c67ef6b59402f4ab8"}, + {file = "sentencepiece-0.1.99-cp39-cp39-win_amd64.whl", hash = "sha256:350e5c74d739973f1c9643edb80f7cc904dc948578bcb1d43c6f2b173e5d18dd"}, + {file = "sentencepiece-0.1.99.tar.gz", hash = "sha256:189c48f5cb2949288f97ccdb97f0473098d9c3dcf5a3d99d4eabe719ec27297f"}, +] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "sniffio" +version = "1.3.0" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, + {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, +] + +[[package]] +name = "syrupy" +version = "4.6.0" +description = "Pytest Snapshot Test Utility" +optional = false +python-versions = ">=3.8.1,<4" +files = [ + {file = "syrupy-4.6.0-py3-none-any.whl", hash = "sha256:747aae1bcf3cb3249e33b1e6d81097874d23615982d5686ebe637875b0775a1b"}, + {file = "syrupy-4.6.0.tar.gz", hash = "sha256:231b1f5d00f1f85048ba81676c79448076189c4aef4d33f21ae32f3b4c565a54"}, +] + +[package.dependencies] +pytest = ">=7.0.0,<8.0.0" + +[[package]] +name = "tenacity" +version = "8.2.3" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, + {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, +] + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "typing-extensions" +version = "4.9.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, + {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, +] + +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +optional = false +python-versions = "*" +files = [ + {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, + {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + +[[package]] +name = "urllib3" +version = "2.1.0" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.1.0-py3-none-any.whl", hash = "sha256:55901e917a5896a349ff771be919f8bd99aff50b79fe58fec595eb37bbc56bb3"}, + {file = "urllib3-2.1.0.tar.gz", hash = "sha256:df7aa8afb0148fa78488e7899b2c59b5f4ffcfa82e6c54ccb9dd37c1d7b52d54"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "watchdog" +version = "3.0.0" +description = "Filesystem events monitoring" +optional = false +python-versions = ">=3.7" +files = [ + {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:336adfc6f5cc4e037d52db31194f7581ff744b67382eb6021c868322e32eef41"}, + {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a70a8dcde91be523c35b2bf96196edc5730edb347e374c7de7cd20c43ed95397"}, + {file = "watchdog-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:adfdeab2da79ea2f76f87eb42a3ab1966a5313e5a69a0213a3cc06ef692b0e96"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2b57a1e730af3156d13b7fdddfc23dea6487fceca29fc75c5a868beed29177ae"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7ade88d0d778b1b222adebcc0927428f883db07017618a5e684fd03b83342bd9"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7e447d172af52ad204d19982739aa2346245cc5ba6f579d16dac4bfec226d2e7"}, + {file = "watchdog-3.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9fac43a7466eb73e64a9940ac9ed6369baa39b3bf221ae23493a9ec4d0022674"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8ae9cda41fa114e28faf86cb137d751a17ffd0316d1c34ccf2235e8a84365c7f"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:25f70b4aa53bd743729c7475d7ec41093a580528b100e9a8c5b5efe8899592fc"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4f94069eb16657d2c6faada4624c39464f65c05606af50bb7902e036e3219be3"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7c5f84b5194c24dd573fa6472685b2a27cc5a17fe5f7b6fd40345378ca6812e3"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3aa7f6a12e831ddfe78cdd4f8996af9cf334fd6346531b16cec61c3b3c0d8da0"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:233b5817932685d39a7896b1090353fc8efc1ef99c9c054e46c8002561252fb8"}, + {file = "watchdog-3.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:13bbbb462ee42ec3c5723e1205be8ced776f05b100e4737518c67c8325cf6100"}, + {file = "watchdog-3.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:8f3ceecd20d71067c7fd4c9e832d4e22584318983cabc013dbf3f70ea95de346"}, + {file = "watchdog-3.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c9d8c8ec7efb887333cf71e328e39cffbf771d8f8f95d308ea4125bf5f90ba64"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0e06ab8858a76e1219e68c7573dfeba9dd1c0219476c5a44d5333b01d7e1743a"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:d00e6be486affb5781468457b21a6cbe848c33ef43f9ea4a73b4882e5f188a44"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:c07253088265c363d1ddf4b3cdb808d59a0468ecd017770ed716991620b8f77a"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:5113334cf8cf0ac8cd45e1f8309a603291b614191c9add34d33075727a967709"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:51f90f73b4697bac9c9a78394c3acbbd331ccd3655c11be1a15ae6fe289a8c83"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:ba07e92756c97e3aca0912b5cbc4e5ad802f4557212788e72a72a47ff376950d"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d429c2430c93b7903914e4db9a966c7f2b068dd2ebdd2fa9b9ce094c7d459f33"}, + {file = "watchdog-3.0.0-py3-none-win32.whl", hash = "sha256:3ed7c71a9dccfe838c2f0b6314ed0d9b22e77d268c67e015450a29036a81f60f"}, + {file = "watchdog-3.0.0-py3-none-win_amd64.whl", hash = "sha256:4c9956d27be0bb08fc5f30d9d0179a855436e655f046d288e2bcc11adfae893c"}, + {file = "watchdog-3.0.0-py3-none-win_ia64.whl", hash = "sha256:5d9f3a10e02d7371cd929b5d8f11e87d4bad890212ed3901f9b4d68767bee759"}, + {file = "watchdog-3.0.0.tar.gz", hash = "sha256:4d98a320595da7a7c5a18fc48cb633c2e73cda78f93cac2ef42d42bf609a33f9"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.8.1,<4.0" +content-hash = "95343e4ae1dd67887432aa1acd09e5f49cbc33c2ddcfdbad9552a90a92adda79" diff --git a/libs/partners/ai21/pyproject.toml b/libs/partners/ai21/pyproject.toml new file mode 100644 index 00000000000000..9274ceb0335569 --- /dev/null +++ b/libs/partners/ai21/pyproject.toml @@ -0,0 +1,90 @@ +[tool.poetry] +name = "langchain-ai21" +version = "0.0.1" +description = "An integration package connecting AI21 and LangChain" +authors = [] +readme = "README.md" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +langchain-core = "^0.1.22" +ai21 = "^2.0.0" + +[tool.poetry.group.test] +optional = true + +[tool.poetry.group.test.dependencies] +pytest = "^7.3.0" +freezegun = "^1.2.2" +pytest-mock = "^3.10.0" +syrupy = "^4.0.2" +pytest-watcher = "^0.3.4" +pytest-asyncio = "^0.21.1" +langchain-core = {path = "../../core", develop = true} + +[tool.poetry.group.codespell] +optional = true + +[tool.poetry.group.codespell.dependencies] +codespell = "^2.2.0" + +[tool.poetry.group.test_integration] +optional = true + +[tool.poetry.group.test_integration.dependencies] + +[tool.poetry.group.lint] +optional = true + +[tool.poetry.group.lint.dependencies] +ruff = "^0.1.5" + +[tool.poetry.group.typing.dependencies] +mypy = "^0.991" +langchain-core = {path = "../../core", develop = true} + +[tool.poetry.group.dev] +optional = true + +[tool.poetry.group.dev.dependencies] +langchain-core = {path = "../../core", develop = true} + +[tool.ruff] +select = [ + "E", # pycodestyle + "F", # pyflakes + "I", # isort +] + +[tool.mypy] +disallow_untyped_defs = "True" + +[tool.coverage.run] +omit = [ + "tests/*", +] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +# --strict-markers will raise errors on unknown marks. +# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks +# +# https://docs.pytest.org/en/7.1.x/reference/reference.html +# --strict-config any warnings encountered while parsing the `pytest` +# section of the configuration file raise errors. +# +# https://github.com/tophat/syrupy +# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite. +addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5" +# Registering custom markers. +# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers +markers = [ + "requires: mark tests as requiring a specific library", + "asyncio: mark tests as requiring asyncio", + "compile: mark placeholder test used to compile integration tests without running them", + "scheduled: mark tests to run in scheduled testing", +] +asyncio_mode = "auto" diff --git a/libs/partners/ai21/scripts/check_imports.py b/libs/partners/ai21/scripts/check_imports.py new file mode 100644 index 00000000000000..fd21a4975b7f0b --- /dev/null +++ b/libs/partners/ai21/scripts/check_imports.py @@ -0,0 +1,17 @@ +import sys +import traceback +from importlib.machinery import SourceFileLoader + +if __name__ == "__main__": + files = sys.argv[1:] + has_failure = False + for file in files: + try: + SourceFileLoader("x", file).load_module() + except Exception: + has_faillure = True + print(file) + traceback.print_exc() + print() + + sys.exit(1 if has_failure else 0) diff --git a/libs/partners/ai21/scripts/check_pydantic.sh b/libs/partners/ai21/scripts/check_pydantic.sh new file mode 100755 index 00000000000000..06b5bb81ae2361 --- /dev/null +++ b/libs/partners/ai21/scripts/check_pydantic.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# This script searches for lines starting with "import pydantic" or "from pydantic" +# in tracked files within a Git repository. +# +# Usage: ./scripts/check_pydantic.sh /path/to/repository + +# Check if a path argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 /path/to/repository" + exit 1 +fi + +repository_path="$1" + +# Search for lines matching the pattern within the specified repository +result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic') + +# Check if any matching lines were found +if [ -n "$result" ]; then + echo "ERROR: The following lines need to be updated:" + echo "$result" + echo "Please replace the code with an import from langchain_core.pydantic_v1." + echo "For example, replace 'from pydantic import BaseModel'" + echo "with 'from langchain_core.pydantic_v1 import BaseModel'" + exit 1 +fi diff --git a/libs/partners/ai21/scripts/lint_imports.sh b/libs/partners/ai21/scripts/lint_imports.sh new file mode 100755 index 00000000000000..695613c7ba8fd6 --- /dev/null +++ b/libs/partners/ai21/scripts/lint_imports.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -eu + +# Initialize a variable to keep track of errors +errors=0 + +# make sure not importing from langchain or langchain_experimental +git --no-pager grep '^from langchain\.' . && errors=$((errors+1)) +git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1)) + +# Decide on an exit status based on the errors +if [ "$errors" -gt 0 ]; then + exit 1 +else + exit 0 +fi diff --git a/libs/partners/ai21/tests/__init__.py b/libs/partners/ai21/tests/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/libs/partners/ai21/tests/integration_tests/__init__.py b/libs/partners/ai21/tests/integration_tests/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/libs/partners/ai21/tests/integration_tests/test_chat_models.py b/libs/partners/ai21/tests/integration_tests/test_chat_models.py new file mode 100644 index 00000000000000..37efd41e520c43 --- /dev/null +++ b/libs/partners/ai21/tests/integration_tests/test_chat_models.py @@ -0,0 +1,43 @@ +"""Test ChatAI21 chat model.""" +from langchain_core.messages import HumanMessage +from langchain_core.outputs import ChatGeneration + +from langchain_ai21.chat_models import ChatAI21 + + +def test_invoke() -> None: + """Test invoke tokens from AI21.""" + llm = ChatAI21(model="j2-ultra") + + result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"])) + assert isinstance(result.content, str) + + +def test_generation() -> None: + """Test invoke tokens from AI21.""" + llm = ChatAI21(model="j2-ultra") + message = HumanMessage(content="Hello") + + result = llm.generate([[message], [message]], config=dict(tags=["foo"])) + + for generations in result.generations: + assert len(generations) == 1 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content + + +async def test_ageneration() -> None: + """Test invoke tokens from AI21.""" + llm = ChatAI21(model="j2-ultra") + message = HumanMessage(content="Hello") + + result = await llm.agenerate([[message], [message]], config=dict(tags=["foo"])) + + for generations in result.generations: + assert len(generations) == 1 + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content diff --git a/libs/partners/ai21/tests/integration_tests/test_compile.py b/libs/partners/ai21/tests/integration_tests/test_compile.py new file mode 100644 index 00000000000000..33ecccdfa0fbda --- /dev/null +++ b/libs/partners/ai21/tests/integration_tests/test_compile.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.mark.compile +def test_placeholder() -> None: + """Used for compiling integration tests without running any real tests.""" + pass diff --git a/libs/partners/ai21/tests/integration_tests/test_embeddings.py b/libs/partners/ai21/tests/integration_tests/test_embeddings.py new file mode 100644 index 00000000000000..8434234e56a4be --- /dev/null +++ b/libs/partners/ai21/tests/integration_tests/test_embeddings.py @@ -0,0 +1,19 @@ +"""Test AI21 embeddings.""" +from langchain_ai21.embeddings import AI21Embeddings + + +def test_langchain_ai21_embedding_documents() -> None: + """Test AI21 embeddings.""" + documents = ["foo bar"] + embedding = AI21Embeddings() + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) > 0 + + +def test_langchain_ai21_embedding_query() -> None: + """Test AI21 embeddings.""" + document = "foo bar" + embedding = AI21Embeddings() + output = embedding.embed_query(document) + assert len(output) > 0 diff --git a/libs/partners/ai21/tests/integration_tests/test_llms.py b/libs/partners/ai21/tests/integration_tests/test_llms.py new file mode 100644 index 00000000000000..fe4a812552723c --- /dev/null +++ b/libs/partners/ai21/tests/integration_tests/test_llms.py @@ -0,0 +1,103 @@ +"""Test AI21LLM llm.""" + + +from langchain_ai21.llms import AI21LLM + + +def _generate_llm() -> AI21LLM: + """ + Testing AI21LLm using non default parameters with the following parameters + """ + return AI21LLM( + model="j2-ultra", + max_tokens=2, # Use less tokens for a faster response + temperature=0, # for a consistent response + epoch=1, + ) + + +def test_stream() -> None: + """Test streaming tokens from AI21.""" + llm = AI21LLM( + model="j2-ultra", + ) + + for token in llm.stream("I'm Pickle Rick"): + assert isinstance(token, str) + + +async def test_abatch() -> None: + """Test streaming tokens from AI21LLM.""" + llm = AI21LLM( + model="j2-ultra", + ) + + result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"]) + for token in result: + assert isinstance(token, str) + + +async def test_abatch_tags() -> None: + """Test batch tokens from AI21LLM.""" + llm = AI21LLM( + model="j2-ultra", + ) + + result = await llm.abatch( + ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]} + ) + for token in result: + assert isinstance(token, str) + + +def test_batch() -> None: + """Test batch tokens from AI21LLM.""" + llm = AI21LLM( + model="j2-ultra", + ) + + result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"]) + for token in result: + assert isinstance(token, str) + + +async def test_ainvoke() -> None: + """Test invoke tokens from AI21LLM.""" + llm = AI21LLM( + model="j2-ultra", + ) + + result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]}) + assert isinstance(result, str) + + +def test_invoke() -> None: + """Test invoke tokens from AI21LLM.""" + llm = AI21LLM( + model="j2-ultra", + ) + + result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"])) + assert isinstance(result, str) + + +def test__generate() -> None: + llm = _generate_llm() + llm_result = llm.generate( + prompts=["Hey there, my name is Pickle Rick. What is your name?"], + stop=["##"], + ) + + assert len(llm_result.generations) > 0 + assert llm_result.llm_output["token_count"] != 0 # type: ignore + + +async def test__agenerate() -> None: + llm = _generate_llm() + llm_result = await llm.agenerate( + prompts=["Hey there, my name is Pickle Rick. What is your name?"], + stop=["##"], + ) + + assert len(llm_result.generations) > 0 + assert llm_result.llm_output["token_count"] != 0 # type: ignore diff --git a/libs/partners/ai21/tests/unit_tests/__init__.py b/libs/partners/ai21/tests/unit_tests/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/libs/partners/ai21/tests/unit_tests/conftest.py b/libs/partners/ai21/tests/unit_tests/conftest.py new file mode 100644 index 00000000000000..43545eaf02e382 --- /dev/null +++ b/libs/partners/ai21/tests/unit_tests/conftest.py @@ -0,0 +1,91 @@ +import os +from contextlib import contextmanager +from typing import Generator +from unittest.mock import Mock + +import pytest +from ai21 import AI21Client +from ai21.models import ( + ChatOutput, + ChatResponse, + Completion, + CompletionData, + CompletionFinishReason, + CompletionsResponse, + FinishReason, + Penalty, + RoleType, +) +from pytest_mock import MockerFixture + +DUMMY_API_KEY = "test_api_key" + + +BASIC_EXAMPLE_LLM_PARAMETERS = { + "num_results": 3, + "max_tokens": 20, + "min_tokens": 10, + "temperature": 0.5, + "top_p": 0.5, + "top_k_return": 0, + "frequency_penalty": Penalty(scale=0.2, apply_to_numbers=True), + "presence_penalty": Penalty(scale=0.2, apply_to_stopwords=True), + "count_penalty": Penalty( + scale=0.2, apply_to_punctuation=True, apply_to_emojis=True + ), +} + + +@pytest.fixture +def mocked_completion_response(mocker: MockerFixture) -> Mock: + mocked_response = mocker.MagicMock(spec=CompletionsResponse) + mocked_response.prompt = "this is a test prompt" + mocked_response.completions = [ + Completion( + data=CompletionData(text="test", tokens=[]), + finish_reason=CompletionFinishReason(reason=None, length=None), + ) + ] + return mocked_response + + +@pytest.fixture +def mock_client_with_completion( + mocker: MockerFixture, mocked_completion_response: Mock +) -> Mock: + mock_client = mocker.MagicMock(spec=AI21Client) + mock_client.completion = mocker.MagicMock() + mock_client.completion.create.side_effect = [ + mocked_completion_response, + mocked_completion_response, + ] + mock_client.count_tokens.side_effect = [10, 20] + + return mock_client + + +@pytest.fixture +def mock_client_with_chat(mocker: MockerFixture) -> Mock: + mock_client = mocker.MagicMock(spec=AI21Client) + mock_client.chat = mocker.MagicMock() + + output = ChatOutput( + text="Hello Pickle Rick!", + role=RoleType.ASSISTANT, + finish_reason=FinishReason(reason="testing"), + ) + mock_client.chat.create.return_value = ChatResponse(outputs=[output]) + + return mock_client + + +@contextmanager +def temporarily_unset_api_key() -> Generator: + """ + Unset and set environment key for testing purpose for when an API KEY is not set + """ + api_key = os.environ.pop("API_KEY", None) + yield + + if api_key is not None: + os.environ["API_KEY"] = api_key diff --git a/libs/partners/ai21/tests/unit_tests/test_chat_models.py b/libs/partners/ai21/tests/unit_tests/test_chat_models.py new file mode 100644 index 00000000000000..83eb06bc45793c --- /dev/null +++ b/libs/partners/ai21/tests/unit_tests/test_chat_models.py @@ -0,0 +1,239 @@ +"""Test chat model integration.""" +from typing import List, Optional +from unittest.mock import Mock, call + +import pytest +from ai21 import MissingApiKeyError +from ai21.models import ChatMessage, Penalty, RoleType +from langchain_core.messages import ( + AIMessage, + BaseMessage, + HumanMessage, + SystemMessage, +) +from langchain_core.messages import ( + ChatMessage as LangChainChatMessage, +) + +from langchain_ai21.chat_models import ( + ChatAI21, + _convert_message_to_ai21_message, + _convert_messages_to_ai21_messages, +) +from tests.unit_tests.conftest import ( + BASIC_EXAMPLE_LLM_PARAMETERS, + DUMMY_API_KEY, + temporarily_unset_api_key, +) + + +def test_initialization__when_no_api_key__should_raise_exception() -> None: + """Test integration initialization.""" + with temporarily_unset_api_key(): + with pytest.raises(MissingApiKeyError): + ChatAI21(model="j2-ultra") + + +def test_initialization__when_default_parameters_in_init() -> None: + """Test chat model initialization.""" + ChatAI21(api_key=DUMMY_API_KEY, model="j2-ultra") + + +def test_initialization__when_custom_parameters_in_init() -> None: + model = "j2-mid" + num_results = 1 + max_tokens = 10 + min_tokens = 20 + temperature = 0.1 + top_p = 0.1 + top_k_returns = 0 + frequency_penalty = Penalty(scale=0.2, apply_to_numbers=True) + presence_penalty = Penalty(scale=0.2, apply_to_stopwords=True) + count_penalty = Penalty(scale=0.2, apply_to_punctuation=True, apply_to_emojis=True) + + llm = ChatAI21( + api_key=DUMMY_API_KEY, + model=model, + num_results=num_results, + max_tokens=max_tokens, + min_tokens=min_tokens, + temperature=temperature, + top_p=top_p, + top_k_returns=top_k_returns, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + count_penalty=count_penalty, + ) + assert llm.model == model + assert llm.num_results == num_results + assert llm.max_tokens == max_tokens + assert llm.min_tokens == min_tokens + assert llm.temperature == temperature + assert llm.top_p == top_p + assert llm.top_k_return == top_k_returns + assert llm.frequency_penalty == frequency_penalty + assert llm.presence_penalty == presence_penalty + assert count_penalty == count_penalty + + +@pytest.mark.parametrize( + ids=[ + "when_human_message", + "when_ai_message", + ], + argnames=["message", "expected_ai21_message"], + argvalues=[ + ( + HumanMessage(content="Human Message Content"), + ChatMessage(role=RoleType.USER, text="Human Message Content"), + ), + ( + AIMessage(content="AI Message Content"), + ChatMessage(role=RoleType.ASSISTANT, text="AI Message Content"), + ), + ], +) +def test_convert_message_to_ai21_message( + message: BaseMessage, expected_ai21_message: ChatMessage +) -> None: + ai21_message = _convert_message_to_ai21_message(message) + assert ai21_message == expected_ai21_message + + +@pytest.mark.parametrize( + ids=[ + "when_system_message", + "when_langchain_chat_message", + ], + argnames=["message"], + argvalues=[ + (SystemMessage(content="System Message Content"),), + (LangChainChatMessage(content="Chat Message Content", role="human"),), + ], +) +def test_convert_message_to_ai21_message__when_invalid_role__should_raise_exception( + message: BaseMessage, +) -> None: + with pytest.raises(ValueError) as e: + _convert_message_to_ai21_message(message) + assert e.value.args[0] == ( + f"Could not resolve role type from message {message}. " + f"Only support {HumanMessage.__name__} and {AIMessage.__name__}." + ) + + +@pytest.mark.parametrize( + ids=[ + "when_all_messages_are_human_messages__should_return_system_none", + "when_first_message_is_system__should_return_system", + ], + argnames=["messages", "expected_system", "expected_messages"], + argvalues=[ + ( + [ + HumanMessage(content="Human Message Content 1"), + HumanMessage(content="Human Message Content 2"), + ], + None, + [ + ChatMessage(role=RoleType.USER, text="Human Message Content 1"), + ChatMessage(role=RoleType.USER, text="Human Message Content 2"), + ], + ), + ( + [ + SystemMessage(content="System Message Content 1"), + HumanMessage(content="Human Message Content 1"), + ], + "System Message Content 1", + [ + ChatMessage(role=RoleType.USER, text="Human Message Content 1"), + ], + ), + ], +) +def test_convert_messages( + messages: List[BaseMessage], + expected_system: Optional[str], + expected_messages: List[ChatMessage], +) -> None: + system, ai21_messages = _convert_messages_to_ai21_messages(messages) + assert ai21_messages == expected_messages + assert system == expected_system + + +def test_convert_messages_when_system_is_not_first__should_raise_value_error() -> None: + messages = [ + HumanMessage(content="Human Message Content 1"), + SystemMessage(content="System Message Content 1"), + ] + with pytest.raises(ValueError): + _convert_messages_to_ai21_messages(messages) + + +def test_invoke(mock_client_with_chat: Mock) -> None: + chat_input = "I'm Pickle Rick" + + llm = ChatAI21( + model="j2-ultra", + api_key=DUMMY_API_KEY, + client=mock_client_with_chat, + **BASIC_EXAMPLE_LLM_PARAMETERS, + ) + llm.invoke(input=chat_input, config=dict(tags=["foo"])) + + mock_client_with_chat.chat.create.assert_called_once_with( + model="j2-ultra", + messages=[ChatMessage(role=RoleType.USER, text=chat_input)], + system="", + stop_sequences=None, + **BASIC_EXAMPLE_LLM_PARAMETERS, + ) + + +def test_generate(mock_client_with_chat: Mock) -> None: + messages0 = [ + HumanMessage(content="I'm Pickle Rick"), + AIMessage(content="Hello Pickle Rick! I am your AI Assistant"), + HumanMessage(content="Nice to meet you."), + ] + messages1 = [ + SystemMessage(content="system message"), + HumanMessage(content="What is 1 + 1"), + ] + llm = ChatAI21( + model="j2-ultra", + client=mock_client_with_chat, + **BASIC_EXAMPLE_LLM_PARAMETERS, + ) + + llm.generate(messages=[messages0, messages1]) + mock_client_with_chat.chat.create.assert_has_calls( + [ + call( + model="j2-ultra", + messages=[ + ChatMessage( + role=RoleType.USER, + text=str(messages0[0].content), + ), + ChatMessage( + role=RoleType.ASSISTANT, text=str(messages0[1].content) + ), + ChatMessage(role=RoleType.USER, text=str(messages0[2].content)), + ], + system="", + stop_sequences=None, + **BASIC_EXAMPLE_LLM_PARAMETERS, + ), + call( + model="j2-ultra", + messages=[ + ChatMessage(role=RoleType.USER, text=str(messages1[1].content)), + ], + system="system message", + stop_sequences=None, + **BASIC_EXAMPLE_LLM_PARAMETERS, + ), + ] + ) diff --git a/libs/partners/ai21/tests/unit_tests/test_embeddings.py b/libs/partners/ai21/tests/unit_tests/test_embeddings.py new file mode 100644 index 00000000000000..a366b32dd331f9 --- /dev/null +++ b/libs/partners/ai21/tests/unit_tests/test_embeddings.py @@ -0,0 +1,67 @@ +"""Test embedding model integration.""" +from unittest.mock import Mock + +import pytest +from ai21 import AI21Client, MissingApiKeyError +from ai21.models import EmbedResponse, EmbedResult, EmbedType +from pytest_mock import MockerFixture + +from langchain_ai21.embeddings import AI21Embeddings +from tests.unit_tests.conftest import DUMMY_API_KEY, temporarily_unset_api_key + +_EXAMPLE_EMBEDDING_0 = [1.0, 2.0, 3.0] +_EXAMPLE_EMBEDDING_1 = [4.0, 5.0, 6.0] +_EXAMPLE_EMBEDDING_2 = [7.0, 8.0, 9.0] + +_EXAMPLE_EMBEDDING_RESPONSE = EmbedResponse( + results=[ + EmbedResult(_EXAMPLE_EMBEDDING_0), + EmbedResult(_EXAMPLE_EMBEDDING_1), + EmbedResult(_EXAMPLE_EMBEDDING_2), + ], + id="test_id", +) + + +def test_initialization__when_no_api_key__should_raise_exception() -> None: + """Test integration initialization.""" + with temporarily_unset_api_key(): + with pytest.raises(MissingApiKeyError): + AI21Embeddings() + + +@pytest.fixture +def mock_client_with_embeddings(mocker: MockerFixture) -> Mock: + mock_client = mocker.MagicMock(spec=AI21Client) + mock_client.embed = mocker.MagicMock() + mock_client.embed.create.return_value = _EXAMPLE_EMBEDDING_RESPONSE + + return mock_client + + +def test_embed_query(mock_client_with_embeddings: Mock) -> None: + llm = AI21Embeddings(client=mock_client_with_embeddings, api_key=DUMMY_API_KEY) + + text = "Hello embeddings world!" + response = llm.embed_query(text=text) + assert response == _EXAMPLE_EMBEDDING_0 + mock_client_with_embeddings.embed.create.assert_called_once_with( + texts=[text], + type=EmbedType.QUERY, + ) + + +def test_embed_documents(mock_client_with_embeddings: Mock) -> None: + llm = AI21Embeddings(client=mock_client_with_embeddings, api_key=DUMMY_API_KEY) + + texts = ["Hello embeddings world!", "Some other text", "Some more text"] + response = llm.embed_documents(texts=texts) + assert response == [ + _EXAMPLE_EMBEDDING_0, + _EXAMPLE_EMBEDDING_1, + _EXAMPLE_EMBEDDING_2, + ] + mock_client_with_embeddings.embed.create.assert_called_once_with( + texts=texts, + type=EmbedType.SEGMENT, + ) diff --git a/libs/partners/ai21/tests/unit_tests/test_imports.py b/libs/partners/ai21/tests/unit_tests/test_imports.py new file mode 100644 index 00000000000000..28b11651a7325c --- /dev/null +++ b/libs/partners/ai21/tests/unit_tests/test_imports.py @@ -0,0 +1,11 @@ +from langchain_ai21 import __all__ + +EXPECTED_ALL = [ + "AI21LLM", + "ChatAI21", + "AI21Embeddings", +] + + +def test_all_imports() -> None: + assert sorted(EXPECTED_ALL) == sorted(__all__) diff --git a/libs/partners/ai21/tests/unit_tests/test_llms.py b/libs/partners/ai21/tests/unit_tests/test_llms.py new file mode 100644 index 00000000000000..a82240bea5d55f --- /dev/null +++ b/libs/partners/ai21/tests/unit_tests/test_llms.py @@ -0,0 +1,107 @@ +"""Test AI21 Chat API wrapper.""" +from unittest.mock import Mock, call + +import pytest +from ai21 import MissingApiKeyError +from ai21.models import ( + Penalty, +) + +from langchain_ai21 import AI21LLM +from tests.unit_tests.conftest import ( + BASIC_EXAMPLE_LLM_PARAMETERS, + DUMMY_API_KEY, + temporarily_unset_api_key, +) + + +def test_initialization__when_no_api_key__should_raise_exception() -> None: + """Test integration initialization.""" + with temporarily_unset_api_key(): + with pytest.raises(MissingApiKeyError): + AI21LLM( + model="j2-ultra", + ) + + +def test_initialization__when_default_parameters() -> None: + """Test integration initialization.""" + AI21LLM( + api_key=DUMMY_API_KEY, + model="j2-ultra", + ) + + +def test_initialization__when_custom_parameters_to_init() -> None: + """Test integration initialization.""" + AI21LLM( + api_key=DUMMY_API_KEY, + model="j2-mid", + num_results=2, + max_tokens=20, + min_tokens=10, + temperature=0.5, + top_p=0.5, + top_k_returns=0, + stop_sequences=["\n"], + frequency_penalty=Penalty(scale=0.2, apply_to_numbers=True), + presence_penalty=Penalty(scale=0.2, apply_to_stopwords=True), + count_penalty=Penalty( + scale=0.2, apply_to_punctuation=True, apply_to_emojis=True + ), + custom_model="test_model", + epoch=1, + ) + + +def test_generate(mock_client_with_completion: Mock) -> None: + # Setup test + prompt0 = "Hi, my name is what?" + prompt1 = "My name is who?" + stop = ["\n"] + custom_model = "test_model" + epoch = 1 + + ai21 = AI21LLM( + model="j2-ultra", + api_key=DUMMY_API_KEY, + client=mock_client_with_completion, + custom_model=custom_model, + epoch=epoch, + **BASIC_EXAMPLE_LLM_PARAMETERS, + ) + + # Make call to testing function + ai21.generate( + [prompt0, prompt1], + stop=stop, + ) + + # Assertions + mock_client_with_completion.count_tokens.assert_has_calls( + [ + call(prompt0), + call(prompt1), + ], + ) + + mock_client_with_completion.completion.create.assert_has_calls( + [ + call( + prompt=prompt0, + model="j2-ultra", + custom_model=custom_model, + stop_sequences=stop, + epoch=epoch, + **BASIC_EXAMPLE_LLM_PARAMETERS, + ), + call( + prompt=prompt1, + model="j2-ultra", + custom_model=custom_model, + stop_sequences=stop, + epoch=epoch, + **BASIC_EXAMPLE_LLM_PARAMETERS, + ), + ] + ) diff --git a/libs/partners/astradb/.gitignore b/libs/partners/astradb/.gitignore new file mode 100644 index 00000000000000..bdc93231f03535 --- /dev/null +++ b/libs/partners/astradb/.gitignore @@ -0,0 +1,5 @@ +__pycache__ +*.env +.mypy_cache +.ruff_cache +.pytest_cache \ No newline at end of file diff --git a/libs/partners/astradb/LICENSE b/libs/partners/astradb/LICENSE new file mode 100644 index 00000000000000..426b65090341f3 --- /dev/null +++ b/libs/partners/astradb/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 LangChain, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libs/partners/astradb/Makefile b/libs/partners/astradb/Makefile new file mode 100644 index 00000000000000..ee1200c9af5e47 --- /dev/null +++ b/libs/partners/astradb/Makefile @@ -0,0 +1,66 @@ +SHELL := /bin/bash +.PHONY: all format lint test tests integration_test integration_tests spell_check help + +# Default target executed when no arguments are given to make. +all: help + +# Define a variable for the test file path. +TEST_FILE ?= tests/unit_tests/ +INTEGRATION_TEST_FILE ?= tests/integration_tests/ + +test: + poetry run pytest $(TEST_FILE) + +tests: + poetry run pytest $(TEST_FILE) + +integration_test: + poetry run pytest $(INTEGRATION_TEST_FILE) + +integration_tests: + poetry run pytest $(INTEGRATION_TEST_FILE) + +###################### +# LINTING AND FORMATTING +###################### + +# Define a variable for Python and notebook files. +PYTHON_FILES=. +MYPY_CACHE=.mypy_cache +lint format: PYTHON_FILES=. +lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/astradb --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$') +lint_package: PYTHON_FILES=langchain_astradb +lint_tests: PYTHON_FILES=tests +lint_tests: MYPY_CACHE=.mypy_cache_test + +lint lint_diff lint_package lint_tests: + poetry run ruff . + poetry run ruff format $(PYTHON_FILES) --diff + poetry run ruff --select I $(PYTHON_FILES) + mkdir -p $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) + +format format_diff: + poetry run ruff format $(PYTHON_FILES) + poetry run ruff --select I --fix $(PYTHON_FILES) + +spell_check: + poetry run codespell --toml pyproject.toml + +spell_fix: + poetry run codespell --toml pyproject.toml -w + +check_imports: $(shell find langchain_astradb -name '*.py') + poetry run python ./scripts/check_imports.py $^ + +###################### +# HELP +###################### + +help: + @echo '----' + @echo 'check_imports - check imports' + @echo 'format - run code formatters' + @echo 'lint - run linters' + @echo 'test - run unit tests' + @echo 'tests - run unit tests' + @echo 'test TEST_FILE= - run all tests in file' diff --git a/libs/partners/astradb/README.md b/libs/partners/astradb/README.md new file mode 100644 index 00000000000000..a4c2dc84e1fb5f --- /dev/null +++ b/libs/partners/astradb/README.md @@ -0,0 +1,35 @@ +# langchain-astradb + +This package contains the LangChain integrations for using DataStax Astra DB. + +> DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Apache Cassandra® and made conveniently available +> through an easy-to-use JSON API. + +_**Note.** For a short transitional period, only some of the Astra DB integration classes are contained in this package (the remaining ones being still in `langchain-community`). In a short while, and surely by version 0.2 of LangChain, all of the Astra DB support will be removed from `langchain-community` and included in this package._ + +## Installation and Setup + +Installation of this partner package: + +```bash +pip install langchain-astradb +``` + +## Integrations overview + +### Vector Store + +```python +from langchain_astradb.vectorstores import AstraDBVectorStore + +my_store = AstraDBVectorStore( + embedding=my_embeddings, + collection_name="my_store", + api_endpoint="https://...", + token="AstraCS:...", +) +``` + +## Reference + +See the [LangChain docs page](https://python.langchain.com/docs/integrations/providers/astradb) for a more detailed listing. diff --git a/libs/partners/astradb/langchain_astradb/__init__.py b/libs/partners/astradb/langchain_astradb/__init__.py new file mode 100644 index 00000000000000..fc86dd73bcf46d --- /dev/null +++ b/libs/partners/astradb/langchain_astradb/__init__.py @@ -0,0 +1,5 @@ +from langchain_astradb.vectorstores import AstraDBVectorStore + +__all__ = [ + "AstraDBVectorStore", +] diff --git a/libs/partners/astradb/langchain_astradb/py.typed b/libs/partners/astradb/langchain_astradb/py.typed new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/libs/partners/astradb/langchain_astradb/utils/mmr.py b/libs/partners/astradb/langchain_astradb/utils/mmr.py new file mode 100644 index 00000000000000..feb34ad1c23d61 --- /dev/null +++ b/libs/partners/astradb/langchain_astradb/utils/mmr.py @@ -0,0 +1,87 @@ +""" +Tools for the Maximal Marginal Relevance (MMR) reranking. +Duplicated from langchain_community to avoid cross-dependencies. + +Functions "maximal_marginal_relevance" and "cosine_similarity" +are duplicated in this utility respectively from modules: + - "libs/community/langchain_community/vectorstores/utils.py" + - "libs/community/langchain_community/utils/math.py" +""" + +import logging +from typing import List, Union + +import numpy as np + +logger = logging.getLogger(__name__) + +Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray] + + +def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray: + """Row-wise cosine similarity between two equal-width matrices.""" + if len(X) == 0 or len(Y) == 0: + return np.array([]) + + X = np.array(X) + Y = np.array(Y) + if X.shape[1] != Y.shape[1]: + raise ValueError( + f"Number of columns in X and Y must be the same. X has shape {X.shape} " + f"and Y has shape {Y.shape}." + ) + try: + import simsimd as simd # type: ignore + + X = np.array(X, dtype=np.float32) + Y = np.array(Y, dtype=np.float32) + Z = 1 - simd.cdist(X, Y, metric="cosine") + if isinstance(Z, float): + return np.array([Z]) + return Z + except ImportError: + logger.info( + "Unable to import simsimd, defaulting to NumPy implementation. If you want " + "to use simsimd please install with `pip install simsimd`." + ) + X_norm = np.linalg.norm(X, axis=1) + Y_norm = np.linalg.norm(Y, axis=1) + # Ignore divide by zero errors run time warnings as those are handled below. + with np.errstate(divide="ignore", invalid="ignore"): + similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm) + similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0 + return similarity + + +def maximal_marginal_relevance( + query_embedding: np.ndarray, + embedding_list: list, + lambda_mult: float = 0.5, + k: int = 4, +) -> List[int]: + """Calculate maximal marginal relevance.""" + if min(k, len(embedding_list)) <= 0: + return [] + if query_embedding.ndim == 1: + query_embedding = np.expand_dims(query_embedding, axis=0) + similarity_to_query = cosine_similarity(query_embedding, embedding_list)[0] + most_similar = int(np.argmax(similarity_to_query)) + idxs = [most_similar] + selected = np.array([embedding_list[most_similar]]) + while len(idxs) < min(k, len(embedding_list)): + best_score = -np.inf + idx_to_add = -1 + similarity_to_selected = cosine_similarity(embedding_list, selected) + for i, query_score in enumerate(similarity_to_query): + if i in idxs: + continue + redundant_score = max(similarity_to_selected[i]) + equation_score = ( + lambda_mult * query_score - (1 - lambda_mult) * redundant_score + ) + if equation_score > best_score: + best_score = equation_score + idx_to_add = i + idxs.append(idx_to_add) + selected = np.append(selected, [embedding_list[idx_to_add]], axis=0) + return idxs diff --git a/libs/partners/astradb/langchain_astradb/vectorstores/__init__.py b/libs/partners/astradb/langchain_astradb/vectorstores/__init__.py new file mode 100644 index 00000000000000..310732d125ff95 --- /dev/null +++ b/libs/partners/astradb/langchain_astradb/vectorstores/__init__.py @@ -0,0 +1,5 @@ +from langchain_astradb.vectorstores.astradb import AstraDBVectorStore + +__all__ = [ + "AstraDBVectorStore", +] diff --git a/libs/partners/astradb/langchain_astradb/vectorstores/astradb.py b/libs/partners/astradb/langchain_astradb/vectorstores/astradb.py new file mode 100644 index 00000000000000..e501113e3083ae --- /dev/null +++ b/libs/partners/astradb/langchain_astradb/vectorstores/astradb.py @@ -0,0 +1,1317 @@ +from __future__ import annotations + +import asyncio +import uuid +import warnings +from asyncio import Task +from concurrent.futures import ThreadPoolExecutor +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Set, + Tuple, + Type, + TypeVar, + cast, +) + +import numpy as np +from astrapy.db import ( + AstraDB as AstraDBClient, +) +from astrapy.db import ( + AstraDBCollection, + AsyncAstraDBCollection, +) +from astrapy.db import ( + AsyncAstraDB as AsyncAstraDBClient, +) +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.runnables import run_in_executor +from langchain_core.runnables.utils import gather_with_concurrency +from langchain_core.utils.iter import batch_iterate +from langchain_core.vectorstores import VectorStore + +from langchain_astradb.utils.mmr import maximal_marginal_relevance + +T = TypeVar("T") +U = TypeVar("U") +DocDict = Dict[str, Any] # dicts expressing entries to insert + +# Batch/concurrency default values (if parameters not provided): +# Size of batches for bulk insertions: +# (20 is the max batch size for the HTTP API at the time of writing) +DEFAULT_BATCH_SIZE = 20 +# Number of threads to insert batches concurrently: +DEFAULT_BULK_INSERT_BATCH_CONCURRENCY = 16 +# Number of threads in a batch to insert pre-existing entries: +DEFAULT_BULK_INSERT_OVERWRITE_CONCURRENCY = 10 +# Number of threads (for deleting multiple rows concurrently): +DEFAULT_BULK_DELETE_CONCURRENCY = 20 + + +def _unique_list(lst: List[T], key: Callable[[T], U]) -> List[T]: + visited_keys: Set[U] = set() + new_lst = [] + for item in lst: + item_key = key(item) + if item_key not in visited_keys: + visited_keys.add(item_key) + new_lst.append(item) + return new_lst + + +class AstraDBVectorStore(VectorStore): + """Wrapper around DataStax Astra DB for vector-store workloads. + + For quickstart and details, visit: + docs.datastax.com/en/astra/home/astra.html + + Example: + .. code-block:: python + + from langchain_astradb.vectorstores import AstraDBVectorStore + from langchain_openai.embeddings import OpenAIEmbeddings + + embeddings = OpenAIEmbeddings() + vectorstore = AstraDBVectorStore( + embedding=embeddings, + collection_name="my_store", + token="AstraCS:...", + api_endpoint="https://-.apps.astra.datastax.com" + ) + + vectorstore.add_texts(["Giraffes", "All good here"]) + results = vectorstore.similarity_search("Everything's ok", k=1) + + Constructor Args (only keyword-arguments accepted): + embedding (Embeddings): embedding function to use. + collection_name (str): name of the Astra DB collection to create/use. + token (Optional[str]): API token for Astra DB usage. + api_endpoint (Optional[str]): full URL to the API endpoint, + such as "https://-us-east1.apps.astra.datastax.com". + astra_db_client (Optional[astrapy.db.AstraDB]): + *alternative to token+api_endpoint*, + you can pass an already-created 'astrapy.db.AstraDB' instance. + async_astra_db_client (Optional[astrapy.db.AsyncAstraDB]): + same as `astra_db_client`, but the basis for the async API + of the vector store. + namespace (Optional[str]): namespace (aka keyspace) where the + collection is created. Defaults to the database's "default namespace". + metric (Optional[str]): similarity function to use out of those + available in Astra DB. If left out, it will use Astra DB API's + defaults (i.e. "cosine" - but, for performance reasons, + "dot_product" is suggested if embeddings are normalized to one). + + Advanced arguments (coming with sensible defaults): + batch_size (Optional[int]): Size of batches for bulk insertions. + bulk_insert_batch_concurrency (Optional[int]): Number of threads + to insert batches concurrently. + bulk_insert_overwrite_concurrency (Optional[int]): Number of + threads in a batch to insert pre-existing entries. + bulk_delete_concurrency (Optional[int]): Number of threads + (for deleting multiple rows concurrently). + pre_delete_collection (Optional[bool]): whether to delete the collection + before creating it. If False and the collection already exists, + the collection will be used as is. + + A note on concurrency: as a rule of thumb, on a typical client machine + it is suggested to keep the quantity + bulk_insert_batch_concurrency * bulk_insert_overwrite_concurrency + much below 1000 to avoid exhausting the client multithreading/networking + resources. The hardcoded defaults are somewhat conservative to meet + most machines' specs, but a sensible choice to test may be: + bulk_insert_batch_concurrency = 80 + bulk_insert_overwrite_concurrency = 10 + A bit of experimentation is required to nail the best results here, + depending on both the machine/network specs and the expected workload + (specifically, how often a write is an update of an existing id). + Remember you can pass concurrency settings to individual calls to + add_texts and add_documents as well. + + A note on passing astra_db_client and/or async_astra_db_client instead + of the credentials (token, api_endpoint): + - if you pass only the async client when creating the store, + the sync methods will error when called. + - conversely, if you pass only the sync client, the async methods will + still be available, but will be wrapping its sync counterpart + in a `run_in_executor` construct instead of using the native async. + """ + + @staticmethod + def _filter_to_metadata(filter_dict: Optional[Dict[str, Any]]) -> Dict[str, Any]: + if filter_dict is None: + return {} + else: + metadata_filter = {} + for k, v in filter_dict.items(): + if k and k[0] == "$": + if isinstance(v, list): + metadata_filter[k] = [ + AstraDBVectorStore._filter_to_metadata(f) for f in v + ] + else: + # assume each list item can be fed back to this function + metadata_filter[k] = AstraDBVectorStore._filter_to_metadata(v) # type: ignore[assignment] + else: + metadata_filter[f"metadata.{k}"] = v + + return metadata_filter + + def __init__( + self, + *, + embedding: Embeddings, + collection_name: str, + token: Optional[str] = None, + api_endpoint: Optional[str] = None, + astra_db_client: Optional[AstraDBClient] = None, + async_astra_db_client: Optional[AsyncAstraDBClient] = None, + namespace: Optional[str] = None, + metric: Optional[str] = None, + batch_size: Optional[int] = None, + bulk_insert_batch_concurrency: Optional[int] = None, + bulk_insert_overwrite_concurrency: Optional[int] = None, + bulk_delete_concurrency: Optional[int] = None, + pre_delete_collection: bool = False, + ) -> None: + """ + Create an AstraDBVectorStore vector store object. See class docstring for help. + """ + + # Conflicting-arg checks: + if astra_db_client is not None or async_astra_db_client is not None: + if token is not None or api_endpoint is not None: + raise ValueError( + "You cannot pass 'astra_db_client' or 'async_astra_db_client' to " + "AstraDBVectorStore if passing 'token' and 'api_endpoint'." + ) + + self.embedding = embedding + self.collection_name = collection_name + self.token = token + self.api_endpoint = api_endpoint + self.namespace = namespace + # Concurrency settings + self.batch_size: int = batch_size or DEFAULT_BATCH_SIZE + self.bulk_insert_batch_concurrency: int = ( + bulk_insert_batch_concurrency or DEFAULT_BULK_INSERT_BATCH_CONCURRENCY + ) + self.bulk_insert_overwrite_concurrency: int = ( + bulk_insert_overwrite_concurrency + or DEFAULT_BULK_INSERT_OVERWRITE_CONCURRENCY + ) + self.bulk_delete_concurrency: int = ( + bulk_delete_concurrency or DEFAULT_BULK_DELETE_CONCURRENCY + ) + # "vector-related" settings + self._embedding_dimension: Optional[int] = None + self.metric = metric + + self.astra_db = astra_db_client + self.async_astra_db = async_astra_db_client + self.collection = None + self.async_collection = None + + if token and api_endpoint: + self.astra_db = AstraDBClient( + token=cast(str, self.token), + api_endpoint=cast(str, self.api_endpoint), + namespace=self.namespace, + ) + self.async_astra_db = AsyncAstraDBClient( + token=cast(str, self.token), + api_endpoint=cast(str, self.api_endpoint), + namespace=self.namespace, + ) + + if self.astra_db is not None: + self.collection = AstraDBCollection( + collection_name=self.collection_name, + astra_db=self.astra_db, + ) + + self.async_setup_db_task: Optional[Task] = None + if self.async_astra_db is not None: + self.async_collection = AsyncAstraDBCollection( + collection_name=self.collection_name, + astra_db=self.async_astra_db, + ) + try: + asyncio.get_running_loop() + self.async_setup_db_task = asyncio.create_task( + self._setup_db(pre_delete_collection) + ) + except RuntimeError: + pass + + if self.async_setup_db_task is None: + if not pre_delete_collection: + self._provision_collection() + else: + self.clear() + + def _ensure_astra_db_client(self) -> None: + """ + If no error is raised, that means self.collection + is also not None (as per constructor flow). + """ + if not self.astra_db: + raise ValueError("Missing AstraDB client") + + async def _setup_db(self, pre_delete_collection: bool) -> None: + if pre_delete_collection: + # _setup_db is called from the constructor only, from a place + # where async_astra_db is not None for sure + await self.async_astra_db.delete_collection( # type: ignore[union-attr] + collection_name=self.collection_name, + ) + await self._aprovision_collection() + + async def _ensure_db_setup(self) -> None: + if self.async_setup_db_task: + await self.async_setup_db_task + + def _get_embedding_dimension(self) -> int: + if self._embedding_dimension is None: + self._embedding_dimension = len( + self.embedding.embed_query("This is a sample sentence.") + ) + return self._embedding_dimension + + def _provision_collection(self) -> None: + """ + Run the API invocation to create the collection on the backend. + + Internal-usage method, no object members are set, + other than working on the underlying actual storage. + """ + self._ensure_astra_db_client() + # self.astra_db is not None (by _ensure_astra_db_client) + self.astra_db.create_collection( # type: ignore[union-attr] + dimension=self._get_embedding_dimension(), + collection_name=self.collection_name, + metric=self.metric, + ) + + async def _aprovision_collection(self) -> None: + """ + Run the API invocation to create the collection on the backend. + + Internal-usage method, no object members are set, + other than working on the underlying actual storage. + """ + if not self.async_astra_db: + await run_in_executor(None, self._provision_collection) + else: + await self.async_astra_db.create_collection( + dimension=self._get_embedding_dimension(), + collection_name=self.collection_name, + metric=self.metric, + ) + + @property + def embeddings(self) -> Embeddings: + return self.embedding + + @staticmethod + def _dont_flip_the_cos_score(similarity0to1: float) -> float: + """Keep similarity from client unchanged ad it's in [0:1] already.""" + return similarity0to1 + + def _select_relevance_score_fn(self) -> Callable[[float], float]: + """ + The underlying API calls already returns a "score proper", + i.e. one in [0, 1] where higher means more *similar*, + so here the final score transformation is not reversing the interval: + """ + return self._dont_flip_the_cos_score + + def clear(self) -> None: + """Empty the collection of all its stored entries.""" + self._ensure_astra_db_client() + # self.collection is not None (by _ensure_astra_db_client) + self.collection.delete_many(filter={}) # type: ignore[union-attr] + + async def aclear(self) -> None: + """Empty the collection of all its stored entries.""" + await self._ensure_db_setup() + if not self.async_astra_db: + return await run_in_executor(None, self.clear) + else: + # async_collection not None if so is async_astra_db (constr. flow) + await self.async_collection.delete_many({}) # type: ignore[union-attr] + + def delete_by_document_id(self, document_id: str) -> bool: + """ + Remove a single document from the store, given its document_id (str). + Return True if a document has indeed been deleted, False if ID not found. + """ + self._ensure_astra_db_client() + # self.collection is not None (by _ensure_astra_db_client) + deletion_response = self.collection.delete_one(document_id) # type: ignore[union-attr] + return ((deletion_response or {}).get("status") or {}).get( + "deletedCount", 0 + ) == 1 + + async def adelete_by_document_id(self, document_id: str) -> bool: + """ + Remove a single document from the store, given its document_id (str). + Return True if a document has indeed been deleted, False if ID not found. + """ + await self._ensure_db_setup() + if not self.async_collection: + return await run_in_executor(None, self.delete_by_document_id, document_id) + deletion_response = await self.async_collection.delete_one(document_id) + return ((deletion_response or {}).get("status") or {}).get( + "deletedCount", 0 + ) == 1 + + def delete( + self, + ids: Optional[List[str]] = None, + concurrency: Optional[int] = None, + **kwargs: Any, + ) -> Optional[bool]: + """Delete by vector ids. + + Args: + ids (Optional[List[str]]): List of ids to delete. + concurrency (Optional[int]): max number of threads issuing + single-doc delete requests. Defaults to instance-level setting. + + Returns: + Optional[bool]: True if deletion is successful, + False otherwise, None if not implemented. + """ + + if kwargs: + warnings.warn( + "Method 'delete' of AstraDBVectorStore vector store invoked with " + f"unsupported arguments ({', '.join(sorted(kwargs.keys()))}), " + "which will be ignored." + ) + + if ids is None: + raise ValueError("No ids provided to delete.") + + _max_workers = concurrency or self.bulk_delete_concurrency + with ThreadPoolExecutor(max_workers=_max_workers) as tpe: + _ = list( + tpe.map( + self.delete_by_document_id, + ids, + ) + ) + return True + + async def adelete( + self, + ids: Optional[List[str]] = None, + concurrency: Optional[int] = None, + **kwargs: Any, + ) -> Optional[bool]: + """Delete by vector ID or other criteria. + + Args: + ids: List of ids to delete. + concurrency (Optional[int]): max number of concurrent delete queries. + Defaults to instance-level setting. + **kwargs: Other keyword arguments that subclasses might use. + + Returns: + Optional[bool]: True if deletion is successful, + False otherwise, None if not implemented. + """ + if kwargs: + warnings.warn( + "Method 'adelete' of AstraDBVectorStore invoked with " + f"unsupported arguments ({', '.join(sorted(kwargs.keys()))}), " + "which will be ignored." + ) + + if ids is None: + raise ValueError("No ids provided to delete.") + + return all( + await gather_with_concurrency( + concurrency, *[self.adelete_by_document_id(doc_id) for doc_id in ids] + ) + ) + + def delete_collection(self) -> None: + """ + Completely delete the collection from the database (as opposed + to 'clear()', which empties it only). + Stored data is lost and unrecoverable, resources are freed. + Use with caution. + """ + self._ensure_astra_db_client() + # self.astra_db is not None (by _ensure_astra_db_client) + self.astra_db.delete_collection( # type: ignore[union-attr] + collection_name=self.collection_name, + ) + + async def adelete_collection(self) -> None: + """ + Completely delete the collection from the database (as opposed + to 'clear()', which empties it only). + Stored data is lost and unrecoverable, resources are freed. + Use with caution. + """ + await self._ensure_db_setup() + if not self.async_astra_db: + return await run_in_executor(None, self.delete_collection) + else: + await self.async_astra_db.delete_collection( + collection_name=self.collection_name, + ) + + @staticmethod + def _get_documents_to_insert( + texts: Iterable[str], + embedding_vectors: List[List[float]], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + ) -> List[DocDict]: + if ids is None: + ids = [uuid.uuid4().hex for _ in texts] + if metadatas is None: + metadatas = [{} for _ in texts] + # + documents_to_insert = [ + { + "content": b_txt, + "_id": b_id, + "$vector": b_emb, + "metadata": b_md, + } + for b_txt, b_emb, b_id, b_md in zip( + texts, + embedding_vectors, + ids, + metadatas, + ) + ] + # make unique by id, keeping the last + uniqued_documents_to_insert = _unique_list( + documents_to_insert[::-1], + lambda document: document["_id"], + )[::-1] + return uniqued_documents_to_insert + + @staticmethod + def _get_missing_from_batch( + document_batch: List[DocDict], insert_result: Dict[str, Any] + ) -> Tuple[List[str], List[DocDict]]: + if "status" not in insert_result: + raise ValueError( + f"API Exception while running bulk insertion: {str(insert_result)}" + ) + batch_inserted = insert_result["status"]["insertedIds"] + # estimation of the preexisting documents that failed + missed_inserted_ids = {document["_id"] for document in document_batch} - set( + batch_inserted + ) + errors = insert_result.get("errors", []) + # careful for other sources of error other than "doc already exists" + num_errors = len(errors) + unexpected_errors = any( + error.get("errorCode") != "DOCUMENT_ALREADY_EXISTS" for error in errors + ) + if num_errors != len(missed_inserted_ids) or unexpected_errors: + raise ValueError( + f"API Exception while running bulk insertion: {str(errors)}" + ) + # deal with the missing insertions as upserts + missing_from_batch = [ + document + for document in document_batch + if document["_id"] in missed_inserted_ids + ] + return batch_inserted, missing_from_batch + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + *, + batch_size: Optional[int] = None, + batch_concurrency: Optional[int] = None, + overwrite_concurrency: Optional[int] = None, + **kwargs: Any, + ) -> List[str]: + """Run texts through the embeddings and add them to the vectorstore. + + If passing explicit ids, those entries whose id is in the store already + will be replaced. + + Args: + texts (Iterable[str]): Texts to add to the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadatas. + ids (Optional[List[str]], optional): Optional list of ids. + batch_size (Optional[int]): Number of documents in each API call. + Check the underlying Astra DB HTTP API specs for the max value + (20 at the time of writing this). If not provided, defaults + to the instance-level setting. + batch_concurrency (Optional[int]): number of threads to process + insertion batches concurrently. Defaults to instance-level + setting if not provided. + overwrite_concurrency (Optional[int]): number of threads to process + pre-existing documents in each batch (which require individual + API calls). Defaults to instance-level setting if not provided. + + A note on metadata: there are constraints on the allowed field names + in this dictionary, coming from the underlying Astra DB API. + For instance, the `$` (dollar sign) cannot be used in the dict keys. + See this document for details: + docs.datastax.com/en/astra-serverless/docs/develop/dev-with-json.html + + Returns: + List[str]: List of ids of the added texts. + """ + + if kwargs: + warnings.warn( + "Method 'add_texts' of AstraDBVectorStore vector store invoked with " + f"unsupported arguments ({', '.join(sorted(kwargs.keys()))}), " + "which will be ignored." + ) + self._ensure_astra_db_client() + + embedding_vectors = self.embedding.embed_documents(list(texts)) + documents_to_insert = self._get_documents_to_insert( + texts, embedding_vectors, metadatas, ids + ) + + def _handle_batch(document_batch: List[DocDict]) -> List[str]: + # self.collection is not None (by _ensure_astra_db_client) + im_result = self.collection.insert_many( # type: ignore[union-attr] + documents=document_batch, + options={"ordered": False}, + partial_failures_allowed=True, + ) + batch_inserted, missing_from_batch = self._get_missing_from_batch( + document_batch, im_result + ) + + def _handle_missing_document(missing_document: DocDict) -> str: + # self.collection is not None (by _ensure_astra_db_client) + replacement_result = self.collection.find_one_and_replace( # type: ignore[union-attr] + filter={"_id": missing_document["_id"]}, + replacement=missing_document, + ) + return replacement_result["data"]["document"]["_id"] + + _u_max_workers = ( + overwrite_concurrency or self.bulk_insert_overwrite_concurrency + ) + with ThreadPoolExecutor(max_workers=_u_max_workers) as tpe2: + batch_replaced = list( + tpe2.map( + _handle_missing_document, + missing_from_batch, + ) + ) + return batch_inserted + batch_replaced + + _b_max_workers = batch_concurrency or self.bulk_insert_batch_concurrency + with ThreadPoolExecutor(max_workers=_b_max_workers) as tpe: + all_ids_nested = tpe.map( + _handle_batch, + batch_iterate( + batch_size or self.batch_size, + documents_to_insert, + ), + ) + return [iid for id_list in all_ids_nested for iid in id_list] + + async def aadd_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + *, + batch_size: Optional[int] = None, + batch_concurrency: Optional[int] = None, + overwrite_concurrency: Optional[int] = None, + **kwargs: Any, + ) -> List[str]: + """Run texts through the embeddings and add them to the vectorstore. + + If passing explicit ids, those entries whose id is in the store already + will be replaced. + + Args: + texts (Iterable[str]): Texts to add to the vectorstore. + metadatas (Optional[List[dict]], optional): Optional list of metadatas. + ids (Optional[List[str]], optional): Optional list of ids. + batch_size (Optional[int]): Number of documents in each API call. + Check the underlying Astra DB HTTP API specs for the max value + (20 at the time of writing this). If not provided, defaults + to the instance-level setting. + batch_concurrency (Optional[int]): number of concurrent batch insertions. + Defaults to instance-level setting if not provided. + overwrite_concurrency (Optional[int]): number of concurrent API calls to + process pre-existing documents in each batch. + Defaults to instance-level setting if not provided. + + A note on metadata: there are constraints on the allowed field names + in this dictionary, coming from the underlying Astra DB API. + For instance, the `$` (dollar sign) cannot be used in the dict keys. + See this document for details: + docs.datastax.com/en/astra-serverless/docs/develop/dev-with-json.html + + Returns: + List[str]: List of ids of the added texts. + """ + await self._ensure_db_setup() + if not self.async_collection: + return await super().aadd_texts( + texts, + metadatas, + ids=ids, + batch_size=batch_size, + batch_concurrency=batch_concurrency, + overwrite_concurrency=overwrite_concurrency, + ) + else: + if kwargs: + warnings.warn( + "Method 'aadd_texts' of AstraDBVectorStore invoked with " + f"unsupported arguments ({', '.join(sorted(kwargs.keys()))}), " + "which will be ignored." + ) + + embedding_vectors = await self.embedding.aembed_documents(list(texts)) + documents_to_insert = self._get_documents_to_insert( + texts, embedding_vectors, metadatas, ids + ) + + async def _handle_batch(document_batch: List[DocDict]) -> List[str]: + # self.async_collection is not None here for sure + im_result = await self.async_collection.insert_many( # type: ignore[union-attr] + documents=document_batch, + options={"ordered": False}, + partial_failures_allowed=True, + ) + batch_inserted, missing_from_batch = self._get_missing_from_batch( + document_batch, im_result + ) + + async def _handle_missing_document(missing_document: DocDict) -> str: + # self.async_collection is not None here for sure + replacement_result = ( + await self.async_collection.find_one_and_replace( # type: ignore[union-attr] + filter={"_id": missing_document["_id"]}, + replacement=missing_document, + ) + ) + return replacement_result["data"]["document"]["_id"] + + _u_max_workers = ( + overwrite_concurrency or self.bulk_insert_overwrite_concurrency + ) + batch_replaced = await gather_with_concurrency( + _u_max_workers, + *[_handle_missing_document(doc) for doc in missing_from_batch], + ) + return batch_inserted + batch_replaced + + _b_max_workers = batch_concurrency or self.bulk_insert_batch_concurrency + all_ids_nested = await gather_with_concurrency( + _b_max_workers, + *[ + _handle_batch(batch) + for batch in batch_iterate( + batch_size or self.batch_size, + documents_to_insert, + ) + ], + ) + + return [iid for id_list in all_ids_nested for iid in id_list] + + def similarity_search_with_score_id_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float, str]]: + """Return docs most similar to embedding vector. + + Args: + embedding (str): Embedding to look up documents similar to. + k (int): Number of Documents to return. Defaults to 4. + Returns: + List of (Document, score, id), the most similar to the query vector. + """ + self._ensure_astra_db_client() + metadata_parameter = self._filter_to_metadata(filter) + # + hits = list( + # self.collection is not None (by _ensure_astra_db_client) + self.collection.paginated_find( # type: ignore[union-attr] + filter=metadata_parameter, + sort={"$vector": embedding}, + options={"limit": k, "includeSimilarity": True}, + projection={ + "_id": 1, + "content": 1, + "metadata": 1, + }, + ) + ) + # + return [ + ( + Document( + page_content=hit["content"], + metadata=hit["metadata"], + ), + hit["$similarity"], + hit["_id"], + ) + for hit in hits + ] + + async def asimilarity_search_with_score_id_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float, str]]: + """Return docs most similar to embedding vector. + + Args: + embedding (str): Embedding to look up documents similar to. + k (int): Number of Documents to return. Defaults to 4. + Returns: + List of (Document, score, id), the most similar to the query vector. + """ + await self._ensure_db_setup() + if not self.async_collection: + return await run_in_executor( + None, + self.similarity_search_with_score_id_by_vector, + embedding, + k, + filter, + ) + metadata_parameter = self._filter_to_metadata(filter) + # + return [ + ( + Document( + page_content=hit["content"], + metadata=hit["metadata"], + ), + hit["$similarity"], + hit["_id"], + ) + async for hit in self.async_collection.paginated_find( + filter=metadata_parameter, + sort={"$vector": embedding}, + options={"limit": k, "includeSimilarity": True}, + projection={ + "_id": 1, + "content": 1, + "metadata": 1, + }, + ) + ] + + def similarity_search_with_score_id( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float, str]]: + embedding_vector = self.embedding.embed_query(query) + return self.similarity_search_with_score_id_by_vector( + embedding=embedding_vector, + k=k, + filter=filter, + ) + + async def asimilarity_search_with_score_id( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float, str]]: + embedding_vector = await self.embedding.aembed_query(query) + return await self.asimilarity_search_with_score_id_by_vector( + embedding=embedding_vector, + k=k, + filter=filter, + ) + + def similarity_search_with_score_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float]]: + """Return docs most similar to embedding vector. + + Args: + embedding (str): Embedding to look up documents similar to. + k (int): Number of Documents to return. Defaults to 4. + Returns: + List of (Document, score), the most similar to the query vector. + """ + return [ + (doc, score) + for (doc, score, doc_id) in self.similarity_search_with_score_id_by_vector( + embedding=embedding, + k=k, + filter=filter, + ) + ] + + async def asimilarity_search_with_score_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float]]: + """Return docs most similar to embedding vector. + + Args: + embedding (str): Embedding to look up documents similar to. + k (int): Number of Documents to return. Defaults to 4. + Returns: + List of (Document, score), the most similar to the query vector. + """ + return [ + (doc, score) + for ( + doc, + score, + doc_id, + ) in await self.asimilarity_search_with_score_id_by_vector( + embedding=embedding, + k=k, + filter=filter, + ) + ] + + def similarity_search( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + embedding_vector = self.embedding.embed_query(query) + return self.similarity_search_by_vector( + embedding_vector, + k, + filter=filter, + ) + + async def asimilarity_search( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + embedding_vector = await self.embedding.aembed_query(query) + return await self.asimilarity_search_by_vector( + embedding_vector, + k, + filter=filter, + ) + + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + return [ + doc + for doc, _ in self.similarity_search_with_score_by_vector( + embedding, + k, + filter=filter, + ) + ] + + async def asimilarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + return [ + doc + for doc, _ in await self.asimilarity_search_with_score_by_vector( + embedding, + k, + filter=filter, + ) + ] + + def similarity_search_with_score( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float]]: + embedding_vector = self.embedding.embed_query(query) + return self.similarity_search_with_score_by_vector( + embedding_vector, + k, + filter=filter, + ) + + async def asimilarity_search_with_score( + self, + query: str, + k: int = 4, + filter: Optional[Dict[str, Any]] = None, + ) -> List[Tuple[Document, float]]: + embedding_vector = await self.embedding.aembed_query(query) + return await self.asimilarity_search_with_score_by_vector( + embedding_vector, + k, + filter=filter, + ) + + @staticmethod + def _get_mmr_hits( + embedding: List[float], k: int, lambda_mult: float, prefetch_hits: List[DocDict] + ) -> List[Document]: + mmr_chosen_indices = maximal_marginal_relevance( + np.array(embedding, dtype=np.float32), + [prefetch_hit["$vector"] for prefetch_hit in prefetch_hits], + k=k, + lambda_mult=lambda_mult, + ) + mmr_hits = [ + prefetch_hit + for prefetch_index, prefetch_hit in enumerate(prefetch_hits) + if prefetch_index in mmr_chosen_indices + ] + return [ + Document( + page_content=hit["content"], + metadata=hit["metadata"], + ) + for hit in mmr_hits + ] + + def max_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + lambda_mult: Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Returns: + List of Documents selected by maximal marginal relevance. + """ + self._ensure_astra_db_client() + metadata_parameter = self._filter_to_metadata(filter) + + prefetch_hits = list( + # self.collection is not None (by _ensure_astra_db_client) + self.collection.paginated_find( # type: ignore[union-attr] + filter=metadata_parameter, + sort={"$vector": embedding}, + options={"limit": fetch_k, "includeSimilarity": True}, + projection={ + "_id": 1, + "content": 1, + "metadata": 1, + "$vector": 1, + }, + ) + ) + + return self._get_mmr_hits(embedding, k, lambda_mult, prefetch_hits) + + async def amax_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + lambda_mult: Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Returns: + List of Documents selected by maximal marginal relevance. + """ + await self._ensure_db_setup() + if not self.async_collection: + return await run_in_executor( + None, + self.max_marginal_relevance_search_by_vector, + embedding, + k, + fetch_k, + lambda_mult, + filter, + **kwargs, + ) + metadata_parameter = self._filter_to_metadata(filter) + + prefetch_hits = [ + hit + async for hit in self.async_collection.paginated_find( + filter=metadata_parameter, + sort={"$vector": embedding}, + options={"limit": fetch_k, "includeSimilarity": True}, + projection={ + "_id": 1, + "content": 1, + "metadata": 1, + "$vector": 1, + }, + ) + ] + + return self._get_mmr_hits(embedding, k, lambda_mult, prefetch_hits) + + def max_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + Args: + query (str): Text to look up documents similar to. + k (int = 4): Number of Documents to return. + fetch_k (int = 20): Number of Documents to fetch to pass to MMR algorithm. + lambda_mult (float = 0.5): Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Optional. + Returns: + List of Documents selected by maximal marginal relevance. + """ + embedding_vector = self.embedding.embed_query(query) + return self.max_marginal_relevance_search_by_vector( + embedding_vector, + k, + fetch_k, + lambda_mult=lambda_mult, + filter=filter, + ) + + async def amax_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + filter: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + Args: + query (str): Text to look up documents similar to. + k (int = 4): Number of Documents to return. + fetch_k (int = 20): Number of Documents to fetch to pass to MMR algorithm. + lambda_mult (float = 0.5): Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Optional. + Returns: + List of Documents selected by maximal marginal relevance. + """ + embedding_vector = await self.embedding.aembed_query(query) + return await self.amax_marginal_relevance_search_by_vector( + embedding_vector, + k, + fetch_k, + lambda_mult=lambda_mult, + filter=filter, + ) + + @classmethod + def _from_kwargs( + cls: Type[AstraDBVectorStore], + embedding: Embeddings, + **kwargs: Any, + ) -> AstraDBVectorStore: + known_kwargs = { + "collection_name", + "token", + "api_endpoint", + "astra_db_client", + "async_astra_db_client", + "namespace", + "metric", + "batch_size", + "bulk_insert_batch_concurrency", + "bulk_insert_overwrite_concurrency", + "bulk_delete_concurrency", + "batch_concurrency", + "overwrite_concurrency", + } + if kwargs: + unknown_kwargs = set(kwargs.keys()) - known_kwargs + if unknown_kwargs: + warnings.warn( + "Method 'from_texts' of AstraDBVectorStore vector store " + "invoked with unsupported arguments " + f"({', '.join(sorted(unknown_kwargs))}), " + "which will be ignored." + ) + + collection_name: str = kwargs["collection_name"] + token = kwargs.get("token") + api_endpoint = kwargs.get("api_endpoint") + astra_db_client = kwargs.get("astra_db_client") + async_astra_db_client = kwargs.get("async_astra_db_client") + namespace = kwargs.get("namespace") + metric = kwargs.get("metric") + + return cls( + embedding=embedding, + collection_name=collection_name, + token=token, + api_endpoint=api_endpoint, + astra_db_client=astra_db_client, + async_astra_db_client=async_astra_db_client, + namespace=namespace, + metric=metric, + batch_size=kwargs.get("batch_size"), + bulk_insert_batch_concurrency=kwargs.get("bulk_insert_batch_concurrency"), + bulk_insert_overwrite_concurrency=kwargs.get( + "bulk_insert_overwrite_concurrency" + ), + bulk_delete_concurrency=kwargs.get("bulk_delete_concurrency"), + ) + + @classmethod + def from_texts( + cls: Type[AstraDBVectorStore], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> AstraDBVectorStore: + """Create an Astra DB vectorstore from raw texts. + + Args: + texts (List[str]): the texts to insert. + embedding (Embeddings): the embedding function to use in the store. + metadatas (Optional[List[dict]]): metadata dicts for the texts. + ids (Optional[List[str]]): ids to associate to the texts. + *Additional arguments*: you can pass any argument that you would + to 'add_texts' and/or to the 'AstraDBVectorStore' constructor + (see these methods for details). These arguments will be + routed to the respective methods as they are. + + Returns: + an `AstraDBVectorStore` vectorstore. + """ + astra_db_store = AstraDBVectorStore._from_kwargs(embedding, **kwargs) + astra_db_store.add_texts( + texts=texts, + metadatas=metadatas, + ids=ids, + batch_size=kwargs.get("batch_size"), + batch_concurrency=kwargs.get("batch_concurrency"), + overwrite_concurrency=kwargs.get("overwrite_concurrency"), + ) + return astra_db_store + + @classmethod + async def afrom_texts( + cls: Type[AstraDBVectorStore], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> AstraDBVectorStore: + """Create an Astra DB vectorstore from raw texts. + + Args: + texts (List[str]): the texts to insert. + embedding (Embeddings): the embedding function to use in the store. + metadatas (Optional[List[dict]]): metadata dicts for the texts. + ids (Optional[List[str]]): ids to associate to the texts. + *Additional arguments*: you can pass any argument that you would + to 'add_texts' and/or to the 'AstraDBVectorStore' constructor + (see these methods for details). These arguments will be + routed to the respective methods as they are. + + Returns: + an `AstraDBVectorStore` vectorstore. + """ + astra_db_store = AstraDBVectorStore._from_kwargs(embedding, **kwargs) + await astra_db_store.aadd_texts( + texts=texts, + metadatas=metadatas, + ids=ids, + batch_size=kwargs.get("batch_size"), + batch_concurrency=kwargs.get("batch_concurrency"), + overwrite_concurrency=kwargs.get("overwrite_concurrency"), + ) + return astra_db_store + + @classmethod + def from_documents( + cls: Type[AstraDBVectorStore], + documents: List[Document], + embedding: Embeddings, + **kwargs: Any, + ) -> AstraDBVectorStore: + """Create an Astra DB vectorstore from a document list. + + Utility method that defers to 'from_texts' (see that one). + + Args: see 'from_texts', except here you have to supply 'documents' + in place of 'texts' and 'metadatas'. + + Returns: + an `AstraDBVectorStore` vectorstore. + """ + return super().from_documents(documents, embedding, **kwargs) diff --git a/libs/partners/astradb/poetry.lock b/libs/partners/astradb/poetry.lock new file mode 100644 index 00000000000000..41c52a6e98ce40 --- /dev/null +++ b/libs/partners/astradb/poetry.lock @@ -0,0 +1,1114 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + +[[package]] +name = "annotated-types" +version = "0.6.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"}, + {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} + +[[package]] +name = "anyio" +version = "4.2.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.8" +files = [ + {file = "anyio-4.2.0-py3-none-any.whl", hash = "sha256:745843b39e829e108e518c489b31dc757de7d2131d53fac32bd8df268227bfee"}, + {file = "anyio-4.2.0.tar.gz", hash = "sha256:e1875bb4b4e2de1669f4bc7869b6d3f54231cdced71605e6e64c9be77e3be50f"}, +] + +[package.dependencies] +exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} +idna = ">=2.8" +sniffio = ">=1.1" +typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} + +[package.extras] +doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (>=0.23)"] + +[[package]] +name = "astrapy" +version = "0.7.5" +description = "AstraPy is a Pythonic SDK for DataStax Astra" +optional = false +python-versions = ">=3.8.0,<4.0.0" +files = [ + {file = "astrapy-0.7.5-py3-none-any.whl", hash = "sha256:51daabbc59ed56f023233296e42372cd2a7468282f978c36ccc33a2e211beddc"}, + {file = "astrapy-0.7.5.tar.gz", hash = "sha256:72a31538c5fd06cbf91a235924bee81007d03b8c0feff1d7cf811e64a2adc7a8"}, +] + +[package.dependencies] +cassio = ">=0.1.4,<0.2.0" +deprecation = ">=2.1.0,<2.2.0" +httpx = {version = ">=0.26.0,<0.27.0", extras = ["http2"]} +toml = ">=0.10.2,<0.11.0" + +[[package]] +name = "cassandra-driver" +version = "3.29.0" +description = "DataStax Driver for Apache Cassandra" +optional = false +python-versions = "*" +files = [ + {file = "cassandra-driver-3.29.0.tar.gz", hash = "sha256:0a34f9534356e5fd33af8cdda109d5e945b6335cb50399b267c46368c4e93c98"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:28d6fe5379d55e4fc96785bd2e2cba029ef171cc43fb38fc507b9ba232917ac2"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:05e267412ccc9fe71ee4a81d98f2250df2429390fac4721f41dd17b65e4c41ac"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84eacfc8e6461590eb1c2b9651ea809be298eb8283c2d844a6dad8058ee7928c"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8feeda01bb13dce1a74b0a94172b3b06b0d9d8f33d6fb56e1910d495b0e085e5"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb0ef3297255bbade7b0c2d168c31d36ec904b1a9b42521d1d3d65c3148bbc7"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-win32.whl", hash = "sha256:39d78971a4e26ef65b77caa09c0e6ccfd7b2c52b0924c328fbfdca91667eb08e"}, + {file = "cassandra_driver-3.29.0-cp310-cp310-win_amd64.whl", hash = "sha256:9dd713fe6388f3ba141cc2eef4737b5e4a27b0d1c1a6b0372b8ff3d2d35ccf79"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:76333d38cb663065d53ca658e15185b23aa0ce434f2876c455624d90d2ee0dbf"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81ce92e0420bf18742b4bc433052c7c2e4aa72fa84898be2b26083e240ace343"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5b90c2f052a102560e4fcf860f6d1ac35d3514ad36b1978cf821998f1e689f38"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fecf584a7f411d247d1925c66d527f7ecc73710b230b68cdacf2044fb57ae4b"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a678bc7107cc606ac8ff8cb58fe6abb0bb2a9ff5196016b3bd36926146c4dc62"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-win32.whl", hash = "sha256:e9badede26005fd993e2344e8a541a4133bc46a76a90969d57a90a028b2b8ca6"}, + {file = "cassandra_driver-3.29.0-cp311-cp311-win_amd64.whl", hash = "sha256:cac6d2e6ad1a386f1b786de78102f918bcd5caac390c3e446558e5adee9464c6"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:01a8b4cdb056c209c5c4aa22e0d7f427b87cb98297a6efff29ea278da9a52698"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:73aa7b32dfad1f58fb00167052ab80b1b186b69baac7de4ad5cca785fff569be"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7f7c446edba002b0fdd94f2b92c4752e16738ea7dce27d754103fcd086b4dcc9"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6843569360fb4a446d65f6733faed1207c252745a31a1d8dc02feff8f7f86a23"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1762d228bdd3f1bc5faa0812e1fcac75a36ab7504f3cfb7e9b5d2bf26a50c552"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-win32.whl", hash = "sha256:dd245817e0df048b780f45ac78b1840fe12deb5aea8873df4a11e0c44a68c19a"}, + {file = "cassandra_driver-3.29.0-cp312-cp312-win_amd64.whl", hash = "sha256:002134a4152034ed66d9f9616ea391f44dfdf7c9f97d22bd4d4f64d70020b91b"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d9b652db99f69ee62bbd679a29cfbab398ebf2bfc195632d57ecb3f246baf48b"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6ac82ae8240b4f4f1a3d1e6f21a4ecd9948afdfedef6f23235fac85d20d11076"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1590d231503594546dfdbf6119d805e1a0b22de98a1a6ec0de79a1bacc59ecb5"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcf9dee3b120062a8224278da56ab088c2c081a79dc9e017f065dccd421b6477"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb9a123ad86152d2a1ca31f4a3d91d72cbd3ed7a88a4c3cd5f6f72173a1bfbd8"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-win32.whl", hash = "sha256:cc6794ca9c94e157570e2b7b5a04458259ee29c5a0d0de50a9e0c8e2da8f5455"}, + {file = "cassandra_driver-3.29.0-cp38-cp38-win_amd64.whl", hash = "sha256:096eef84ab466b090a69a4e9d85e65d57e926ff7d7897443e7b637d40277f373"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:befb723d62ee650cb3afd9668245ee9ce6ba5394dbd58352866ff2baa0324101"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4108fb2a64a8fd77948003ff0ca4d296364d9ff7381f4abe7a9db202e6378446"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5cd4701cc083e047553888dbd99d2d5119b5b3da54b9e8034a80b8c8d516142c"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7b94c5273bf3c2f252aed8624303c46d9d4e6dc7663f53ed9c9335e5d0dcb88"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3609f2eda8ee2a6a9b2c9c84c009bf54a7695b9dfc21700b88dd0a2140c82c95"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-win32.whl", hash = "sha256:aaeff4c3af3100510e329177c46da89aab6d444070f4fa370df5328b8ad488b4"}, + {file = "cassandra_driver-3.29.0-cp39-cp39-win_amd64.whl", hash = "sha256:88d9a6abd0e0af199636ff9386d0b9b81b1dd189e22c8498ecaa546256bacf24"}, +] + +[package.dependencies] +geomet = ">=0.1,<0.3" + +[package.extras] +cle = ["cryptography (>=35.0)"] +graph = ["gremlinpython (==3.4.6)"] + +[[package]] +name = "cassio" +version = "0.1.4" +description = "A framework-agnostic Python library to seamlessly integrate Apache Cassandra(R) with ML/LLM/genAI workloads." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cassio-0.1.4-py3-none-any.whl", hash = "sha256:ab997879c36807ff5b9771ff35941f104c0f0e60e1595118279869b5b95c146f"}, + {file = "cassio-0.1.4.tar.gz", hash = "sha256:df495c459ee5e9194e4780ac3ea1aaf79a4443e6d06f0eeb67aac6e3cd8c47aa"}, +] + +[package.dependencies] +cassandra-driver = ">=3.28.0" +numpy = ">=1.0" +requests = ">=2" + +[[package]] +name = "certifi" +version = "2024.2.2" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "codespell" +version = "2.2.6" +description = "Codespell" +optional = false +python-versions = ">=3.8" +files = [ + {file = "codespell-2.2.6-py3-none-any.whl", hash = "sha256:9ee9a3e5df0990604013ac2a9f22fa8e57669c827124a2e961fe8a1da4cacc07"}, + {file = "codespell-2.2.6.tar.gz", hash = "sha256:a8c65d8eb3faa03deabab6b3bbe798bea72e1799c7e9e955d57eca4096abcff9"}, +] + +[package.extras] +dev = ["Pygments", "build", "chardet", "pre-commit", "pytest", "pytest-cov", "pytest-dependency", "ruff", "tomli", "twine"] +hard-encoding-detection = ["chardet"] +toml = ["tomli"] +types = ["chardet (>=5.1.0)", "mypy", "pytest", "pytest-cov", "pytest-dependency"] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "deprecation" +version = "2.1.0" +description = "A library to handle automated deprecations" +optional = false +python-versions = "*" +files = [ + {file = "deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a"}, + {file = "deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff"}, +] + +[package.dependencies] +packaging = "*" + +[[package]] +name = "exceptiongroup" +version = "1.2.0" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, + {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "freezegun" +version = "1.4.0" +description = "Let your Python tests travel through time" +optional = false +python-versions = ">=3.7" +files = [ + {file = "freezegun-1.4.0-py3-none-any.whl", hash = "sha256:55e0fc3c84ebf0a96a5aa23ff8b53d70246479e9a68863f1fcac5a3e52f19dd6"}, + {file = "freezegun-1.4.0.tar.gz", hash = "sha256:10939b0ba0ff5adaecf3b06a5c2f73071d9678e507c5eaedb23c761d56ac774b"}, +] + +[package.dependencies] +python-dateutil = ">=2.7" + +[[package]] +name = "geomet" +version = "0.2.1.post1" +description = "GeoJSON <-> WKT/WKB conversion utilities" +optional = false +python-versions = ">2.6, !=3.3.*, <4" +files = [ + {file = "geomet-0.2.1.post1-py3-none-any.whl", hash = "sha256:a41a1e336b381416d6cbed7f1745c848e91defaa4d4c1bdc1312732e46ffad2b"}, + {file = "geomet-0.2.1.post1.tar.gz", hash = "sha256:91d754f7c298cbfcabd3befdb69c641c27fe75e808b27aa55028605761d17e95"}, +] + +[package.dependencies] +click = "*" +six = "*" + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" + +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + +[[package]] +name = "httpcore" +version = "1.0.3" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpcore-1.0.3-py3-none-any.whl", hash = "sha256:9a6a501c3099307d9fd76ac244e08503427679b1e81ceb1d922485e2f2462ad2"}, + {file = "httpcore-1.0.3.tar.gz", hash = "sha256:5c0f9546ad17dac4d0772b0808856eb616eb8b48ce94f49ed819fd6982a8a544"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.13,<0.15" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<0.24.0)"] + +[[package]] +name = "httpx" +version = "0.26.0" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpx-0.26.0-py3-none-any.whl", hash = "sha256:8915f5a3627c4d47b73e8202457cb28f1266982d1159bd5779d86a80c0eab1cd"}, + {file = "httpx-0.26.0.tar.gz", hash = "sha256:451b55c30d5185ea6b23c2c793abf9bb237d2a7dfb901ced6ff69ad37ec1dfaf"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""} +httpcore = "==1.*" +idna = "*" +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] + +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + +[[package]] +name = "idna" +version = "3.6" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, + {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + +[[package]] +name = "jsonpointer" +version = "2.4" +description = "Identify specific nodes in a JSON document (RFC 6901)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, +] + +[[package]] +name = "langchain-core" +version = "0.1.23" +description = "Building applications with LLMs through composability" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [] +develop = true + +[package.dependencies] +anyio = ">=3,<5" +jsonpatch = "^1.33" +langsmith = "^0.1.0" +packaging = "^23.2" +pydantic = ">=1,<3" +PyYAML = ">=5.3" +requests = "^2" +tenacity = "^8.1.0" + +[package.extras] +extended-testing = ["jinja2 (>=3,<4)"] + +[package.source] +type = "directory" +url = "../../core" + +[[package]] +name = "langsmith" +version = "0.1.1" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "langsmith-0.1.1-py3-none-any.whl", hash = "sha256:10ff2b977a41e3f6351d1a4239d9bd57af0547aa909e839d2791e16cc197a6f9"}, + {file = "langsmith-0.1.1.tar.gz", hash = "sha256:09df0c2ca9085105f97a4e4f281b083e312c99d162f3fe2b2d5eefd5c3692e60"}, +] + +[package.dependencies] +pydantic = ">=1,<3" +requests = ">=2,<3" + +[[package]] +name = "mypy" +version = "0.991" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mypy-0.991-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7d17e0a9707d0772f4a7b878f04b4fd11f6f5bcb9b3813975a9b13c9332153ab"}, + {file = "mypy-0.991-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0714258640194d75677e86c786e80ccf294972cc76885d3ebbb560f11db0003d"}, + {file = "mypy-0.991-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c8f3be99e8a8bd403caa8c03be619544bc2c77a7093685dcf308c6b109426c6"}, + {file = "mypy-0.991-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9ec663ed6c8f15f4ae9d3c04c989b744436c16d26580eaa760ae9dd5d662eb"}, + {file = "mypy-0.991-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4307270436fd7694b41f913eb09210faff27ea4979ecbcd849e57d2da2f65305"}, + {file = "mypy-0.991-cp310-cp310-win_amd64.whl", hash = "sha256:901c2c269c616e6cb0998b33d4adbb4a6af0ac4ce5cd078afd7bc95830e62c1c"}, + {file = "mypy-0.991-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d13674f3fb73805ba0c45eb6c0c3053d218aa1f7abead6e446d474529aafc372"}, + {file = "mypy-0.991-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1c8cd4fb70e8584ca1ed5805cbc7c017a3d1a29fb450621089ffed3e99d1857f"}, + {file = "mypy-0.991-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:209ee89fbb0deed518605edddd234af80506aec932ad28d73c08f1400ef80a33"}, + {file = "mypy-0.991-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37bd02ebf9d10e05b00d71302d2c2e6ca333e6c2a8584a98c00e038db8121f05"}, + {file = "mypy-0.991-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:26efb2fcc6b67e4d5a55561f39176821d2adf88f2745ddc72751b7890f3194ad"}, + {file = "mypy-0.991-cp311-cp311-win_amd64.whl", hash = "sha256:3a700330b567114b673cf8ee7388e949f843b356a73b5ab22dd7cff4742a5297"}, + {file = "mypy-0.991-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f7d1a520373e2272b10796c3ff721ea1a0712288cafaa95931e66aa15798813"}, + {file = "mypy-0.991-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:641411733b127c3e0dab94c45af15fea99e4468f99ac88b39efb1ad677da5711"}, + {file = "mypy-0.991-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3d80e36b7d7a9259b740be6d8d906221789b0d836201af4234093cae89ced0cd"}, + {file = "mypy-0.991-cp37-cp37m-win_amd64.whl", hash = "sha256:e62ebaad93be3ad1a828a11e90f0e76f15449371ffeecca4a0a0b9adc99abcef"}, + {file = "mypy-0.991-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b86ce2c1866a748c0f6faca5232059f881cda6dda2a893b9a8373353cfe3715a"}, + {file = "mypy-0.991-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac6e503823143464538efda0e8e356d871557ef60ccd38f8824a4257acc18d93"}, + {file = "mypy-0.991-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cca5adf694af539aeaa6ac633a7afe9bbd760df9d31be55ab780b77ab5ae8bf"}, + {file = "mypy-0.991-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12c56bf73cdab116df96e4ff39610b92a348cc99a1307e1da3c3768bbb5b135"}, + {file = "mypy-0.991-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:652b651d42f155033a1967739788c436491b577b6a44e4c39fb340d0ee7f0d70"}, + {file = "mypy-0.991-cp38-cp38-win_amd64.whl", hash = "sha256:4175593dc25d9da12f7de8de873a33f9b2b8bdb4e827a7cae952e5b1a342e243"}, + {file = "mypy-0.991-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98e781cd35c0acf33eb0295e8b9c55cdbef64fcb35f6d3aa2186f289bed6e80d"}, + {file = "mypy-0.991-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6d7464bac72a85cb3491c7e92b5b62f3dcccb8af26826257760a552a5e244aa5"}, + {file = "mypy-0.991-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c9166b3f81a10cdf9b49f2d594b21b31adadb3d5e9db9b834866c3258b695be3"}, + {file = "mypy-0.991-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8472f736a5bfb159a5e36740847808f6f5b659960115ff29c7cecec1741c648"}, + {file = "mypy-0.991-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e80e758243b97b618cdf22004beb09e8a2de1af481382e4d84bc52152d1c476"}, + {file = "mypy-0.991-cp39-cp39-win_amd64.whl", hash = "sha256:74e259b5c19f70d35fcc1ad3d56499065c601dfe94ff67ae48b85596b9ec1461"}, + {file = "mypy-0.991-py3-none-any.whl", hash = "sha256:de32edc9b0a7e67c2775e574cb061a537660e51210fbf6006b0b36ea695ae9bb"}, + {file = "mypy-0.991.tar.gz", hash = "sha256:3c0165ba8f354a6d9881809ef29f1a9318a236a6d81c690094c5df32107bde06"}, +] + +[package.dependencies] +mypy-extensions = ">=0.4.3" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=3.10" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +python2 = ["typed-ast (>=1.4.0,<2)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "numpy" +version = "1.24.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, + {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, + {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, + {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, + {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, + {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, + {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, + {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, + {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, + {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, + {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, +] + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "pluggy" +version = "1.4.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, + {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pydantic" +version = "2.6.1" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic-2.6.1-py3-none-any.whl", hash = "sha256:0b6a909df3192245cb736509a92ff69e4fef76116feffec68e93a567347bae6f"}, + {file = "pydantic-2.6.1.tar.gz", hash = "sha256:4fd5c182a2488dc63e6d32737ff19937888001e2a6d86e94b3f233104a5d1fa9"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.16.2" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.16.2" +description = "" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic_core-2.16.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3fab4e75b8c525a4776e7630b9ee48aea50107fea6ca9f593c98da3f4d11bf7c"}, + {file = "pydantic_core-2.16.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8bde5b48c65b8e807409e6f20baee5d2cd880e0fad00b1a811ebc43e39a00ab2"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2924b89b16420712e9bb8192396026a8fbd6d8726224f918353ac19c4c043d2a"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16aa02e7a0f539098e215fc193c8926c897175d64c7926d00a36188917717a05"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:936a787f83db1f2115ee829dd615c4f684ee48ac4de5779ab4300994d8af325b"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:459d6be6134ce3b38e0ef76f8a672924460c455d45f1ad8fdade36796df1ddc8"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9ee4febb249c591d07b2d4dd36ebcad0ccd128962aaa1801508320896575ef"}, + {file = "pydantic_core-2.16.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40a0bd0bed96dae5712dab2aba7d334a6c67cbcac2ddfca7dbcc4a8176445990"}, + {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:870dbfa94de9b8866b37b867a2cb37a60c401d9deb4a9ea392abf11a1f98037b"}, + {file = "pydantic_core-2.16.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:308974fdf98046db28440eb3377abba274808bf66262e042c412eb2adf852731"}, + {file = "pydantic_core-2.16.2-cp310-none-win32.whl", hash = "sha256:a477932664d9611d7a0816cc3c0eb1f8856f8a42435488280dfbf4395e141485"}, + {file = "pydantic_core-2.16.2-cp310-none-win_amd64.whl", hash = "sha256:8f9142a6ed83d90c94a3efd7af8873bf7cefed2d3d44387bf848888482e2d25f"}, + {file = "pydantic_core-2.16.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:406fac1d09edc613020ce9cf3f2ccf1a1b2f57ab00552b4c18e3d5276c67eb11"}, + {file = "pydantic_core-2.16.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ce232a6170dd6532096cadbf6185271e4e8c70fc9217ebe105923ac105da9978"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a90fec23b4b05a09ad988e7a4f4e081711a90eb2a55b9c984d8b74597599180f"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8aafeedb6597a163a9c9727d8a8bd363a93277701b7bfd2749fbefee2396469e"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9957433c3a1b67bdd4c63717eaf174ebb749510d5ea612cd4e83f2d9142f3fc8"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0d7a9165167269758145756db43a133608a531b1e5bb6a626b9ee24bc38a8f7"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dffaf740fe2e147fedcb6b561353a16243e654f7fe8e701b1b9db148242e1272"}, + {file = "pydantic_core-2.16.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8ed79883b4328b7f0bd142733d99c8e6b22703e908ec63d930b06be3a0e7113"}, + {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:cf903310a34e14651c9de056fcc12ce090560864d5a2bb0174b971685684e1d8"}, + {file = "pydantic_core-2.16.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:46b0d5520dbcafea9a8645a8164658777686c5c524d381d983317d29687cce97"}, + {file = "pydantic_core-2.16.2-cp311-none-win32.whl", hash = "sha256:70651ff6e663428cea902dac297066d5c6e5423fda345a4ca62430575364d62b"}, + {file = "pydantic_core-2.16.2-cp311-none-win_amd64.whl", hash = "sha256:98dc6f4f2095fc7ad277782a7c2c88296badcad92316b5a6e530930b1d475ebc"}, + {file = "pydantic_core-2.16.2-cp311-none-win_arm64.whl", hash = "sha256:ef6113cd31411eaf9b39fc5a8848e71c72656fd418882488598758b2c8c6dfa0"}, + {file = "pydantic_core-2.16.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:88646cae28eb1dd5cd1e09605680c2b043b64d7481cdad7f5003ebef401a3039"}, + {file = "pydantic_core-2.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7b883af50eaa6bb3299780651e5be921e88050ccf00e3e583b1e92020333304b"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bf26c2e2ea59d32807081ad51968133af3025c4ba5753e6a794683d2c91bf6e"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99af961d72ac731aae2a1b55ccbdae0733d816f8bfb97b41909e143de735f522"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02906e7306cb8c5901a1feb61f9ab5e5c690dbbeaa04d84c1b9ae2a01ebe9379"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5362d099c244a2d2f9659fb3c9db7c735f0004765bbe06b99be69fbd87c3f15"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ac426704840877a285d03a445e162eb258924f014e2f074e209d9b4ff7bf380"}, + {file = "pydantic_core-2.16.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b94cbda27267423411c928208e89adddf2ea5dd5f74b9528513f0358bba019cb"}, + {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6db58c22ac6c81aeac33912fb1af0e930bc9774166cdd56eade913d5f2fff35e"}, + {file = "pydantic_core-2.16.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396fdf88b1b503c9c59c84a08b6833ec0c3b5ad1a83230252a9e17b7dfb4cffc"}, + {file = "pydantic_core-2.16.2-cp312-none-win32.whl", hash = "sha256:7c31669e0c8cc68400ef0c730c3a1e11317ba76b892deeefaf52dcb41d56ed5d"}, + {file = "pydantic_core-2.16.2-cp312-none-win_amd64.whl", hash = "sha256:a3b7352b48fbc8b446b75f3069124e87f599d25afb8baa96a550256c031bb890"}, + {file = "pydantic_core-2.16.2-cp312-none-win_arm64.whl", hash = "sha256:a9e523474998fb33f7c1a4d55f5504c908d57add624599e095c20fa575b8d943"}, + {file = "pydantic_core-2.16.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:ae34418b6b389d601b31153b84dce480351a352e0bb763684a1b993d6be30f17"}, + {file = "pydantic_core-2.16.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:732bd062c9e5d9582a30e8751461c1917dd1ccbdd6cafb032f02c86b20d2e7ec"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b52776a2e3230f4854907a1e0946eec04d41b1fc64069ee774876bbe0eab55"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ef551c053692b1e39e3f7950ce2296536728871110e7d75c4e7753fb30ca87f4"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ebb892ed8599b23fa8f1799e13a12c87a97a6c9d0f497525ce9858564c4575a4"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa6c8c582036275997a733427b88031a32ffa5dfc3124dc25a730658c47a572f"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ba0884a91f1aecce75202473ab138724aa4fb26d7707f2e1fa6c3e68c84fbf"}, + {file = "pydantic_core-2.16.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7924e54f7ce5d253d6160090ddc6df25ed2feea25bfb3339b424a9dd591688bc"}, + {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69a7b96b59322a81c2203be537957313b07dd333105b73db0b69212c7d867b4b"}, + {file = "pydantic_core-2.16.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7e6231aa5bdacda78e96ad7b07d0c312f34ba35d717115f4b4bff6cb87224f0f"}, + {file = "pydantic_core-2.16.2-cp38-none-win32.whl", hash = "sha256:41dac3b9fce187a25c6253ec79a3f9e2a7e761eb08690e90415069ea4a68ff7a"}, + {file = "pydantic_core-2.16.2-cp38-none-win_amd64.whl", hash = "sha256:f685dbc1fdadb1dcd5b5e51e0a378d4685a891b2ddaf8e2bba89bd3a7144e44a"}, + {file = "pydantic_core-2.16.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:55749f745ebf154c0d63d46c8c58594d8894b161928aa41adbb0709c1fe78b77"}, + {file = "pydantic_core-2.16.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b30b0dd58a4509c3bd7eefddf6338565c4905406aee0c6e4a5293841411a1286"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18de31781cdc7e7b28678df7c2d7882f9692ad060bc6ee3c94eb15a5d733f8f7"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5864b0242f74b9dd0b78fd39db1768bc3f00d1ffc14e596fd3e3f2ce43436a33"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8f9186ca45aee030dc8234118b9c0784ad91a0bb27fc4e7d9d6608a5e3d386c"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc6f6c9be0ab6da37bc77c2dda5f14b1d532d5dbef00311ee6e13357a418e646"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa057095f621dad24a1e906747179a69780ef45cc8f69e97463692adbcdae878"}, + {file = "pydantic_core-2.16.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ad84731a26bcfb299f9eab56c7932d46f9cad51c52768cace09e92a19e4cf55"}, + {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3b052c753c4babf2d1edc034c97851f867c87d6f3ea63a12e2700f159f5c41c3"}, + {file = "pydantic_core-2.16.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e0f686549e32ccdb02ae6f25eee40cc33900910085de6aa3790effd391ae10c2"}, + {file = "pydantic_core-2.16.2-cp39-none-win32.whl", hash = "sha256:7afb844041e707ac9ad9acad2188a90bffce2c770e6dc2318be0c9916aef1469"}, + {file = "pydantic_core-2.16.2-cp39-none-win_amd64.whl", hash = "sha256:9da90d393a8227d717c19f5397688a38635afec89f2e2d7af0df037f3249c39a"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f60f920691a620b03082692c378661947d09415743e437a7478c309eb0e4f82"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:47924039e785a04d4a4fa49455e51b4eb3422d6eaacfde9fc9abf8fdef164e8a"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6294e76b0380bb7a61eb8a39273c40b20beb35e8c87ee101062834ced19c545"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe56851c3f1d6f5384b3051c536cc81b3a93a73faf931f404fef95217cf1e10d"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9d776d30cde7e541b8180103c3f294ef7c1862fd45d81738d156d00551005784"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:72f7919af5de5ecfaf1eba47bf9a5d8aa089a3340277276e5636d16ee97614d7"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:4bfcbde6e06c56b30668a0c872d75a7ef3025dc3c1823a13cf29a0e9b33f67e8"}, + {file = "pydantic_core-2.16.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ff7c97eb7a29aba230389a2661edf2e9e06ce616c7e35aa764879b6894a44b25"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9b5f13857da99325dcabe1cc4e9e6a3d7b2e2c726248ba5dd4be3e8e4a0b6d0e"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a7e41e3ada4cca5f22b478c08e973c930e5e6c7ba3588fb8e35f2398cdcc1545"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60eb8ceaa40a41540b9acae6ae7c1f0a67d233c40dc4359c256ad2ad85bdf5e5"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7beec26729d496a12fd23cf8da9944ee338c8b8a17035a560b585c36fe81af20"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:22c5f022799f3cd6741e24f0443ead92ef42be93ffda0d29b2597208c94c3753"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:eca58e319f4fd6df004762419612122b2c7e7d95ffafc37e890252f869f3fb2a"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed957db4c33bc99895f3a1672eca7e80e8cda8bd1e29a80536b4ec2153fa9804"}, + {file = "pydantic_core-2.16.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:459c0d338cc55d099798618f714b21b7ece17eb1a87879f2da20a3ff4c7628e2"}, + {file = "pydantic_core-2.16.2.tar.gz", hash = "sha256:0ba503850d8b8dcc18391f10de896ae51d37fe5fe43dbfb6a35c5c5cad271a06"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + +[[package]] +name = "pytest" +version = "7.4.4" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.21.1" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-asyncio-0.21.1.tar.gz", hash = "sha256:40a7eae6dded22c7b604986855ea48400ab15b069ae38116e8c01238e9eeb64d"}, + {file = "pytest_asyncio-0.21.1-py3-none-any.whl", hash = "sha256:8666c1c8ac02631d7c51ba282e0c69a8a452b211ffedf2599099845da5c5c37b"}, +] + +[package.dependencies] +pytest = ">=7.0.0" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"] + +[[package]] +name = "pytest-dotenv" +version = "0.5.2" +description = "A py.test plugin that parses environment files before running tests" +optional = false +python-versions = "*" +files = [ + {file = "pytest-dotenv-0.5.2.tar.gz", hash = "sha256:2dc6c3ac6d8764c71c6d2804e902d0ff810fa19692e95fe138aefc9b1aa73732"}, + {file = "pytest_dotenv-0.5.2-py3-none-any.whl", hash = "sha256:40a2cece120a213898afaa5407673f6bd924b1fa7eafce6bda0e8abffe2f710f"}, +] + +[package.dependencies] +pytest = ">=5.0.0" +python-dotenv = ">=0.9.1" + +[[package]] +name = "pytest-mock" +version = "3.12.0" +description = "Thin-wrapper around the mock package for easier use with pytest" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9"}, + {file = "pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f"}, +] + +[package.dependencies] +pytest = ">=5.0" + +[package.extras] +dev = ["pre-commit", "pytest-asyncio", "tox"] + +[[package]] +name = "pytest-watcher" +version = "0.3.5" +description = "Automatically rerun your tests on file modifications" +optional = false +python-versions = ">=3.7.0,<4.0.0" +files = [ + {file = "pytest_watcher-0.3.5-py3-none-any.whl", hash = "sha256:af00ca52c7be22dc34c0fd3d7ffef99057207a73b05dc5161fe3b2fe91f58130"}, + {file = "pytest_watcher-0.3.5.tar.gz", hash = "sha256:8896152460ba2b1a8200c12117c6611008ec96c8b2d811f0a05ab8a82b043ff8"}, +] + +[package.dependencies] +tomli = {version = ">=2.0.1,<3.0.0", markers = "python_version < \"3.11\""} +watchdog = ">=2.0.0" + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "ruff" +version = "0.1.15" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5fe8d54df166ecc24106db7dd6a68d44852d14eb0729ea4672bb4d96c320b7df"}, + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f0bfbb53c4b4de117ac4d6ddfd33aa5fc31beeaa21d23c45c6dd249faf9126f"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d432aec35bfc0d800d4f70eba26e23a352386be3a6cf157083d18f6f5881c8"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9405fa9ac0e97f35aaddf185a1be194a589424b8713e3b97b762336ec79ff807"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c66ec24fe36841636e814b8f90f572a8c0cb0e54d8b5c2d0e300d28a0d7bffec"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6f8ad828f01e8dd32cc58bc28375150171d198491fc901f6f98d2a39ba8e3ff5"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86811954eec63e9ea162af0ffa9f8d09088bab51b7438e8b6488b9401863c25e"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd4025ac5e87d9b80e1f300207eb2fd099ff8200fa2320d7dc066a3f4622dc6b"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b17b93c02cdb6aeb696effecea1095ac93f3884a49a554a9afa76bb125c114c1"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ddb87643be40f034e97e97f5bc2ef7ce39de20e34608f3f829db727a93fb82c5"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:abf4822129ed3a5ce54383d5f0e964e7fef74a41e48eb1dfad404151efc130a2"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6c629cf64bacfd136c07c78ac10a54578ec9d1bd2a9d395efbee0935868bf852"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1bab866aafb53da39c2cadfb8e1c4550ac5340bb40300083eb8967ba25481447"}, + {file = "ruff-0.1.15-py3-none-win32.whl", hash = "sha256:2417e1cb6e2068389b07e6fa74c306b2810fe3ee3476d5b8a96616633f40d14f"}, + {file = "ruff-0.1.15-py3-none-win_amd64.whl", hash = "sha256:3837ac73d869efc4182d9036b1405ef4c73d9b1f88da2413875e34e0d6919587"}, + {file = "ruff-0.1.15-py3-none-win_arm64.whl", hash = "sha256:9a933dfb1c14ec7a33cceb1e49ec4a16b51ce3c20fd42663198746efc0427360"}, + {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"}, +] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "sniffio" +version = "1.3.0" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, + {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, +] + +[[package]] +name = "syrupy" +version = "4.6.1" +description = "Pytest Snapshot Test Utility" +optional = false +python-versions = ">=3.8.1,<4" +files = [ + {file = "syrupy-4.6.1-py3-none-any.whl", hash = "sha256:203e52f9cb9fa749cf683f29bd68f02c16c3bc7e7e5fe8f2fc59bdfe488ce133"}, + {file = "syrupy-4.6.1.tar.gz", hash = "sha256:37a835c9ce7857eeef86d62145885e10b3cb9615bc6abeb4ce404b3f18e1bb36"}, +] + +[package.dependencies] +pytest = ">=7.0.0,<9.0.0" + +[[package]] +name = "tenacity" +version = "8.2.3" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, + {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, +] + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "typing-extensions" +version = "4.9.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, + {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, +] + +[[package]] +name = "urllib3" +version = "2.2.0" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.0-py3-none-any.whl", hash = "sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224"}, + {file = "urllib3-2.2.0.tar.gz", hash = "sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "watchdog" +version = "4.0.0" +description = "Filesystem events monitoring" +optional = false +python-versions = ">=3.8" +files = [ + {file = "watchdog-4.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:39cb34b1f1afbf23e9562501673e7146777efe95da24fab5707b88f7fb11649b"}, + {file = "watchdog-4.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c522392acc5e962bcac3b22b9592493ffd06d1fc5d755954e6be9f4990de932b"}, + {file = "watchdog-4.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6c47bdd680009b11c9ac382163e05ca43baf4127954c5f6d0250e7d772d2b80c"}, + {file = "watchdog-4.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8350d4055505412a426b6ad8c521bc7d367d1637a762c70fdd93a3a0d595990b"}, + {file = "watchdog-4.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c17d98799f32e3f55f181f19dd2021d762eb38fdd381b4a748b9f5a36738e935"}, + {file = "watchdog-4.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4986db5e8880b0e6b7cd52ba36255d4793bf5cdc95bd6264806c233173b1ec0b"}, + {file = "watchdog-4.0.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:11e12fafb13372e18ca1bbf12d50f593e7280646687463dd47730fd4f4d5d257"}, + {file = "watchdog-4.0.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5369136a6474678e02426bd984466343924d1df8e2fd94a9b443cb7e3aa20d19"}, + {file = "watchdog-4.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76ad8484379695f3fe46228962017a7e1337e9acadafed67eb20aabb175df98b"}, + {file = "watchdog-4.0.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:45cc09cc4c3b43fb10b59ef4d07318d9a3ecdbff03abd2e36e77b6dd9f9a5c85"}, + {file = "watchdog-4.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:eed82cdf79cd7f0232e2fdc1ad05b06a5e102a43e331f7d041e5f0e0a34a51c4"}, + {file = "watchdog-4.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba30a896166f0fee83183cec913298151b73164160d965af2e93a20bbd2ab605"}, + {file = "watchdog-4.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d18d7f18a47de6863cd480734613502904611730f8def45fc52a5d97503e5101"}, + {file = "watchdog-4.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2895bf0518361a9728773083908801a376743bcc37dfa252b801af8fd281b1ca"}, + {file = "watchdog-4.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:87e9df830022488e235dd601478c15ad73a0389628588ba0b028cb74eb72fed8"}, + {file = "watchdog-4.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6e949a8a94186bced05b6508faa61b7adacc911115664ccb1923b9ad1f1ccf7b"}, + {file = "watchdog-4.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6a4db54edea37d1058b08947c789a2354ee02972ed5d1e0dca9b0b820f4c7f92"}, + {file = "watchdog-4.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d31481ccf4694a8416b681544c23bd271f5a123162ab603c7d7d2dd7dd901a07"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8fec441f5adcf81dd240a5fe78e3d83767999771630b5ddfc5867827a34fa3d3"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:6a9c71a0b02985b4b0b6d14b875a6c86ddea2fdbebd0c9a720a806a8bbffc69f"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:557ba04c816d23ce98a06e70af6abaa0485f6d94994ec78a42b05d1c03dcbd50"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:d0f9bd1fd919134d459d8abf954f63886745f4660ef66480b9d753a7c9d40927"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:f9b2fdca47dc855516b2d66eef3c39f2672cbf7e7a42e7e67ad2cbfcd6ba107d"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:73c7a935e62033bd5e8f0da33a4dcb763da2361921a69a5a95aaf6c93aa03a87"}, + {file = "watchdog-4.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6a80d5cae8c265842c7419c560b9961561556c4361b297b4c431903f8c33b269"}, + {file = "watchdog-4.0.0-py3-none-win32.whl", hash = "sha256:8f9a542c979df62098ae9c58b19e03ad3df1c9d8c6895d96c0d51da17b243b1c"}, + {file = "watchdog-4.0.0-py3-none-win_amd64.whl", hash = "sha256:f970663fa4f7e80401a7b0cbeec00fa801bf0287d93d48368fc3e6fa32716245"}, + {file = "watchdog-4.0.0-py3-none-win_ia64.whl", hash = "sha256:9a03e16e55465177d416699331b0f3564138f1807ecc5f2de9d55d8f188d08c7"}, + {file = "watchdog-4.0.0.tar.gz", hash = "sha256:e3e7065cbdabe6183ab82199d7a4f6b3ba0a438c5a512a68559846ccb76a78ec"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.8.1,<4.0" +content-hash = "fe6988cc6483f13341578e46b85a42fe242c21c5fdbe4f1f64384369b916b186" diff --git a/libs/partners/astradb/pyproject.toml b/libs/partners/astradb/pyproject.toml new file mode 100644 index 00000000000000..3ba131e0a59f70 --- /dev/null +++ b/libs/partners/astradb/pyproject.toml @@ -0,0 +1,89 @@ +[tool.poetry] +name = "langchain-astradb" +version = "0.0.1" +description = "An integration package connecting Astra DB and LangChain" +authors = [] +readme = "README.md" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +langchain-core = "^0.1.5" +astrapy = "^0.7.5" +numpy = "^1" + +[tool.poetry.group.test] +optional = true + +[tool.poetry.group.test.dependencies] +pytest = "^7.3.0" +pytest-dotenv = "^0.5.2" +freezegun = "^1.2.2" +pytest-mock = "^3.10.0" +syrupy = "^4.0.2" +pytest-watcher = "^0.3.4" +pytest-asyncio = "^0.21.1" +langchain-core = { path = "../../core", develop = true } + +[tool.poetry.group.codespell] +optional = true + +[tool.poetry.group.codespell.dependencies] +codespell = "^2.2.0" + +[tool.poetry.group.test_integration] +optional = true + +[tool.poetry.group.test_integration.dependencies] + +[tool.poetry.group.lint] +optional = true + +[tool.poetry.group.lint.dependencies] +ruff = "^0.1.5" + +[tool.poetry.group.typing.dependencies] +mypy = "^0.991" +langchain-core = { path = "../../core", develop = true } + +[tool.poetry.group.dev] +optional = true + +[tool.poetry.group.dev.dependencies] +langchain-core = { path = "../../core", develop = true } + +[tool.ruff] +select = [ + "E", # pycodestyle + "F", # pyflakes + "I", # isort +] + +[tool.mypy] +disallow_untyped_defs = "True" + +[tool.coverage.run] +omit = ["tests/*"] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +# --strict-markers will raise errors on unknown marks. +# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks +# +# https://docs.pytest.org/en/7.1.x/reference/reference.html +# --strict-config any warnings encountered while parsing the `pytest` +# section of the configuration file raise errors. +# +# https://github.com/tophat/syrupy +# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite. +addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5" +# Registering custom markers. +# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers +markers = [ + "requires: mark tests as requiring a specific library", + "asyncio: mark tests as requiring asyncio", + "compile: mark placeholder test used to compile integration tests without running them", +] +asyncio_mode = "auto" diff --git a/libs/partners/astradb/scripts/check_imports.py b/libs/partners/astradb/scripts/check_imports.py new file mode 100644 index 00000000000000..fd21a4975b7f0b --- /dev/null +++ b/libs/partners/astradb/scripts/check_imports.py @@ -0,0 +1,17 @@ +import sys +import traceback +from importlib.machinery import SourceFileLoader + +if __name__ == "__main__": + files = sys.argv[1:] + has_failure = False + for file in files: + try: + SourceFileLoader("x", file).load_module() + except Exception: + has_faillure = True + print(file) + traceback.print_exc() + print() + + sys.exit(1 if has_failure else 0) diff --git a/libs/partners/astradb/scripts/check_pydantic.sh b/libs/partners/astradb/scripts/check_pydantic.sh new file mode 100755 index 00000000000000..06b5bb81ae2361 --- /dev/null +++ b/libs/partners/astradb/scripts/check_pydantic.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# This script searches for lines starting with "import pydantic" or "from pydantic" +# in tracked files within a Git repository. +# +# Usage: ./scripts/check_pydantic.sh /path/to/repository + +# Check if a path argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 /path/to/repository" + exit 1 +fi + +repository_path="$1" + +# Search for lines matching the pattern within the specified repository +result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic') + +# Check if any matching lines were found +if [ -n "$result" ]; then + echo "ERROR: The following lines need to be updated:" + echo "$result" + echo "Please replace the code with an import from langchain_core.pydantic_v1." + echo "For example, replace 'from pydantic import BaseModel'" + echo "with 'from langchain_core.pydantic_v1 import BaseModel'" + exit 1 +fi diff --git a/libs/partners/astradb/scripts/lint_imports.sh b/libs/partners/astradb/scripts/lint_imports.sh new file mode 100755 index 00000000000000..695613c7ba8fd6 --- /dev/null +++ b/libs/partners/astradb/scripts/lint_imports.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -eu + +# Initialize a variable to keep track of errors +errors=0 + +# make sure not importing from langchain or langchain_experimental +git --no-pager grep '^from langchain\.' . && errors=$((errors+1)) +git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1)) + +# Decide on an exit status based on the errors +if [ "$errors" -gt 0 ]; then + exit 1 +else + exit 0 +fi diff --git a/libs/partners/astradb/tests/__init__.py b/libs/partners/astradb/tests/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/libs/partners/astradb/tests/integration_tests/.env.example b/libs/partners/astradb/tests/integration_tests/.env.example new file mode 100644 index 00000000000000..4259d87682c348 --- /dev/null +++ b/libs/partners/astradb/tests/integration_tests/.env.example @@ -0,0 +1,5 @@ +# astra db +ASTRA_DB_API_ENDPOINT=https://your_astra_db_id-your_region.apps.astra.datastax.com +ASTRA_DB_APPLICATION_TOKEN=AstraCS:your_astra_db_application_token +# ASTRA_DB_KEYSPACE=your_astra_db_namespace +# ASTRA_DB_SKIP_COLLECTION_DELETIONS=true diff --git a/libs/partners/astradb/tests/integration_tests/__init__.py b/libs/partners/astradb/tests/integration_tests/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/libs/partners/astradb/tests/integration_tests/conftest.py b/libs/partners/astradb/tests/integration_tests/conftest.py new file mode 100644 index 00000000000000..02b518e8695a2b --- /dev/null +++ b/libs/partners/astradb/tests/integration_tests/conftest.py @@ -0,0 +1,19 @@ +# Getting the absolute path of the current file's directory +import os + +ABS_PATH = os.path.dirname(os.path.abspath(__file__)) + +# Getting the absolute path of the project's root directory +PROJECT_DIR = os.path.abspath(os.path.join(ABS_PATH, os.pardir, os.pardir)) + + +# Loading the .env file if it exists +def _load_env() -> None: + dotenv_path = os.path.join(PROJECT_DIR, "tests", "integration_tests", ".env") + if os.path.exists(dotenv_path): + from dotenv import load_dotenv + + load_dotenv(dotenv_path) + + +_load_env() diff --git a/libs/partners/astradb/tests/integration_tests/test_compile.py b/libs/partners/astradb/tests/integration_tests/test_compile.py new file mode 100644 index 00000000000000..33ecccdfa0fbda --- /dev/null +++ b/libs/partners/astradb/tests/integration_tests/test_compile.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.mark.compile +def test_placeholder() -> None: + """Used for compiling integration tests without running any real tests.""" + pass diff --git a/libs/partners/astradb/tests/integration_tests/vectorstores/test_astradb.py b/libs/partners/astradb/tests/integration_tests/vectorstores/test_astradb.py new file mode 100644 index 00000000000000..de68c016a98091 --- /dev/null +++ b/libs/partners/astradb/tests/integration_tests/vectorstores/test_astradb.py @@ -0,0 +1,869 @@ +""" +Test of Astra DB vector store class `AstraDBVectorStore` + +Required to run this test: + - a recent `astrapy` Python package available + - an Astra DB instance; + - the two environment variables set: + export ASTRA_DB_API_ENDPOINT="https://-us-east1.apps.astra.datastax.com" + export ASTRA_DB_APPLICATION_TOKEN="AstraCS:........." + - optionally this as well (otherwise defaults are used): + export ASTRA_DB_KEYSPACE="my_keyspace" + - optionally: + export SKIP_COLLECTION_DELETE="0" ("1" = no deletions, default) +""" + +import json +import math +import os +from typing import Iterable, List, Optional, TypedDict + +import pytest +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings + +from langchain_astradb.vectorstores import AstraDBVectorStore + +# Faster testing (no actual collection deletions). Off by default (=full tests) +SKIP_COLLECTION_DELETE = ( + int(os.environ.get("ASTRA_DB_SKIP_COLLECTION_DELETIONS", "0")) != 0 +) + +COLLECTION_NAME_DIM2 = "lc_test_d2" +COLLECTION_NAME_DIM2_EUCLIDEAN = "lc_test_d2_eucl" + +MATCH_EPSILON = 0.0001 + +# Ad-hoc embedding classes: + + +class AstraDBCredentials(TypedDict): + token: str + api_endpoint: str + namespace: Optional[str] + + +class SomeEmbeddings(Embeddings): + """ + Turn a sentence into an embedding vector in some way. + Not important how. It is deterministic is all that counts. + """ + + def __init__(self, dimension: int) -> None: + self.dimension = dimension + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + return [self.embed_query(txt) for txt in texts] + + async def aembed_documents(self, texts: List[str]) -> List[List[float]]: + return self.embed_documents(texts) + + def embed_query(self, text: str) -> List[float]: + unnormed0 = [ord(c) for c in text[: self.dimension]] + unnormed = (unnormed0 + [1] + [0] * (self.dimension - 1 - len(unnormed0)))[ + : self.dimension + ] + norm = sum(x * x for x in unnormed) ** 0.5 + normed = [x / norm for x in unnormed] + return normed + + async def aembed_query(self, text: str) -> List[float]: + return self.embed_query(text) + + +class ParserEmbeddings(Embeddings): + """ + Parse input texts: if they are json for a List[float], fine. + Otherwise, return all zeros and call it a day. + """ + + def __init__(self, dimension: int) -> None: + self.dimension = dimension + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + return [self.embed_query(txt) for txt in texts] + + async def aembed_documents(self, texts: List[str]) -> List[List[float]]: + return self.embed_documents(texts) + + def embed_query(self, text: str) -> List[float]: + try: + vals = json.loads(text) + assert len(vals) == self.dimension + return vals + except Exception: + print(f'[ParserEmbeddings] Returning a moot vector for "{text}"') + return [0.0] * self.dimension + + async def aembed_query(self, text: str) -> List[float]: + return self.embed_query(text) + + +def _has_env_vars() -> bool: + return all( + [ + "ASTRA_DB_APPLICATION_TOKEN" in os.environ, + "ASTRA_DB_API_ENDPOINT" in os.environ, + ] + ) + + +@pytest.fixture(scope="session") +def astradb_credentials() -> Iterable[AstraDBCredentials]: + yield { + "token": os.environ["ASTRA_DB_APPLICATION_TOKEN"], + "api_endpoint": os.environ["ASTRA_DB_API_ENDPOINT"], + "namespace": os.environ.get("ASTRA_DB_KEYSPACE"), + } + + +@pytest.fixture(scope="function") +def store_someemb( + astradb_credentials: AstraDBCredentials, +) -> Iterable[AstraDBVectorStore]: + emb = SomeEmbeddings(dimension=2) + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + v_store.clear() + + yield v_store + + if not SKIP_COLLECTION_DELETE: + v_store.delete_collection() + else: + v_store.clear() + + +@pytest.fixture(scope="function") +def store_parseremb( + astradb_credentials: AstraDBCredentials, +) -> Iterable[AstraDBVectorStore]: + emb = ParserEmbeddings(dimension=2) + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + v_store.clear() + + yield v_store + + if not SKIP_COLLECTION_DELETE: + v_store.delete_collection() + else: + v_store.clear() + + +@pytest.mark.requires("astrapy") +@pytest.mark.skipif(not _has_env_vars(), reason="Missing Astra DB env. vars") +class TestAstraDBVectorStore: + def test_astradb_vectorstore_create_delete( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Create and delete.""" + from astrapy.db import AstraDB as LibAstraDB + + emb = SomeEmbeddings(dimension=2) + # creation by passing the connection secrets + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + v_store.add_texts("Sample 1") + if not SKIP_COLLECTION_DELETE: + v_store.delete_collection() + else: + v_store.clear() + + # Creation by passing a ready-made astrapy client: + astra_db_client = LibAstraDB( + **astradb_credentials, + ) + v_store_2 = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + astra_db_client=astra_db_client, + ) + v_store_2.add_texts("Sample 2") + if not SKIP_COLLECTION_DELETE: + v_store_2.delete_collection() + else: + v_store_2.clear() + + async def test_astradb_vectorstore_create_delete_async( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Create and delete.""" + emb = SomeEmbeddings(dimension=2) + # creation by passing the connection secrets + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + await v_store.adelete_collection() + # Creation by passing a ready-made astrapy client: + from astrapy.db import AsyncAstraDB + + astra_db_client = AsyncAstraDB( + **astradb_credentials, + ) + v_store_2 = AstraDBVectorStore( + embedding=emb, + collection_name="lc_test_2_async", + async_astra_db_client=astra_db_client, + ) + if not SKIP_COLLECTION_DELETE: + await v_store_2.adelete_collection() + else: + await v_store_2.aclear() + + @pytest.mark.skipif( + SKIP_COLLECTION_DELETE, + reason="Collection-deletion tests are suppressed", + ) + def test_astradb_vectorstore_pre_delete_collection( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Use of the pre_delete_collection flag.""" + emb = SomeEmbeddings(dimension=2) + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + v_store.clear() + try: + v_store.add_texts( + texts=["aa"], + metadatas=[ + {"k": "a", "ord": 0}, + ], + ids=["a"], + ) + res1 = v_store.similarity_search("aa", k=5) + assert len(res1) == 1 + v_store = AstraDBVectorStore( + embedding=emb, + pre_delete_collection=True, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + res1 = v_store.similarity_search("aa", k=5) + assert len(res1) == 0 + finally: + v_store.delete_collection() + + @pytest.mark.skipif( + SKIP_COLLECTION_DELETE, + reason="Collection-deletion tests are suppressed", + ) + async def test_astradb_vectorstore_pre_delete_collection_async( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Use of the pre_delete_collection flag.""" + emb = SomeEmbeddings(dimension=2) + # creation by passing the connection secrets + + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + try: + await v_store.aadd_texts( + texts=["aa"], + metadatas=[ + {"k": "a", "ord": 0}, + ], + ids=["a"], + ) + res1 = await v_store.asimilarity_search("aa", k=5) + assert len(res1) == 1 + v_store = AstraDBVectorStore( + embedding=emb, + pre_delete_collection=True, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + res1 = await v_store.asimilarity_search("aa", k=5) + assert len(res1) == 0 + finally: + await v_store.adelete_collection() + + def test_astradb_vectorstore_from_x( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """from_texts and from_documents methods.""" + emb = SomeEmbeddings(dimension=2) + # prepare empty collection + AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ).clear() + # from_texts + v_store = AstraDBVectorStore.from_texts( + texts=["Hi", "Ho"], + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + try: + assert v_store.similarity_search("Ho", k=1)[0].page_content == "Ho" + finally: + if not SKIP_COLLECTION_DELETE: + v_store.delete_collection() + else: + v_store.clear() + + # from_documents + v_store_2 = AstraDBVectorStore.from_documents( + [ + Document(page_content="Hee"), + Document(page_content="Hoi"), + ], + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + try: + assert v_store_2.similarity_search("Hoi", k=1)[0].page_content == "Hoi" + finally: + if not SKIP_COLLECTION_DELETE: + v_store_2.delete_collection() + else: + v_store_2.clear() + + async def test_astradb_vectorstore_from_x_async( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """from_texts and from_documents methods.""" + emb = SomeEmbeddings(dimension=2) + # prepare empty collection + await AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ).aclear() + # from_texts + v_store = await AstraDBVectorStore.afrom_texts( + texts=["Hi", "Ho"], + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + try: + assert (await v_store.asimilarity_search("Ho", k=1))[0].page_content == "Ho" + finally: + if not SKIP_COLLECTION_DELETE: + await v_store.adelete_collection() + else: + await v_store.aclear() + + # from_documents + v_store_2 = await AstraDBVectorStore.afrom_documents( + [ + Document(page_content="Hee"), + Document(page_content="Hoi"), + ], + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ) + try: + assert (await v_store_2.asimilarity_search("Hoi", k=1))[ + 0 + ].page_content == "Hoi" + finally: + if not SKIP_COLLECTION_DELETE: + await v_store_2.adelete_collection() + else: + await v_store_2.aclear() + + def test_astradb_vectorstore_crud(self, store_someemb: AstraDBVectorStore) -> None: + """Basic add/delete/update behaviour.""" + res0 = store_someemb.similarity_search("Abc", k=2) + assert res0 == [] + # write and check again + store_someemb.add_texts( + texts=["aa", "bb", "cc"], + metadatas=[ + {"k": "a", "ord": 0}, + {"k": "b", "ord": 1}, + {"k": "c", "ord": 2}, + ], + ids=["a", "b", "c"], + ) + res1 = store_someemb.similarity_search("Abc", k=5) + assert {doc.page_content for doc in res1} == {"aa", "bb", "cc"} + # partial overwrite and count total entries + store_someemb.add_texts( + texts=["cc", "dd"], + metadatas=[ + {"k": "c_new", "ord": 102}, + {"k": "d_new", "ord": 103}, + ], + ids=["c", "d"], + ) + res2 = store_someemb.similarity_search("Abc", k=10) + assert len(res2) == 4 + # pick one that was just updated and check its metadata + res3 = store_someemb.similarity_search_with_score_id( + query="cc", k=1, filter={"k": "c_new"} + ) + print(str(res3)) + doc3, score3, id3 = res3[0] + assert doc3.page_content == "cc" + assert doc3.metadata == {"k": "c_new", "ord": 102} + assert score3 > 0.999 # leaving some leeway for approximations... + assert id3 == "c" + # delete and count again + del1_res = store_someemb.delete(["b"]) + assert del1_res is True + del2_res = store_someemb.delete(["a", "c", "Z!"]) + assert del2_res is True # a non-existing ID was supplied + assert len(store_someemb.similarity_search("xy", k=10)) == 1 + # clear store + store_someemb.clear() + assert store_someemb.similarity_search("Abc", k=2) == [] + # add_documents with "ids" arg passthrough + store_someemb.add_documents( + [ + Document(page_content="vv", metadata={"k": "v", "ord": 204}), + Document(page_content="ww", metadata={"k": "w", "ord": 205}), + ], + ids=["v", "w"], + ) + assert len(store_someemb.similarity_search("xy", k=10)) == 2 + res4 = store_someemb.similarity_search("ww", k=1, filter={"k": "w"}) + assert res4[0].metadata["ord"] == 205 + + async def test_astradb_vectorstore_crud_async( + self, store_someemb: AstraDBVectorStore + ) -> None: + """Basic add/delete/update behaviour.""" + res0 = await store_someemb.asimilarity_search("Abc", k=2) + assert res0 == [] + # write and check again + await store_someemb.aadd_texts( + texts=["aa", "bb", "cc"], + metadatas=[ + {"k": "a", "ord": 0}, + {"k": "b", "ord": 1}, + {"k": "c", "ord": 2}, + ], + ids=["a", "b", "c"], + ) + res1 = await store_someemb.asimilarity_search("Abc", k=5) + assert {doc.page_content for doc in res1} == {"aa", "bb", "cc"} + # partial overwrite and count total entries + await store_someemb.aadd_texts( + texts=["cc", "dd"], + metadatas=[ + {"k": "c_new", "ord": 102}, + {"k": "d_new", "ord": 103}, + ], + ids=["c", "d"], + ) + res2 = await store_someemb.asimilarity_search("Abc", k=10) + assert len(res2) == 4 + # pick one that was just updated and check its metadata + res3 = await store_someemb.asimilarity_search_with_score_id( + query="cc", k=1, filter={"k": "c_new"} + ) + print(str(res3)) + doc3, score3, id3 = res3[0] + assert doc3.page_content == "cc" + assert doc3.metadata == {"k": "c_new", "ord": 102} + assert score3 > 0.999 # leaving some leeway for approximations... + assert id3 == "c" + # delete and count again + del1_res = await store_someemb.adelete(["b"]) + assert del1_res is True + del2_res = await store_someemb.adelete(["a", "c", "Z!"]) + assert del2_res is False # a non-existing ID was supplied + assert len(await store_someemb.asimilarity_search("xy", k=10)) == 1 + # clear store + await store_someemb.aclear() + assert await store_someemb.asimilarity_search("Abc", k=2) == [] + # add_documents with "ids" arg passthrough + await store_someemb.aadd_documents( + [ + Document(page_content="vv", metadata={"k": "v", "ord": 204}), + Document(page_content="ww", metadata={"k": "w", "ord": 205}), + ], + ids=["v", "w"], + ) + assert len(await store_someemb.asimilarity_search("xy", k=10)) == 2 + res4 = await store_someemb.asimilarity_search("ww", k=1, filter={"k": "w"}) + assert res4[0].metadata["ord"] == 205 + + def test_astradb_vectorstore_mmr(self, store_parseremb: AstraDBVectorStore) -> None: + """ + MMR testing. We work on the unit circle with angle multiples + of 2*pi/20 and prepare a store with known vectors for a controlled + MMR outcome. + """ + + def _v_from_i(i: int, N: int) -> str: + angle = 2 * math.pi * i / N + vector = [math.cos(angle), math.sin(angle)] + return json.dumps(vector) + + i_vals = [0, 4, 5, 13] + N_val = 20 + store_parseremb.add_texts( + [_v_from_i(i, N_val) for i in i_vals], metadatas=[{"i": i} for i in i_vals] + ) + res1 = store_parseremb.max_marginal_relevance_search( + _v_from_i(3, N_val), + k=2, + fetch_k=3, + ) + res_i_vals = {doc.metadata["i"] for doc in res1} + assert res_i_vals == {0, 4} + + async def test_astradb_vectorstore_mmr_async( + self, store_parseremb: AstraDBVectorStore + ) -> None: + """ + MMR testing. We work on the unit circle with angle multiples + of 2*pi/20 and prepare a store with known vectors for a controlled + MMR outcome. + """ + + def _v_from_i(i: int, N: int) -> str: + angle = 2 * math.pi * i / N + vector = [math.cos(angle), math.sin(angle)] + return json.dumps(vector) + + i_vals = [0, 4, 5, 13] + N_val = 20 + await store_parseremb.aadd_texts( + [_v_from_i(i, N_val) for i in i_vals], + metadatas=[{"i": i} for i in i_vals], + ) + res1 = await store_parseremb.amax_marginal_relevance_search( + _v_from_i(3, N_val), + k=2, + fetch_k=3, + ) + res_i_vals = {doc.metadata["i"] for doc in res1} + assert res_i_vals == {0, 4} + + def test_astradb_vectorstore_metadata( + self, store_someemb: AstraDBVectorStore + ) -> None: + """Metadata filtering.""" + store_someemb.add_documents( + [ + Document( + page_content="q", + metadata={"ord": ord("q"), "group": "consonant"}, + ), + Document( + page_content="w", + metadata={"ord": ord("w"), "group": "consonant"}, + ), + Document( + page_content="r", + metadata={"ord": ord("r"), "group": "consonant"}, + ), + Document( + page_content="e", + metadata={"ord": ord("e"), "group": "vowel"}, + ), + Document( + page_content="i", + metadata={"ord": ord("i"), "group": "vowel"}, + ), + Document( + page_content="o", + metadata={"ord": ord("o"), "group": "vowel"}, + ), + ] + ) + # no filters + res0 = store_someemb.similarity_search("x", k=10) + assert {doc.page_content for doc in res0} == set("qwreio") + # single filter + res1 = store_someemb.similarity_search( + "x", + k=10, + filter={"group": "vowel"}, + ) + assert {doc.page_content for doc in res1} == set("eio") + # multiple filters + res2 = store_someemb.similarity_search( + "x", + k=10, + filter={"group": "consonant", "ord": ord("q")}, + ) + assert {doc.page_content for doc in res2} == set("q") + # excessive filters + res3 = store_someemb.similarity_search( + "x", + k=10, + filter={"group": "consonant", "ord": ord("q"), "case": "upper"}, + ) + assert res3 == [] + # filter with logical operator + res4 = store_someemb.similarity_search( + "x", + k=10, + filter={"$or": [{"ord": ord("q")}, {"ord": ord("r")}]}, + ) + assert {doc.page_content for doc in res4} == {"q", "r"} + + def test_astradb_vectorstore_similarity_scale( + self, store_parseremb: AstraDBVectorStore + ) -> None: + """Scale of the similarity scores.""" + store_parseremb.add_texts( + texts=[ + json.dumps([1, 1]), + json.dumps([-1, -1]), + ], + ids=["near", "far"], + ) + res1 = store_parseremb.similarity_search_with_score( + json.dumps([0.5, 0.5]), + k=2, + ) + scores = [sco for _, sco in res1] + sco_near, sco_far = scores + assert abs(1 - sco_near) < MATCH_EPSILON and abs(sco_far) < MATCH_EPSILON + + async def test_astradb_vectorstore_similarity_scale_async( + self, store_parseremb: AstraDBVectorStore + ) -> None: + """Scale of the similarity scores.""" + await store_parseremb.aadd_texts( + texts=[ + json.dumps([1, 1]), + json.dumps([-1, -1]), + ], + ids=["near", "far"], + ) + res1 = await store_parseremb.asimilarity_search_with_score( + json.dumps([0.5, 0.5]), + k=2, + ) + scores = [sco for _, sco in res1] + sco_near, sco_far = scores + assert abs(1 - sco_near) < MATCH_EPSILON and abs(sco_far) < MATCH_EPSILON + + def test_astradb_vectorstore_massive_delete( + self, store_someemb: AstraDBVectorStore + ) -> None: + """Larger-scale bulk deletes.""" + M = 50 + texts = [str(i + 1 / 7.0) for i in range(2 * M)] + ids0 = ["doc_%i" % i for i in range(M)] + ids1 = ["doc_%i" % (i + M) for i in range(M)] + ids = ids0 + ids1 + store_someemb.add_texts(texts=texts, ids=ids) + # deleting a bunch of these + del_res0 = store_someemb.delete(ids0) + assert del_res0 is True + # deleting the rest plus a fake one + del_res1 = store_someemb.delete(ids1 + ["ghost!"]) + assert del_res1 is True # ensure no error + # nothing left + assert store_someemb.similarity_search("x", k=2 * M) == [] + + @pytest.mark.skipif( + SKIP_COLLECTION_DELETE, + reason="Collection-deletion tests are suppressed", + ) + def test_astradb_vectorstore_delete_collection( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """behaviour of 'delete_collection'.""" + collection_name = COLLECTION_NAME_DIM2 + emb = SomeEmbeddings(dimension=2) + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=collection_name, + **astradb_credentials, + ) + v_store.add_texts(["huh"]) + assert len(v_store.similarity_search("hah", k=10)) == 1 + # another instance pointing to the same collection on DB + v_store_kenny = AstraDBVectorStore( + embedding=emb, + collection_name=collection_name, + **astradb_credentials, + ) + v_store_kenny.delete_collection() + # dropped on DB, but 'v_store' should have no clue: + with pytest.raises(ValueError): + _ = v_store.similarity_search("hah", k=10) + + def test_astradb_vectorstore_custom_params( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Custom batch size and concurrency params.""" + emb = SomeEmbeddings(dimension=2) + # prepare empty collection + AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ).clear() + v_store = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + batch_size=17, + bulk_insert_batch_concurrency=13, + bulk_insert_overwrite_concurrency=7, + bulk_delete_concurrency=19, + ) + try: + # add_texts + N = 50 + texts = [str(i + 1 / 7.0) for i in range(N)] + ids = ["doc_%i" % i for i in range(N)] + v_store.add_texts(texts=texts, ids=ids) + v_store.add_texts( + texts=texts, + ids=ids, + batch_size=19, + batch_concurrency=7, + overwrite_concurrency=13, + ) + # + _ = v_store.delete(ids[: N // 2]) + _ = v_store.delete(ids[N // 2 :], concurrency=23) + # + finally: + if not SKIP_COLLECTION_DELETE: + v_store.delete_collection() + else: + v_store.clear() + + async def test_astradb_vectorstore_custom_params_async( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """Custom batch size and concurrency params.""" + emb = SomeEmbeddings(dimension=2) + v_store = AstraDBVectorStore( + embedding=emb, + collection_name="lc_test_c_async", + batch_size=17, + bulk_insert_batch_concurrency=13, + bulk_insert_overwrite_concurrency=7, + bulk_delete_concurrency=19, + **astradb_credentials, + ) + try: + # add_texts + N = 50 + texts = [str(i + 1 / 7.0) for i in range(N)] + ids = ["doc_%i" % i for i in range(N)] + await v_store.aadd_texts(texts=texts, ids=ids) + await v_store.aadd_texts( + texts=texts, + ids=ids, + batch_size=19, + batch_concurrency=7, + overwrite_concurrency=13, + ) + # + await v_store.adelete(ids[: N // 2]) + await v_store.adelete(ids[N // 2 :], concurrency=23) + # + finally: + if not SKIP_COLLECTION_DELETE: + await v_store.adelete_collection() + else: + await v_store.aclear() + + def test_astradb_vectorstore_metrics( + self, astradb_credentials: AstraDBCredentials + ) -> None: + """ + Different choices of similarity metric. + Both stores (with "cosine" and "euclidea" metrics) contain these two: + - a vector slightly rotated w.r.t query vector + - a vector which is a long multiple of query vector + so, which one is "the closest one" depends on the metric. + """ + emb = ParserEmbeddings(dimension=2) + isq2 = 0.5**0.5 + isa = 0.7 + isb = (1.0 - isa * isa) ** 0.5 + texts = [ + json.dumps([isa, isb]), + json.dumps([10 * isq2, 10 * isq2]), + ] + ids = [ + "rotated", + "scaled", + ] + query_text = json.dumps([isq2, isq2]) + + # prepare empty collections + AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + **astradb_credentials, + ).clear() + AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2_EUCLIDEAN, + metric="euclidean", + **astradb_credentials, + ).clear() + + # creation, population, query - cosine + vstore_cos = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2, + metric="cosine", + **astradb_credentials, + ) + try: + vstore_cos.add_texts( + texts=texts, + ids=ids, + ) + _, _, id_from_cos = vstore_cos.similarity_search_with_score_id( + query_text, + k=1, + )[0] + assert id_from_cos == "scaled" + finally: + if not SKIP_COLLECTION_DELETE: + vstore_cos.delete_collection() + else: + vstore_cos.clear() + # creation, population, query - euclidean + + vstore_euc = AstraDBVectorStore( + embedding=emb, + collection_name=COLLECTION_NAME_DIM2_EUCLIDEAN, + metric="euclidean", + **astradb_credentials, + ) + try: + vstore_euc.add_texts( + texts=texts, + ids=ids, + ) + _, _, id_from_euc = vstore_euc.similarity_search_with_score_id( + query_text, + k=1, + )[0] + assert id_from_euc == "rotated" + finally: + if not SKIP_COLLECTION_DELETE: + vstore_euc.delete_collection() + else: + vstore_euc.clear() diff --git a/libs/partners/astradb/tests/unit_tests/__init__.py b/libs/partners/astradb/tests/unit_tests/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/libs/partners/astradb/tests/unit_tests/test_imports.py b/libs/partners/astradb/tests/unit_tests/test_imports.py new file mode 100644 index 00000000000000..2240748c70c6e1 --- /dev/null +++ b/libs/partners/astradb/tests/unit_tests/test_imports.py @@ -0,0 +1,9 @@ +from langchain_astradb import __all__ + +EXPECTED_ALL = [ + "AstraDBVectorStore", +] + + +def test_all_imports() -> None: + assert sorted(EXPECTED_ALL) == sorted(__all__) diff --git a/libs/partners/astradb/tests/unit_tests/test_vectorstores.py b/libs/partners/astradb/tests/unit_tests/test_vectorstores.py new file mode 100644 index 00000000000000..ebfc6978d18c7d --- /dev/null +++ b/libs/partners/astradb/tests/unit_tests/test_vectorstores.py @@ -0,0 +1,45 @@ +from typing import List +from unittest.mock import Mock + +from langchain_core.embeddings import Embeddings + +from langchain_astradb.vectorstores import AstraDBVectorStore + + +class SomeEmbeddings(Embeddings): + """ + Turn a sentence into an embedding vector in some way. + Not important how. It is deterministic is all that counts. + """ + + def __init__(self, dimension: int) -> None: + self.dimension = dimension + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + return [self.embed_query(txt) for txt in texts] + + async def aembed_documents(self, texts: List[str]) -> List[List[float]]: + return self.embed_documents(texts) + + def embed_query(self, text: str) -> List[float]: + unnormed0 = [ord(c) for c in text[: self.dimension]] + unnormed = (unnormed0 + [1] + [0] * (self.dimension - 1 - len(unnormed0)))[ + : self.dimension + ] + norm = sum(x * x for x in unnormed) ** 0.5 + normed = [x / norm for x in unnormed] + return normed + + async def aembed_query(self, text: str) -> List[float]: + return self.embed_query(text) + + +def test_initialization() -> None: + """Test integration vectorstore initialization.""" + mock_astra_db = Mock() + embedding = SomeEmbeddings(dimension=2) + AstraDBVectorStore( + embedding=embedding, + collection_name="mock_coll_name", + astra_db_client=mock_astra_db, + ) diff --git a/libs/partners/exa/langchain_exa/retrievers.py b/libs/partners/exa/langchain_exa/retrievers.py index e316d0ab84c2c4..0011ddc008e8d7 100644 --- a/libs/partners/exa/langchain_exa/retrievers.py +++ b/libs/partners/exa/langchain_exa/retrievers.py @@ -4,7 +4,7 @@ from exa_py.api import HighlightsContentsOptions, TextContentsOptions # type: ignore from langchain_core.callbacks import CallbackManagerForRetrieverRun from langchain_core.documents import Document -from langchain_core.pydantic_v1 import SecretStr, root_validator +from langchain_core.pydantic_v1 import Field, SecretStr, root_validator from langchain_core.retrievers import BaseRetriever from langchain_exa._utilities import initialize_client @@ -53,8 +53,8 @@ class ExaSearchRetriever(BaseRetriever): text_contents_options: Union[TextContentsOptions, Literal[True]] = True """How to set the page content of the results""" - client: Exa - exa_api_key: SecretStr + client: Exa = Field(default=None) + exa_api_key: SecretStr = Field(default=None) exa_base_url: Optional[str] = None @root_validator(pre=True) @@ -66,11 +66,11 @@ def validate_environment(cls, values: Dict) -> Dict: def _get_relevant_documents( self, query: str, *, run_manager: CallbackManagerForRetrieverRun ) -> List[Document]: - response = self.client.search_and_contents( + response = self.client.search_and_contents( # type: ignore[misc] query, num_results=self.k, text=self.text_contents_options, - highlights=self.highlights, + highlights=self.highlights, # type: ignore include_domains=self.include_domains, exclude_domains=self.exclude_domains, start_crawl_date=self.start_crawl_date, diff --git a/libs/partners/exa/langchain_exa/tools.py b/libs/partners/exa/langchain_exa/tools.py index 20636afbfb289e..8cff78fd9005bc 100644 --- a/libs/partners/exa/langchain_exa/tools.py +++ b/libs/partners/exa/langchain_exa/tools.py @@ -7,7 +7,7 @@ from langchain_core.callbacks import ( CallbackManagerForToolRun, ) -from langchain_core.pydantic_v1 import SecretStr, root_validator +from langchain_core.pydantic_v1 import Field, SecretStr, root_validator from langchain_core.tools import BaseTool from langchain_exa._utilities import initialize_client @@ -22,8 +22,8 @@ class ExaSearchResults(BaseTool): "Input should be an Exa-optimized query. " "Output is a JSON array of the query results" ) - client: Exa - exa_api_key: SecretStr + client: Exa = Field(default=None) + exa_api_key: SecretStr = Field(default=None) @root_validator(pre=True) def validate_environment(cls, values: Dict) -> Dict: @@ -51,8 +51,8 @@ def _run( return self.client.search_and_contents( query, num_results=num_results, - text=text_contents_options, - highlights=highlights, + text=text_contents_options, # type: ignore + highlights=highlights, # type: ignore include_domains=include_domains, exclude_domains=exclude_domains, start_crawl_date=start_crawl_date, @@ -60,7 +60,7 @@ def _run( start_published_date=start_published_date, end_published_date=end_published_date, use_autoprompt=use_autoprompt, - ) + ) # type: ignore except Exception as e: return repr(e) @@ -74,8 +74,8 @@ class ExaFindSimilarResults(BaseTool): "Input should be an Exa-optimized query. " "Output is a JSON array of the query results" ) - client: Exa - exa_api_key: SecretStr + client: Exa = Field(default=None) + exa_api_key: SecretStr = Field(default=None) exa_base_url: Optional[str] = None @root_validator(pre=True) @@ -105,8 +105,8 @@ def _run( return self.client.find_similar_and_contents( url, num_results=num_results, - text=text_contents_options, - highlights=highlights, + text=text_contents_options, # type: ignore + highlights=highlights, # type: ignore include_domains=include_domains, exclude_domains=exclude_domains, start_crawl_date=start_crawl_date, @@ -115,6 +115,6 @@ def _run( end_published_date=end_published_date, exclude_source_domain=exclude_source_domain, category=category, - ) + ) # type: ignore except Exception as e: return repr(e) diff --git a/libs/partners/exa/poetry.lock b/libs/partners/exa/poetry.lock index 7a0c1382b279cf..874478edf26bfb 100644 --- a/libs/partners/exa/poetry.lock +++ b/libs/partners/exa/poetry.lock @@ -306,52 +306,49 @@ requests = ">=2,<3" [[package]] name = "mypy" -version = "0.991" +version = "1.8.0" description = "Optional static typing for Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "mypy-0.991-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7d17e0a9707d0772f4a7b878f04b4fd11f6f5bcb9b3813975a9b13c9332153ab"}, - {file = "mypy-0.991-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0714258640194d75677e86c786e80ccf294972cc76885d3ebbb560f11db0003d"}, - {file = "mypy-0.991-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c8f3be99e8a8bd403caa8c03be619544bc2c77a7093685dcf308c6b109426c6"}, - {file = "mypy-0.991-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9ec663ed6c8f15f4ae9d3c04c989b744436c16d26580eaa760ae9dd5d662eb"}, - {file = "mypy-0.991-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4307270436fd7694b41f913eb09210faff27ea4979ecbcd849e57d2da2f65305"}, - {file = "mypy-0.991-cp310-cp310-win_amd64.whl", hash = "sha256:901c2c269c616e6cb0998b33d4adbb4a6af0ac4ce5cd078afd7bc95830e62c1c"}, - {file = "mypy-0.991-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d13674f3fb73805ba0c45eb6c0c3053d218aa1f7abead6e446d474529aafc372"}, - {file = "mypy-0.991-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1c8cd4fb70e8584ca1ed5805cbc7c017a3d1a29fb450621089ffed3e99d1857f"}, - {file = "mypy-0.991-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:209ee89fbb0deed518605edddd234af80506aec932ad28d73c08f1400ef80a33"}, - {file = "mypy-0.991-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37bd02ebf9d10e05b00d71302d2c2e6ca333e6c2a8584a98c00e038db8121f05"}, - {file = "mypy-0.991-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:26efb2fcc6b67e4d5a55561f39176821d2adf88f2745ddc72751b7890f3194ad"}, - {file = "mypy-0.991-cp311-cp311-win_amd64.whl", hash = "sha256:3a700330b567114b673cf8ee7388e949f843b356a73b5ab22dd7cff4742a5297"}, - {file = "mypy-0.991-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f7d1a520373e2272b10796c3ff721ea1a0712288cafaa95931e66aa15798813"}, - {file = "mypy-0.991-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:641411733b127c3e0dab94c45af15fea99e4468f99ac88b39efb1ad677da5711"}, - {file = "mypy-0.991-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3d80e36b7d7a9259b740be6d8d906221789b0d836201af4234093cae89ced0cd"}, - {file = "mypy-0.991-cp37-cp37m-win_amd64.whl", hash = "sha256:e62ebaad93be3ad1a828a11e90f0e76f15449371ffeecca4a0a0b9adc99abcef"}, - {file = "mypy-0.991-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b86ce2c1866a748c0f6faca5232059f881cda6dda2a893b9a8373353cfe3715a"}, - {file = "mypy-0.991-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac6e503823143464538efda0e8e356d871557ef60ccd38f8824a4257acc18d93"}, - {file = "mypy-0.991-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cca5adf694af539aeaa6ac633a7afe9bbd760df9d31be55ab780b77ab5ae8bf"}, - {file = "mypy-0.991-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12c56bf73cdab116df96e4ff39610b92a348cc99a1307e1da3c3768bbb5b135"}, - {file = "mypy-0.991-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:652b651d42f155033a1967739788c436491b577b6a44e4c39fb340d0ee7f0d70"}, - {file = "mypy-0.991-cp38-cp38-win_amd64.whl", hash = "sha256:4175593dc25d9da12f7de8de873a33f9b2b8bdb4e827a7cae952e5b1a342e243"}, - {file = "mypy-0.991-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98e781cd35c0acf33eb0295e8b9c55cdbef64fcb35f6d3aa2186f289bed6e80d"}, - {file = "mypy-0.991-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6d7464bac72a85cb3491c7e92b5b62f3dcccb8af26826257760a552a5e244aa5"}, - {file = "mypy-0.991-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c9166b3f81a10cdf9b49f2d594b21b31adadb3d5e9db9b834866c3258b695be3"}, - {file = "mypy-0.991-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8472f736a5bfb159a5e36740847808f6f5b659960115ff29c7cecec1741c648"}, - {file = "mypy-0.991-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e80e758243b97b618cdf22004beb09e8a2de1af481382e4d84bc52152d1c476"}, - {file = "mypy-0.991-cp39-cp39-win_amd64.whl", hash = "sha256:74e259b5c19f70d35fcc1ad3d56499065c601dfe94ff67ae48b85596b9ec1461"}, - {file = "mypy-0.991-py3-none-any.whl", hash = "sha256:de32edc9b0a7e67c2775e574cb061a537660e51210fbf6006b0b36ea695ae9bb"}, - {file = "mypy-0.991.tar.gz", hash = "sha256:3c0165ba8f354a6d9881809ef29f1a9318a236a6d81c690094c5df32107bde06"}, + {file = "mypy-1.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485a8942f671120f76afffff70f259e1cd0f0cfe08f81c05d8816d958d4577d3"}, + {file = "mypy-1.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:df9824ac11deaf007443e7ed2a4a26bebff98d2bc43c6da21b2b64185da011c4"}, + {file = "mypy-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2afecd6354bbfb6e0160f4e4ad9ba6e4e003b767dd80d85516e71f2e955ab50d"}, + {file = "mypy-1.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8963b83d53ee733a6e4196954502b33567ad07dfd74851f32be18eb932fb1cb9"}, + {file = "mypy-1.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:e46f44b54ebddbeedbd3d5b289a893219065ef805d95094d16a0af6630f5d410"}, + {file = "mypy-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:855fe27b80375e5c5878492f0729540db47b186509c98dae341254c8f45f42ae"}, + {file = "mypy-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c886c6cce2d070bd7df4ec4a05a13ee20c0aa60cb587e8d1265b6c03cf91da3"}, + {file = "mypy-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d19c413b3c07cbecf1f991e2221746b0d2a9410b59cb3f4fb9557f0365a1a817"}, + {file = "mypy-1.8.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9261ed810972061388918c83c3f5cd46079d875026ba97380f3e3978a72f503d"}, + {file = "mypy-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:51720c776d148bad2372ca21ca29256ed483aa9a4cdefefcef49006dff2a6835"}, + {file = "mypy-1.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:52825b01f5c4c1c4eb0db253ec09c7aa17e1a7304d247c48b6f3599ef40db8bd"}, + {file = "mypy-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f5ac9a4eeb1ec0f1ccdc6f326bcdb464de5f80eb07fb38b5ddd7b0de6bc61e55"}, + {file = "mypy-1.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afe3fe972c645b4632c563d3f3eff1cdca2fa058f730df2b93a35e3b0c538218"}, + {file = "mypy-1.8.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:42c6680d256ab35637ef88891c6bd02514ccb7e1122133ac96055ff458f93fc3"}, + {file = "mypy-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:720a5ca70e136b675af3af63db533c1c8c9181314d207568bbe79051f122669e"}, + {file = "mypy-1.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:028cf9f2cae89e202d7b6593cd98db6759379f17a319b5faf4f9978d7084cdc6"}, + {file = "mypy-1.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4e6d97288757e1ddba10dd9549ac27982e3e74a49d8d0179fc14d4365c7add66"}, + {file = "mypy-1.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f1478736fcebb90f97e40aff11a5f253af890c845ee0c850fe80aa060a267c6"}, + {file = "mypy-1.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42419861b43e6962a649068a61f4a4839205a3ef525b858377a960b9e2de6e0d"}, + {file = "mypy-1.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:2b5b6c721bd4aabaadead3a5e6fa85c11c6c795e0c81a7215776ef8afc66de02"}, + {file = "mypy-1.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5c1538c38584029352878a0466f03a8ee7547d7bd9f641f57a0f3017a7c905b8"}, + {file = "mypy-1.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ef4be7baf08a203170f29e89d79064463b7fc7a0908b9d0d5114e8009c3a259"}, + {file = "mypy-1.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7178def594014aa6c35a8ff411cf37d682f428b3b5617ca79029d8ae72f5402b"}, + {file = "mypy-1.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ab3c84fa13c04aeeeabb2a7f67a25ef5d77ac9d6486ff33ded762ef353aa5592"}, + {file = "mypy-1.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:99b00bc72855812a60d253420d8a2eae839b0afa4938f09f4d2aa9bb4654263a"}, + {file = "mypy-1.8.0-py3-none-any.whl", hash = "sha256:538fd81bb5e430cc1381a443971c0475582ff9f434c16cd46d2c66763ce85d9d"}, + {file = "mypy-1.8.0.tar.gz", hash = "sha256:6ff8b244d7085a0b425b56d327b480c3b29cafbd2eff27316a004f9a7391ae07"}, ] [package.dependencies] -mypy-extensions = ">=0.4.3" +mypy-extensions = ">=1.0.0" tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = ">=3.10" +typing-extensions = ">=4.1.0" [package.extras] dmypy = ["psutil (>=4.0)"] install-types = ["pip"] -python2 = ["typed-ast (>=1.4.0,<2)"] +mypyc = ["setuptools (>=50)"] reports = ["lxml"] [[package]] @@ -827,4 +824,4 @@ watchmedo = ["PyYAML (>=3.10)"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "0a0c63dbd8c0ad87b555754848d250f97e9dfeb65bc3d5b94d3c6e528972a71c" +content-hash = "06818acc2d818513a1a78398d32b59cd837413cb5bd94026d63e963020f01045" diff --git a/libs/partners/exa/pyproject.toml b/libs/partners/exa/pyproject.toml index f9c250cd255a17..4cc65162a4459d 100644 --- a/libs/partners/exa/pyproject.toml +++ b/libs/partners/exa/pyproject.toml @@ -13,7 +13,7 @@ license = "MIT" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" langchain-core = "^0.1" -exa-py = "^1.0.7" +exa-py = "^1.0.8" [tool.poetry.group.test] optional = true @@ -40,7 +40,7 @@ optional = true ruff = "^0.1.5" [tool.poetry.group.typing.dependencies] -mypy = "^0.991" +mypy = "^1" langchain-core = { path = "../../core", develop = true } [tool.poetry.group.dev] diff --git a/libs/partners/ibm/langchain_ibm/llms.py b/libs/partners/ibm/langchain_ibm/llms.py index 18f2ae3f4c1deb..8afdcd17c0bf34 100644 --- a/libs/partners/ibm/langchain_ibm/llms.py +++ b/libs/partners/ibm/langchain_ibm/llms.py @@ -381,7 +381,7 @@ def _stream( if not isinstance(stream_resp, dict): stream_resp = stream_resp.dict() chunk = self._stream_response_to_generation_chunk(stream_resp) - yield chunk if run_manager: run_manager.on_llm_new_token(chunk.text, chunk=chunk) + yield chunk diff --git a/libs/partners/ibm/poetry.lock b/libs/partners/ibm/poetry.lock index 4968e523caad48..b4f2e062d097f7 100644 --- a/libs/partners/ibm/poetry.lock +++ b/libs/partners/ibm/poetry.lock @@ -245,13 +245,13 @@ ibm-cos-sdk-core = "2.13.4" [[package]] name = "ibm-watson-machine-learning" -version = "1.0.345" +version = "1.0.347" description = "IBM Watson Machine Learning API Client" optional = false python-versions = ">=3.10" files = [ - {file = "ibm_watson_machine_learning-1.0.345-py3-none-any.whl", hash = "sha256:b6c85c7f7c955af813a056f42cc3b0ec112e8a943e8922e2620129c7af680527"}, - {file = "ibm_watson_machine_learning-1.0.345.tar.gz", hash = "sha256:f5fa7f4f082c4a2fe0b2bef1106e7dd86f112db2f5678615c2c4a396ecdaecb9"}, + {file = "ibm_watson_machine_learning-1.0.347-py3-none-any.whl", hash = "sha256:5ac0b6ec9a08adaf1c2a2b414bbf0aed8d8f3d77adef29bda725c8483580169b"}, + {file = "ibm_watson_machine_learning-1.0.347.tar.gz", hash = "sha256:62ef043ddf6c093b2d7b1f6fe68ebbe4e79db5a4d9852d651a1192ad2e2ad723"}, ] [package.dependencies] @@ -274,13 +274,13 @@ fl-rt23-1-py3-10 = ["GPUtil", "cloudpickle (==1.3.0)", "cryptography (==39.0.1)" [[package]] name = "ibm-watsonx-ai" -version = "0.1.7" +version = "0.1.8" description = "IBM watsonx.ai API Client" optional = false python-versions = ">=3.10" files = [ - {file = "ibm_watsonx_ai-0.1.7-py3-none-any.whl", hash = "sha256:8f4329686d7b1d367bef210441fd8903de904cea83b32b165a4c2eb8d11a7242"}, - {file = "ibm_watsonx_ai-0.1.7.tar.gz", hash = "sha256:07ed5604f4825bc4d19c387f8587eaf5c54208bf6a1f81f26a3c2d823f632857"}, + {file = "ibm_watsonx_ai-0.1.8-py3-none-any.whl", hash = "sha256:85536b00aa3c495540480e53a17b56a0990d1340e47fae0e7ea778dcd717e5dc"}, + {file = "ibm_watsonx_ai-0.1.8.tar.gz", hash = "sha256:ba4e60091165cb755985f85ef0ece1db76ad1d351dd515a55d739467196dace3"}, ] [package.dependencies] @@ -372,7 +372,7 @@ files = [ [[package]] name = "langchain-core" -version = "0.1.22" +version = "0.1.23" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -382,7 +382,7 @@ develop = true [package.dependencies] anyio = ">=3,<5" jsonpatch = "^1.33" -langsmith = "^0.0.87" +langsmith = "^0.1.0" packaging = "^23.2" pydantic = ">=1,<3" PyYAML = ">=5.3" @@ -398,13 +398,13 @@ url = "../../core" [[package]] name = "langsmith" -version = "0.0.87" +version = "0.1.1" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.87-py3-none-any.whl", hash = "sha256:8903d3811b9fc89eb18f5961c8e6935fbd2d0f119884fbf30dc70b8f8f4121fc"}, - {file = "langsmith-0.0.87.tar.gz", hash = "sha256:36c4cc47e5b54be57d038036a30fb19ce6e4c73048cd7a464b8f25b459694d34"}, + {file = "langsmith-0.1.1-py3-none-any.whl", hash = "sha256:10ff2b977a41e3f6351d1a4239d9bd57af0547aa909e839d2791e16cc197a6f9"}, + {file = "langsmith-0.1.1.tar.gz", hash = "sha256:09df0c2ca9085105f97a4e4f281b083e312c99d162f3fe2b2d5eefd5c3692e60"}, ] [package.dependencies] diff --git a/libs/partners/robocorp/poetry.lock b/libs/partners/robocorp/poetry.lock index 8ede4ab2befdde..20b60ed600a6e8 100644 --- a/libs/partners/robocorp/poetry.lock +++ b/libs/partners/robocorp/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "annotated-types" @@ -246,7 +246,6 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, - {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -261,7 +260,7 @@ develop = true [package.dependencies] anyio = ">=3,<5" jsonpatch = "^1.33" -langsmith = "^0.0.87" +langsmith = "^0.1.0 || ^0.0.87" packaging = "^23.2" pydantic = ">=1,<3" PyYAML = ">=5.3" @@ -585,7 +584,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -593,16 +591,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -619,7 +609,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -627,7 +616,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -827,4 +815,4 @@ watchmedo = ["PyYAML (>=3.10)"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "e65c58173388cea51a3a205dcdb77081133921c6790ee2cd19b356ebae4de8cb" +content-hash = "75f4075fd17085ea5a72c7a7dcb7d2086baada0772329fe99c746e400cb9e81c" diff --git a/libs/partners/robocorp/pyproject.toml b/libs/partners/robocorp/pyproject.toml index 0d4f06cbc9b85c..16a024b4448b0b 100644 --- a/libs/partners/robocorp/pyproject.toml +++ b/libs/partners/robocorp/pyproject.toml @@ -15,7 +15,6 @@ python = ">=3.8.1,<4.0" langchain-core = "^0.1" requests = "^2.31.0" types-requests = "^2.31.0.6" -langsmith = ">=0.0.83,<0.1" [tool.poetry.group.test] optional = true