Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

text-splitters[minor], langchain[minor], community[patch], templates, docs: langchain-text-splitters 0.0.1 #18346

Merged
merged 16 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/scripts/check_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

LANGCHAIN_DIRS = [
"libs/core",
"libs/text-splitters",
"libs/community",
"libs/langchain",
"libs/experimental",
"libs/community",
]

if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/get_min_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from packaging.version import parse as parse_version
import re

MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain"]
MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain", "langchain-text-splitters"]


def get_min_version(version: str) -> str:
Expand Down
2 changes: 1 addition & 1 deletion cookbook/Multi_modal_RAG.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"from unstructured.partition.pdf import partition_pdf\n",
"\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/advanced_rag_eval.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"pdf_pages = loader.load()\n",
"\n",
"# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
"all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/agent_vectorstore.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"llm = OpenAI(temperature=0)"
]
Expand Down
2 changes: 1 addition & 1 deletion cookbook/autogpt/marathon_times.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,8 @@
" BaseCombineDocumentsChain,\n",
" load_qa_with_sources_chain,\n",
")\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.tools import BaseTool, DuckDuckGoSearchRun\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"from pydantic import Field\n",
"\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions cookbook/code-analysis-deeplake.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"source": [
"1. Prepare data:\n",
" 1. Upload all python project files using the `langchain_community.document_loaders.TextLoader`. We will call these files the **documents**.\n",
" 2. Split all documents to chunks using the `langchain.text_splitter.CharacterTextSplitter`.\n",
" 2. Split all documents to chunks using the `langchain_text_splitters.CharacterTextSplitter`.\n",
" 3. Embed chunks and upload them into the DeepLake using `langchain.embeddings.openai.OpenAIEmbeddings` and `langchain_community.vectorstores.DeepLake`\n",
"2. Question-Answering:\n",
" 1. Build a chain from `langchain.chat_models.ChatOpenAI` and `langchain.chains.ConversationalRetrievalChain`\n",
Expand Down Expand Up @@ -621,7 +621,7 @@
}
],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_documents(docs)\n",
Expand Down
6 changes: 3 additions & 3 deletions cookbook/deeplake_semantic_search_over_chat.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@
"import os\n",
"\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import (\n",
"from langchain_community.vectorstores import DeepLake\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import (\n",
" CharacterTextSplitter,\n",
" RecursiveCharacterTextSplitter,\n",
")\n",
"from langchain_community.vectorstores import DeepLake\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"activeloop_token = getpass.getpass(\"Activeloop Token:\")\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/fireworks_rag.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@
"data = loader.load()\n",
"\n",
"# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
"all_splits = text_splitter.split_documents(data)\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/hypothetical_document_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"with open(\"../../state_of_the_union.txt\") as f:\n",
" state_of_the_union = f.read()\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/nomic_embedding_rag.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n",
" chunk_size=7500, chunk_overlap=100\n",
Expand Down
4 changes: 2 additions & 2 deletions cookbook/openai_functions_retrieval_qa.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAIEmbeddings"
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions cookbook/qianfan_baidu_elasticesearch_RAG.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@
"from baidubce.auth.bce_credentials import BceCredentials\n",
"from baidubce.bce_client_configuration import BceClientConfiguration\n",
"from langchain.chains.retrieval_qa import RetrievalQA\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders.baiducloud_bos_directory import (\n",
" BaiduBOSDirectoryLoader,\n",
")\n",
"from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n",
"from langchain_community.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint\n",
"from langchain_community.vectorstores import BESVectorStore"
"from langchain_community.vectorstores import BESVectorStore\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
]
},
{
Expand Down
6 changes: 3 additions & 3 deletions cookbook/rag_with_quantized_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,16 @@
"from bs4 import BeautifulSoup as Soup\n",
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
"from langchain.storage import InMemoryByteStore, LocalFileStore\n",
"\n",
"# For our example, we'll load docs from the web\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter # noqa\n",
"from langchain_community.document_loaders.recursive_url_loader import (\n",
" RecursiveUrlLoader,\n",
")\n",
"\n",
"# noqa\n",
"from langchain_community.vectorstores import Chroma\n",
"\n",
"# For our example, we'll load docs from the web\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter # noqa\n",
"\n",
"DOCSTORE_DIR = \".\"\n",
"DOCSTORE_ID_KEY = \"doc_id\""
]
Expand Down
2 changes: 1 addition & 1 deletion cookbook/sales_agent_with_context.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@
"from langchain.chains.base import Chain\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.prompts.base import StringPromptTemplate\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.llms import BaseLLM\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_core.agents import AgentAction, AgentFinish\n",
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"from pydantic import BaseModel, Field"
]
},
Expand Down
2 changes: 1 addition & 1 deletion cookbook/together_ai.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"data = loader.load()\n",
"\n",
"# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
"all_splits = text_splitter.split_documents(data)\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/twitter-the-algorithm-analysis-deeplake.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2610,7 +2610,7 @@
}
],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_documents(docs)"
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/get_started/quickstart.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ Then we can build our index:

```python
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter()
Expand Down Expand Up @@ -531,7 +531,7 @@ from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.tools.retriever import create_retriever_tool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_openai import ChatOpenAI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -643,9 +643,9 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"# 2. Load the data: In our case data's already loaded\n",
"# 3. Anonymize the data before indexing\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/callbacks/confident.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,10 @@
"source": [
"import requests\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_file_url = \"https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt\"\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/integrations/document_loaders/psychic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQAWithSourcesChain\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings"
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
]
},
{
Expand Down
7 changes: 4 additions & 3 deletions docs/docs/integrations/document_loaders/source_code.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@
"warnings.filterwarnings(\"ignore\")\n",
"from pprint import pprint\n",
"\n",
"from langchain.text_splitter import Language\n",
"from langchain_community.document_loaders.generic import GenericLoader\n",
"from langchain_community.document_loaders.parsers import LanguageParser"
"from langchain_community.document_loaders.parsers import LanguageParser\n",
"from langchain_text_splitters import Language"
]
},
{
Expand Down Expand Up @@ -323,7 +323,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import (\n",
"from langchain_text_splitters import (\n",
" Language,\n",
" RecursiveCharacterTextSplitter,\n",
")"
Expand Down Expand Up @@ -426,6 +426,7 @@
},
{
"cell_type": "markdown",
"id": "7fb27b941602401d91542211134fc71a",
"metadata": {},
"source": [
"## Adding Languages using Tree-sitter Template\n",
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/integrations/document_loaders/youtube_audio.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings"
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/llms/llm_caching.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1463,7 +1463,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter()"
]
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/llms/manifest.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
"# Map reduce example\n",
"from langchain.chains.mapreduce import MapReduceChain\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"_prompt = \"\"\"Write a concise summary of the following:\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/platforms/openai.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ for OpenAI LLMs.

You can also use it to count tokens when splitting documents with
```python
from langchain.text_splitter import CharacterTextSplitter
from langchain_text_splitters import CharacterTextSplitter
CharacterTextSplitter.from_tiktoken_encoder(...)
```
For a more detailed walkthrough of this, see [this notebook](/docs/modules/data_connection/document_transformers/split_by_token#tiktoken)
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/providers/elasticsearch.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ The vector store is a simple wrapper around Elasticsearch. It provides a simple
from langchain_elasticsearch import ElasticsearchStore

from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_text_splitters import CharacterTextSplitter

loader = TextLoader("./state_of_the_union.txt")
documents = loader.load()
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/providers/ragatouille.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@
"outputs": [],
"source": [
"import requests\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"\n",
"def get_wikipedia_page(title: str):\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/providers/spacy.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pip install spacy
See a [usage example](/docs/modules/data_connection/document_transformers/split_by_token#spacy).

```python
from langchain.text_splitter import SpacyTextSplitter
from langchain_text_splitters import SpacyTextSplitter
```

## Text Embedding Models
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/retrievers/activeloop.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"chunk_size = 4096\n",
"docs_new = []\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/retrievers/cohere-reranker.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,10 @@
}
],
"source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import CohereEmbeddings\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/retrievers/flashrank-reranker.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,10 @@
}
],
"source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"documents = TextLoader(\n",
" \"../../modules/state_of_the_union.txt\",\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/retrievers/jaguar.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores.jaguar import Jaguar\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"\"\"\" \n",
"Load a text file into a set of documents \n",
Expand Down
Loading