Skip to content

Commit

Permalink
text-splitters[minor], langchain[minor], community[patch], templates,…
Browse files Browse the repository at this point in the history
… docs: langchain-text-splitters 0.0.1 (#18346)
  • Loading branch information
baskaryan committed Mar 1, 2024
1 parent 7891934 commit 5efb5c0
Show file tree
Hide file tree
Showing 226 changed files with 6,628 additions and 1,982 deletions.
1 change: 1 addition & 0 deletions .github/scripts/check_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

LANGCHAIN_DIRS = [
"libs/core",
"libs/text-splitters",
"libs/community",
"libs/langchain",
"libs/experimental",
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/get_min_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from packaging.version import parse as parse_version
import re

MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain"]
MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain", "langchain-text-splitters"]


def get_min_version(version: str) -> str:
Expand Down
2 changes: 1 addition & 1 deletion cookbook/Multi_modal_RAG.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"from unstructured.partition.pdf import partition_pdf\n",
"\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/advanced_rag_eval.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"pdf_pages = loader.load()\n",
"\n",
"# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
"all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/agent_vectorstore.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"llm = OpenAI(temperature=0)"
]
Expand Down
2 changes: 1 addition & 1 deletion cookbook/autogpt/marathon_times.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,8 @@
" BaseCombineDocumentsChain,\n",
" load_qa_with_sources_chain,\n",
")\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.tools import BaseTool, DuckDuckGoSearchRun\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"from pydantic import Field\n",
"\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions cookbook/code-analysis-deeplake.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"source": [
"1. Prepare data:\n",
" 1. Upload all python project files using the `langchain_community.document_loaders.TextLoader`. We will call these files the **documents**.\n",
" 2. Split all documents to chunks using the `langchain.text_splitter.CharacterTextSplitter`.\n",
" 2. Split all documents to chunks using the `langchain_text_splitters.CharacterTextSplitter`.\n",
" 3. Embed chunks and upload them into the DeepLake using `langchain.embeddings.openai.OpenAIEmbeddings` and `langchain_community.vectorstores.DeepLake`\n",
"2. Question-Answering:\n",
" 1. Build a chain from `langchain.chat_models.ChatOpenAI` and `langchain.chains.ConversationalRetrievalChain`\n",
Expand Down Expand Up @@ -621,7 +621,7 @@
}
],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_documents(docs)\n",
Expand Down
6 changes: 3 additions & 3 deletions cookbook/deeplake_semantic_search_over_chat.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@
"import os\n",
"\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import (\n",
"from langchain_community.vectorstores import DeepLake\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import (\n",
" CharacterTextSplitter,\n",
" RecursiveCharacterTextSplitter,\n",
")\n",
"from langchain_community.vectorstores import DeepLake\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"activeloop_token = getpass.getpass(\"Activeloop Token:\")\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/fireworks_rag.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@
"data = loader.load()\n",
"\n",
"# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
"all_splits = text_splitter.split_documents(data)\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/hypothetical_document_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"with open(\"../../state_of_the_union.txt\") as f:\n",
" state_of_the_union = f.read()\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/nomic_embedding_rag.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n",
" chunk_size=7500, chunk_overlap=100\n",
Expand Down
4 changes: 2 additions & 2 deletions cookbook/openai_functions_retrieval_qa.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAIEmbeddings"
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions cookbook/qianfan_baidu_elasticesearch_RAG.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@
"from baidubce.auth.bce_credentials import BceCredentials\n",
"from baidubce.bce_client_configuration import BceClientConfiguration\n",
"from langchain.chains.retrieval_qa import RetrievalQA\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders.baiducloud_bos_directory import (\n",
" BaiduBOSDirectoryLoader,\n",
")\n",
"from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n",
"from langchain_community.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint\n",
"from langchain_community.vectorstores import BESVectorStore"
"from langchain_community.vectorstores import BESVectorStore\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
]
},
{
Expand Down
6 changes: 3 additions & 3 deletions cookbook/rag_with_quantized_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,16 @@
"from bs4 import BeautifulSoup as Soup\n",
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
"from langchain.storage import InMemoryByteStore, LocalFileStore\n",
"\n",
"# For our example, we'll load docs from the web\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter # noqa\n",
"from langchain_community.document_loaders.recursive_url_loader import (\n",
" RecursiveUrlLoader,\n",
")\n",
"\n",
"# noqa\n",
"from langchain_community.vectorstores import Chroma\n",
"\n",
"# For our example, we'll load docs from the web\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter # noqa\n",
"\n",
"DOCSTORE_DIR = \".\"\n",
"DOCSTORE_ID_KEY = \"doc_id\""
]
Expand Down
2 changes: 1 addition & 1 deletion cookbook/sales_agent_with_context.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@
"from langchain.chains.base import Chain\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.prompts.base import StringPromptTemplate\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.llms import BaseLLM\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_core.agents import AgentAction, AgentFinish\n",
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"from pydantic import BaseModel, Field"
]
},
Expand Down
2 changes: 1 addition & 1 deletion cookbook/together_ai.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"data = loader.load()\n",
"\n",
"# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
"all_splits = text_splitter.split_documents(data)\n",
Expand Down
2 changes: 1 addition & 1 deletion cookbook/twitter-the-algorithm-analysis-deeplake.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2610,7 +2610,7 @@
}
],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_documents(docs)"
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/get_started/quickstart.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ Then we can build our index:

```python
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter()
Expand Down Expand Up @@ -531,7 +531,7 @@ from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.tools.retriever import create_retriever_tool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_openai import ChatOpenAI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -643,9 +643,9 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"# 2. Load the data: In our case data's already loaded\n",
"# 3. Anonymize the data before indexing\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/callbacks/confident.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,10 @@
"source": [
"import requests\n",
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_file_url = \"https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt\"\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/integrations/document_loaders/psychic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQAWithSourcesChain\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings"
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
]
},
{
Expand Down
7 changes: 4 additions & 3 deletions docs/docs/integrations/document_loaders/source_code.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@
"warnings.filterwarnings(\"ignore\")\n",
"from pprint import pprint\n",
"\n",
"from langchain.text_splitter import Language\n",
"from langchain_community.document_loaders.generic import GenericLoader\n",
"from langchain_community.document_loaders.parsers import LanguageParser"
"from langchain_community.document_loaders.parsers import LanguageParser\n",
"from langchain_text_splitters import Language"
]
},
{
Expand Down Expand Up @@ -323,7 +323,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import (\n",
"from langchain_text_splitters import (\n",
" Language,\n",
" RecursiveCharacterTextSplitter,\n",
")"
Expand Down Expand Up @@ -426,6 +426,7 @@
},
{
"cell_type": "markdown",
"id": "7fb27b941602401d91542211134fc71a",
"metadata": {},
"source": [
"## Adding Languages using Tree-sitter Template\n",
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/integrations/document_loaders/youtube_audio.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings"
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/llms/llm_caching.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1463,7 +1463,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"text_splitter = CharacterTextSplitter()"
]
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/llms/manifest.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
"# Map reduce example\n",
"from langchain.chains.mapreduce import MapReduceChain\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"_prompt = \"\"\"Write a concise summary of the following:\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/platforms/openai.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ for OpenAI LLMs.

You can also use it to count tokens when splitting documents with
```python
from langchain.text_splitter import CharacterTextSplitter
from langchain_text_splitters import CharacterTextSplitter
CharacterTextSplitter.from_tiktoken_encoder(...)
```
For a more detailed walkthrough of this, see [this notebook](/docs/modules/data_connection/document_transformers/split_by_token#tiktoken)
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/providers/elasticsearch.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ The vector store is a simple wrapper around Elasticsearch. It provides a simple
from langchain_elasticsearch import ElasticsearchStore

from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_text_splitters import CharacterTextSplitter

loader = TextLoader("./state_of_the_union.txt")
documents = loader.load()
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/providers/ragatouille.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@
"outputs": [],
"source": [
"import requests\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"\n",
"def get_wikipedia_page(title: str):\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/providers/spacy.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pip install spacy
See a [usage example](/docs/modules/data_connection/document_transformers/split_by_token#spacy).

```python
from langchain.text_splitter import SpacyTextSplitter
from langchain_text_splitters import SpacyTextSplitter
```

## Text Embedding Models
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/retrievers/activeloop.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"chunk_size = 4096\n",
"docs_new = []\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/retrievers/cohere-reranker.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,10 @@
}
],
"source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import CohereEmbeddings\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/retrievers/flashrank-reranker.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,10 @@
}
],
"source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n",
"documents = TextLoader(\n",
" \"../../modules/state_of_the_union.txt\",\n",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/integrations/retrievers/jaguar.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores.jaguar import Jaguar\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n",
"\"\"\" \n",
"Load a text file into a set of documents \n",
Expand Down
Loading

0 comments on commit 5efb5c0

Please sign in to comment.