In [None]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader('./test.txt', encoding="utf-8")
docs = loader.load()

print(docs)


In [None]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader('https://www.langchain.com/')
docs = loader.load()

print(docs)

In [None]:
# install the pdf parsing library !pip install pypdf

from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader('./test.pdf')
pages = loader.load()

print(pages)


In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_community.document_loaders import TextLoader

loader = TextLoader('./test.txt', encoding="utf-8")
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splitted_docs = splitter.split_documents(docs)

print(splitted_docs)


In [None]:
from langchain_text_splitters import (
    Language,
    RecursiveCharacterTextSplitter,
)

PYTHON_CODE = """ def hello_world(): print("Hello, World!") # Call the function hello_world() """

python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=50, chunk_overlap=0
)

python_docs = python_splitter.create_documents([PYTHON_CODE])

print(python_docs)


In [2]:
"""
1. Ensure docker is installed and running (https://docs.docker.com/get-docker/)
2. pip install -qU langchain_postgres psycopg
3. Run the following command to start the postgres container:
   
docker run \
    --name pgvector-container \
    -e POSTGRES_USER=langchain \
    -e POSTGRES_PASSWORD=langchain \
    -e POSTGRES_DB=langchain \
    -p 6024:5432 \
    -d pgvector/pgvector:pg16
4. Use the connection string below for the postgres container

"""

from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_postgres.vectorstores import PGVector
import uuid


# See docker command above to launch a postgres instance with pgvector enabled.
connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"

# Load the document, split it into chunks
raw_documents = TextLoader('./test.txt', encoding="utf-8").load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(raw_documents)

# Create embeddings for the documents
embeddings_model = OpenAIEmbeddings()

db = PGVector.from_documents(
    documents, embeddings_model, connection=connection)

results = db.similarity_search("query", k=4)
print("Similarity search results:")
print(results)

print("Adding documents to the vector store")
ids = [str(uuid.uuid4()), str(uuid.uuid4())]
db.add_documents(
    [
        Document(
            page_content="there are cats in the pond",
            metadata={"location": "pond", "topic": "animals"},
        ),
        Document(
            page_content="ducks are also found in the pond",
            metadata={"location": "pond", "topic": "animals"},
        ),
    ],
    ids=ids,
)

print("Documents added successfully.\n Fetched documents count:",
      len(db.get_by_ids(ids)))

print("Deleting document with id", ids[1])
db.delete({"ids": ids})

print("Document deleted successfully.\n Fetched documents count:",
      len(db.get_by_ids(ids)))


Similarity search results:
[Document(id='fe98c500-fbd0-42a1-ad51-217ff06430d2', metadata={'source': './test.txt'}, page_content='V.'), Document(id='12ab5334-0344-4082-8160-063062a3108f', metadata={'source': './test.txt'}, page_content='V.'), Document(id='c2a00ba3-d304-4fbf-bc74-a5d3f3832b13', metadata={'source': './test.txt'}, page_content='V.'), Document(id='2c1d0996-1355-4471-8d70-b30a89aae462', metadata={'source': './test.txt'}, page_content='V.')]
Adding documents to the vector store
Documents added successfully.
 Fetched documents count: 2
Deleting document with id 4f9a868b-e44c-454b-8d6b-fb37aaa97b7f
Document deleted successfully.
 Fetched documents count: 2
