In [43]:
%pip install -qU langchain langchain-openai python-dotenv rich

Note: you may need to restart the kernel to use updated packages.


In [44]:
from langchain.document_loaders import TextLoader

loader = TextLoader("document.txt")
document = loader.load()
print(document)

In [53]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from rich import print

text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
documents = text_splitter.split_documents(document)
print(documents)

In [46]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [47]:
from langchain.vectorstores.pgvector import PGVector
import os

COLLECTION_NAME = "my_collection"
DATABASE_URL = os.getenv("DATABASE_URL")

vector_store = PGVector.from_documents(
    documents=[],
    embedding=embeddings,
    connection_string=DATABASE_URL,
    collection_name=COLLECTION_NAME,
    create_extension=True,
    use_jsonb=True,
)

In [48]:
from langchain.indexes import SQLRecordManager, index
from rich import print

namespace = f"pgvector/{COLLECTION_NAME}"
record_manager = SQLRecordManager(namespace=namespace, db_url=DATABASE_URL)
record_manager.create_schema()

In [49]:
from rich import print

result = index(
    docs_source=documents,
    record_manager=record_manager,
    vector_store=vector_store,
    cleanup="incremental",
    source_id_key="source",
)
print(result)

In [54]:
from langchain.schema import Document
from rich import print

documents[1].page_content = "updated"
del documents[0]
documents.append(Document(page_content="new content", metadata={"source": "important"}))
print(documents)

In [55]:
from rich import print

result = index(
    docs_source=documents,
    record_manager=record_manager,
    vector_store=vector_store,
    cleanup="incremental",
    source_id_key="source",
)
print(result)

In [56]:
from rich import print

result = index(
    docs_source=[],
    record_manager=record_manager,
    vector_store=vector_store,
    cleanup="full",
    source_id_key="source",
)
print(result)

In [57]:
from rich import print

result = index(
    docs_source=documents,
    record_manager=record_manager,
    vector_store=vector_store,
    cleanup="incremental",
    source_id_key="source",
)
print(result)