In [None]:
# Langchain <> VLite integration script
# !pip install langchain==0.1.17
import requests
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import VLite

# Download the PDF
open('attention.pdf', 'wb').write(requests.get('https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf').content)

# Load the PDF document
loader = TextLoader('attention.pdf')
documents = loader.load()

# Split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

# Create a VLite instance
vlite = VLite(collection="attention")

# Add texts to the VLite vector database
vlite.add_texts([text.page_content for text in texts])

# Perform a similarity search
query = "What is attention?"
docs = vlite.similarity_search(query, k=3)

# Print the most relevant chunks
for doc in docs:
    print(doc.page_content)
    print('---')

# Get collection information
vlite.info()


In [None]:
# Pure vlite example
import requests
from vlite import VLite
from vlite.utils import process_pdf

# Start VLite
vdb = VLite('attention2')

# Download the pdf
open('attention.pdf', 'wb').write(requests.get('https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf').content)

# Process the pdf
corpus = process_pdf('attention.pdf')

# Add the PDF to the VLite database
vdb.add(corpus)

# Query the VLite database
print(vdb.retrieve('what is attention'))

# Print the VLite database
vdb.info()
