In [25]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

import uuid

## Load PDF

In [26]:
file_path = "./minikube.pdf"
loader = PyPDFLoader(file_path)
documents = loader.load()

## Generate Chunks

In [27]:
# Splitting
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=400, chunk_overlap=0, encoding_name='cl100k_base'
)
chunks = text_splitter.split_documents(documents)

chunks

[Document(metadata={'producer': 'cairo 1.9.5 (http://cairographics.org)', 'creator': 'cairo 1.9.5 (http://cairographics.org)', 'creationdate': '', 'source': './minikube.pdf', 'total_pages': 7, 'page': 0, 'page_label': '1'}, page_content='How to Install Minikube on Ubuntu 18.04By Josphat Mutai  - June 20, 2018\n(Last Updated On: September 30, 2018)\nIn this tutorial, I’ll take you through the steps to install minikube on Ubuntu 18.04. To those\nnew to minikube, let’s start with an introduction before diving to the installation steps.\nMinikube is an open source tool that was developed to enable developers and system\nadministrators to run a single cluster of Kubernetes on their local machine.\xa0Minikube starts a\nsingle node kubernetes cluster locally with small resource utilization. This is ideal for\ndevelopment tests and POC purposes,\nIn a nutshell, Minikube\xa0packages and con�gures a Linux VM,\xa0then installs Docker and all\nKubernetes components into it.\nMinikube supports Kube

## Create vector Store

In [31]:

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

index = faiss.IndexFlatL2(len(embeddings.embed_query(chunks[0].page_content)))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)
uuids = [str(uuid.uuid4()) for _ in range(len(chunks))]

vector_store.add_documents(documents=chunks, ids=uuids)

['33311f27-e6ad-49bd-ab2b-2b5a3ac0b917',
 'a9786e1e-ef9a-4d33-aa2a-96cf2264e3a5',
 '979a380c-de08-4adc-97a5-efd286fa76de',
 'd192e659-019f-4588-8372-150ddc262d3a',
 '5f04b769-558d-4a8b-ac1e-e55db1f9363d',
 '1910944f-e5e6-49b5-8747-e22bfa3ca834',
 '5c1c7795-e0f1-416c-879f-a687e29801fb',
 'f05320e5-066b-4a92-b91c-7df4d53566e2',
 '8837a227-464a-460f-88bd-5e6aa064f899']

In [36]:
vector_store.similarity_search("What is minikube", k=4)
vector_store.similarity_search_with_score("What is minikube", k=4)

  np.float32(0.84640205)),
 (Document(id='33311f27-e6ad-49bd-ab2b-2b5a3ac0b917', metadata={'producer': 'cairo 1.9.5 (http://cairographics.org)', 'creator': 'cairo 1.9.5 (http://cairographics.org)', 'creationdate': '', 'source': './minikube.pdf', 'total_pages': 7, 'page': 0, 'page_label': '1'}, page_content='How to Install Minikube on Ubuntu 18.04By Josphat Mutai  - June 20, 2018\n(Last Updated On: September 30, 2018)\nIn this tutorial, I’ll take you through the steps to install minikube on Ubuntu 18.04. To those\nnew to minikube, let’s start with an introduction before diving to the installation steps.\nMinikube is an open source tool that was developed to enable developers and system\nadministrators to run a single cluster of Kubernetes on their local machine.\xa0Minikube starts a\nsingle node kubernetes cluster locally with small resource utilization. This is ideal for\ndevelopment tests and POC purposes,\nIn a nutshell, Minikube\xa0packages and con�gures a Linux VM,\xa0then installs