In [1]:
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [2]:
from langchain_ollama import ChatOllama
llama_llm = ChatOllama(base_url="http://localhost:11434",
                       model="llama3", 
                       temperature=0.7,
                       max_tokens=250)

#### Load PDF into Documents

In [10]:
from langchain_community.document_loaders import PyPDFLoader
pdf_1 = PyPDFLoader("./SampleData/attention.pdf")
pdf_2 = PyPDFLoader("./SampleData/LLMForgetting.pdf")
pdf_3 = PyPDFLoader("./SampleData/TestingAndEvaluatingLLM.pdf")
pdf_files = [pdf_1, pdf_2, pdf_3]
documents = []
for loader in pdf_files:
    documents.extend(loader.load())
print(len(documents))

253


#### Divide Large Document in Chunks

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
                                               chunk_overlap=200,
                                               add_start_index=True)
all_split = text_splitter.split_documents(documents)
len(all_split)

640

#### Embedding 
- Convert Chunks of Text in to Vector using Embedding Model

In [None]:
from langchain_ollama import OllamaEmbeddings
ollama_embedding = OllamaEmbeddings(model="llama3.2:latest")
vector_1 = ollama_embedding.embed_query(all_split[0].page_content)
vector_2 = ollama_embedding.embed_query(all_split[1].page_content)
assert len(vector_1) == len(vector_2)

#### Storing Embedded Vectors in the Vector DB

In [15]:
from langchain_chroma import Chroma
chroma_db = Chroma.from_documents(documents=all_split,
                                  embedding=ollama_embedding,
                                  persist_directory="./chroma_langchain_db")