# Prequisites

In [26]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
import os
os.environ["HUGGINGFACE_HUB_CACHE"] = "model_cache"
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from langchain_chroma import Chroma
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings

#os.environ['HF_HOME'] = "model_cache"

# Text Embeddings

In [2]:
model_name = "BAAI/bge-small-en-v1.5"
model_kwargs = {"device" : "cuda:0"}
encode_kwargs = {"normalize_embeddings" : True}

In [None]:
model = HuggingFaceBgeEmbeddings(
    model_name = model_name,
    model_kwargs = model_kwargs,
    encode_kwargs = encode_kwargs
)

In [4]:
embedding = model.embed_query("Tell me what is an apple ?")
len(embedding)

  attn_output = torch.nn.functional.scaled_dot_product_attention(


384

# Loading Document

In [14]:
file_path = ("Harry Potter and the Sorcerers Stone.pdf")
loader = PyPDFLoader(file_path, extract_images=False)
pages = loader.load()
pages[4].page_content

'4he was imagining things, which he had never hoped before, because he\ndidn\'t approve of imagination.\nAs he pulled into the driveway of number four, the first thing he saw --\nand it didn\'t improve his mood -- was the tabby cat he\'d spotted thatmorning. It was now sitting on his garden wall. He was sure it was the\nsame one; it had the same markings around its eyes.\n"Shoo!" said Mr. Dursley loudly. The cat didn\'t move. It just gave him a\nstern look. Was this normal cat behavior? Mr. Dursley wondered. Tryingto pull himself together, he let himself into the house. He was stilldetermined not to mention anything to his wife.\nMrs. Dursley had had a nice, normal day. She told him over dinner all\nabout Mrs. Next Door\'s problems with her daughter and how Dudley hadlearned a new word ("Won\'t!"). Mr. Dursley tried to act normally. WhenDudley had been put to bed, he went into the living room in time tocatch the last report on the evening news:\n"And finally, bird-watchers everywhere h

# Indexing :- Split

In [16]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, 
    chunk_overlap=200, 
    add_start_index=True
)

all_splits = text_splitter.split_documents(pages)

In [17]:
len(all_splits)

613

# Indexing : Store :- Vector DB

In [28]:
embedding_model_function = SentenceTransformerEmbeddings(model_name = "BAAI/bge-small-en-v1.5")



In [36]:
vectorstore = Chroma.from_documents(documents=all_splits, 
                           embedding = embedding_model_function)

In [44]:
query = "What was alias of Harry Potter ?"

In [48]:
retriever = vectorstore.as_retriever(search_type="similarity",
                                     search_kwargs={"k":6})

retrieved_docs = retriever.invoke(query)

retrieved_docs

[Document(page_content='" Brocklehurst, Mandy" went to Ravenclaw too, but "Brown, Lavender"\nbecame the first new Gryffindor, and the table on the far left explodedwith cheers; Harry could see Ron\'s twin brothers catcalling.\n"Bulstrode, Millicent" then became a Slytherin. Perhaps it was Harry\'s\nimagination, after all he\'d heard about Slytherin, but he thought theylooked like an unpleasant lot. He was starting to feel definitely sicknow. He remembered being picked for teams during gym at his old school.He had always been last to be chosen, not because he was no good, butbecause no one wanted Dudley to think they liked him.\n"Finch-Fletchley, Justin!"\n"HUFFLEPUFF!"', metadata={'page': 95, 'source': 'Harry Potter and the Sorcerers Stone.pdf', 'start_index': 769}),
 Document(page_content='" Brocklehurst, Mandy" went to Ravenclaw too, but "Brown, Lavender"\nbecame the first new Gryffindor, and the table on the far left explodedwith cheers; Harry could see Ron\'s twin brothers catcalli