In [6]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import DirectoryLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

## Loading the Documents and Embeddings

In [7]:
embeddings = HuggingFaceEmbeddings()
loader = DirectoryLoader(path="data",
                         glob="./*.pdf",
                         loader_cls=UnstructuredPDFLoader)
documents = loader.load()

## Splitting the documents into chunks

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=20)
docs = text_splitter.split_documents(documents)

## Uploading the chunked documents into ChromaDB

In [10]:
import chromadb

In [11]:
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory="vector_db_store"
)

## Sample Query

In [12]:
query = "What are Allergies"
result =vectordb.similarity_search(query=query,k=3)
print(result)

[Document(metadata={'source': 'data\\Medical_book.pdf'}, page_content='DescriptionAllergies are among the most common of medicaldisorders. It is estimated that 60 million Americans,ormore than one in every five people,suffer from someform of allergy,with similar proportions throughoutmuch of the rest of the world. Allergy is the single largestreason for school absence and is a major source of lostproductivity in the workplace.An allergy is a type of immune reaction. Normally,the immune system responds to foreign microorganismsor particles by producing specific proteins called anti-bodies. These antibodies are capable of binding to iden-tifying molecules,or antigens,on the foreign particle.This reaction between antibody and antigen sets off aseries of chemical reactions designed to protect thebody from infection. Sometimes,this same series ofreactions is triggered by harmless,everyday substancessuch as pollen,dust,and animal danders. When thisoccurs,an allergy develops against the offen