In [None]:
pip install streamlit langchain langchain_community langchain_core python-dotenv langchain-huggingface langchain-qdrant langchain-ollama unstructured[pdf] onnx==1.16.1 qdrant-client ipywidgets

In [None]:
# vectors stuff

import os
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import Qdrant

# Setup Of Embeddings

qdrant_url = "http://localhost:6333"
collection_name: str = "vector_db"

embeddingModel = HuggingFaceBgeEmbeddings(
            model_name="BAAI/bge-small-en",
            model_kwargs={"device": "cpu"},
            encode_kwargs={"normalize_embeddings": True},
        )

def create_embeddings(pdf_path: str):
        """
        Processes the PDF, creates embeddings, and stores them in Qdrant.

        Args:
            pdf_path (str): The file path to the PDF document.

        Returns:
            str: Success message upon completion.
        """
        if not os.path.exists(pdf_path):
            raise FileNotFoundError(f"The file {pdf_path} does not exist.")

        # Load and preprocess the document
        loader = UnstructuredPDFLoader(pdf_path)
        docs = loader.load()
        if not docs:
            raise ValueError("No documents were loaded from the PDF.")

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=250
        )
        splits = text_splitter.split_documents(docs)
        if not splits:
            raise ValueError("No text chunks were created from the documents.")

        # Create and store embeddings in Qdrant
        try:
            qdrant = Qdrant.from_documents(
                splits,
                embeddingModel,
                url=qdrant_url,
                prefer_grpc=False,
                collection_name=collection_name,
            )
        except Exception as e:
            raise ConnectionError(f"Failed to connect to Qdrant: {e}")

        return "✅ Vector DB Successfully Created and Stored in Qdrant!"

In [None]:
# create and store the embeddings for given pdf path

create_embeddings("stomach-cancer.pdf")

In [None]:
# qdrant client setup for exctracting context

from qdrant_client import QdrantClient
from langchain_qdrant import QdrantVectorStore

client = QdrantClient(
    url="http://localhost:6333",
    prefer_grpc=False
)

db = QdrantVectorStore(
    client=client,
    embedding=embeddingModel,
    collection_name=collection_name
)

retriever = db.as_retriever(search_kwargs={"k": 1})



In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3.2:3b",
    temperature=0.7,
)

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. keep the answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)



In [None]:
question = input("Ask the Question: ")
results = rag_chain.invoke({"input": question})

results['answer']