In [4]:
# Import necessary packages
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames

from langchain_ibm import WatsonxLLM, WatsonxEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA

import gradio as gr
import warnings
import os

file= "ReviewFile.pdf"
loader = PyPDFLoader (file)
documents = loader.load()
print(f"Loaded {len(documents)} document(s).")
print(f"The first page has {len(documents [0].page_content)} characters.")

## Text splitter
def text_splitter (data) :
    text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, 
    chunk_overlap=50,
    length_function=len,
    )
    chunks = text_splitter.split_documents(data)
    return chunks

## Embedding model
def watsonx_embedding():
    embed_params = {
        EmbedTextParamsMetaNames.TRUNCATE_INPUT_TOKENS: 3,
        EmbedTextParamsMetaNames.RETURN_OPTIONS: {"input_text"}
    }
    watsonx_embedding = WatsonxEmbeddings(
        model_id="ibm/slate-125m-english-rtrvr",
        url="https://us-south.ml.cloud.ibm.com",
        project_id="skills-network", 
        params=embed_params,
    )
    return watsonx_embedding

##VectorDB
def vector_database(chunks):
    embedding_model = watsonx_embedding()
    vectordb = Chroma.from_documents(chunks, embedding_model)
    return vectordb

##Retreiver 
def retriever(file):
    splits=document_loader(file)
    chunks = text_splitter(splits)
    vectordb = vector.database(chunks)
    retriever = vectordb.as_retriever()
    return retriever

## QA Chain 
def retriever_qa(file, query):
    llm = get_llm()
    retriever_obj = retriever(file)
    qa = RetrievalQA.from_chain_type(llm=llm,
                                    chain_type="stuff", 
                                    retriever=retriever_obj, return_source_documents=False)
    response = qa. invoke(query)
    return response['result']

# ====== Gradio Interface ======
rag_app = gr.Interface(
    fn=retriever_qa,
    inputs=[
        gr.File(label="üìÑ Upload a PDF file", file_types=[".pdf"]),
        gr.Textbox(label="‚ùì Ask a question about the document", placeholder="Type your question here...")
    ],
    outputs=gr.Textbox(label="üí¨ Answer"),
    title="IBM watsonx.ai PDF Q&A Bot",
    description="Upload a PDF and ask questions. This bot uses watsonx.ai and LangChain to find answers from your document.",
    flagging_mode=None
)

# ====== Launch ======
if __name__ == "__main__":
    rag_app.launch()

Loaded 11 document(s).
The first page has 2046 characters.
* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.
