In [None]:
import sys
import os
sys.path.append(os.path.abspath('/home/mshahidul/'))
from gpu_selection import _gpu_selection_
_gpu_selection_()
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# Load PDF
loader = PyPDFLoader(file_path="/home/mshahidul/webiner/ML.pdf")
documents = loader.load()

# Split text
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n")
docs = text_splitter.split_documents(documents)

# Embedding model
model_name = "sentence-transformers/all-mpnet-base-v2"
hf = HuggingFaceEmbeddings(model_name=model_name)

# Save to Chroma vector store
chroma_dir = "langchain_pyloader/chroma_db"
vectorstore = Chroma.from_documents(docs, hf, persist_directory=chroma_dir)
vectorstore.persist()

# Load vector store later
new_vectorstore = Chroma(persist_directory=chroma_dir, embedding_function=hf)

print("PDF read and vectorized using Chroma.")


In [None]:
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# --- 1. Load the embedding function ---
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# --- 2. Load Chroma vectorstore from disk ---
chroma_dir = "langchain_pyloader/chroma_db"
vectorstore = Chroma(persist_directory=chroma_dir, embedding_function=embedding_model)

# --- 3. Set up retriever ---
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})


In [None]:
from openai import OpenAI
import os
import json
from langchain.chat_models import ChatOpenAI
with open('/home/mshahidul/api.json', 'r') as f_api:
    api_data = json.load(f_api)
# generator = OpenAI(api_key=api_data["openai_api_key"])
llm = ChatOpenAI(
    openai_api_key=api_data["openai_api_key"],
    model_name="gpt-4o-mini",
    temperature=0.3,
    max_tokens=256
)

In [None]:
# from transformers import BitsAndBytesConfig

# model_name="unsloth/Qwen2.5-14B-Instruct"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# nf4_config = BitsAndBytesConfig(
#    load_in_4bit=True,
#    bnb_4bit_quant_type="nf4",
#    bnb_4bit_use_double_quant=True,
#    bnb_4bit_compute_dtype=torch.bfloat16
# )

# model_nf4 = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config,device_map="auto")


In [None]:
# generator = pipeline(
#     "text-generation",
#     model=model_nf4,
#     tokenizer=tokenizer,
#     max_new_tokens=256,
#     temperature=0.3,
#     top_p=0.9
# )

# llm = HuggingFacePipeline(pipeline=generator)

# --- 5. Build RetrievalQA chain ---
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

In [None]:

# --- 4. Load HuggingFace model (Mistral 7B as example) ---
# model_name = "unsloth/Qwen2.5-0.5B-Instruct"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     device_map="auto",               # automatically selects GPU/CPU
#     torch_dtype=torch.float16        # use float32 if running on CPU
# )

# generator = pipeline(
#     "text-generation",
#     model=model_nf4,
#     tokenizer=tokenizer,
#     max_new_tokens=256,
#     temperature=0.3,
#     top_p=0.9
# )

# llm = HuggingFacePipeline(pipeline=generator)

# # --- 5. Build RetrievalQA chain ---
# qa_chain = RetrievalQA.from_chain_type(
#     llm=llm,
#     retriever=retriever,
#     return_source_documents=True
# )

# --- 6. Ask questions! ---
query = "What is svm?"
result = qa_chain({"query": query})

# --- 7. Display result ---
print("Answer:\n", result["result"])
print("\nSources:")
for doc in result["source_documents"]:
    print(f"Page: {doc.metadata.get('page', 'N/A')} | Content Snippet: {doc.page_content[:100]}...")


In [None]:
def answer_question(query):
    result = qa_chain({"query": query})
    answer = result["result"]
    sources = "\n".join(
        f"Page: {doc.metadata.get('page', 'N/A')} - {doc.page_content[:100]}..."
        for doc in result["source_documents"]
    )
    return answer, sources

In [None]:
# --- Gradio Interface ---
import gradio as gr
iface = gr.Interface(
    fn=answer_question,
    inputs=gr.Textbox(lines=2, placeholder="Ask something about the PDF..."),
    outputs=[
        gr.Textbox(label="Answer"),
        gr.Textbox(label="Source Chunks")
    ],
    title="PDF QA (RAG) with Chroma + Qwen"
)

iface.launch(share=True)