In [12]:
# Task 3: RAG - Part 1 (Basic Document QA)
# Objective: Enable Q&A over machine manuals, ISO standards, safety SOPs

# ✅ Step 1: Install dependencies
!pip install langchain langchain-community faiss-cpu PyPDF2 gradio groq python-dotenv langchain-groq pypdf -q


[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\cocsa\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [8]:
# ✅ Step 2: Import Libraries
import os
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq

  from .autonotebook import tqdm as notebook_tqdm


In [None]:

# =====================================
# 🔑 Step 3: Set up API Key
# =====================================
os.environ["GROQ_API_KEY"] = "your_groq_api_key_here"


In [5]:
import os
from langchain.document_loaders import PyPDFLoader

# Step 4: Load PDFs from local folder
folder_path = "manuals/"  # folder containing your PDFs
pdf_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".pdf")]

documents = []
for path in pdf_files:
    loader = PyPDFLoader(path)
    docs = loader.load()
    documents.extend(docs)

print(f"Loaded {len(documents)} documents from {len(pdf_files)} PDF files.")


Loaded 1 documents from 1 PDF files.


In [9]:
# Step 5: Split documents into chunks for embeddings
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)

split_docs = text_splitter.split_documents(documents)
print(f"Split into {len(split_docs)} chunks.")

Split into 2 chunks.


In [11]:
# =====================================
# 🧠 Step 6: Create Embeddings & Vector Store (FAISS)
# =====================================
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = FAISS.from_documents(split_docs, embeddings)
db.save_local("faiss_index")
print("✅ FAISS index created and saved.")

✅ FAISS index created and saved.


In [13]:
# =====================================
# 💬 Step 7: Set Up LLM (Groq)
# =====================================
llm = ChatGroq(model="llama-3.3-70b-versatile")

# Create retriever
retriever = db.as_retriever(search_kwargs={"k": 3})

# Custom prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=(
        "You are an expert assistant for manufacturing document QA.\n"
        "Use the context below to answer the question accurately.\n\n"
        "Context:\n{context}\n\n"
        "Question: {question}\n\n"
        "Answer clearly and concisely."
    ),
)

In [15]:

# Create RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt},
)

In [16]:
# =====================================
# 💬 Step 8: Build Gradio Chat UI
# =====================================
def chat_with_docs(question):
    if not question.strip():
        return "Please enter a valid question."
    try:
        response = qa_chain.invoke({"query": question})
        return response["result"]
    except Exception as e:
        return f"⚠️ Error: {e}"

ui = gr.Interface(
    fn=chat_with_docs,
    inputs=gr.Textbox(label="Ask a question about your documents", placeholder="e.g. What are the safety steps before operating the CNC machine?"),
    outputs=gr.Textbox(label="Answer"),
    title="🧠 Task 03 - RAG Document QA",
    description="Upload PDF manuals, ISO standards, or SOPs in `/content/docs` and ask context-based questions.",
)

In [17]:

# =====================================
# 🚀 Step 9: Launch App
# =====================================
ui.launch(debug=True)

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.


