In [7]:
# Task 3: RAG - Part 2 (Memory + Multi-step Retrieval) with Groq - Colab Version
# Objective: Enable deeper QA with memory and multi-turn dialogue using Groq

# ✅ Step 1: Install dependencies in Colab
!pip install -q langchain langchain-community faiss-cpu pypdf langchain-groq gradio huggingface_hub


[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\cocsa\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [9]:
# ✅ Step 2: Import all required libraries
import os
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.schema import BaseOutputParser
import tempfile


In [None]:

# =====================================
# 🔑 Step 3: Set up API Key
# =====================================
os.environ["GROQ_API_KEY"] = "your_groq_api_key_here"

In [12]:
import os
from langchain.document_loaders import PyPDFLoader

# Step 4: Load PDFs from local folder
folder_path = "PDFSAMPLE/"  # folder containing your PDFs
pdf_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".pdf")]

documents = []
for path in pdf_files:
    loader = PyPDFLoader(path)
    docs = loader.load()
    documents.extend(docs)

print(f"Loaded {len(documents)} documents from {len(pdf_files)} PDF files.")

Loaded 1 documents from 1 PDF files.


In [14]:
# =====================================
# 📄 Step 5: Load and Process PDFs
# =====================================
def load_and_process_pdfs():
    """Load PDFs and create vector store"""
    folder_path = "PDFSAMPLE/"  # folder containing your PDFs
    pdf_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".pdf")]
    
    if not pdf_files:
        print("❌ No PDF files found. Please upload PDFs first.")
        return None
    
    documents = []
    for path in pdf_files:
        try:
            loader = PyPDFLoader(path)
            docs = loader.load()
            documents.extend(docs)
            print(f"✅ Loaded {len(docs)} pages from {os.path.basename(path)}")
        except Exception as e:
            print(f"❌ Error loading {path}: {e}")
    
    print(f"📄 Total documents loaded: {len(documents)}")
    
    # Split documents
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100
    )
    split_docs = text_splitter.split_documents(documents)
    print(f"🔪 Split into {len(split_docs)} chunks")
    
    # Create embeddings and vector store
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    db = FAISS.from_documents(split_docs, embeddings)
    db.save_local("faiss_index")
    print("✅ FAISS index created and saved")
    
    return db

# Create or load vector store
try:
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    print("✅ FAISS index loaded from disk")
except:
    print("🔄 Creating new FAISS index from uploaded PDFs...")
    db = load_and_process_pdfs()

🔄 Creating new FAISS index from uploaded PDFs...
✅ Loaded 1 pages from SK Cover letterr_compressed (1).pdf
📄 Total documents loaded: 1
🔪 Split into 2 chunks
✅ FAISS index created and saved


In [16]:
# =====================================
# 🔄 Step 6: Set Up MultiQuery Retriever with Groq
# =====================================
class LineListOutputParser(BaseOutputParser):
    """Output parser that parses a string into a list of lines."""

    def parse(self, text: str):
        lines = text.strip().split("\n")
        return [line.strip() for line in lines if line.strip()]

# Set up Groq LLM for retrieval
llm_for_retrieval = ChatGroq(
    model="llama-3.1-8b-instant",  # Fast model for retrieval
    temperature=0.1
)

# Create MultiQuery Retriever
multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever=db.as_retriever(search_kwargs={"k": 4}),
    llm=llm_for_retrieval,
)

print("✅ MultiQuery Retriever with Groq configured")

✅ MultiQuery Retriever with Groq configured


In [17]:
# =====================================
# 🧠 Step 7: Set Up Memory + Conversation Chain
# =====================================
# Initialize memory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    output_key='answer'
)

# Enhanced custom prompt template for manufacturing context
custom_prompt = PromptTemplate(
    input_variables=["chat_history", "question", "context"],
    template=(
        "You are an expert manufacturing engineer and technical documentation specialist. "
        "Your role is to provide accurate, detailed answers based on the provided technical documents, "
        "manuals, ISO standards, and SOPs.\n\n"
        
        "IMPORTANT INSTRUCTIONS:\n"
        "1. Use ONLY the document context below to answer the question\n"
        "2. Consider the chat history for context and follow-up questions\n"
        "3. For comparative questions, highlight similarities and differences clearly\n"
        "4. Reference specific procedures, standards, or safety requirements\n"
        "5. If information is not in the context, say 'Based on the provided documents, this information is not available'\n"
        "6. Provide comprehensive but concise answers\n\n"
        
        "CHAT HISTORY:\n{chat_history}\n\n"
        "DOCUMENT CONTEXT:\n{context}\n\n"
        "QUESTION: {question}\n\n"
        "EXPERT ANSWER:"
    )
)

# Set up Groq LLM for conversation
conversation_llm = ChatGroq(
    model="llama-3.3-70b-versatile",  # High-quality model for responses
    temperature=0.1,
    max_tokens=1024
)

# Create conversational retrieval chain
conversational_qa_chain = ConversationalRetrievalChain.from_llm(
    llm=conversation_llm,
    retriever=multi_query_retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": custom_prompt},
    return_source_documents=True,
    verbose=False
)

print("✅ Conversational QA Chain with Memory configured")

✅ Conversational QA Chain with Memory configured


In [19]:
# =====================================
# 💬 Step 8: Enhanced Chat Function for Colab
# =====================================
def chat_with_memory(question, chat_history=None):
    if not question.strip():
        return "Please enter a valid question.", ""
    
    try:
        print(f"🔍 Processing: {question[:50]}...")
        
        # Execute the conversational chain
        result = conversational_qa_chain({"question": question})
        answer = result["answer"]
        
        # Extract source information
        source_docs = result.get("source_documents", [])
        sources = []
        for i, doc in enumerate(source_docs[:3]):
            source_name = doc.metadata.get('source', 'Unknown')
            page = doc.metadata.get('page', 'N/A')
            sources.append(f"📄 {os.path.basename(source_name)} (Page {page+1})")
        
        source_text = "\n".join(sources) if sources else "No specific sources identified"
        
        return answer, source_text
        
    except Exception as e:
        print(f"❌ Error: {e}")
        return f"⚠️ Error processing your question: {str(e)}", ""

def clear_memory():
    global conversational_qa_chain
    conversational_qa_chain.memory.clear()
    return "🗑️ Conversation history cleared!", ""

In [20]:
# =====================================
# 🎨 Step 9: Build Colab-Optimized Gradio Interface
# =====================================
def create_colab_interface():
    with gr.Blocks(theme=gr.themes.Soft(), title="Manufacturing RAG - Colab") as demo:
        gr.Markdown(
            """
            # 🏭 Manufacturing Document QA - Colab
            ## Powered by Groq + Memory + Multi-Query Retrieval
            
            **Features:**
            - ✅ Multi-turn conversations with memory
            - ✅ Comparative analysis across documents  
            - ✅ Source document tracking
            - ✅ Colab-optimized performance
            
            **Try this workflow:**
            1. "What are maintenance steps for equipment A?"
            2. "Compare with equipment B maintenance"
            3. "Which has stricter safety requirements?"
            """
        )
        
        with gr.Row():
            with gr.Column(scale=2):
                chatbot = gr.Chatbot(
                    label="Expert Chat",
                    height=400,
                    show_copy_button=True
                )
                
                with gr.Row():
                    question_input = gr.Textbox(
                        label="Ask about manuals, SOPs, standards...",
                        placeholder="e.g., What are the safety procedures for operating heavy machinery?",
                        scale=4
                    )
                    submit_btn = gr.Button("🚀 Ask", variant="primary", scale=1)
                
                with gr.Row():
                    clear_btn = gr.Button("🗑️ Clear History", variant="secondary")
            
            with gr.Column(scale=1):
                sources_output = gr.Textbox(
                    label="📚 Source Documents",
                    lines=8,
                    interactive=False
                )
                
                gr.Markdown("### 💡 Example Questions")
                
                examples = [
                    "What safety equipment is required?",
                    "Explain maintenance schedules",
                    "Compare different machine procedures",
                    "What ISO standards are referenced?"
                ]
                
                for example in examples:
                    gr.Button(example, size="sm").click(
                        lambda x=example: x, 
                        outputs=question_input
                    )
        
        # Event handlers
        def respond(message, history):
            answer, sources = chat_with_memory(message)
            history.append((message, answer))
            return "", history, sources
        
        question_input.submit(
            respond,
            [question_input, chatbot],
            [question_input, chatbot, sources_output]
        )
        
        submit_btn.click(
            respond,
            [question_input, chatbot],
            [question_input, chatbot, sources_output]
        )
        
        clear_btn.click(
            lambda: ([], "No active sources"),
            outputs=[chatbot, sources_output]
        ).then(clear_memory)
    
    return demo

# Create the interface
colab_ui = create_colab_interface()

  chatbot = gr.Chatbot(


In [21]:
# =====================================
# 🚀 Step 10: Launch in Colab with Public URL
# =====================================
print("🎯 Starting Gradio interface...")
print("📱 The interface will open below. For public access, use the public URL provided.")

# Launch with share=True for public access
colab_ui.launch(
    share=True,  # Creates public URL
    debug=True,
    show_error=True
)

🎯 Starting Gradio interface...
📱 The interface will open below. For public access, use the public URL provided.
* Running on local URL:  http://127.0.0.1:7860

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


🔍 Processing: who is suraj...


  warn_deprecated(


Keyboard interruption in main thread... closing server.




In [22]:
# =====================================
# 🧪 Step 11: Test Functionality
# =====================================
def quick_test():
    """Quick test of the system"""
    print("🧪 Running quick test...")
    
    clear_memory()
    
    test_questions = [
        "What are the key safety procedures mentioned?",
        "What types of equipment are discussed in the documents?"
    ]
    
    for q in test_questions:
        print(f"\n👤 Q: {q}")
        answer, sources = chat_with_memory(q)
        print(f"🏭 A: {answer[:100]}...")
        print(f"📚 Sources: {sources}")

# Uncomment to run test
# quick_test()

In [24]:
# =====================================
# 📊 Step 12: System Information
# =====================================
def system_info():
    """Display system information"""
    print("📊 System Information:")
    print(f"✅ GROQ_API_KEY: {'Set' if os.getenv('GROQ_API_KEY') else 'Not set'}")
    print(f"✅ FAISS Index: {'Loaded' if db else 'Not loaded'}")
    print(f"✅ PDF Files: {len([f for f in os.listdir('PDFSAMPLE/') if f.endswith('.pdf')])}")
    print(f"✅ Memory: {'Active' if conversational_qa_chain.memory else 'Inactive'}")
    
    # Test API connection
    try:
        test_llm = ChatGroq(model="llama-3.1-8b-instant")
        test_llm.invoke("Hello")
        print("✅ Groq API: Connected")
    except Exception as e:
        print(f"❌ Groq API: Error - {e}")

system_info()

📊 System Information:
✅ GROQ_API_KEY: Set
✅ FAISS Index: Loaded
✅ PDF Files: 1
✅ Memory: Active
✅ Groq API: Connected
