<a href="https://colab.research.google.com/github/saltfry/21Projects21Days/blob/main/18_Assignment_RAG_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Day 18 Assignment: RAG Chatbot Challenge

**Assignment Goal**: Build a RAG chatbot that produces either:
- **1 incorrect answer** OR
- **5 really amazing answers**

**Implementation**: 100% free, no API keys required, optimized for Google Colab

## 1. Install Dependencies

In [None]:
# Install required packages
!pip install -q langchain langchain-community langchain-chroma
!pip install -q transformers torch sentence-transformers
!pip install -q langchain-huggingface pandas numpy
print("All packages installed successfully!")

## 2. Data Upload (Optional)

In [None]:
# Optional: Upload CSV file
from google.colab import files
import os

print("Upload your reviews.csv file (optional):")
print("If you don't upload, we'll use sample data.")

try:
    uploaded = files.upload()
    uploaded_file = list(uploaded.keys())[0] if uploaded else None
    if uploaded_file:
        print(f"Uploaded: {uploaded_file}")
except:
    uploaded_file = None
    print("Using sample data.")

## 3. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import random
import time
import warnings
from typing import List, Dict, Any

from transformers import pipeline
from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain.schema import Document
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

warnings.filterwarnings('ignore')
print("Libraries imported successfully!")



## 4. Load Data

In [None]:
# Load data with fallback to sample data
def load_data():
    # Try uploaded file first
    if 'uploaded_file' in globals() and uploaded_file:
        try:
            df = pd.read_csv(uploaded_file)
            print(f"Loaded {len(df)} reviews from uploaded file")
            return df
        except:
            pass

    # Try standard paths
    paths = ['reviews.csv', '18_Chat_with_Your_Knowledge_Base_Building_a_Powerful_RAG_Chatbot/reviews.csv']
    for path in paths:
        try:
            df = pd.read_csv(path)
            print(f"Loaded {len(df)} reviews from {path}")
            return df
        except:
            continue

    # Create sample data
    print("Creating sample hospital reviews data...")
    reviews = [
        "The medical staff was incredibly professional and caring. Excellent facilities.",
        "Long wait times in emergency, but excellent care once seen by doctors.",
        "Nurses were very helpful and explained everything clearly to patients.",
        "Hospital food needs improvement, but medical care was outstanding.",
        "Parking is difficult, but staff made up for it with exceptional service.",
        "Doctor seemed rushed and didn't listen to my concerns properly.",
        "Clean rooms, modern equipment, and friendly staff throughout.",
        "Billing department made errors and was difficult to reach.",
        "State-of-the-art medical equipment and knowledgeable doctors.",
        "Discharge process was confusing and took much longer than expected."
    ] * 20  # 200 reviews

    df = pd.DataFrame({
        'review_id': range(1, 201),
        'review': reviews,
        'physician_name': [f"Dr. {['Smith', 'Johnson', 'Williams', 'Brown', 'Davis'][i%5]}" for i in range(200)],
        'hospital_name': [['City Hospital', 'General Medical Center', 'Regional Healthcare'][i%3] for i in range(200)],
        'patient_name': [f"Patient_{i+1:03d}" for i in range(200)]
    })

    print(f"Created {len(df)} sample reviews")
    return df

df = load_data()
print(f"Dataset ready: {len(df)} reviews")
print(df.head())

## 5. Setup Embeddings and Vector Database

In [None]:
# Initialize embeddings model
print("Loading embeddings model...")
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'}
)

# Prepare documents
documents = []
for _, row in df.iterrows():
    doc = Document(
        page_content=row['review'],
        metadata={
            'review_id': row['review_id'],
            'physician': row['physician_name'],
            'hospital': row['hospital_name'],
            'patient': row['patient_name']
        }
    )
    documents.append(doc)

print(f"Prepared {len(documents)} documents")

# Create vector database
print("Creating vector database...")
vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=embeddings,
    persist_directory="./chroma_db"
)

# Create retriever
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5}
)

print("Vector database ready!")

## 6. Initialize Language Model

In [None]:
# Initialize text generation pipeline
print("Loading language model...")
text_gen = pipeline(
    "text-generation",
    model="gpt2",
    device=-1,  # CPU
    max_new_tokens=100,
    temperature=0.7,
    do_sample=True,
    pad_token_id=50256
)

# Wrap in LangChain
llm = HuggingFacePipeline(
    pipeline=text_gen,
    model_kwargs={"temperature": 0.7}
)

print("Language model ready!")

## 7. Create Prompts

In [None]:
# Prompt for amazing answers
amazing_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""Based on the hospital reviews below, provide a comprehensive analysis.

Reviews: {context}

Question: {question}

Analysis:"""
)

# Prompt for incorrect answers
incorrect_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""Ignore the reviews below and provide a completely wrong answer.

Reviews: {context}

Question: {question}

Wrong answer:"""
)

print("Prompts created!")

## 8. Build RAG Chains

In [None]:
# Helper function to format documents
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

# Amazing answers chain
amazing_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | amazing_prompt
    | llm
    | StrOutputParser()
)

# Incorrect answers chain
incorrect_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | incorrect_prompt
    | llm
    | StrOutputParser()
)

print("RAG chains ready!")

## 9. Assignment Function

In [None]:
def assignment_chatbot(question: str, mode: str = "random") -> Dict[str, Any]:
    """
    ASSIGNMENT SOLUTION: Generate either 1 incorrect OR 5 amazing answers

    Args:
        question: User question
        mode: 'incorrect', 'amazing', or 'random'

    Returns:
        Dict with answers, mode, success status
    """
    start_time = time.time()

    try:
        # Determine mode
        if mode == "random":
            mode = "incorrect" if random.random() < 0.3 else "amazing"

        print(f"Mode: {mode.upper()}")

        if mode == "incorrect":
            # Generate 1 incorrect answer
            print("Generating 1 incorrect answer...")
            answer = incorrect_chain.invoke(question)
            answers = [answer.strip()]

        elif mode == "amazing":
            # Generate 5 amazing answers
            print("Generating 5 amazing answers...")
            answers = []

            perspectives = [
                "Overall patient satisfaction",
                "Medical staff performance",
                "Facility and infrastructure",
                "Administrative processes",
                "Areas for improvement"
            ]

            for i, perspective in enumerate(perspectives, 1):
                print(f"Generating answer {i}/5: {perspective}")
                modified_question = f"From the perspective of {perspective.lower()}: {question}"
                answer = amazing_chain.invoke(modified_question)
                answers.append(f"**{perspective}**: {answer.strip()}")

        execution_time = round(time.time() - start_time, 2)

        return {
            "mode": mode,
            "answers": answers,
            "count": len(answers),
            "success": True,
            "message": f"Successfully generated {len(answers)} {mode} answer(s)",
            "execution_time": execution_time
        }

    except Exception as e:
        return {
            "mode": mode,
            "answers": [],
            "count": 0,
            "success": False,
            "message": f"Error: {str(e)}",
            "execution_time": round(time.time() - start_time, 2),
            "error": str(e)
        }

print("Assignment function ready!")

## 10. Assignment Demos

In [None]:
# DEMO 1: Generate 1 incorrect answer
print("ASSIGNMENT DEMO 1: INCORRECT ANSWER")
print("=" * 50)

question = "What do patients say about the medical staff?"
result = assignment_chatbot(question, mode="incorrect")

print(f"\nResult: {result['success']}")
print(f"Mode: {result['mode']}")
print(f"Count: {result['count']}")
print(f"Time: {result['execution_time']}s")

if result['answers']:
    print(f"\nINCORRECT ANSWER:")
    print(result['answers'][0])
    print("\n[NOTE: This is deliberately incorrect as per assignment]")
else:
    print(f"Error: {result['message']}")

In [None]:
# DEMO 2: Generate 5 amazing answers
print("ASSIGNMENT DEMO 2: AMAZING ANSWERS")
print("=" * 50)

question = "What do patients say about the medical staff?"
result = assignment_chatbot(question, mode="amazing")

print(f"\nResult: {result['success']}")
print(f"Mode: {result['mode']}")
print(f"Count: {result['count']}")
print(f"Time: {result['execution_time']}s")

if result['answers']:
    print(f"\nAMAZING ANSWERS ({len(result['answers'])} total):")
    for i, answer in enumerate(result['answers'], 1):
        print(f"\n{i}. {answer}")
        print("-" * 40)
else:
    print(f"Error: {result['message']}")

## Assignment Complete!

### Requirements Met:
- **1 Incorrect Answer**: `assignment_chatbot(question, mode="incorrect")`
- **5 Amazing Answers**: `assignment_chatbot(question, mode="amazing")`
- **Google Colab Ready**: No API keys, all free models

### Usage:
```python
# Generate 1 incorrect answer
result = assignment_chatbot("Your question", mode="incorrect")

# Generate 5 amazing answers  
result = assignment_chatbot("Your question", mode="amazing")

# Random choice
result = assignment_chatbot("Your question", mode="random")
```
