In [1]:
import os
import json
import time
from pathlib import Path
from typing import List, Dict, Any, Tuple, Optional
from datetime import datetime
from collections import Counter

import numpy as np
import pandas as pd
import torch

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification

from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader

from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
if not OPENAI_API_KEY:
    raise ValueError("Set OPENAI_API_KEY in environment or .env file.")

# Configuration
EMBEDDING_MODEL = "text-embedding-3-small"
LLM_MODEL = "gpt-4.1"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Pipeline parameters
CHUNK_SIZE = 400
CHUNK_OVERLAP = 80
STAGE1_K = 30
TOP_K_RERANKED = 5

print(f"‚úì Environment configured")
print(f"  Device: {DEVICE}")
print(f"  LLM Model: {LLM_MODEL}")

  from .autonotebook import tqdm as notebook_tqdm


‚úì Environment configured
  Device: cpu
  LLM Model: gpt-4.1


## Initialize Reranker Model

In [2]:
RERANKER_MODEL_NAME = "BAAI/bge-reranker-base"

tokenizer = AutoTokenizer.from_pretrained(RERANKER_MODEL_NAME)
reranker_model = AutoModelForSequenceClassification.from_pretrained(RERANKER_MODEL_NAME).to(DEVICE)

def cross_encoder_rerank(
    query: str,
    docs: List[Document],
    top_k: int = TOP_K_RERANKED
) -> List[Document]:
    """Rerank documents using cross-encoder."""
    if not docs:
        return []

    pairs = [(query, d.page_content) for d in docs]
    inputs = tokenizer(
        [p[0] for p in pairs],
        [p[1] for p in pairs],
        padding=True,
        truncation=True,
        return_tensors="pt",
        max_length=512
    ).to(DEVICE)

    with torch.no_grad():
        scores = reranker_model(**inputs).logits.squeeze(-1).cpu().numpy()

    ranked_idx = np.argsort(-scores)
    top_docs = [docs[i] for i in ranked_idx[:top_k]]
    return top_docs

print("‚úì Reranker model loaded")

‚úì Reranker model loaded


## Helper Functions

In [3]:
def get_embedding_model() -> OpenAIEmbeddings:
    return OpenAIEmbeddings(model=EMBEDDING_MODEL, api_key=OPENAI_API_KEY)

def get_llm(temperature: float = 0.7) -> ChatOpenAI:
    return ChatOpenAI(
        model=LLM_MODEL,
        temperature=temperature,
        api_key=OPENAI_API_KEY,
    )

## Data Loading Functions

In [4]:
def load_squad_subset(max_examples: int = 1000) -> Tuple[List[Document], pd.DataFrame]:
    """Load SQuAD dataset for testing."""
    ds = load_dataset("squad", split="train[:10%]")
    ds = ds.shuffle(seed=42).select(range(min(max_examples, len(ds))))

    contexts = []
    qa_rows = []

    for ex in ds:
        context = ex["context"]
        q = ex["question"]
        ans_texts = ex["answers"]["text"]
        ans = ans_texts[0] if ans_texts else ""

        contexts.append(context)
        qa_rows.append({
            "id": ex["id"],
            "context": context,
            "question": q,
            "answer": ans
        })

    unique_contexts = list({c: True for c in contexts}.keys())
    docs = [Document(page_content=c, metadata={"source": f"squad_paragraph_{i}"})
            for i, c in enumerate(unique_contexts)]

    qa_df = pd.DataFrame(qa_rows)
    return docs, qa_df

def load_pdf_documents(pdf_dir: str = "../data/pdfs") -> List[Document]:
    """Load PDF documents from a directory."""
    pdf_path = Path(pdf_dir)
    
    if not pdf_path.exists():
        print(f"Creating directory: {pdf_dir}")
        pdf_path.mkdir(parents=True, exist_ok=True)
        print(f"Please add PDF files to {pdf_dir} and run again.")
        return []
    
    pdf_files = list(pdf_path.glob("*.pdf"))
    
    if not pdf_files:
        print(f"No PDF files found in {pdf_dir}")
        return []
    
    docs = []
    for pdf_file in pdf_files:
        print(f"Loading: {pdf_file.name}")
        loader = PyPDFLoader(str(pdf_file))
        docs.extend(loader.load())
    
    print(f"Loaded {len(docs)} pages from {len(pdf_files)} PDF files")
    return docs

print("‚úì Data loading functions ready")

‚úì Data loading functions ready


## Build Vectorstore

In [5]:
def build_vectorstore(
    docs: List[Document],
    chunk_size: int = CHUNK_SIZE,
    chunk_overlap: int = CHUNK_OVERLAP
) -> Tuple[FAISS, List[Document]]:
    """Build vectorstore with chunking."""
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", ". ", " ", ""],
    )
    chunks = splitter.split_documents(docs)
    
    embeddings = get_embedding_model()
    vectordb = FAISS.from_documents(chunks, embedding=embeddings)
    return vectordb, chunks

## Feedback Learning System

This system learns from thumbs up/down feedback and text comments to improve responses over time.

In [12]:
class ChatbotFeedbackSystem:
    """Learns from user feedback to improve chatbot responses."""
    
    def __init__(self, save_path: str = "../data/chatbot_feedback.json"):
        self.save_path = save_path
        self.feedback_history: List[Dict[str, Any]] = []
        self.positive_feedback: List[Dict[str, Any]] = []
        self.negative_feedback: List[Dict[str, Any]] = []
        self.improvement_keywords: Dict[str, int] = {}
        self.load_feedback()
    
    def add_feedback(
        self,
        query: str,
        response: str,
        rating: str,  # "üëç" or "üëé"
        comment: Optional[str] = None,
        context_used: str = ""
    ):
        """Record user feedback on a response."""
        feedback_entry = {
            "timestamp": datetime.now().isoformat(),
            "query": query,
            "response": response,
            "rating": rating,
            "comment": comment,
            "response_length": len(response.split()),
            "context_length": len(context_used.split()) if context_used else 0,
            "session_number": len(self.feedback_history) + 1
        }
        
        self.feedback_history.append(feedback_entry)
        
        # Categorize feedback
        if rating == "üëç":
            self.positive_feedback.append(feedback_entry)
        else:
            self.negative_feedback.append(feedback_entry)
            # Extract improvement keywords from negative feedback
            if comment:
                self._extract_improvement_keywords(comment)
        
        self.save_feedback()
    
    def _extract_improvement_keywords(self, comment: str):
        """Extract keywords from negative feedback to identify improvement areas."""
        # Common improvement indicators
        improvement_indicators = [
            "more detail", "too long", "too short", "unclear", "confusing",
            "not relevant", "missing", "incorrect", "better explanation",
            "more examples", "simpler", "more technical", "more context",
            "incomplete", "off-topic", "vague", "specific", "concise"
        ]
        
        comment_lower = comment.lower()
        for indicator in improvement_indicators:
            if indicator in comment_lower:
                self.improvement_keywords[indicator] = self.improvement_keywords.get(indicator, 0) + 1
    
    def get_satisfaction_rate(self) -> float:
        """Calculate overall satisfaction rate."""
        if not self.feedback_history:
            return 0.0
        positive_count = len(self.positive_feedback)
        return (positive_count / len(self.feedback_history)) * 100
    
    def get_improvement_insights(self) -> Dict[str, Any]:
        """Analyze feedback to identify improvement patterns."""
        if not self.feedback_history:
            return {"total_feedback": 0, "insights": "No feedback yet"}
        
        positive_avg_length = np.mean([f["response_length"] for f in self.positive_feedback]) if self.positive_feedback else 0
        negative_avg_length = np.mean([f["response_length"] for f in self.negative_feedback]) if self.negative_feedback else 0
        
        # Identify trending issues
        top_issues = sorted(self.improvement_keywords.items(), key=lambda x: x[1], reverse=True)[:3]
        
        return {
            "total_feedback": len(self.feedback_history),
            "positive_count": len(self.positive_feedback),
            "negative_count": len(self.negative_feedback),
            "satisfaction_rate": self.get_satisfaction_rate(),
            "positive_avg_length": int(positive_avg_length),
            "negative_avg_length": int(negative_avg_length),
            "top_issues": top_issues
        }
    
    def generate_system_prompt(self) -> str:
        """Generate adaptive system prompt based on learned feedback."""
        base_prompt = "You are a helpful AI assistant. Answer questions accurately based on the provided context."
        
        if len(self.feedback_history) < 3:
            return base_prompt
        
        insights = self.get_improvement_insights()
        adaptations = []
        
        # Adapt based on length preferences
        if insights["positive_avg_length"] > 0 and insights["negative_avg_length"] > 0:
            if insights["positive_avg_length"] < insights["negative_avg_length"]:
                adaptations.append("Keep responses concise and to the point.")
            else:
                adaptations.append("Provide detailed, comprehensive answers.")
        
        # Adapt based on common issues
        top_issues = insights.get("top_issues", [])
        for issue, count in top_issues:
            if "too long" in issue:
                adaptations.append("Be concise without unnecessary elaboration.")
            elif "more detail" in issue or "incomplete" in issue:
                adaptations.append("Provide thorough explanations with sufficient detail.")
            elif "unclear" in issue or "confusing" in issue:
                adaptations.append("Use clear, simple language and structure your answers well.")
            elif "not relevant" in issue or "off-topic" in issue:
                adaptations.append("Focus strictly on the question asked using only the relevant context.")
            elif "more examples" in issue:
                adaptations.append("Include concrete examples when helpful.")
            elif "more context" in issue:
                adaptations.append("Provide background information when necessary.")
        
        if adaptations:
            return base_prompt + " " + " ".join(adaptations)
        
        return base_prompt
    
    def get_recent_improvement_trend(self, window_size: int = 5) -> Dict[str, float]:
        """Calculate satisfaction trend over recent interactions."""
        if len(self.feedback_history) < window_size:
            return {"trend": "insufficient_data", "recent_rate": 0.0, "previous_rate": 0.0}
        
        # Recent window
        recent = self.feedback_history[-window_size:]
        recent_positive = sum(1 for f in recent if f["rating"] == "üëç")
        recent_rate = (recent_positive / len(recent)) * 100
        
        # Previous window
        previous = self.feedback_history[-window_size*2:-window_size]
        if len(previous) >= window_size:
            previous_positive = sum(1 for f in previous if f["rating"] == "üëç")
            previous_rate = (previous_positive / len(previous)) * 100
            
            if recent_rate > previous_rate:
                trend = "improving"
            elif recent_rate < previous_rate:
                trend = "declining"
            else:
                trend = "stable"
        else:
            previous_rate = 0.0
            trend = "insufficient_data"
        
        return {
            "trend": trend,
            "recent_rate": recent_rate,
            "previous_rate": previous_rate,
            "improvement": recent_rate - previous_rate
        }
    
    def save_feedback(self):
        """Save feedback to disk."""
        os.makedirs(os.path.dirname(self.save_path), exist_ok=True)
        with open(self.save_path, 'w') as f:
            json.dump({
                "feedback_history": self.feedback_history,
                "improvement_keywords": self.improvement_keywords
            }, f, indent=2)
    
    def load_feedback(self):
        """Load feedback from disk."""
        if os.path.exists(self.save_path):
            with open(self.save_path, 'r') as f:
                data = json.load(f)
                self.feedback_history = data.get("feedback_history", [])
                self.improvement_keywords = data.get("improvement_keywords", {})
                
                # Categorize loaded feedback
                for entry in self.feedback_history:
                    if entry["rating"] == "üëç":
                        self.positive_feedback.append(entry)
                    else:
                        self.negative_feedback.append(entry)
                
                print(f"‚úì Loaded {len(self.feedback_history)} feedback entries from memory")

# Initialize feedback system
feedback_system = ChatbotFeedbackSystem()
print(f"‚úì Feedback system initialized")
if feedback_system.feedback_history:
    print(f"  Current satisfaction rate: {feedback_system.get_satisfaction_rate():.1f}%")

‚úì Loaded 2 feedback entries from memory
‚úì Feedback system initialized
  Current satisfaction rate: 50.0%


## RAG Pipeline with Adaptive Response Generation

In [13]:
def retrieve_documents(
    vectordb: FAISS,
    query: str,
    stage1_k: int = STAGE1_K,
    top_k: int = TOP_K_RERANKED
) -> List[Document]:
    """Retrieve and rerank documents."""
    candidates = vectordb.similarity_search(query, k=stage1_k)
    top_docs = cross_encoder_rerank(query, candidates, top_k=top_k)
    return top_docs

def generate_response(
    query: str,
    context_docs: List[Document],
    feedback_system: ChatbotFeedbackSystem,
    temperature: float = 0.7
) -> Tuple[str, str]:
    """Generate response using learned preferences."""
    
    context = "\n\n".join([d.page_content for d in context_docs])
    
    # Get adaptive system prompt based on feedback
    system_prompt = feedback_system.generate_system_prompt()
    
    # Generate response
    llm = get_llm(temperature=temperature)
    
    prompt = f"""Context:
\"\"\"{context}\"\"\"

Question: {query}

Answer:"""
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt}
    ]
    
    response = llm.invoke(messages).content.strip()
    
    return response, context

print("‚úì RAG pipeline ready")

‚úì RAG pipeline ready


## Load Data and Build Index

In [14]:
# Configuration: Choose data source
DATA_SOURCE = "squad"  # Options: "squad" or "pdf"
PDF_DIRECTORY = "../data/pdfs"

print(f"üìö Loading data from: {DATA_SOURCE}")

if DATA_SOURCE == "squad":
    base_docs, qa_df = load_squad_subset(max_examples=600)
    print(f"‚úì Loaded {len(base_docs)} documents from SQuAD")
elif DATA_SOURCE == "pdf":
    base_docs = load_pdf_documents(PDF_DIRECTORY)
    if not base_docs:
        print("\n‚ö†Ô∏è  No PDF documents found. Please add PDFs to the directory.")
else:
    raise ValueError("DATA_SOURCE must be 'squad' or 'pdf'")

# Build vectorstore
if base_docs:
    print("\nüî® Building vectorstore...")
    vectordb, chunks = build_vectorstore(base_docs)
    print(f"‚úì Vectorstore built with {len(chunks)} chunks")
    print(f"‚úì System ready for queries")
else:
    vectordb = None
    print("‚ö†Ô∏è  No documents to index")

üìö Loading data from: squad
‚úì Loaded 496 documents from SQuAD

üî® Building vectorstore...
‚úì Loaded 496 documents from SQuAD

üî® Building vectorstore...
‚úì Vectorstore built with 1272 chunks
‚úì System ready for queries
‚úì Vectorstore built with 1272 chunks
‚úì System ready for queries


## Gradio Interface

Interactive chatbot UI with feedback collection

In [18]:
try:
    import gradio as gr
    GRADIO_AVAILABLE = True
    print("‚úì Gradio available")
except ImportError:
    GRADIO_AVAILABLE = False
    print("‚ö†Ô∏è  Gradio not installed. Install with: pip install gradio")
    print("   Continuing without Gradio interface...")

if GRADIO_AVAILABLE and vectordb is not None:
    
    # State management for current response
    current_query = {"query": "", "response": "", "context": ""}
    
    def chatbot_respond(query: str, chat_history: List[Tuple[str, str]]) -> Tuple[List[Tuple[str, str]], str]:
        """Generate response for chatbot."""
        if not query.strip():
            return chat_history, ""
        
        # Retrieve and generate
        top_docs = retrieve_documents(vectordb, query)
        response, context = generate_response(query, top_docs, feedback_system)
        
        # Store for feedback
        current_query["query"] = query
        current_query["response"] = response
        current_query["context"] = context
        
        # Update chat history
        chat_history.append((query, response))
        
        return chat_history, ""
    
    def thumbs_up(chat_history: List[Tuple[str, str]]) -> str:
        """Record positive feedback."""
        if current_query["response"]:
            feedback_system.add_feedback(
                query=current_query["query"],
                response=current_query["response"],
                rating="üëç",
                context_used=current_query["context"]
            )
            return "‚úì Positive feedback recorded! The system will learn from this."
        return "No response to rate."
    
    def thumbs_down(chat_history: List[Tuple[str, str]], comment: str) -> str:
        """Record negative feedback with optional comment."""
        if current_query["response"]:
            feedback_system.add_feedback(
                query=current_query["query"],
                response=current_query["response"],
                rating="üëé",
                comment=comment if comment.strip() else None,
                context_used=current_query["context"]
            )
            
            insights = feedback_system.get_improvement_insights()
            return f"‚úì Feedback recorded! Total: {insights['total_feedback']} | Satisfaction: {insights['satisfaction_rate']:.1f}%\nThe system will improve based on your feedback."
        return "No response to rate."
    
    def get_stats() -> str:
        """Get feedback statistics."""
        insights = feedback_system.get_improvement_insights()
        
        if insights["total_feedback"] == 0:
            return "No feedback collected yet. Start chatting and provide ratings!"
        
        trend = feedback_system.get_recent_improvement_trend()
        
        stats = f"""üìä **Feedback Statistics**
        
**Overall Performance:**
- Total Interactions: {insights['total_feedback']}
- üëç Positive: {insights['positive_count']} ({insights['satisfaction_rate']:.1f}%)
- üëé Negative: {insights['negative_count']}

**Recent Trend (Last 5):**
- Trend: {trend['trend'].upper()}
- Recent Satisfaction: {trend['recent_rate']:.1f}%
- Previous Satisfaction: {trend['previous_rate']:.1f}%
- Change: {trend.get('improvement', 0.0):+.1f}%

**Response Characteristics:**
- Positive Avg Length: {insights['positive_avg_length']} words
- Negative Avg Length: {insights['negative_avg_length']} words
"""
        
        if insights.get("top_issues"):
            stats += "\n**Top Improvement Areas:**\n"
            for issue, count in insights["top_issues"]:
                stats += f"- {issue}: {count} mentions\n"
        
        return stats
    
    # Create Gradio interface
    with gr.Blocks(title="RAG Chatbot with Feedback Learning", theme=gr.themes.Soft()) as demo:
        gr.Markdown("# ü§ñ RAG Chatbot with Feedback Learning")
        gr.Markdown("Ask questions and provide feedback to help the system improve!")
        
        with gr.Row():
            with gr.Column(scale=2):
                chatbot = gr.Chatbot(
                    label="Chat History",
                    height=400,
                    show_copy_button=True
                )
                
                with gr.Row():
                    query_input = gr.Textbox(
                        label="Your Question",
                        placeholder="Ask me anything...",
                        lines=2,
                        scale=4
                    )
                    submit_btn = gr.Button("Send", variant="primary", scale=1)
                
                gr.Markdown("### üí¨ Rate the Response")
                with gr.Row():
                    thumbs_up_btn = gr.Button("üëç Good Response", variant="primary", scale=1)
                    thumbs_down_btn = gr.Button("üëé Needs Improvement", variant="stop", scale=1)
                
                feedback_comment = gr.Textbox(
                    label="Feedback Comment (optional - helps system learn)",
                    placeholder="What could be improved? (e.g., 'too long', 'more detail needed', 'unclear')",
                    lines=2
                )
                
                feedback_output = gr.Textbox(label="Feedback Status", lines=2)
            
            with gr.Column(scale=1):
                gr.Markdown("### üìà Learning Progress")
                stats_output = gr.Markdown(get_stats())
                refresh_stats_btn = gr.Button("üîÑ Refresh Stats", variant="secondary")
                
                gr.Markdown("### üí° Current System Prompt")
                system_prompt_display = gr.Textbox(
                    value=feedback_system.generate_system_prompt(),
                    label="Adaptive Prompt",
                    lines=6,
                    interactive=False
                )
        
        # Example queries
        gr.Markdown("### üìù Example Questions")
        example_queries = [
            "What is the capital of France?",
            "Who invented the telephone?",
            "When did World War II end?",
            "What is photosynthesis?"
        ]
        gr.Examples(examples=example_queries, inputs=query_input)
        
        # Event handlers
        submit_btn.click(
            fn=chatbot_respond,
            inputs=[query_input, chatbot],
            outputs=[chatbot, query_input]
        ).then(
            fn=lambda: feedback_system.generate_system_prompt(),
            outputs=system_prompt_display
        )
        
        query_input.submit(
            fn=chatbot_respond,
            inputs=[query_input, chatbot],
            outputs=[chatbot, query_input]
        ).then(
            fn=lambda: feedback_system.generate_system_prompt(),
            outputs=system_prompt_display
        )
        
        thumbs_up_btn.click(
            fn=thumbs_up,
            inputs=[chatbot],
            outputs=feedback_output
        ).then(
            fn=get_stats,
            outputs=stats_output
        ).then(
            fn=lambda: feedback_system.generate_system_prompt(),
            outputs=system_prompt_display
        )
        
        thumbs_down_btn.click(
            fn=thumbs_down,
            inputs=[chatbot, feedback_comment],
            outputs=feedback_output
        ).then(
            fn=get_stats,
            outputs=stats_output
        ).then(
            fn=lambda: "",
            outputs=feedback_comment
        ).then(
            fn=lambda: feedback_system.generate_system_prompt(),
            outputs=system_prompt_display
        )
        
        refresh_stats_btn.click(
            fn=get_stats,
            outputs=stats_output
        )
    
    print("\n" + "="*80)
    print("üöÄ Gradio interface ready!")
    print("="*80)
    print("\nRun the cell below to launch the interface")

else:
    if not GRADIO_AVAILABLE:
        print("\n‚ö†Ô∏è  Gradio not available. Install with: pip install gradio")
    if vectordb is None:
        print("\n‚ö†Ô∏è  No vectorstore available. Load data first.")

‚úì Gradio available


  chatbot = gr.Chatbot(



üöÄ Gradio interface ready!

Run the cell below to launch the interface


## Launch Gradio Interface

Run this cell to start the interactive chatbot

In [21]:
if GRADIO_AVAILABLE and vectordb is not None:
    # Launch the interface
    demo.launch(
        share=False,  # Set to True to create a public link
        server_name="127.0.0.1",
        server_port=7861,
        show_error=True
    )
else:
    print("Cannot launch Gradio interface. Check previous cells for errors.")

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


Traceback (most recent call last):
  File "/Users/ebpearls1/Desktop/Advanced-RAG-demo/.venv/lib/python3.10/site-packages/gradio/queueing.py", line 759, in process_events
    response = await route_utils.call_process_api(
  File "/Users/ebpearls1/Desktop/Advanced-RAG-demo/.venv/lib/python3.10/site-packages/gradio/route_utils.py", line 354, in call_process_api
    output = await app.get_blocks().process_api(
  File "/Users/ebpearls1/Desktop/Advanced-RAG-demo/.venv/lib/python3.10/site-packages/gradio/blocks.py", line 2116, in process_api
    result = await self.call_function(
  File "/Users/ebpearls1/Desktop/Advanced-RAG-demo/.venv/lib/python3.10/site-packages/gradio/blocks.py", line 1623, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/Users/ebpearls1/Desktop/Advanced-RAG-demo/.venv/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/Users/ebpearls1/Deskt

## Command Line Testing (Alternative to Gradio)

Test the chatbot without Gradio interface

In [None]:
def test_chatbot_cli(vectordb: FAISS, feedback_system: ChatbotFeedbackSystem, query: str):
    """Test chatbot in notebook."""
    print("\n" + "="*80)
    print(f"üîç QUERY: {query}")
    print("="*80)
    
    # Retrieve and generate
    top_docs = retrieve_documents(vectordb, query)
    print(f"‚úì Retrieved {len(top_docs)} relevant documents")
    
    response, context = generate_response(query, top_docs, feedback_system)
    
    print(f"\nüìù RESPONSE:")
    print("-"*80)
    print(response)
    print("-"*80)
    print(f"Length: {len(response.split())} words")
    print("\n" + "="*80)
    
    return response, context

# Example test
if vectordb is not None:
    test_query = "What is the capital of France?"
    test_response, test_context = test_chatbot_cli(vectordb, feedback_system, test_query)

## Manual Feedback Recording (For CLI Testing)

In [None]:
# Record feedback for the test query above
# Uncomment and modify to record feedback:

# feedback_system.add_feedback(
#     query=test_query,
#     response=test_response,
#     rating="üëç",  # or "üëé"
#     comment="Good response, very clear",  # Optional
#     context_used=test_context
# )

# print("‚úì Feedback recorded")
# print(f"Current satisfaction rate: {feedback_system.get_satisfaction_rate():.1f}%")

## Feedback Analysis and Improvement Visualization

In [None]:
def visualize_feedback_improvement():
    """Visualize feedback trends and improvements."""
    if not feedback_system.feedback_history:
        print("No feedback data to visualize yet.")
        return
    
    import matplotlib.pyplot as plt
    
    df = pd.DataFrame(feedback_system.feedback_history)
    
    # Create figure with subplots
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('Chatbot Feedback Analysis & Improvement Tracking', fontsize=16, fontweight='bold')
    
    # 1. Cumulative satisfaction rate over time
    df['positive'] = (df['rating'] == 'üëç').astype(int)
    df['cumulative_satisfaction'] = df['positive'].expanding().mean() * 100
    
    ax1 = axes[0, 0]
    ax1.plot(df['session_number'], df['cumulative_satisfaction'], marker='o', linewidth=2, markersize=6)
    ax1.axhline(y=50, color='r', linestyle='--', alpha=0.3, label='50% baseline')
    ax1.set_xlabel('Session Number')
    ax1.set_ylabel('Satisfaction Rate (%)')
    ax1.set_title('Cumulative Satisfaction Rate Over Time')
    ax1.grid(True, alpha=0.3)
    ax1.legend()
    ax1.set_ylim([0, 100])
    
    # 2. Rolling satisfaction rate (window=5)
    if len(df) >= 5:
        df['rolling_satisfaction'] = df['positive'].rolling(window=5, min_periods=1).mean() * 100
        ax2 = axes[0, 1]
        ax2.plot(df['session_number'], df['rolling_satisfaction'], marker='s', linewidth=2, markersize=6, color='green')
        ax2.axhline(y=50, color='r', linestyle='--', alpha=0.3, label='50% baseline')
        ax2.set_xlabel('Session Number')
        ax2.set_ylabel('Satisfaction Rate (%)')
        ax2.set_title('Rolling Satisfaction Rate (Window=5)')
        ax2.grid(True, alpha=0.3)
        ax2.legend()
        ax2.set_ylim([0, 100])
    else:
        axes[0, 1].text(0.5, 0.5, 'Need 5+ sessions for rolling average', 
                       ha='center', va='center', transform=axes[0, 1].transAxes)
        axes[0, 1].set_title('Rolling Satisfaction Rate (Window=5)')
    
    # 3. Rating distribution
    ax3 = axes[1, 0]
    rating_counts = df['rating'].value_counts()
    colors = ['#4CAF50' if r == 'üëç' else '#F44336' for r in rating_counts.index]
    ax3.bar(rating_counts.index, rating_counts.values, color=colors, alpha=0.7, edgecolor='black')
    ax3.set_xlabel('Rating')
    ax3.set_ylabel('Count')
    ax3.set_title('Rating Distribution')
    ax3.grid(True, alpha=0.3, axis='y')
    
    # Add percentage labels on bars
    total = len(df)
    for i, (rating, count) in enumerate(rating_counts.items()):
        percentage = (count / total) * 100
        ax3.text(i, count, f'{count}\n({percentage:.1f}%)', ha='center', va='bottom')
    
    # 4. Response length comparison
    ax4 = axes[1, 1]
    positive_lengths = df[df['rating'] == 'üëç']['response_length']
    negative_lengths = df[df['rating'] == 'üëé']['response_length']
    
    if len(positive_lengths) > 0 and len(negative_lengths) > 0:
        ax4.boxplot([positive_lengths, negative_lengths], labels=['üëç Positive', 'üëé Negative'])
        ax4.set_ylabel('Response Length (words)')
        ax4.set_title('Response Length by Rating')
        ax4.grid(True, alpha=0.3, axis='y')
    else:
        ax4.text(0.5, 0.5, 'Need both positive and negative feedback', 
                ha='center', va='center', transform=ax4.transAxes)
        ax4.set_title('Response Length by Rating')
    
    plt.tight_layout()
    plt.show()
    
    # Print detailed insights
    insights = feedback_system.get_improvement_insights()
    trend = feedback_system.get_recent_improvement_trend()
    
    print("\n" + "="*80)
    print("üìä DETAILED FEEDBACK INSIGHTS")
    print("="*80)
    print(f"\nüìà Overall Performance:")
    print(f"   Total Feedback: {insights['total_feedback']}")
    print(f"   üëç Positive: {insights['positive_count']} ({insights['satisfaction_rate']:.1f}%)")
    print(f"   üëé Negative: {insights['negative_count']}")
    
    print(f"\nüìä Recent Trend (Last 5 sessions):")
    print(f"   Status: {trend['trend'].upper()}")
    print(f"   Recent Rate: {trend['recent_rate']:.1f}%")
    print(f"   Previous Rate: {trend['previous_rate']:.1f}%")
    print(f"   Change: {trend['improvement'], 0.0:+.1f}%")
    
    print(f"\nüìè Response Characteristics:")
    print(f"   Positive Responses Avg: {insights['positive_avg_length']} words")
    print(f"   Negative Responses Avg: {insights['negative_avg_length']} words")
    
    if insights.get("top_issues"):
        print(f"\n‚ö†Ô∏è  Top Improvement Areas:")
        for issue, count in insights["top_issues"]:
            print(f"   - {issue}: {count} mentions")
    
    print("\n" + "="*80)

# Run visualization
if feedback_system.feedback_history:
    visualize_feedback_improvement()
else:
    print("üìä No feedback data yet. Use the chatbot and provide feedback first!")

## Export Feedback Data

In [None]:
def export_feedback_data():
    """Export feedback to CSV for analysis."""
    if not feedback_system.feedback_history:
        print("No feedback to export")
        return
    
    output_path = "../data/chatbot_feedback_export.csv"
    df = pd.DataFrame(feedback_system.feedback_history)
    df.to_csv(output_path, index=False)
    
    print(f"‚úì Feedback data exported to: {output_path}")
    print(f"  Total entries: {len(df)}")
    print(f"  Satisfaction rate: {feedback_system.get_satisfaction_rate():.1f}%")
    
    # Display sample
    print("\nüìÑ Sample of exported data:")
    print(df[['timestamp', 'query', 'rating', 'response_length', 'comment']].tail())

# Uncomment to export:
# export_feedback_data()

## View Current System Adaptations

In [None]:
def show_system_adaptations():
    """Display how the system has adapted based on feedback."""
    print("\n" + "="*80)
    print("üß† SYSTEM ADAPTATIONS BASED ON FEEDBACK")
    print("="*80)
    
    print("\nüìã Current System Prompt:")
    print("-"*80)
    current_prompt = feedback_system.generate_system_prompt()
    print(current_prompt)
    print("-"*80)
    
    insights = feedback_system.get_improvement_insights()
    
    if insights["total_feedback"] < 3:
        print("\n‚è≥ Not enough feedback yet to show significant adaptations.")
        print("   Provide more feedback (at least 3) to see system improvements!")
    else:
        print("\nüéØ Adaptation Details:")
        
        if insights["positive_avg_length"] > 0 and insights["negative_avg_length"] > 0:
            if insights["positive_avg_length"] < insights["negative_avg_length"]:
                print("   ‚úì Learned to prefer CONCISE responses")
                print(f"     (Positive avg: {insights['positive_avg_length']} words, Negative avg: {insights['negative_avg_length']} words)")
            else:
                print("   ‚úì Learned to prefer DETAILED responses")
                print(f"     (Positive avg: {insights['positive_avg_length']} words, Negative avg: {insights['negative_avg_length']} words)")
        
        if insights.get("top_issues"):
            print("\n   ‚úì Addressing these issues:")
            for issue, count in insights["top_issues"]:
                print(f"     - {issue} ({count} mentions)")
    
    print("\n" + "="*80)

show_system_adaptations()

## Summary

This notebook implements:

‚úÖ **Single-Response Chatbot**: One response per query (no A/B testing)
‚úÖ **Thumbs Up/Down Feedback**: Simple rating system  
‚úÖ **Optional Text Feedback**: Users can explain what needs improvement
‚úÖ **Adaptive Learning**: System adjusts based on feedback patterns
‚úÖ **Gradio Interface**: Beautiful, interactive UI for chatting
‚úÖ **Improvement Tracking**: Visualizations show satisfaction trends over time
‚úÖ **Performance Metrics**: Real-time stats on satisfaction rate and improvements

### Key Features:

1. **Feedback Learning System**: Analyzes thumbs up/down ratings and text comments
2. **Adaptive System Prompts**: Automatically adjusts based on user preferences
3. **Improvement Detection**: Tracks satisfaction rate trends (improving/declining/stable)
4. **Interactive UI**: Gradio interface with chat history and real-time stats
5. **Visualization**: Charts showing cumulative satisfaction, rolling trends, and response characteristics

### How to Use:

1. **With Gradio**: Launch the interface and chat naturally with feedback buttons
2. **Without Gradio**: Use CLI testing functions and manually record feedback
3. **Track Progress**: Run visualization cell to see improvement over time

The system learns from your feedback and adapts its responses to match your preferences! üöÄ