#### RAG System with Feedback Loop: Enhancing Retrieval and Response Quality

This system implements a Retrieval-Augmented Generation (RAG) approach with an integrated feedback loop. It aims to improve the quality and relevance of responses over time by incorporating user feedback and dynamically adjusting the retrieval process.

In [22]:
import os
import sys
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from utility import encode_pdf, show_context, retrieve_context_per_question
from langchain_core.output_parsers import StrOutputParser
from typing import List, Any, Dict
from concurrent.futures import ThreadPoolExecutor, as_completed
from langchain_community.docstore.in_memory import InMemoryDocstore
from tqdm import tqdm
from langchain.vectorstores import Chroma, FAISS
import faiss
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from utility import replace_t_with_space
from langchain_experimental.text_splitter import SemanticChunker
import pymupdf
from pydantic import BaseModel, Field

In [30]:
def read_pdf_to_string(path):
    """
    Read a PDF document from the specified path and return its content as a string.

    Args:
        path (str): The file path to the PDF document.

    Returns:
        str: The concatenated text content of all pages in the PDF document.

    The function uses the 'fitz' library (PyMuPDF) to open the PDF document, iterate over each page,
    extract the text content from each page, and append it to a single string.
    """
    # Open the PDF document located at the specified path
    doc = pymupdf.open(path)
    content = ""
    # Iterate over each page in the document
    for page_num in range(len(doc)):
        # Get the current page
        page = doc[page_num]
        # Extract the text content from the current page and append it to the content string
        content += page.get_text()
    return content

In [31]:
def encode_from_string(content, chunk_size=1000, chunk_overlap=200):
    """
    Encodes a string into a vector store using OpenAI embeddings.

    Args:
        content (str): The text content to be encoded.
        chunk_size (int): The size of each chunk of text.
        chunk_overlap (int): The overlap between chunks.

    Returns:
        FAISS: A vector store containing the encoded content.

    Raises:
        ValueError: If the input content is not valid.
        RuntimeError: If there is an error during the encoding process.
    """

    if not isinstance(content, str) or not content.strip():
        raise ValueError("Content must be a non-empty string.")

    if not isinstance(chunk_size, int) or chunk_size <= 0:
        raise ValueError("chunk_size must be a positive integer.")

    if not isinstance(chunk_overlap, int) or chunk_overlap < 0:
        raise ValueError("chunk_overlap must be a non-negative integer.")

    try:
        # Split the content into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            length_function=len,
            is_separator_regex=False,
        )
        chunks = text_splitter.create_documents([content])

        # Assign metadata to each chunk
        for chunk in chunks:
            chunk.metadata['relevance_score'] = 1.0

        # Generate embeddings and create the vector store
        #Embeddings 
        embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        vectorstore = FAISS.from_documents(chunks, embeddings)

    except Exception as e:
        raise RuntimeError(f"An error occurred during the encoding process: {str(e)}")

    return vectorstore

In [32]:
file_path = "data/Understanding_Climate_Change.pdf"

#create vector store
content = read_pdf_to_string(file_path)
vector_store = encode_from_string(content)

retriever = vector_store.as_retriever(search_kwargs={'k':3})

In [36]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
groq_api_key=os.getenv("GROQ_API_KEY")
llm=ChatGroq(groq_api_key=groq_api_key,model_name="llama-3.1-8b-instant")
system_prompt = """ 
    Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise. "
    "Context: {context}
"""
prompt = ChatPromptTemplate(
    [
        ("system",system_prompt),
        ("user","{input}")
    ]
)

qa_chain = create_stuff_documents_chain(llm,prompt)
retriever_chain = create_retrieval_chain(retriever,qa_chain)

In [17]:
#Function to format user feedback
def get_user_feedback(query,response,relevance,quality,comments=""):
    return {
        "query":query,
        "response":response,
        "relevance":int(relevance),
        "quality":int(quality),
        "comments":comments
    }

In [18]:
#Function to store the feedback in json format
import json
def store_feedback(feedback):
    with open("data/feedback_data.json","a") as file:
        json.dump(feedback,file)
        file.write("\n")

In [19]:
#Function to read the feedback file
def load_feedback_data():
    feedback_data = []
    try:
        with open("data/feedback_data.json","r") as file:
            for line in file:
                feedback_data.append(json.loads(line.strip()))
    except FileNotFoundError:
        print("No feedback file found. Starting with empty feedback")
    return feedback_data


In [39]:
## Function to adjust relevance score in the documents

#SChema for llm output
class Response(BaseModel):
    answer : str = Field(description="Then answer to the question. The options can be 'Yes' or 'No'")

def adjust_relevance_score(query:str,docs:List[Any],feedback_data:List[Dict[str,Any]]) -> List[Any]:

    #prompt for relevance check
    prompt = PromptTemplate(
        template = """Determine if the following feedback response is relevant to the current query and document content.
        You are also provided with the Feedback original query that was used to generate the feedback response.
        Current query: {query}
        Feedback query: {feedback_query}
        Document content: {doc_content}
        Feedback response: {feedback_response}
        
        Is this feedback relevant? Respond with only 'Yes' or 'No'.
        """
    )
    relevance_chain = prompt | llm.with_structured_output(Response)

    #Loop each documents retreived for current query
    for doc in docs:
        relevant_feedback = []
        # loop each feedback data to check the relevance with document and current query
        for feedback in feedback_data:
            input = {
                "query":query,
                "feedback_query":feedback["query"],
                "doc_content":doc.page_content,
                "feedback_response":feedback["response"]
            }

            result = relevance_chain.invoke(input).answer

            if result == "yes":
                relevant_feedback.append(feedback)

        #Adjust the relevance score of the document based on feedback
        if relevant_feedback:
            average_relevance = sum(f["relevance"] for f in relevant_feedback) / len(relevant_feedback)
            doc.metadata['relevance_score'] *= (average_relevance / 3) # Assuming 1-5 scale, 3 is neutral
    
    #Rerank documents based on relevance score
    return sorted(docs,key=lambda x:x.metadata["relevance_score"],reverse=True)
    

#### Demonstration of how to retrieve answers with respect to user feedbacks

In [41]:
query = "What is the greenhouse effect?"

# Get response from RAG system
response = retriever_chain.invoke({"input":query})["answer"]

relevance = 5
quality = 4

# Collect feedback
feedback = get_user_feedback(query, response, relevance, quality)

# Store feedback
store_feedback(feedback)

# Adjust relevance scores for future retrievals
docs = retriever.get_relevant_documents(query)
adjusted_docs = adjust_relevance_score(query, docs, load_feedback_data())

# Update the retriever with adjusted docs
retriever.search_kwargs['k'] = len(adjusted_docs)
retriever.search_kwargs['docs'] = adjusted_docs

  docs = retriever.get_relevant_documents(query)


#### Function to fine tune the vector index to include query and their resonse which are having good feedback

In [42]:
def fine_tune_index(feedback_data: List[Dict[str, Any]], texts: List[str]) -> Any:
    # Filter high-quality responses
    good_responses = [f for f in feedback_data if f['relevance'] >= 4 and f['quality'] >= 4]
    
    # Extract queries and responses, and create new documents
    additional_texts = []
    for f in good_responses:
        combined_text = f['query'] + " " + f['response']
        additional_texts.append(combined_text)

    # make the list a string
    additional_texts = " ".join(additional_texts)
    
    # Create a new index with original and high-quality texts
    all_texts = texts + additional_texts
    new_vectorstore = encode_from_string(all_texts)
    
    return new_vectorstore

#### Finetune the vectorstore periodicly

In [None]:
# Periodically (e.g., daily or weekly), fine-tune the index
new_vectorstore = fine_tune_index(load_feedback_data(), content)
retriever = new_vectorstore.as_retriever()