# Self-Correcting RAG System with Evaluation Loop

This notebook demonstrates a robust Retrieval-Augmented Generation (RAG) system that includes a self-correction mechanism. It uses a secondary "Evaluator" LLM to critique responses and triggers a retry loop if the quality threshold is not met. This pattern ensures high-reliability outputs for user-facing applications.

In [None]:
# Import dependencies
from dotenv import load_dotenv
from openai import OpenAI
from pypdf import PdfReader
import gradio as gr
import os
from pydantic import BaseModel

In [None]:
# Initialize Environment
load_dotenv(override=True)
openai = OpenAI()

In [None]:
# Load Context Data (Simulated Profile)
# In a production environment, this would be retrieved from a vector database or document store.

name = "Alex Chen"
summary = """
Alex Chen is a Senior AI Engineer with 8 years of experience in machine learning and distributed systems.
He specializes in building autonomous agents and scalable NLP pipelines.
Previously, he worked at TechCorp leading the Search Infrastructure team.
"""

linkedin_profile = """
Experience:
- Senior AI Engineer @ FutureAI (2022-Present)
- Staff Software Engineer @ TechCorp (2018-2022)
- Backend Developer @ StartUpInc (2015-2018)

Skills: Python, PyTorch, Kubernetes, Kafka, OpenAI API, LangChain
"""

In [None]:
# Define Persona System Prompt
system_prompt = f"You are acting as {name}. You are answering questions on {name}'s website, "
system_prompt += f"particularly questions related to {name}'s career, background, skills and experience. "
system_prompt += f"Your responsibility is to represent {name} for interactions on the website as faithfully as possible. "
system_prompt += f"You are given a summary of {name}'s background and LinkedIn profile which you can use to answer questions. "
system_prompt += f"Be professional and engaging, as if talking to a potential client or future employer who came across the website. "
system_prompt += f"If you don't know the answer, say so."

system_prompt += f"\n\n## Summary:\n{summary}\n\n## LinkedIn Profile:\n{linkedin_profile}\n\n"
system_prompt += f"With this context, please chat with the user, always staying in character as {name}."

In [None]:
# Define Chat Function
def chat(message, history):
    # Ensure history format compatibility
    history_formatted = [{"role": h["role"], "content": h["content"]} for h in history]
    messages = [{"role": "system", "content": system_prompt}] + history_formatted + [{"role": "user", "content": message}]
    response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
    return response.choices[0].message.content

In [None]:
# Define Evaluation Schema
class Evaluation(BaseModel):
    is_acceptable: bool
    feedback: str

In [None]:
# Define Evaluator System Prompt
evaluator_system_prompt = f"You are an evaluator that decides whether a response to a question is acceptable. "
evaluator_system_prompt += f"You are provided with a conversation between a User and an Agent. Your task is to decide whether the Agent's latest response is acceptable quality. "
evaluator_system_prompt += f"The Agent is playing the role of {name} and is representing {name} on their website. "
evaluator_system_prompt += f"The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website. "
evaluator_system_prompt += f"The Agent has been provided with context on {name} in the form of their summary and LinkedIn details. Here's the information:"

evaluator_system_prompt += f"\n\n## Summary:\n{summary}\n\n## LinkedIn Profile:\n{linkedin_profile}\n\n"
evaluator_system_prompt += f"With this context, please evaluate the latest response, replying with whether the response is acceptable and your feedback."

In [None]:
# Define Evaluator User Prompt Builder
def evaluator_user_prompt(reply, message, history):
    user_prompt = f"Here's the conversation between the User and the Agent: \n\n{history}\n\n"
    user_prompt += f"Here's the latest message from the User: \n\n{message}\n\n"
    user_prompt += f"Here's the latest response from the Agent: \n\n{reply}\n\n"
    user_prompt += "Please evaluate the response, replying with whether it is acceptable and your feedback."
    return user_prompt

In [None]:
# Initialize Evaluator Client (Google Gemini)
gemini = OpenAI(
    api_key=os.getenv("GOOGLE_API_KEY"), 
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

In [None]:
# Evaluation Function
def evaluate(reply, message, history) -> Evaluation:
    messages = [{"role": "system", "content": evaluator_system_prompt}] + [{"role": "user", "content": evaluator_user_prompt(reply, message, history)}]
    response = gemini.beta.chat.completions.parse(model="gemini-2.0-flash", messages=messages, response_format=Evaluation)
    return response.choices[0].message.parsed

In [None]:
# Rerun Logic for Failed Evaluations
def rerun(reply, message, history, feedback):
    updated_system_prompt = system_prompt + "\n\n## Previous answer rejected\nYou just tried to reply, but the quality control rejected your reply\n"
    updated_system_prompt += f"## Your attempted answer:\n{reply}\n\n"
    updated_system_prompt += f"## Reason for rejection:\n{feedback}\n\n"
    
    # Ensure history format compatibility
    history_formatted = [{"role": h["role"], "content": h["content"]} for h in history]
    messages = [{"role": "system", "content": updated_system_prompt}] + history_formatted + [{"role": "user", "content": message}]
    
    response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
    return response.choices[0].message.content

In [None]:
# Main Chat Loop with Self-Correction
def chat_with_eval(message, history):
    # Inject failure scenario for testing (Pig Latin trigger)
    if "patent" in message.lower():
        system = system_prompt + "\n\nEverything in your reply needs to be in pig latin - it is mandatory that you respond only and entirely in pig latin"
    else:
        system = system_prompt
    
    # Ensure history format compatibility
    history_formatted = [{"role": h["role"], "content": h["content"]} for h in history]
    messages = [{"role": "system", "content": system}] + history_formatted + [{"role": "user", "content": message}]
    
    response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
    reply = response.choices[0].message.content

    # Evaluate Response
    evaluation = evaluate(reply, message, history)
    
    if evaluation.is_acceptable:
        print("Evaluation Passed.")
    else:
        print(f"Evaluation Failed: {evaluation.feedback}")
        print("Retrying generation...")
        reply = rerun(reply, message, history, evaluation.feedback)       
    return reply

In [None]:
# Launch Interface
gr.ChatInterface(chat_with_eval, type="messages").launch()