In [None]:
KNOWLEDGE_BASE = {
    "doc1": {
        "title": "Project Chimera Overview",
        "content": (
            "Project Chimera is a research initiative focused on developing "
            "novel bio-integrated interfaces. It aims to merge biological "
            "systems with advanced computing technologies."
        )
    },
    "doc2": {
        "title": "Chimera's Neural Interface",
        "content": (
            "The core component of Project Chimera is a neural interface "
            "that allows for bidirectional communication between the brain "
            "and external devices. This interface uses biocompatible "
            "nanomaterials."
        )
    },
    "doc3": {
        "title": "Applications of Chimera",
        "content": (
            "Potential applications of Project Chimera include advanced "
            "prosthetics, treatment of neurological disorders, and enhanced "
            "human-computer interaction. Ethical considerations are paramount."
        )
    }
}

Step 1 -> (Retrieval): Locating Relevant Information

In [None]:
def rag_retrieval(query, documents):
    query_words = set(query.lower().split())
    best_doc_id = None
    best_overlap = 0
    
    for doc_id, doc in documents.items():
        # Compare the query words with the document's content words
        doc_words = set(doc["content"].lower().split())
        overlap = len(query_words.intersection(doc_words))
        
        if overlap > best_overlap:
            best_overlap = overlap
            best_doc_id = doc_id
    
    # Return the best document, or None if nothing matched
    return documents.get(best_doc_id)

step 2 -> (Query Augmentation): Creating Context-Rich Prompts

In [None]:
"""
we enrich the users original question with the document with that document's content which will significantly reduces hallucinations because the language model sees both the question and real data.
"""
def rag_generation(query, document):
    if document:
        snippet = f"{document['title']}: {document['content']}"
        prompt = f"Using the following information: '{snippet}', answer: {query}"
    else:
        prompt = f"No relevant information found. Answer directly: {query}"

step 4 -> (Generation): Producing Tailored Answers

In [None]:
def get_llm_response(prompt):
    """
    This function interfaces with a language model to generate a response based on the provided prompt.
    
    Parameters:
    - prompt (str): A string containing the question or task for the language model, potentially augmented with additional context.
    
    Returns:
    - response (str): The generated text from the language model, which aims to answer the question or fulfill the task described in the prompt.
    """
    pass

In [None]:
def naive_generation(query):
    # This approach ignores the knowledge base
    prompt = f"Answer directly the following query: {query}"
    return get_llm_response(prompt)
#or 
def rag_generation(query, document):
    # This approach augments the prompt via the knowledge base
    if document:
        snippet = f"{document['title']}: {document['content']}"
        prompt = f"Using the following information: '{snippet}', answer: {query}"
    else:
        prompt = f"No relevant information found. Answer directly: {query}"
    return get_llm_response(prompt)
#the problem here in the rag generation is that we can have a document but it does not contain the relevant information needed so to make sure we use the llm properly we shoukd construct the model so that he can answer onl if the relevant information are present 
def rag_generation(query, document):
    if document:
        snipet = f"{document['title']}:{document['content']}"
        prompt = f"only answer {query} if all required info is present in the snippet: '{snipet}'"
    else:
        prompt = f"Refuse politely if No relevant information found. Answer directly: {query}"
    return get_llm_response(prompt)
    

First Test 

In [None]:
query = "What is the main goal of Project Chimera?"

naive_answer = naive_generation(query)
print("Naive approach:", naive_answer)

doc = rag_retrieval(query, KNOWLEDGE_BASE)
rag_answer = rag_generation(query, doc)
print("RAG approach:", rag_answer)

Note:

    naive_generation(query) can easily lead to random or inaccurate answers regarding “Project Chimera”, such as: "The main goal of Project Chimera is to develop advanced artificial intelligence systems that can enhance human capabilities and improve decision-making processes across various fields.".
    rag_generation(query, doc) **provides contextual information** from the knowledge base, ensuring the answer is grounded: "The main goal of Project Chimera is to enable bidirectional communication between the brain and external devices through the use of a neural interface.".
    Seeing both approaches (and their actual output) helps you compare how naive answers can deviate from your authoritative data, while RAG-based responses stay closer to truth.


#Fighting Hallucinations: RAG's Role in Trustworthy LLMs