In [12]:
pip install openai sentence-transformers faiss-cpu nltk tiktoken python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [13]:
import openai
import faiss
import numpy as np
import nltk
from sentence_transformers import SentenceTransformer
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Download required NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize

# Get API key from environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")

# Verify API key is loaded
if not openai.api_key:
    raise ValueError("OPENAI_API_KEY not found in environment variables. Please check your .env file.")

# Load embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Example long paragraph
long_paragraph = """
Retrieval-Augmented Generation (RAG) is a method that improves large language models by allowing them to retrieve information from external documents. 
This reduces hallucinations and makes the answers more fact-based. 
For example, a RAG system will first look up relevant information and then generate a response using that retrieved context. 
Confidence scores help indicate how certain the model is about its answer. 
OpenAI's GPT-4o is the latest generation of their language model, offering fast performance and multi-modal capabilities.
"""


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [14]:
# Step 1: Split
sentences = sent_tokenize(long_paragraph)

# Step 2: Embed
sentence_embeddings = embedder.encode(sentences, convert_to_numpy=True)

# Step 3: Create FAISS index
dimension = sentence_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(sentence_embeddings)


In [15]:
def retrieve_sentences(query, top_k=4):
    query_embedding = embedder.encode([query], convert_to_numpy=True)
    D, I = index.search(query_embedding, top_k)
    return [(sentences[i], float(D[0][rank]), i+1) for rank, i in enumerate(I[0])]


In [19]:
def format_context(snippets):
    return "\n".join([f"[{i}] {text}" for text, _, i in snippets])

def generate_answer(query):
    top_snippets = retrieve_sentences(query)
    context = format_context(top_snippets)

    prompt = f"""
You are a helpful assistant. Use the following context to answer the question. 
Cite relevant sentences by their number (e.g., [1], [2]) and do not cite anything not directly used. 
At the end, output a confidence score between 0 and 1 based on how well the context answers the question.

Context:
{context}

Question: {query}

Answer:"""

    # Updated for OpenAI API v1.0+
    client = openai.OpenAI(api_key=openai.api_key)
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )

    return response.choices[0].message.content.strip()


In [20]:
query = "What is RAG and how does it reduce hallucinations?"
print(generate_answer(query))


Retrieval-Augmented Generation (RAG) is a method that improves large language models by allowing them to retrieve information from external documents [1]. It reduces hallucinations by first looking up relevant information and then generating a response using that retrieved context [3]. This approach makes the answers more fact-based [2].

Confidence Score: 1


For example, a RAG system will first look up relevant information and then generate a response using that retrieved context. 