In [11]:
from openai import OpenAI
import faiss
import numpy as np
from dotenv import load_dotenv
load_dotenv()

# Initialize OpenAI client
client = OpenAI()

# Sample documents
documents = [
    "The sky is blue.",
    "Grass is green.",
    "The sun is yellow.",
    "Water is transparent.",
    "Fire is hot."
]

# Function to get embeddings from OpenAI
def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    response = client.embeddings.create(input=[text], model=model)
    return response.data[0].embedding

# Function to get embeddings for multiple documents
def get_embeddings(texts, model="text-embedding-ada-002"):
    texts = [text.replace("\n", " ") for text in texts]
    response = client.embeddings.create(input=texts, model=model)
    return [item.embedding for item in response.data]

# Function to perform RAG
def rag_query(query, index, documents, k=2):
    # Get query embedding
    query_embedding = get_embedding(query)
    
    # Search FAISS index
    distances, indices = index.search(np.array([query_embedding]).astype('float32'), k)
    
    # Retrieve relevant documents
    relevant_docs = [documents[i] for i in indices[0]]
    
    # Construct prompt
    prompt = f"Based on the following information:\n"
    for doc in relevant_docs:
        prompt += f"- {doc}\n"
    prompt += f"\nAnswer the question: {query}"
    
    # Query ChatGPT
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    
    return response.choices[0].message.content

# Get embeddings for documents
document_embeddings = get_embeddings(documents)
document_embeddings = np.array(document_embeddings)

# Create FAISS index
dimension = len(document_embeddings[0])
index = faiss.IndexFlatL2(dimension)
index.add(document_embeddings.astype('float32'))

# Example usage
query = "What color is the sky?"
result = rag_query(query, index, documents)
print(f"Query: {query}")
print(f"Answer: {result}")

# Try another query
query = "What is the color of grass?"
result = rag_query(query, index, documents)
print(f"\nQuery: {query}")
print(f"Answer: {result}")

Query: What color is the sky?
Answer: Based on the information provided, the color of the sky is blue.

Query: What is the color of grass?
Answer: Based on the information provided, the color of grass is green.
