In [1]:
pip install pymongo sentence-transformers


Note: you may need to restart the kernel to use updated packages.


In [3]:
from pymongo import MongoClient
from sentence_transformers import SentenceTransformer

# Connect to MongoDB with authentication
client = MongoClient("mongodb://admin:password@localhost:27017/")

# Select the database and collection
db = client["rag_database"]
collection = db["documents"]

# Load pre-trained sentence embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Sample documents
documents = [
    {"title": "AI and its Future", "content": "Artificial intelligence is evolving rapidly."},
    {"title": "Quantum Computing", "content": "Quantum computers can perform complex calculations much faster."},
    {"title": "Cloud Computing Trends", "content": "Serverless architectures and multi-cloud are gaining popularity."},
    {"title": "Machine Learning Basics", "content": "Supervised and unsupervised learning are key paradigms."}
]

# Generate embeddings and insert into MongoDB
for doc in documents:
    doc["embedding"] = model.encode(doc["content"]).tolist()  # Convert numpy array to list
    collection.insert_one(doc)

print("Data inserted successfully with authentication!")


Data inserted successfully with authentication!


In [4]:
import numpy as np

def find_most_relevant(query):
    query_embedding = model.encode(query).tolist()
    
    # Retrieve all documents
    docs = list(collection.find({}))
    
    # Compute similarity (dot product)
    similarities = [
        (doc, np.dot(query_embedding, doc["embedding"])) for doc in docs
    ]
    
    # Sort by similarity score
    best_match = max(similarities, key=lambda x: x[1])[0]
    
    return best_match

query = "Explain AI advancements"
result = find_most_relevant(query)

print(f"Most relevant document: {result['title']}\nContent: {result['content']}")


Most relevant document: AI and its Future
Content: Artificial intelligence is evolving rapidly.


In [9]:
import openai
import numpy as np
openai.api_key = "API KEY"
# Function to compute cosine similarity
def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

# Function to retrieve relevant documents
def retrieve_relevant_docs(query, top_n=2):
    query_embedding = model.encode(query).tolist()
    results = []
    
    for doc in collection.find():
        doc_embedding = np.array(doc["embedding"])
        similarity = cosine_similarity(query_embedding, doc_embedding)
        results.append((similarity, doc))
    
    # Sort by highest similarity
    results.sort(reverse=True, key=lambda x: x[0])
    return [doc["content"] for _, doc in results[:top_n]]

# Function to generate response using OpenAI
def get_openai_response(query):
    relevant_docs = retrieve_relevant_docs(query)
    context = " ".join(relevant_docs)

    prompt = f"Context: {context}\n\nQuestion: {query}\nAnswer:"
    
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "system", "content": "You are an AI assistant that answers based on retrieved documents."},
                  {"role": "user", "content": prompt}]
    )
    return response["choices"][0]["message"]["content"]

# Example query
query = "What is machine learning?"
answer = get_openai_response(query)

print("🔹 Question:", query)
print("🔹 Answer:", answer)

RateLimitError: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.

In [None]:
import openai

openai.api_key = "API-KEY"

try:
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": "Hello!"}]
    )
    print(response["choices"][0]["message"]["content"])
except Exception as e:
    print("Error:", e)