<a href="https://colab.research.google.com/github/khandekar0708/khandekar0708.github.io/blob/main/Rag_System0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install openai pinecone-client


In [None]:
import os
import openai
from pinecone import Pinecone, ServerlessSpec

# Set your API keys from environment variables
openai.api_key = "OPENAI_API_KEY"
pinecone_api_key = "PINECONE_API_KEY"

# Initialize Pinecone client
pc = Pinecone(api_key=pinecone_api_key)

# Define index name and specifications for free plan in us-east-1 (AWS)
index_name = "business-documents"
spec = ServerlessSpec(cloud="aws", region="us-east-1")  # Free-tier compatible region

# Create or connect to a Pinecone index
try:
    # Get the list of indexes
    existing_indexes = pc.list_indexes()

    # Check if the index already exists
    if index_name not in existing_indexes:
        pc.create_index(name=index_name, dimension=1536, metric="cosine", spec=spec)

    index = pc.Index(index_name)  # Connect to the index
except Exception as e:
    if "already exists" in str(e).lower():
        print(f"Index '{index_name}' already exists. Connecting to the existing index.")
        index = pc.Index(index_name)
    else:
        print(f"Failed to create or connect to index: {e}")
        raise  # Re-raise if it's a different error

# Function to get embeddings
def get_embedding(text):
    try:
        response = openai.Embedding.create(input=[text], model="text-embedding-ada-002")
        return response['data'][0]['embedding']
    except Exception as e:
        print("Error in getting embedding:", e)
        return None

# Ingest documents
def ingest_documents(documents):
    vectors = []
    for doc in documents:
        embedding = get_embedding(doc['text'])
        if embedding is not None:  # Check if embedding was successful
            vectors.append({"id": doc['id'], "values": embedding, "metadata": {"text": doc['text']}})
    if vectors:  # Only upsert if there are vectors to add
        index.upsert(vectors)

# Function to search documents
def search_documents(query, top_k=5):
    query_embedding = get_embedding(query)
    if query_embedding is None:
        return []  # Early return if embedding failed
    results = index.query(query_embedding, top_k=top_k, include_metadata=True)
    return [result['metadata']['text'] for result in results['matches']]

# RAG answer generation
def rag_answer(query):
    retrieved_docs = search_documents(query)
    context = "\n".join(retrieved_docs)
    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
    try:
        # Use the latest model available
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # Change to "gpt-4" if you have access
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=200,
            temperature=0.7
        )
        return response['choices'][0]['message']['content'].strip()
    except Exception as e:
        print("Error in generating answer:", e)
        return "Sorry, I couldn't generate an answer."

# Example usage
documents = [
    {"id": "doc1", "text": "Product X has features A, B, and C, which are useful for business applications."},
    {"id": "doc2", "text": "Our company's support services are available 24/7 for all clients."},
    {"id": "doc3", "text": "The pricing plan includes Basic, Premium, and Enterprise tiers, each with different features."}
]

ingest_documents(documents)

query = "What support options does the company offer?"
answer = rag_answer(query)
print("Answer:", answer)
