In [None]:
# using OpenAI API for generation and Pinecone as the vector database for retrieval.

# Step 1: Install Required Libraries
!pip install openai pinecone-client

# Step 2: Import Required Modules
import openai
import pinecone

# Step 3: Set Up API Keys
openai.api_key = "your_openai_api_key"
pinecone_api_key="your_api_key"

# Step 2: Initialize Pinecone
pc = Pinecone(api_key=pinecone_api_key)


# Step 4: Create a Pinecone Index
# Pinecone indexes are used to store vector embeddings of documents for efficient retrieval.
index_name = "qa-index"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
      )

# 1536 is the embedding size for OpenAI embeddings

index = pc.Index(index_name)
print(f"Connected to index: {index_name}")

# Step 5: Defining a Function to Add Documents to the Index
# This function converts text into embeddings and adds them to the Pinecone index.

def add_to_index(documents):

    embeddings = openai.Embedding.create(
        input=documents, model="text-embedding-ada-002"
    )
    for doc, embed in zip(documents, embeddings['data']):
        index.upsert([(embed['index'], embed['embedding'], {"text": doc})])

# Example documents
documents = [
    "Our business offers 24/7 customer support.",
    "The refund policy allows returns within 30 days of purchase.",
    "We provide free shipping on orders over $50."
]
add_to_index(documents)

# Step 6: Define the RAG Retrieval Function
# This function retrieves relevant documents based on a query and generates a response.

def retrieve_and_generate(query):
    """Retrieve relevant documents and generate a response using OpenAI."""

    query_embedding = openai.Embedding.create(
        input=[query], model="text-embedding-ada-002"
    )['data'][0]['embedding']
    results = index.query(query_embedding, top_k=3, include_metadata=True)

    # Extract texts from results
    context = "\n".join([match['metadata']['text'] for match in results['matches']])

    # It will generate response using OpenAI
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=f"Answer the question based on the context:\nContext: {context}\nQuestion: {query}\nAnswer:",
        max_tokens=100
    )
    return response['choices'][0]['text'].strip()

# Example query
query = "What is the refund policy?"
response = retrieve_and_generate(query)
print("Response:", response)


