## RAG Example

In [37]:
# Import Dependencies

import PyPDF2
import openai
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec
import pinecone
import os
from tiktoken import get_encoding
from dotenv import load_dotenv

In [38]:
# Load environment variables from a .env file
load_dotenv()

# Set OpenAI API Key from environment variable
openai.api_key = os.getenv("OPENAI_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")

client = OpenAI(api_key=os.getenv("OPENAI_KEY"))

# Initialize Pinecone using the Pinecone class
pc = Pinecone(api_key=pinecone_api_key)

In [39]:
# Check if the index exists, otherwise create one
index_name = "rag-example"
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,  # Your embedding dimensions
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",  # You specified AWS
            region="us-east-1"  # Your region
        )
    )

# Connect to the existing index
index = pc.Index(index_name)

In [40]:
# Function to extract text from the PDF
def extract_text_from_pdf(pdf_file):
    with open(pdf_file, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            text += page.extract_text()
    return text

In [41]:
# Function to chunk the text into smaller pieces
def chunk_text(text, chunk_size=1000):
    encoding = get_encoding("cl100k_base")  # Tokenizer model
    tokens = encoding.encode(text)
    chunks = [tokens[i:i + chunk_size] for i in range(0, len(tokens), chunk_size)]
    text_chunks = [encoding.decode(chunk) for chunk in chunks]
    return text_chunks

In [42]:
# Function to generate embeddings using the new OpenAI API
def get_embedding(text, engine="text-embedding-ada-002"):
    response = openai.embeddings.create(input=[text], model=engine)
    return response.data[0].embedding

In [43]:
# Function to add chunks to the Pinecone vector database
def upload_chunks_to_pinecone(text_chunks):
    for idx, chunk in enumerate(text_chunks):
        embedding = get_embedding(chunk)
        index.upsert([(f"chunk_{idx}", embedding, {"text": chunk})])

In [53]:
def ask_question(question):
    # Get the embedding of the question
    question_embedding = get_embedding(question)
    
    # Verify that the embedding has the correct number of dimensions (1536)
    if len(question_embedding) != 1536:
        raise ValueError(f"Embedding size is incorrect: {len(question_embedding)} dimensions found, expected 1536.")
    
    # Query the Pinecone vector database
    result = index.query(vector=question_embedding, top_k=5, include_metadata=True)
    
    # Use OpenAI to generate an answer based on retrieved chunks
    context = " ".join([match['metadata']['text'] for match in result['matches']])
    
    # Updated OpenAI API call for chat models using `ChatCompletion.create` method
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Based on the following context: {context}, answer the question: {question}"}
        ],
        max_tokens=150
    )
    
    # Return the assistant's answer
    return response.choices[0].message.content.strip()


In [45]:
# Extract text from the PDF
pdf_text = extract_text_from_pdf('pvyn1u.pdf')

# Chunk the text
chunks = chunk_text(pdf_text)

# Upload the chunks to Pinecone
upload_chunks_to_pinecone(chunks)

In [54]:
# Example usage
question = "What is the climax of the novel?"
answer = ask_question(question)
print(answer)

What's the climax of the novel? Ah, that's a great question! The climax is like the big explosion at the end of a fireworks show, you know? It's when everything comes together, like the punchline of a really long joke. In "The Midnight Feast" by Lucy Foley, the climax occurs when all the suspense, the secrets, and the drama lead to that intense moment on the cliffs. Francesca and Eddie have this dramatic confrontation, and it's all happening amid the aftermath of the fire and everyone's shady past coming into play. It's the point where you're on the edge of your seat, and you just can't look away because everything is hitting the fan, and you're thinking, "Wow, this is it. This is the moment we've been
