## RAG Model

Import Libraries

In [1]:
# Import Dependencies

import PyPDF2
import openai
import numpy as np
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec
import os
from tiktoken import get_encoding
from dotenv import load_dotenv
#%pip install scikit-learn
from sklearn.metrics.pairwise import cosine_similarity

  from tqdm.autonotebook import tqdm



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


### Load OPEN AI Key

In [2]:

# Load environment variables from a .env file
load_dotenv()

# Set OpenAI API Key from environment variable
# Open AI (LLM to process text)
# Pinecone is database where vectors are actually stored
openai.api_key = os.getenv("OPENAI_KEY_API")
client = openai.OpenAI(api_key=os.getenv("OPENAI_KEY_API"))

# Initialize Pinecone using the Pinecone class
# pc = Pinecone(api_key=pinecone_api_key)
print(os.getenv("OPENAI_KEY_API"))



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


### Load SEC API 

### Create Embeddings/Vectorizer 

In [4]:
# Function to generate embeddings using the new OpenAI API
def get_embedding(text, engine="text-embedding-ada-002"):
    response = openai.embeddings.create(input=[text], model=engine)
    embedding = response.data[0].embedding
    embedding = np.array(embedding).reshape(1, -1)  # Ensure the embedding is a 2D array
    return response.data[0].embedding

### Add Vector Database to Local Storage

In [5]:
# Dictionary to store vectors in local memory
local_vector_store = {}

# Function to add chunks to the local vector store
def upload_chunks_to_local_memory(text_chunks):
    for idx, chunk in enumerate(text_chunks):
        embedding = get_embedding(chunk)
        local_vector_store[f"chunk_{idx}"] = {
            "embedding": embedding,
            "text": chunk
        }


### Extract Text from PDF 

In [6]:
# Function to extract text from the PDF
def extract_text_from_pdf(pdf_file):
    with open(pdf_file, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]
            text += page.extract_text()
    return text

### Chunk text into smaller pieces

In [7]:
# Function to chunk the text into smaller pieces
def chunk_text(text, chunk_size=1000):
    encoding = get_encoding("cl100k_base")  # Tokenizer model
    tokens = encoding.encode(text)
    chunks = [tokens[i:i + chunk_size] for i in range(0, len(tokens), chunk_size)]
    text_chunks = [encoding.decode(chunk) for chunk in chunks]
    return text_chunks

### Add Vectors to Pinecone if storing in Database

In [8]:
# # Function to add chunks to the Pinecone vector database
# def upload_chunks_to_pinecone(text_chunks):
#     for idx, chunk in enumerate(text_chunks):
#         embedding = get_embedding(chunk)
#         index.upsert([(f"chunk_{idx}", embedding, {"text": chunk})])

### Change Question into a Vector to compare to the Vector Database

In [14]:
def ask_question(question):
    # Get the embedding of the question
    question_embedding = get_embedding(question)
    
    # Verify that the embedding has the correct number of dimensions (1536)
    if len(question_embedding) != 1536:
        raise ValueError(f"Embedding size is incorrect: {len(question_embedding)} dimensions found, expected 1536.")
    
    # Reshape the question embedding to 2D
    question_embedding = np.array(question_embedding).reshape(1, -1)
    
    # Query the local vector store
    result = sorted(
        local_vector_store.values(),
        key=lambda x: cosine_similarity(np.array(x['embedding']).reshape(1, -1), question_embedding)[0][0],
        reverse=True
    )[:5]
    
    # Use OpenAI to generate an answer based on retrieved chunks
    context = " ".join([match['text'] for match in result])
    
    # Updated OpenAI API call for chat models using `ChatCompletion.create` method
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": f"You are a stock Hedge Fund manager offering investment recommendation. Based on the following context: {context}, answer the question: {question}"}
        ],
        max_tokens=250
    )
    
    return response.choices[0].message.content

    
    


### Getting text from the PDF, chunking it, and Uploading Locally

In [15]:


# Extract text from the PDF
pdf_text = extract_text_from_pdf('NVIDIAAn.pdf')

# Chunk the text
chunks = chunk_text(pdf_text)

# Upload the chunks to Pinecone
# upload_chunks_to_pinecone(chunks)

# Upload Chunks to local store
upload_chunks_to_local_memory(chunks)

### Prompt Model with Question

In [16]:
# Example usage
question = "What is NVIDIA's revenue and Non-GAAP performance? Whats your buy, sell, hold recommendation and why?"
answer = ask_question(question)
print(answer)

NVIDIA's Q1 FY2024 revenue stands at $7.19 billion, which is a decrease of 13% year-over-year (Y/Y) but an increase of 19% compared to the previous quarter. The company's Non-GAAP earnings per diluted share were $1.09, down 20% from the previous year but up 24% from the previous quarter. 

The results show that despite a yearly revenue dip, NVIDIA maintains strong quarterly growth. The positive forward-looking statement of an expected revenue of $11.00 billion for Q2 FY2024 also shows promise for the company's future. Moreover, the company's solid performance in the data center sector, a ramp-up of supply to meet increasing demands, and its positive positioning within the AI technology space can also act as positive indicators for investors.

Based on the financial performance and future outlook, I would recommend a "BUY" status for NVIDIA. The company's strategic positioning, strong demand in the AI and data center sectors, improving operating margin, and positive revenue forecast for