In [31]:
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate
import os

os.environ["LANGCHAIN_WANDB_TRACING"] = "true"
os.environ["WANDB_PROJECT"] = "midterm"

Download PDF

In [32]:
pdf_link = "https://d18rn0p25nwr6d.cloudfront.net/CIK-0001045810/1cbe8fe7-e08a-46e3-8dcc-b429fc06c1a4.pdf"
loader = PyMuPDFLoader(
    pdf_link,
)
documents = loader.load()

In [35]:
len(documents)

'Table of Contents\nUNITED STATES\nSECURITIES AND EXCHANGE COMMISSION\nWashington, D.C. 20549\n____________________________________________________________________________________________\nFORM 10-K\n☒\nANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n    For the fiscal year ended January 28, 2024\nOR\n☐\nTRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\nCommission file number: 0-23985\n \n \nNVIDIA CORPORATION\n(Exact name of registrant as specified in its charter)\nDelaware\n94-3177549\n(State or other jurisdiction of\n(I.R.S. Employer\nincorporation or organization)\nIdentification No.)\n   2788 San Tomas Expressway, Santa Clara, California\n95051\n  (Address of principal executive offices)\n(Zip Code)\nRegistrant’s telephone number, including area code: (408) 486-2000\nSecurities registered pursuant to Section 12(b) of the Act:\nTitle of each class\nTrading Symbol(s)\nName of each exchange on which registe

In [36]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap = 50
)

documents = text_splitter.split_documents(documents)

In [37]:
len(documents)

624

Instantiate Models

In [38]:
embedding_model = OpenAIEmbeddings(
    model="text-embedding-3-small"
)

In [39]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

Load Vectors into Vector Store

In [40]:
vector_store = FAISS.from_documents(documents, embedding_model)
retriever = vector_store.as_retriever()

2024-03-14 03:55:34 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Set up chain

In [46]:
template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':

Context:
{context}

Question:
{input}
"""

prompt = ChatPromptTemplate.from_template(template)

document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)


In [47]:
retrieval_chain.invoke({"input": "Who is the E-VP, Operations - and how old are they?"})

2024-03-14 04:03:53 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-03-14 04:03:54 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




{'input': 'Who is the E-VP, Operations - and how old are they?',
 'context': [Document(page_content='supports diverse hiring, retention, and employee engagement, which we believe makes NVIDIA a great place to work.\nDuring fiscal year 2025, we will continue to have a flexible work environment and maintain our company wide 2-days off a quarter for employees to rest and\nrecharge.\nInformation About Our Executive Officers\nThe following sets forth certain information regarding our executive officers, their ages, and positions as of February 16, 2024:\nName\nAge\nPosition\nJen-Hsun Huang\n60\nPresident and Chief Executive Officer\nColette M. Kress\n56\nExecutive Vice President and Chief Financial Officer\nAjay K. Puri\n69\nExecutive Vice President, Worldwide Field Operations\nDebora Shoquist\n69', metadata={'source': 'https://d18rn0p25nwr6d.cloudfront.net/CIK-0001045810/1cbe8fe7-e08a-46e3-8dcc-b429fc06c1a4.pdf', 'file_path': 'https://d18rn0p25nwr6d.cloudfront.net/CIK-0001045810/1cbe8fe7-e

In [43]:
retrieval_chain.invoke({"input": "What is the gross carrying amount of Total Amortizable Intangible Assets for Jan 29, 2023?"})

2024-03-14 03:58:27 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-03-14 03:58:28 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




{'input': 'What is the gross carrying amount of Total Amortizable Intangible Assets for Jan 29, 2023?',
 'context': [Document(page_content='Table of Contents\nNVIDIA Corporation and Subsidiaries\nNotes to the Consolidated Financial Statements\n(Continued)\nNote 7 - Amortizable Intangible Assets\nThe components of our amortizable intangible assets are as follows:\n \nJan 28, 2024\nJan 29, 2023\n \nGross\nCarrying\nAmount\nAccumulated\nAmortization\nNet \nCarrying\nAmount\nGross\nCarrying\nAmount\nAccumulated\nAmortization\nNet \nCarrying\nAmount\n \n(In millions)\nAcquisition-related intangible\nassets (1)\n$\n2,642 \n$\n(1,720)\n$\n922 \n$\n3,093 \n$\n(1,614)\n$\n1,479 \nPatents and licensed technology\n449 \n(259)\n190 \n446 \n(249)\n197 \nTotal intangible assets\n$\n3,091 \n$\n(1,979)\n$\n1,112 \n$\n3,539 \n$\n(1,863)\n$\n1,676', metadata={'source': 'https://d18rn0p25nwr6d.cloudfront.net/CIK-0001045810/1cbe8fe7-e08a-46e3-8dcc-b429fc06c1a4.pdf', 'file_path': 'https://d18rn0p25nwr6d.cl