In [1]:
#importing all the needed packages
import os
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import oracledb
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores.oraclevs import OracleVS
from langchain_community.vectorstores.utils import DistanceStrategy
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [2]:
# Set the NVIDIA API key as an environment variable
os.environ["NVIDIA_API_KEY"] = "<your nvidia key starting with nvapi**** here>" 
# Initialize the LLM (Large Language Model) with the specified model
llm = ChatNVIDIA(model="meta/llama3-8b-instruct")

In [5]:
# Create a chat prompt template with a system message and a user message
prompt = ChatPromptTemplate.from_messages([
    ("system", (
        "You are a helpful and friendly AI!"
        "Your responses should be concise and no longer than two sentences."
        "Say you don't know if you don't have this information."
    )),
    ("user", "{question}")
])

In [6]:
# Chain the prompt, LLM, and output parser together
chain = prompt | llm | StrOutputParser()

In [7]:
# Example questions to invoke the LLM chain
print(chain.invoke({"question": "What's the difference between a GPU and a CPU?"}))

A CPU (Central Processing Unit) is the brain of your computer, handling general computing tasks, executing instructions, and performing calculations. A GPU (Graphics Processing Unit) is designed specifically for handling graphics and computationally intensive tasks, like gaming, video editing, and scientific simulations, with many cores performing parallel processing.


In [8]:
# Example questions to invoke the LLM chain
print(chain.invoke({"question": "What does the A in the NVIDIA A100 stand for?"}))

I'm happy to help! The "A" in NVIDIA A100 likely stands for "Accelerated", which refers to the card's enhanced computing capabilities.


In [9]:
# Example questions to invoke the LLM chain
print(chain.invoke({"question": "How much memory does the NVIDIA H200 have?"}))

I'm not familiar with the NVIDIA H200, could you provide more context or information about it?


In [10]:
## # Database connection setup
username = "<your username here>"
password = "<your password here>"
host="<IP of your host here>"
port="<the port that you are using here>"
service_name="<service name here>"
dsn=host+":"+port+"/"+service_name

print("The database user name is:", username)
print("Database connection information is:", dsn)

## Connect to the database
try:
    conn23c = oracledb.connect(user=username, password=password, dsn=dsn)
    print("Connection successful!")
except oracledb.DatabaseError as e:
    error, = e.args
    print(f"Connection failed. Error code: {error.code}")
    print(f"Error message: {error.message}")

The database user name is: vector
Database connection information is: localhost:1521/freepdb1
Connection successful!


In [12]:
## Initialize an embedding model for query embedding
embedding_model = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5")



In [14]:
## Create an embedding vector for a specific query
embedding_model.embed_query("How much memory does the NVIDIA H200 have?")[:10]

[-0.0251007080078125,
 -0.038055419921875,
 0.035980224609375,
 -0.061309814453125,
 0.056396484375,
 -0.001224517822265625,
 0.01220703125,
 -0.04010009765625,
 -0.0258941650390625,
 -0.029815673828125]

In [15]:
# Load a PDF document from a URL
loader = PyPDFLoader("https://nvdam.widen.net/content/udc6mzrk7a/original/hpc-datasheet-sc23-h200-datasheet-3002446.pdf")
document = loader.load()
document[0]  # Print the first page of the document

Document(metadata={'source': 'https://nvdam.widen.net/content/udc6mzrk7a/original/hpc-datasheet-sc23-h200-datasheet-3002446.pdf', 'page': 0}, page_content='NVIDIA H200 Tensor Core GPU\u2002|\u2002Datasheet\u2002|\u2002 1NVIDIA H200 Tensor Core GPU\nSupercharging AI and HPC workloads.\nHigher Performance With Larger, Faster Memory\nThe NVIDIA H200 Tensor Core GPU supercharges generative AI and high-\nperformance computing (HPC) workloads with game-changing performance  \nand memory capabilities. \nBased on the NVIDIA Hopper™ architecture , the NVIDIA H200 is the first GPU to \noffer 141 gigabytes (GB) of HBM3e memory at 4.8 terabytes per second (TB/s)—\nthat’s nearly double the capacity of the NVIDIA H100 Tensor Core GPU  with \n1.4X more memory bandwidth. The H200’s larger and faster memory accelerates \ngenerative AI and large language models, while advancing scientific computing for \nHPC workloads with better energy efficiency and lower total cost of ownership. \nUnlock Insights Wit

In [16]:
# Initialize a text splitter to chunk the document into smaller pieces
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
    separators=["\n\n", "\n", ".", ";", ",", " ", ""],
)

In [17]:
# Split the document into chunks
document_chunks = text_splitter.split_documents(document)
print("Number of chunks from the document:", len(document_chunks))

Number of chunks from the document: 16


In [19]:
# Extract text (page content) from the document chunks
page_contents = [doc.page_content for doc in document_chunks]

In [20]:
# Create vector embeddings from the document
embedding_model.embed_documents(page_contents)[0][:10]

[-0.0394287109375,
 -0.03741455078125,
 0.06634521484375,
 -0.0518798828125,
 0.08477783203125,
 -0.0224456787109375,
 0.02484130859375,
 -0.0247802734375,
 -0.01496124267578125,
 -0.005344390869140625]

In [24]:
# Initialize a OracleVS vector store to store the document embeddings in oracle 23ai
vector_store = OracleVS.from_documents(
    document_chunks,
    embedding_model,
    client=conn23c,
    table_name="MY_DEM04",
    distance_strategy=DistanceStrategy.DOT_PRODUCT,
    #tablespace="my_tablespace"
)

In [27]:
# Create a new chat prompt template for the AI with context awareness
prompt = ChatPromptTemplate.from_messages([
    ("system", 
        "You are a helpful and friendly AI!"
        "Your responses should be concise and no longer than two sentences."
        "Do not hallucinate. Say you don't know if you don't have this information."
        # "Answer the question using only the context"
        "\n\nQuestion:{question}\n\nContext:{context}"
    ),
    ("user", "{question}")
])
# Create a chain that retrieves context from the vector store and answers questions
chain = (
    {
        "context": vector_store.as_retriever(),
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)
# Invoke the chain with specific questions, using the retrieved context
print(chain.invoke("How much memory does the NVIDIA H200 have?"))

The NVIDIA H200 has 141 gigabytes (GB) of HBM3e memory.
