In [1]:
# install package
%pip install -qU langchain-ollama

In [12]:
%pip install -q langchain_nvidia_ai_endpoints

Note: you may need to restart the kernel to use updated packages.


In [151]:
import os

os.environ["PYTORCH_CUDA_ALLOC_CONF"]="expandable_segments:True"

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PDFPlumberLoader,TextLoader
from tqdm.notebook import tqdm
import pandas as pd
from typing import Optional, List, Tuple
import matplotlib.pyplot as plt
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from langchain_community.embeddings import HuggingFaceHubEmbeddings
from torch import cuda
from langchain.vectorstores import FAISS
from langchain_community.vectorstores.utils import DistanceStrategy
from langchain_core.output_parsers import StrOutputParser
#from langchain_community.llms import Ollama
from langchain_ollama.llms import OllamaLLM
#from langchain_ollama import OllamaEmbeddings 
from langchain_core.runnables import Runnable
import torch
from langchain.prompts.prompt import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate

In [45]:
# to empty gpu ram
import gc
torch.cuda.empty_cache()
gc.collect()

0

In [152]:
llm = OllamaLLM(model="qwen2:7b-instruct",num_gpu=2)

In [84]:
prompt = ChatPromptTemplate.from_template("tell me a short joke about {topic}")
output_parser = StrOutputParser()

chain = prompt | llm | output_parser

chain.invoke({"topic": "ice cream"})

"Why don't ice creams ever play hide and seek?\n\nBecause they always get frozen out!"

In [66]:
from langchain_community.embeddings import OllamaEmbeddings
ollama_embeddings = OllamaEmbeddings(
    model="rjmalagon/gte-qwen2-1.5b-instruct-embed-f16",num_gpu=2
)

In [153]:
from getpass import getpass


In [154]:
NVIDIA_API_KEY = getpass()

 ········


In [87]:
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings

NVIDIAEmbeddings.get_available_models()

[Model(id='baai/bge-m3', model_type='embedding', client='NVIDIAEmbeddings', endpoint=None, aliases=None, supports_tools=False, supports_structured_output=False, base_model=None),
 Model(id='nvidia/nv-embedqa-e5-v5', model_type='embedding', client='NVIDIAEmbeddings', endpoint=None, aliases=None, supports_tools=False, supports_structured_output=False, base_model=None),
 Model(id='nvidia/nv-embed-v1', model_type='embedding', client='NVIDIAEmbeddings', endpoint=None, aliases=['ai-nv-embed-v1'], supports_tools=False, supports_structured_output=False, base_model=None),
 Model(id='snowflake/arctic-embed-l', model_type='embedding', client='NVIDIAEmbeddings', endpoint=None, aliases=['ai-arctic-embed-l'], supports_tools=False, supports_structured_output=False, base_model=None),
 Model(id='NV-Embed-QA', model_type='embedding', client='NVIDIAEmbeddings', endpoint='https://ai.api.nvidia.com/v1/retrieval/nvidia/embeddings', aliases=['ai-embed-qa-4', 'playground_nvolveqa_40k', 'nvolveqa_40k'], suppor

In [155]:
embedder = NVIDIAEmbeddings(model="nvidia/nv-embedqa-mistral-7b-v2", 
  nvidia_api_key=NVIDIA_API_KEY, 
  truncate="NONE", )

In [156]:
loader = TextLoader("/home/sara/Documents/Advanced_Rag/Udemy-Advanced-LangChain-main/data/food.txt",encoding = 'UTF-8')
documents = loader.load()

In [157]:
# read data from the file and put them into a variable called raw_text
MARKDOWN_SEPARATORS = [
    "\n#{1,6} ",
    "```\n",
    "\n\\*\\*\\*+\n",
    "\n---+\n",
    "\n___+\n",
    "\n\n",
    "\n",
    " "
    ""
]

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1300,  # the maximum number of characters in a chunk: we selected this value arbitrarily
    chunk_overlap=100,  # the number of characters to overlap between chunks
    add_start_index=True,  # If `True`, includes chunk's start index in metadata
    strip_whitespace=True,  # If `True`, strips whitespace from the start and end of every document
    separators=MARKDOWN_SEPARATORS
    
)

docs_processed = []

for doc in documents:
    docs_processed += text_splitter.split_documents([doc])

In [158]:
db = FAISS.from_documents(
    docs_processed, embedder, distance_strategy=DistanceStrategy.JACCARD
)
retriever = db.as_retriever()

In [159]:
prompt_template = """You are a helpful assistant for our restaurant. you are going to answer questions about the food on the menu based on the following context:

{context}

Question: {question}
Answer here:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)


In [101]:
from langchain.chains import RetrievalQA


chain_type_kwargs = {"prompt": PROMPT}


qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs=chain_type_kwargs,
)

result = qa.invoke(input="What is the most expensive food on the menue?")
print(result)

{'query': 'What is the most expensive food on the menue ', 'result': 'The most expensive food on the menu is "caponata" priced at $21.'}


In [144]:
from langchain_core.runnables import RunnablePassthrough
retrieval_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | PROMPT
    | llm
    | StrOutputParser()
)

In [145]:
retrieval_chain.invoke("What is the most expensive food on the menue ?")

'The most expensive food on the menu is "branzino" which costs $21. This is described as Mediterranean sea bass, usually grilled or baked.'

In [162]:
rephrase_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
REPHRASE_TEMPLATE = PromptTemplate.from_template(rephrase_template)

In [165]:
template = """You are a helpful assistant for our restaurant. you are going to answer questions about the food on the menu based on the following context:
{context}

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [163]:
from langchain_core.messages import AIMessage, HumanMessage

rephrase_chain = REPHRASE_TEMPLATE | llm | StrOutputParser()

In [170]:
rephrase_chain.invoke(
    {
        "question": "No, Are you sure this is the most expensive dish on the menu?",
        "chat_history": [
            HumanMessage(content="What is the most expensive food on the menue?"),
            AIMessage(content="The most expensive food on the menu is Branzino, which costs $21."),
        ],
    }
)

'Is Branzino the most expensive dish on the menu, and does it cost $21?'

In [166]:
retrieval_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | ANSWER_PROMPT
    | llm
    | StrOutputParser()
)

In [171]:
final_chain = rephrase_chain | retrieval_chain

In [174]:
final_chain.invoke(
    {
        "question": "No, really?",
        "chat_history": [
            HumanMessage(content="What is the most expensive food on the menue?"),
            AIMessage(content="The most expensive food on the menu is Branzino, which costs $21."),
        ],
    }
)

"Yes, Branzino is one of the more expensive dishes on the menu based on the provided context. It costs $21, which positions it as a higher-priced main dish option in the list of restaurant items you've shared."