In [1]:
import os
import bs4
import getpass
from langchain import hub
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [2]:
os.environ["NVIDIA_API_KEY"] = 'nvapi-*******************************************************'

## Load documents by scrpaing the web

In [3]:
def load_documents():
    # Load, chunk and index the contents of the blog.
    loader = WebBaseLoader(
        web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
        bs_kwargs=dict(
            parse_only=bs4.SoupStrainer(
                class_=("post-content", "post-title", "post-header")
            )
        ),
    )
    docs = loader.load()
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)
    return splits

## Format docs

In [4]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

## LLM

In [5]:
llm = ChatNVIDIA(model="mixtral_8x7b")
document_embedder = NVIDIAEmbeddings(model="nvolveqa_40k", model_type="passage")

## Vectorstore and retriever

In [6]:
splits = load_documents()
vectorstore = Chroma.from_documents(documents=splits, embedding=document_embedder)

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()

## Custom RAG prompt

In [7]:
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

## RAG chain

In [8]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

In [9]:
rag_chain.invoke("What is Task Decomposition?")

'Task decomposition is a process in artificial intelligence and software engineering where a complex task is broken down into smaller, manageable subtasks. This method is used to simplify the problem-solving process and to make it easier to design, implement, and maintain software systems or agents. Thanks for asking!'