In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

In [None]:
from langchain.prompts import ChatPromptTemplate

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

In [None]:
# LLM
from langchain_huggingface import HuggingFaceEndpoint

huggingFaceApiToken = os.environ["HUGGINGFACEHUB_API_TOKEN"]

llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.2",
    max_length=128,
    temperature=0.5,
    huggingfacehub_api_token=huggingFaceApiToken,
)

In [None]:
# Chain
chain = prompt | llm

In [None]:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)

huggingFaceApiToken = os.environ["HUGGINGFACEHUB_API_TOKEN"]
embedding = HuggingFaceInferenceAPIEmbeddings(
    api_key=huggingFaceApiToken, model_name="sentence-transformers/all-MiniLM-l6-v2"
)


vectorstore = Chroma.from_documents(documents=splits, embedding=embedding)
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})


docs = retriever.get_relevant_documents("What is Task Decomposition?")

In [None]:
# Run
simple_question = chain.invoke({"context":docs,"question":"What is Task Decomposition?"})

In [None]:
simple_question

In [None]:
from langchain import hub
prompt_hub_rag = hub.pull("rlm/rag-prompt")

In [None]:
prompt_hub_rag

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt_hub_rag
    | llm
    | StrOutputParser()
)

littleBitBetterChainAnswer = rag_chain.invoke("What is Task Decomposition?")

In [None]:
littleBitBetterChainAnswer