In [1]:
# Dependencies
# ! pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma bs4

## Retrieval Augmented Generation (RAG)
 ### RAG Q&A quickstart (LangChain Demo)
  - 2 Main steps:
     1. Indexing (VectorStore or similar store (FAISS) etc.)
     2. Retrieval (Retrieve from the store and generate responses based on context)
     

In [2]:
import os
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain import hub

In [3]:
# basic llm use
llm = Ollama(model='phi3')

llm.invoke("how can langsmith help with testing ?")

' LangSmith, often known as the creator of the "LangSmite" project (a combination of coding and Minecraft), offers a unique approach to software testing that focuses on practical experimentation and learning through doing. While he may not directly provide a standardized service for testing like professional QA companies do, here\'s how LangSmith\'s methods can help with testing:\n\n1. Learning by building: By creating simple programs or tools in programming languages (like JavaScript, Python), you can learn to test the functionality of your code more effectively and efficiently. This hands-on approach helps developers better understand how their code behaves under different conditions and inputs.\n\n2. Community support: LangSmith has a large following on platforms like GitHub, where his projects are open-sourced. Engaging with this community can help you get valuable feedback from other experienced programmers who can provide advice or suggestions for testing strategies that have wor

In [4]:
# Chat template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a world class technical documentation writer."),
        ("user", "<|user|>{input}<|end|>\n<|assistant|>")
    ]
)

output_parser = StrOutputParser()

In [5]:
prompt

ChatPromptTemplate(input_variables=['input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a world class technical documentation writer.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='<|user|>{input}<|end|>\n<|assistant|>'))])

In [6]:
chain = prompt | llm | output_parser

In [7]:
chain.invoke({"input": 'how can langsmith help with testing ?'})

# Still doesnt have a context

" As a world-class technical documentation writer, LangSmith can assist in testing through the following ways:\n\n1. Documentation Creation: LangSmith provides comprehensive and clear guidelines for software tests that facilitate understanding of test objectives, procedures, and expected results. These detailed documents can help both manual and automated testers understand what is needed to perform a thorough evaluation effectively.\n\n2. Test Design Guidance: Utilizing LangSmith's expertise in writing technical documentation, you can create clear, well-structured test design specifications that outline the testing approach, criteria for success, expected inputs and outputs, as well as edge cases. This will aid in creating effective and efficient test plans and designs.\n\n3. Test Cases Development: LangSmith's expertise in documentation writing can be leveraged to create clear and concise test case descriptions that accurately reflect the purpose of each test scenario. Detailed instr

### RAG pipeline components
 - Embeddings model
 - Vector index
 - Retrieval logic

In [8]:
# Embeddings model
embed_model = OllamaEmbeddings(model='phi3')

# Web based loader
loader = WebBaseLoader(
    'https://lilianweng.github.io/posts/2023-06-23-agent/',
    bs_kwargs=dict(
            parse_only=bs4.SoupStrainer(
                class_=("post-content", "post-title", "post-header")
            )
        )
    )

docs = loader.load()

# text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(docs)

# vectorstore (using local FAISS)
vectorstore = FAISS.from_documents(documents, embedding=embed_model)


In [9]:
# create retriever
retriever = vectorstore.as_retriever()

# create prompt from chat prompt template (Phi3-mini-4k-instruct)
prompt = ChatPromptTemplate.from_template(
    """
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
    Question : {input}
    Context: {context}
    Answer: 
    """
    # """
    # You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
    # <|user|>
    # Question : {input}
    # Context: {context}<|end|>
    # <|assistant|>
    # """
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "input": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# RAG chain can also be created by usig the `create_stuff_documents_chain`:
#            - We retrieve the documents first using a retriever.
#            - Generally, we need to format_docs first and then stuff them together in a context and then add it in the context section of the prompt.

In [10]:
# invoke the rag chain
rag_chain.invoke("What is chain of thought?")

" Chain of thought prompting, such as CoT (Wei et al. 2022), guides language models like LLMs to break down complex tasks into smaller steps and provide reasoning for their outputs, improving performance by encouraging the model's self-reflection based on feedback sequences."

In [11]:
for chunk in rag_chain.stream("What is Task Decomposition?"):
    print(chunk, end="", flush=True)



 Task Decomposition is a problem solving strategy that breaks down complex tasks into smaller, manageable subtasks to make it easier for an agent or system to learn and execute the task efficiently. It involves designing algorithms that can handle each subtask independently while coordinating their efforts towards completing the overall objective.

However, based on the provided context, there is no direct information about Task Decomposition. The discussed topics mainly relate to Model Fine-Tuning (CoH), Algorithm Distillation (AD), and fine-tuning Language Models for specific applications.

## Adding chat history
 - ***Prompt*** : Update prompt for historical chat support
 - ***Contextualization*** : Reformulate the question wrt the chat context. E.g. if asked someting like "what is the second point ?" after the model has produced different points given a question. The model wont know the context so we might be better off reformulating the question.

In [12]:
# prompt from the langchain hub
prompt = hub.pull('rlm/rag-prompt')
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

#### Contextualize the input query using a llm.

In [14]:
from langchain.chains.history_aware_retriever import create_history_aware_retriever
# prompts module, makes using prompts easy
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder


# Taken from - https://python.langchain.com/v0.1/docs/use_cases/question_answering/chat_history/
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [("system", contextualize_q_system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}")]
)

# Create a history aware retriever
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

### Create a system input

In [20]:
from langchain.chains.retrieval import create_retrieval_chain


qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

qa_chain = create_stuff_documents_chain(llm, qa_prompt)


# rag_chain = create_retrieval_chain(history_aware_retriever, qa_chain)

rag_chain = (
    history_aware_retriever
    | qa_chain
    | StrOutputParser()
)


In [22]:
# Example
from langchain_core.messages import HumanMessage

chat_history = []

question = "What is Task Decomposition?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg_1["answer"]])

second_question = "What are common ways of doing it?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

 Task Decomposition is a technique where complex tasks are broken down into smaller, more manageable subtasks or components. One method involves using Chain of Thought (CoT) to guide an AI model's reasoning process step by step, while another approach fine-tunes language models with external tools and APIs for specific problem domains.
