In [None]:
from ast import parse
import bs4
from langchain_community.document_loaders import WebBaseLoader
import torch
import tqdm
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate

from langchain.load import dumps, loads
from langchain.schema.output_parser import StrOutputParser

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_community.llms import Ollama
import os
from operator import itemgetter


os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = ''
os.environ['USER_AGENT'] = ''

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_ = (
                "post-content", "post-title", "post-header"
            )
        )
    ),
)

blog_docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, chunk_overlap=50
)

splits = text_splitter.split_documents(blog_docs)


modelPath = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=embeddings)

retriever = vectorstore.as_retriever()

### ___Query LLM___

In [None]:
#model_id = "mistralai/Mistral-7B-Instruct-v0.2"
model_id = "microsoft/Phi-3-mini-4k-instruct"
Query_LLM = HuggingFacePipeline.from_model_id(
    model_id=model_id,
    task="text-generation",
    model_kwargs={ "temperature": 0.0,
                   "do_sample": True },
)

### ___Answer LLM___

In [None]:
Answer_LLM = Ollama(
    model="qwen2.5:7b-instruct",
    temperature=0.2
)

#print(Answer_LLM("Explain transformers in simple terms"))

# model_id = "mistralai/Mistral-7B-Instruct-v0.2"
# model_id = "microsoft/Phi-3-mini-4k-instruct"

# Answer_LLM = HuggingFacePipeline.from_model_id(
#     model_id=model_id,
#     task="text-generation",
#     model_kwargs={ "temperature": 0.0,
#                    "do_sample": True },
# )

# tokenizer = AutoTokenizer.from_pretrained(model_id)

# model = AutoModelForCausalLM.from_pretrained(model_id)

# pipe = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     max_new_tokens=256,
#     temperature=0.0,
# )

# Answer_LLM = HuggingFacePipeline(pipeline=pipe)

### __Multi Query__

In [None]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_ = (
                "post-content", "post-title", "post-header"
            )
        )
    ),
)

blog_docs = loader.load()



text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, chunk_overlap=50
)

splits = text_splitter.split_documents(blog_docs)

from langchain_community.embeddings import HuggingFaceEmbeddings

modelPath = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=embeddings)

retriever = vectorstore.as_retriever()

### ___Prompt___

In [None]:


# Multi Query
template = """
You are an AI language model assistant. Your task is to generate five different versions of the given
user question to retrieve relevant documents from a vector database. By generating multiple perspectives
on the user question, your goal is to help the user overcome some of the limitations of the distance
based similarity search. Provide these alternative questions separated by newlines.
Original question: {question}
"""

prompt_perspectives = ChatPromptTemplate.from_template(template)

def parse_queries(text: str) -> list[str]:
    return [
        line.strip()
        for line in text.split("\n")
        if line.strip()
    ]

output_parser = StrOutputParser()
query_chain = (
    prompt_perspectives
    | Query_LLM
    | output_parser
)

In [None]:


def get_unique_union(documents: list[list]):
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    return [loads(doc) for doc in unique_docs]

question = "What is task decomposition for LLM agents?"
retrieval_chain = query_chain | retriever.map() | get_unique_union

docs = retrieval_chain.invoke( { "question" : question } )
len(docs)

In [None]:
type(docs)

In [None]:
docs[0]

In [None]:
template = """
Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)



final_rag_chain = (
    { "context": retrieval_chain,
      "question": itemgetter("question") }
    | prompt
    | Answer_LLM
    | StrOutputParser()
)

final_rag_chain.invoke( {"question": question} )