python version 3.11.8

In [3]:
! pip install trulens_eval openai langchain chromadb langchainhub bs4 tiktoken langchain-core langchain-openai



In [None]:
import os
#os.environ["OPENAI_API_KEY"] = "sk-" #hide the key

In [4]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from trulens_eval import Tru
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from trulens_eval.feedback.provider import OpenAI
import logging
from trulens_eval.app import App
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain import hub

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Load blog post
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
splits = text_splitter.split_documents(data)

# VectorDB
embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=splits, embedding=embedding)

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five 
    different versions of the given user question to retrieve relevant documents from a vector 
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search. 
    Provide these alternative questions separated by newlines.
    Original question: {question}""",
)


question = "What are the approaches to Task Decomposition?"
llm = ChatOpenAI(temperature=0)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectordb.as_retriever(), llm=llm, prompt=QUERY_PROMPT
)

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)


In [6]:
unique_docs = retriever_from_llm.get_relevant_documents(query=question)
print(len(unique_docs))

INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be achieved through various methods?', '2. What strategies are commonly used for Task Decomposition?', '3. What are the different ways to break down tasks in Task Decomposition?', '4. How do experts typically approach Task Decomposition?', '5. What are the diverse approaches available for Task Decomposition?']


5


In [10]:
tru = Tru()
tru.reset_database()
# Initialize provider class
provider = OpenAI()

# select context to be used in feedback. the location of context is app specific.

prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever_from_llm | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

context = App.select_context(rag_chain) # you would get error here because of retriever_from_llm multiquery 

# retriever_from_llm = MultiQueryRetriever.from_llm(
#     retriever=vectordb.as_retriever(), llm=llm, prompt=QUERY_PROMPT
# )


🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of Tru` to prevent this.


ValueError: Found more than one `BaseRetriever` in app:
	<class 'langchain.retrievers.multi_query.MultiQueryRetriever'> at first.steps.context.first
	<class 'langchain_core.vectorstores.VectorStoreRetriever'> at first.steps.context.first.retriever