#6.9 Map Reduce LCEL Chain

In [6]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langsmith import traceable
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda


In [7]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",temperature=0.1)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = TextLoader("./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

@traceable
def generate_response(chain, prompt):
    return chain.invoke(prompt)


In [8]:
retriever = vectorstore.as_retriever()


In [9]:
map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm


In [14]:
def map_docs_2(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    results = []
    
    for document in documents:
        result = map_doc_chain.invoke(
            {
                "context": document.page_content,
                "question": question,
            }
        ).content
        results.append(result)
    
    results = "\n\n".join(results)
    return results

In [10]:
def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )

In [17]:
map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm


# chain.invoke("Describe Victory Mansions")
chain.invoke("Where does Winston go to work?")
# chain.invoke("How many ministries are mentioned")

AIMessage(content='Winston works in the Records Department.\n', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-10fcae61-a22a-4bce-b567-d111915516df-0', usage_metadata={'input_tokens': 95, 'output_tokens': 8, 'total_tokens': 103, 'input_token_details': {'cache_read': 0}})