**Map Reduce**

In [7]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import ChatPromptTemplate
from langchain.storage import LocalFileStore

In [10]:
llm=ChatOpenAI(
    temperature=0.1
)
loader=UnstructuredFileLoader("../files/chapter_one.docx")
splitter=CharacterTextSplitter(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
docs=loader.load_and_split(text_splitter=splitter)
embeddings=OpenAIEmbeddings()
cache_dir=LocalFileStore("../.cache/")
cached_embeddings=CacheBackedEmbeddings.from_bytes_store(embeddings,cache_dir)
vectorstore=FAISS.from_documents(docs,cached_embeddings)


Created a chunk of size 963, which is longer than the specified 600
Created a chunk of size 774, which is longer than the specified 600
Created a chunk of size 954, which is longer than the specified 600
Created a chunk of size 922, which is longer than the specified 600
Created a chunk of size 1168, which is longer than the specified 600
Created a chunk of size 821, which is longer than the specified 600
Created a chunk of size 700, which is longer than the specified 600
Created a chunk of size 745, which is longer than the specified 600
Created a chunk of size 735, which is longer than the specified 600
Created a chunk of size 1110, which is longer than the specified 600
Created a chunk of size 991, which is longer than the specified 600
Created a chunk of size 990, which is longer than the specified 600
Created a chunk of size 1741, which is longer than the specified 600
Created a chunk of size 2001, which is longer than the specified 600
Created a chunk of size 1900, which is longe

AIMessage(content='Winston goes to work at the Ministry of Truth.')

In [12]:
retriever=vectorstore.as_retriever()
map_doc_prompt=ChatPromptTemplate.from_messages([
    ("system",
     """Use the following portion of a long document to see if any of the text 
     is relevant to answer the question. Return any relevant text verbatim.
     -------
     {context}
     """,
    ),
    ("human","{question}"),
])
map_docs_chain=map_doc_prompt|llm
def map_docs(inputs):
    document=inputs['documents']
    question=inputs['question']
    return "\n\n".join(map_docs_chain.invoke(
        {
            "context":docs,
            "question":question,
        }
    ).content for docs in document
    )
map_chain=(
    {
        "documents":retriever,
        "question":RunnablePassthrough()
    }
    |RunnableLambda(map_docs)
)
final_prompt=ChatPromptTemplate.from_messages([
    ("system",
     """You are a helpful assistant. Answer questions using only the 
     following context. If you don't know the answer just say you don't know, 
     Don't try to make up an answer.
     -------
     {context}
     """,
    ),
    ("human","{question}"),
])

chain=(
    {
        "context":map_chain,
        "question":RunnablePassthrough()
    }
    |final_prompt|llm
)
chain.invoke("Describe Victory Mansions")

AIMessage(content='Victory Mansions is a run-down apartment building in London, part of a group of four similar buildings. It has three thousand rooms above ground level and corresponding ramifications below. The building dwarfs the surrounding architecture and is home to the Ministry of Truth. It has glass doors, a hallway that smells of boiled cabbage and old rag mats, and a large colored poster of a man with a mustache and the caption "BIG BROTHER IS WATCHING YOU."')