In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI()  # Remove parentheses here

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the fllowing portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs['documents']
    question = inputs['question']
    return "\n\n".join(map_doc_chain.invoke({
        "context": doc.page_content,
        "question": question
    }).content
    for doc in documents
    )

map_chain = {
     "documents": retriever, "question": RunnablePassthrough() 
     } | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Givern the following extracted parts of a long document and a question, create a final answer.
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
         ),
         ("human", "{question}")
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough() } | final_prompt | llm

chain.invoke("Describe Victory Mansions.")

AIMessage(content='Victory Mansions is a run-down apartment building where Winston Smith lives in the novel "1984" by George Orwell. The building is described as having narrow staircases that are never fully lit, always smelling of boiled cabbage and old rag mats. The hallway outside Winston\'s apartment is dimly lit by electricity that only works intermittently. The building is infested with rats, and the apartment itself is cramped and shabby, with peeling wallpaper and a faulty telescreen that is constantly monitoring Winston\'s every move. Overall, Victory Mansions is a bleak and oppressive place that reflects the oppressive and totalitarian society depicted in the novel. It also serves as a vantage point from which the other buildings in Oceania can be observed.')