In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI()  # Remove parentheses here

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the fllowing portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs['documents']
    question = inputs['question']
    return "\n\n".join(map_doc_chain.invoke({
        "context": doc.page_content,
        "question": question
    }).content
    for doc in documents
    )

map_chain = {
     "documents": retriever, "question": RunnablePassthrough() 
     } | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Givern the following extracted parts of a long document and a question, create a final answer.
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
         ),
         ("human", "{question}")
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough() } | final_prompt | llm

chain.invoke("Describe Victory Mansions.")

AIMessage(content='Victory Mansions is a run-down and dilapidated apartment building where the protagonist, Winston, resides in George Orwell\'s novel "1984." It has a crumbling facade, broken elevators, shabby hallways, and small, cramped apartments with peeling wallpaper and faulty plumbing. The building lacks basic amenities, and the residents often face shortages of food and other necessities. The hallway smells of boiled cabbage and old rag mats, and a large poster of an enormous face with the caption "BIG BROTHER IS WATCHING YOU" is prominently displayed. The building has a faulty lift, and Winston\'s flat is located seven flights up. From the roof of Victory Mansions, one can see four other buildings, including the Ministry of Truth, suggesting it is located in London, Airstrip One. The overall atmosphere of Victory Mansions is grim and oppressive, reflecting the dystopian society depicted in the novel.')