In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI()  # Remove parentheses here

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the fllowing portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs['documents']
    question = inputs['question']
    return "\n\n".join(map_doc_chain.invoke({
        "context": doc.page_content,
        "question": question
    }).content
    for doc in documents
    )

map_chain = {
     "documents": retriever, "question": RunnablePassthrough() 
     } | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Givern the following extracted parts of a long document and a question, create a final answer.
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
         ),
         ("human", "{question}")
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough() } | final_prompt | llm

chain.invoke("Describe Victory Mansions.")

AIMessage(content='Victory Mansions is a dilapidated apartment building in George Orwell\'s novel "1984." It is depicted as being run-down and in poor condition, located in a city controlled by a totalitarian regime. The building has a shabby appearance with peeling paint and crumbling walls, reflecting the oppressive nature of the society. Inside, the living conditions are grim with cramped and poorly maintained apartments. The hallway smells of boiled cabbage and old rag mats, with posters of a man\'s face and the caption "BIG BROTHER IS WATCHING YOU." The building has a faulty lift, a telescreen constantly broadcasting information on pig-iron production, and a rooftop view of the government Ministries amidst a grimy landscape of rotting houses and bombed sites. Inhabited by Party members, the building\'s living-room features an unusual telescreen position and a shallow alcove where Winston sits, hinting at its original purpose for bookshelves.')