In [10]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma, FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI(temperature=0.1)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator = "\n",
    chunk_size = 300,
    chunk_overlap = 50,
)

loader = UnstructuredFileLoader('./files/george_orwell.csv')

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs,cached_embeddings)

retriever = vectorstore.as_retriever()

###  map_reduce chain 의 구현순서
# 1. list of docs

# 2. for doc in list of docs | prompt | llm

# 3. for response in list of llms response | put them all together

# 4. final doc | prompt | llm

map_doc_prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        """
        Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim.
        -------
        {context}
        """
    ),
    ("human","{question}")
])

map_doc_chain = map_doc_prompt | llm

# def map_docs(inputs):
#     # print(inputs)
#     documents = inputs["documents"]
#     question = inputs["question"]
#     results = []
#     for document in documents:
#         result = map_doc_chain.invoke(
#             {"context": document.page_content,         "question":question}
#         ).content
#         results.append(result)
#     results = "\n\n".join(results)
#     # print(results)
#     return results

def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context":doc.page_content,     "question":question}
        ).content 
        for doc in documents
    )
    

map_chain = {
    "documents":retriever,
    "question":RunnablePassthrough(),
} | RunnableLambda(map_docs)

# {
#     "documents": [Documents],
#     "question":"Describe Victory Mansions?"
# }

final_prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        """
        Given the following extracted parts of a long document and a question, create a final answer.
        if you don't know the answer, just say that you don't know. Don't try to make up an answer.
        ------------
        {context}
        """
    ),
    ("human", "{question}")
])

chain = {
    "context": map_chain, 
    "question":RunnablePassthrough(),
} | final_prompt | llm

chain.invoke("Where does Winston go to work?")

AIMessage(content='Winston goes to work at the Ministry of Truth.')