In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
# from langchain.vectorstores import Chroma
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda


llm = ChatOpenAI(
    temperature=0.1
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(
    docs, cached_embeddings)  # Chroma보다 FAISS가 성능이 나음

retriever = vectorstore.as_retriever()

# retriever -> list of docs
# for doc in list of docs | prompt | llm
# for response in list of llms response | put them all together
# final doc | prompt | llm


map_doc_prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        """
        Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim
        ------
        {context}
        """,
    )
    (
        "human",
        "{question}",
    )
])

map_doc_chain = map_doc_prompt | llm


def map_docs(inputs):
    # print(inputs)
    documents = inputs['documents']
    question = inputs['question']
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )

    # 아래처럼 할 수 있지만 위의 방법이 더 깔끔함. 아래는 원리 이해를 위해 참고.
    # results = []
    # for document in documents:
    #     result = map_doc_chain.invoke({
    #         "context": document.page_content,
    #         "question": question,
    #     }).content
    #     results.append(result)
    # print(results)
    # results = "\n\n".join(results)
    # return results


map_chain = (
    {
        "documents": retriever,
        "question": RunnablePassthrough()
    }
    | RunnableLambda(map_docs)
)


# map_chain에서 바라는 결과물
# {
#     "documents": [Documents],
#     "question": "Where does Winstone live?"
# }


final_prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        """
        Given the following extracted parts of a long document and a question, create a final answer.
        If you don't know the answer, just say that you don't know. Don't try to make up an answer.
        ------
        {context}
        """
    ),
    (
        "human",
        "{question}"
    ),

])

chain = {"context": map_chain, "question": RunnablePassthrough(),
         } | final_prompt | llm


# 여기 안에 있는 문장은 RunnablePassthrough()으로 전달된다.
chain.invoke("Where does Winstone live?")