In [None]:
from langchain_openai import ChatOpenAI
from langchain_unstructured import UnstructuredLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import  CacheBackedEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

### Map-Reduce LCEL Chain ###

##  Document들을 받아 개별적으로 요약작업을 수행하고, 각각의 요약본을 LLM에게 전달한다.

#  - document list가 필요하고, list 내부의 모든 document들을 위한
#     prompt('이 document를 읽고 사용자의 질문에 답변하기에 적절한 정보가 있는지 확인해주세요')가 필요하다.
#  - 이를 전달받은 LLM은 응답을 제출하고, 그 응답들을 취합해 하나의 document를 만들어낸다. 
#  - 최종 document가 LLM을 위한 prompt로 전달된다.


llm  = ChatOpenAI(model_name="gpt-4.1",
                temperature=0.1,
                streaming=True,
                )

# 캐싱 위치 지정
cache_dir = LocalFileStore("./.cache/")

# 문서 나누는 방법 지정
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

# 로컬에 저장된 문서 로드
loader = UnstructuredLoader("./files/chapter_one.txt")

# 문서 로드 및 분할
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

# 임베딩 인스턴스 생성(캐시된 데이터가 있으면, 캐시된 데이터 사용)
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

# 문서를 임베딩하고, FAISS 벡터스토어를 이용하여 지정된 위치에 저장
vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()
# 기본설정으로 search_kwargs={"k": 4}가 되어있어
# 4개의 document를 가져온다.

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)
# 🟩 실행순서 6 ➡ map_doc_chain("context": doc.page_content, "question": question)
map_doc_chain = map_doc_prompt | llm

# 🟩 실행순서 5 ➡map_docs({"document": [document(...), ... document(...)], "question": "Where does...?"})
#                 
def map_docs(inputs):
    documents = inputs["documents"]
    # print(f"map_docs -> documents: {documents[0]}")
    # print(f"number of documents: {len(documents)}")
    question = inputs["question"]
    results = []

    for doc in documents:
        result = map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        # print(f"map_docs -> result: {result}")
        results.append(result)
# 🟩 실행순서 7 ➡ map_doc_chain실행으로 주어진 4개 docu조각에서 
#                  질문과 관련된 내용을 results에 합쳐서 결과를 반환한다.
    return "\n\n".join(results)


# 🟩 실행순서 3 ➡ {"document" : retriever("Where does...?"), "question": "Where does...?"}
# 🟩 실행순서 4 ➡ 3의 값이 딕셔너리로 map_docs로 전달됨(document에 전달되는 것은 4개 도큐먼트 객체의 리스트)
map_chain = {
    # 자동으로 "Where does Winston go to work?"가
    # retriever의 입력값으로 사용됨.
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

# 🟩 실행순서 2 ➡ {"context: map_chain(Where does Winston go to work?)", "question": "Where does Winston go to work?"}
# 🟩 실행순서 7 ➡ map_chain(질문과 연관된 4개 docu를, llm이 질문에 대해 관련성을 파악한 텍스트가 context로 제공됨 )
chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

# 🟩 실행순서 1
chain.invoke("Where does Winston go to work?")


INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


map_docs -> result: ''


INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


map_docs -> result: ''


INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


map_docs -> result: the Ministry of Truth, his place of work


INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


map_docs -> result: ''


INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AIMessage(content='Winston goes to work at the Ministry of Truth.', additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-4.1-2025-04-14', 'system_fingerprint': 'fp_beec22d258'}, id='run-0bbf3a9e-37ab-45d3-9e7c-517002977177-0')