In [5]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

# 1. llm 설정 
llm = ChatOpenAI(
  temperature=0.1,
  model="gpt-4o",
  max_tokens=1000,
)

# 2. 캐시 저장소 설정 
cache_dir = LocalFileStore("./.cache/")

# 3. 문서 로딩 및 분할
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/PhysicalAI.txt")

docs = loader.load_and_split(text_splitter=splitter)

# 4. 임베딩 + 캐시 
embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings,cache_dir)

# 5. 벡터스토어 저장 및 리트리버 설정 
vectorstore = FAISS.from_documents(docs, cached_embeddings)
retriever = vectorstore.as_retriever()

# 6. 개별 문서용 프롬프트 정의 
map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)
map_doc_chain = map_doc_prompt | llm

# 7. 문서 리스트 -> 응답 리스트 -> 결합 처리 함수 정의 
def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )

# 8. 문서 검색 및 답변 요약용 체인 구성
map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

# 9. 최종 프롬프트 템플릿 정의
final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

# 10. 최종 체인 구성 
chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

# 11. 실행 
chain.invoke("피지컬 ai가 뭐야?")

libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.


AIMessage(content="피지컬 AI는 환경을 감지하는 센서와 환경과 상호작용하고 변화시키는 액추에이터를 갖춘 AI 시스템을 의미합니다. 이는 로봇이나 'Embodied AI' 에이전트와 같은 형태로 나타나며, 사람에게 위험하거나 힘들고 반복적인 작업을 처리하도록 설계됩니다.", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 75, 'prompt_tokens': 229, 'total_tokens': 304, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_07871e2ad8', 'finish_reason': 'stop', 'logprobs': None}, id='run--9ead1ecd-8b45-470e-be56-5ef00c099b02-0')