In [1]:
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.vectorstores import Chroma , FAISS
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory

# memory선언 (ConversationBufferMemory)
memory = ConversationBufferMemory(return_messages=True)

# memory에서 history 가져오는 부분 (history=> 기본 memory_key)
def load_memory(_):
    return memory.load_memory_variables({})["history"]


# 1. 파일 로드 (loader => UnstructuredFileLoader , spliter => CharacterTextSplitter)
loader = UnstructuredFileLoader("../files/document.txt")
splitter = CharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=100,
)
docs = loader.load_and_split(
    text_splitter=splitter,
)

# 2. Embedding + cache 작업 (vector를 매기는 작업 + local에 cache로 저장)
cache_dir = LocalFileStore("../cache/")
embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

# 3. vector store를 통해 prompt에 관련 되어 있는 부분을 찾습니다.
vector_store = FAISS.from_documents(docs, cached_embeddings)
# print(len(vector_store.similarity_search("Aaronson 은 유죄인가요?")))

# 4. vecctor store의 값을 통해 관련된 document를 가져와 propmpt에 참조용으로 전달합니다.(Retriever) 
# + memory에서 예시를 가져옵니다. (memory_key=> history)
retriever = vector_store.as_retriever()
prompt = ChatPromptTemplate.from_messages(
    [
        ("system",
         "당신은 유용한 도우미입니다. 아래 제공된 문맥만을 사용하여 질문에 답하세요. 답을 모를 경우, 모른다고 말하세요. 답을 지어내지 마세요:\n\n{context}"),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}")
    ]
)

# 5. chain 작성 + invoke (체인 실행)
chat = ChatOpenAI(temperature=0.1)

chain = {
    "context": retriever,
    "question": RunnablePassthrough(),
    "history": load_memory
} | prompt | chat

# chain 실행 함수, 메모리에 질문 답변을 저장해 둡니다.
def invoke_chain(question):
    result = chain.invoke(question)
    memory.save_context({"input": question}, {"output": result.content})
    return result.content

Created a chunk of size 717, which is longer than the specified 600
Created a chunk of size 608, which is longer than the specified 600
Created a chunk of size 642, which is longer than the specified 600
Created a chunk of size 1444, which is longer than the specified 600
Created a chunk of size 1251, which is longer than the specified 600
Created a chunk of size 1012, which is longer than the specified 600
Created a chunk of size 1493, which is longer than the specified 600
Created a chunk of size 819, which is longer than the specified 600
Created a chunk of size 1458, which is longer than the specified 600
Created a chunk of size 1411, which is longer than the specified 600
Created a chunk of size 742, which is longer than the specified 600
Created a chunk of size 669, which is longer than the specified 600
Created a chunk of size 906, which is longer than the specified 600
Created a chunk of size 703, which is longer than the specified 600
Created a chunk of size 1137, which is lon

In [2]:
invoke_chain("Aaronson 은 유죄인가요?")

'네, Jones, Aaronson, 그리고 Rutherford은 그들이 기소된 범죄로 유죄 판결을 받았습니다.'

In [3]:
invoke_chain("그가 테이블에 어떤 메시지를 썼나요?")

'그가 테이블에 쓴 메시지는 "Under the spreading chestnut tree I sold you and you sold me" 입니다.'

In [4]:
invoke_chain("Julia 는 누구인가요?")

'Julia는 주인공과 함께 자유롭고 함께 있을 때보다 더 사랑스러워 보였던 여자입니다.'

In [5]:
invoke_chain("Aaronson 은 유죄인가요?")

'네, Jones, Aaronson, 그리고 Rutherford은 그들이 기소된 범죄로 유죄 판결을 받았습니다.'

In [6]:
# prompt 가 재대로 작동 하고 있는지 확인
chain = {
    "context": retriever,
    "question": RunnablePassthrough(),
    "history": load_memory
} | prompt

chain.invoke("Aaronson 은 유죄인가요?")

ChatPromptValue(messages=[SystemMessage(content='당신은 유용한 도우미입니다. 아래 제공된 문맥만을 사용하여 질문에 답하세요. 답을 모를 경우, 모른다고 말하세요. 답을 지어내지 마세요:\n\n[Document(page_content=\'He accepted everything. The past was alterable. The past never had been altered. Oceania was at war with Eastasia. Oceania had always been at war with Eastasia. Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with. He had never seen the photograph that disproved their guilt. It had never existed, he had invented it. He remembered remembering contrary things, but those were false memories, products of self-deception. How easy it all was! Only surrender, and everything else followed. It was like swimming against a current that swept you backwards however hard you struggled, and then suddenly deciding to turn round and go with the current instead of opposing it. Nothing had changed except your own attitude: the predestined thing happened in any case. He hardly knew why he had ever rebelled. Everything was easy

In [7]:
print(load_memory(any))

[HumanMessage(content='Aaronson 은 유죄인가요?'), AIMessage(content='네, Jones, Aaronson, 그리고 Rutherford은 그들이 기소된 범죄로 유죄 판결을 받았습니다.'), HumanMessage(content='그가 테이블에 어떤 메시지를 썼나요?'), AIMessage(content='그가 테이블에 쓴 메시지는 "Under the spreading chestnut tree I sold you and you sold me" 입니다.'), HumanMessage(content='Julia 는 누구인가요?'), AIMessage(content='Julia는 주인공과 함께 자유롭고 함께 있을 때보다 더 사랑스러워 보였던 여자입니다.'), HumanMessage(content='Aaronson 은 유죄인가요?'), AIMessage(content='네, Jones, Aaronson, 그리고 Rutherford은 그들이 기소된 범죄로 유죄 판결을 받았습니다.')]
