In [2]:
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langserve import RemoteRunnable
import bs4
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain import hub
from langchain_core.runnables import RunnablePassthrough
from langchain.memory import ConversationSummaryBufferMemory

In [2]:
from dotenv import load_dotenv

# 환경변수 로드 (.env)
load_dotenv()

True

In [1]:
from langchain_community.chat_models import ChatOllama

# llm = ChatOllama(model="llama3:latest")
# llm = ChatOllama(model="Llama-3-Open-Ko-8B-FP16.gguf:latest")
llm = ChatOllama(model="EEVE-Korean-Instruct-10.8B-v1.0:latest")

In [16]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain_core.prompts import MessagesPlaceholder

# 최근 대화 내용의 버퍼를 메모리에 유지하되, 이전 대화내용을 완전히 플러시(flush)하지 않고 요약하여 저장
memory_key = "chat_history"
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=200, # 요약의 기준이 되는 토큰 길이. 초과할 경우에 요약하여 저장
    memory_key=memory_key,
    return_messages=True,
)

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a nice chatbot having a conversation with a human."),
    MessagesPlaceholder(variable_name=memory_key),
    ("human", "{question}"),
])

def load_memory(input):
    print(f"load_memory")
    return memory.load_memory_variables({})[memory_key]
    
chain = (RunnablePassthrough.assign(chat_history=load_memory) 
         | prompt 
         | llm
         | StrOutputParser()
         )

def invoke(question):
    print(f"invoke ===============================================")
    result = chain.invoke({"question": question})
    memory.save_context(
        inputs={
            "human": question
        },
        outputs={
            "ai": result
        },
    )
    print(f"invoke / question : {question} / result : {result}")

invoke("3+11 계산해줘")
invoke("그 값에 6 더해줘")

load_memory
invoke / question : 3+11 계산해줘 / result : 물론이죠, 도와드리겠습니다! 😊

3 + 11 = 14입니다.

도움이 되셨길 바랍니다! 다른 질문이 있거나 도와드릴 일이 더 있으면 알려주세요. 😄
load_memory
invoke / question : 그 값에 6 더해줘 / result : 물론이죠, 도와드리겠습니다! 😊

3 + 11 + 6 = 20입니다.

도움이 되셨길 바랍니다! 다른 질문이 있거나 도와드릴 일이 있으면 알려주세요. 😄
