# Memory

https://python.langchain.com/api_reference/langchain/memory.html

챗봇으로 하여금 대화(상태)를 '기억'하게끔 한다

Memory maintains Chain state, incorporating context from past runs.


# 기존 import

In [1]:
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.prompts.chat import ChatPromptTemplate

In [2]:
# OpenAI 사에서 제공하는 '기본 API' 도 랭체인 없이 사용 가능.
# 메모리 지원하지 않는다.  이전 대화 기억 못함.  stateless 하다!

# ChatGPT 는 '메모리' 기능이 탑재되어 있다.
# 챗봇이 이전의 대화 내용과 질문을 기억하고 답할수 있다.

In [3]:
# Langchain 의 memory 계층
#  BaseMemory --> BaseChatMemory --> <name>Memory  # Examples: ZepMemory, MotorheadMemory

In [4]:
import warnings
warnings.filterwarnings('ignore')

# ConversationBufferMemory

In [5]:
# ConversationBufferMemory
# 대화 내용 '전체'를 저장하는 메모리

# 장점: 단순하다

# 단점:
# => 매번 요청할때마다 '이전 대화 기록 전체' 를 같이 보내야 함.
#  그래야 모델이 전에 일어났던 대화를 보고 이해 할수 있다.
#  대화내용이 길어질수록 메모리도 계속 커지니까 성능적으로도 & 비용적으로도 비효율적이다.

In [6]:
# v0.3
from langchain.memory.buffer import ConversationBufferMemory
# https://python.langchain.com/api_reference/langchain/memory/langchain.memory.buffer.ConversationBufferMemory.html#conversationbuffermemory


In [7]:
memory = ConversationBufferMemory()

# 직접 save 해보기
memory.save_context(
    {'input': 'Hi!'}, # 사용자 입력값
    {'output': 'How are you?'}, # AI가 사용자에게 뭐라고 답할지 
)

# history buffer 를 리턴
memory.load_memory_variables({})

  memory = ConversationBufferMemory()


{'history': 'Human: Hi!\nAI: How are you?'}

In [8]:
# ★참고★ memory 종류와 관계없이 API 는 다 똑같다
# 즉, 모든 memory 는 save_context(), load_memory_variables() 함수를 갖고 있다.


In [9]:
# ChatModel 과 작업을 하게 되면
# AIMessage 와 HumanMessage 가 다 필요하다.

## return_messages=True
history 에  AIMessage 와 HumanMessage 로 저장된다.

In [10]:
memory = ConversationBufferMemory(return_messages=True)

memory

ConversationBufferMemory(chat_memory=InMemoryChatMessageHistory(messages=[]), return_messages=True)

In [11]:
memory.save_context(
    {'input': 'Hi!'},
    {'output': 'How are you?'},
)
memory.load_memory_variables({})

{'history': [HumanMessage(content='Hi!', additional_kwargs={}, response_metadata={}),
  AIMessage(content='How are you?', additional_kwargs={}, response_metadata={})]}

In [12]:
memory.save_context(
    {'input': 'Hi!'},
    {'output': 'How are you?'},
)
memory.load_memory_variables({})

{'history': [HumanMessage(content='Hi!', additional_kwargs={}, response_metadata={}),
  AIMessage(content='How are you?', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Hi!', additional_kwargs={}, response_metadata={}),
  AIMessage(content='How are you?', additional_kwargs={}, response_metadata={})]}

In [13]:
"""
- 우선 메모리를 만들고
- ChatModel을 위한건지 아닌지 선택하고
- ChatModel을 위한게 아니라면 return_messages=False (디폴트)
    => 그러면 history 는 문자열로 표시됨.
- ChatModel을 위한거라면 return_messages=True
    => 그러면 history  는 챗모델이 사용할수 있는 형태로 출력됨
"""
None

# ConversationBufferWindowMemory

In [14]:
# ConversationBufferWindowMemory 는 대화의 '특정 부분만' 을 저장하는 메모리.

# 장점:
#   메모리를 특정 크기로 유지할 수 있다!
#   따라서 모든 대화 내용을 저장하지 않아도 된다!

# 단점:
#   챗봇이 전체 대화가 아닌 '최근 대화' 에만 집중하게 된다.

In [15]:
# v0.3
from langchain.memory.buffer_window import ConversationBufferWindowMemory
# https://python.langchain.com/api_reference/langchain/memory/langchain.memory.buffer_window.ConversationBufferWindowMemory.html#conversationbufferwindowmemory


In [16]:
memory = ConversationBufferWindowMemory(
    return_messages=True,
    k=4,  # 버퍼 윈도우 사이즈.  몇개의 메세지를 저장할지 설정
)

  memory = ConversationBufferWindowMemory(


In [17]:
# 도우미 함수 준비.
def add_message(input, output):
    memory.save_context({'input': input}, {'output': output})

In [18]:
add_message("1", "1")
add_message("2", "2")
add_message("3", "3")
add_message("4", "4")

In [19]:
# k=4.
memory.load_memory_variables({})

{'history': [HumanMessage(content='1', additional_kwargs={}, response_metadata={}),
  AIMessage(content='1', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='2', additional_kwargs={}, response_metadata={}),
  AIMessage(content='2', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='3', additional_kwargs={}, response_metadata={}),
  AIMessage(content='3', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='4', additional_kwargs={}, response_metadata={}),
  AIMessage(content='4', additional_kwargs={}, response_metadata={})]}

In [20]:
add_message("5", "5")
memory.load_memory_variables({})

{'history': [HumanMessage(content='2', additional_kwargs={}, response_metadata={}),
  AIMessage(content='2', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='3', additional_kwargs={}, response_metadata={}),
  AIMessage(content='3', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='4', additional_kwargs={}, response_metadata={}),
  AIMessage(content='4', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='5', additional_kwargs={}, response_metadata={}),
  AIMessage(content='5', additional_kwargs={}, response_metadata={})]}

# ConversationSummaryMemory
- 대화를 요약

In [21]:
llm = ChatOpenAI(temperature=0.1)

In [22]:


# v0.3
from langchain.memory.summary import ConversationSummaryMemory
# https://python.langchain.com/api_reference/langchain/memory/langchain.memory.summary.ConversationSummaryMemory.html#langchain.memory.summary.ConversationSummaryMemory

# Continually summarizes the conversation history.
# The summary is updated after each conversation turn.
# The implementations returns a summary of the conversation history
# which can be used to provide context to the model.

In [23]:
# ConversationSummaryMemory
# 메세지를 그대로 저장하는 것이 아니라 Convertaion 의 '요약'을 해준다. LLM 필요
#  장점: 대화의 메세지가 많아질수록 요약을 해주어 입력 토큰의 양도 줄여줌.
memory = ConversationSummaryMemory(llm=llm)


  memory = ConversationSummaryMemory(llm=llm)


In [24]:
def get_history():
    return memory.load_memory_variables({})

In [25]:
# message 추가
add_message(
    "Hi I'm John, I live in South Korea",    # input
    "Wow that is so cool!"  # output : AI 답변
)

In [26]:
# 또 message 추가
add_message(
    "South Korea is so pretty",
    "I wish I could go!!!")

In [27]:
get_history()

{'history': 'John introduces himself as living in South Korea. The AI responds by expressing admiration for his location, saying it wishes it could go there because South Korea is so pretty.'}

In [28]:
# ↑ 대화를 '요약' 한 내용으로 기억하고 있다
# 대화의 turn 이 길어질수로 summary 가 각 메세지를 효율적으료 '요약(압축)' 해준다

# ConversationSummaryBufferMemory
- summary + buffer

In [29]:
# v0.3
from langchain.memory.summary_buffer import ConversationSummaryBufferMemory
# https://python.langchain.com/api_reference/langchain/memory/langchain.memory.summary_buffer.ConversationSummaryBufferMemory.html#langchain.memory.summary_buffer.ConversationSummaryBufferMemory

# Buffer with summarizer for storing conversation memory.
# Provides a running summary of the conversation together with
#  the most recent messages in the conversation under the constraint
#   that the total number of tokens in the conversation does not exceed a certain limit.

In [30]:
# ConversationSummaryBufferMemory 는
#   ConversationBufferMemory 와 ConversationSummaryMemory 의 결합형

# 메모리에 보내온 메세지의 수를 지정하여 저장한다.
# 오래된 메세지들 또한 요약 하여 저장함.

In [31]:
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=150,   # 최대 가용한 메세지 토큰수  (메세지 요약되기 전)
    return_messages=True,
)

  memory = ConversationSummaryBufferMemory(


In [32]:
# 메세지 추가하고 history 확인

add_message(
    "Hi I'm John, I live in South Korea",    # input
    "Wow that is so cool!"  # output : AI 답변
)

get_history()

{'history': [HumanMessage(content="Hi I'm John, I live in South Korea", additional_kwargs={}, response_metadata={}),
  AIMessage(content='Wow that is so cool!', additional_kwargs={}, response_metadata={})]}

In [33]:
# 다시 메세지 추가하고 확인!

add_message(
    "South Korea is so pretty",
    "I wish I could go!!!")

get_history()


{'history': [HumanMessage(content="Hi I'm John, I live in South Korea", additional_kwargs={}, response_metadata={}),
  AIMessage(content='Wow that is so cool!', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='South Korea is so pretty', additional_kwargs={}, response_metadata={}),
  AIMessage(content='I wish I could go!!!', additional_kwargs={}, response_metadata={})]}

In [34]:
# ↑ 아직까진 max_token_limit=150 이하이다  (요약은 발생하지 않았다)

In [35]:
add_message(
    "How far is Korea from Argentina?",
    "I don't know! Super far!"
)
get_history()

{'history': [HumanMessage(content="Hi I'm John, I live in South Korea", additional_kwargs={}, response_metadata={}),
  AIMessage(content='Wow that is so cool!', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='South Korea is so pretty', additional_kwargs={}, response_metadata={}),
  AIMessage(content='I wish I could go!!!', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='How far is Korea from Argentina?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="I don't know! Super far!", additional_kwargs={}, response_metadata={})]}

In [36]:
# ↑ 아직까진 max_token_limit=150 이하이다  (요약은 발생하지 않았다)

In [37]:
add_message(
    "How far is Brazil from Argentina?",
    "I don't know! Super far!"
)
get_history()

{'history': [HumanMessage(content="Hi I'm John, I live in South Korea", additional_kwargs={}, response_metadata={}),
  AIMessage(content='Wow that is so cool!', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='South Korea is so pretty', additional_kwargs={}, response_metadata={}),
  AIMessage(content='I wish I could go!!!', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='How far is Korea from Argentina?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="I don't know! Super far!", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='How far is Brazil from Argentina?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="I don't know! Super far!", additional_kwargs={}, response_metadata={})]}

In [38]:
# limit 에 도달하면, 오래된 메세지들이 요약되고 있을 것을 확인할수 있다. (SystemMessage 확인)

# ★그러나 '요약' 이라는 과정은 API 를 사용한다는 사실을 명심하세요.
# ★'요약' 동작은 비용 지출이 발생되는 부분입니다.


# ConversationKGMemory
Conversation Knowledge Graph Memory

In [39]:
# 대화중에 '엔티티'의 knowledge graph 를 형성한다 => 가장 중요한 것들만 추출한 요약본.
# knowledge graph 는 history 를 가지고 오지 않는다.  대신 '엔티티' 를 가지고 옴

In [40]:
# v0.3
from langchain_community.memory.kg import ConversationKGMemory
# https://python.langchain.com/api_reference/community/memory/langchain_community.memory.kg.ConversationKGMemory.html

# Knowledge graph conversation memory.
# Integrates with external knowledge graph to store and retrieve information about knowledge triples in the conversation.

In [41]:
memory = ConversationKGMemory(
    llm=llm,
    return_messages=True,
)

In [42]:
add_message(
    "Hi I'm John, I live in South Korea",    # input
    "Wow that is so cool!"  # output : AI 답변
)

In [43]:
# 이후에,  대화의 특정 엔티티(entity) 에 대해 질문해보자
memory.load_memory_variables({"input": "who is John"})

{'history': [SystemMessage(content='On John: John lives in South Korea.', additional_kwargs={}, response_metadata={})]}

In [44]:
# SystemMessage 에서 '요약' 은 되었지만,  대화에서 entity 를 뽑아내는 거다.

In [45]:
# 여기에 메세지를 더해보자
add_message("John likes kimchi", "Wow that is so cool!")

In [46]:
memory.load_memory_variables({"input": "What does John like"})

{'history': [SystemMessage(content='On John: John lives in South Korea. John likes kimchi.', additional_kwargs={}, response_metadata={})]}

# 그 밖의 메모리들..

In [47]:
# ConversationTokenBufferMemory 도 있다
#   interaction 의 최대값을 가지고 있는 것 대신에 token의 총 양을 계산하는게 전부다.
#   max_token_limit= 값!  <= ConversationBufferWindowMemory 와 비슷하다 (k= 값)

# https://python.langchain.com/api_reference/langchain/memory/langchain.memory.token_buffer.ConversationTokenBufferMemory.html#langchain.memory.token_buffer.ConversationTokenBufferMemory

In [48]:
# ConversationEntityMemory
#  Entity 를 활용한 메모리도 있다.
#  이는 대화중에 entity 를 추출해 활용
# https://python.langchain.com/api_reference/langchain/memory/langchain.memory.entity.ConversationEntityMemory.html#langchain.memory.entity.ConversationEntityMemory

- **참고: Database 와 integration 된 메모리들**

| Memory Class                  | 통합 대상 (Integration)             | 설명                                                           |
| ----------------------------- | ------------------------------- | ------------------------------------------------------------ |
| `RedisChatMessageHistory`     | **Redis**                       | Redis에 메시지 저장. 빠르고 확장 가능한 저장소.                               |
| `SQLChatMessageHistory`       | **SQLite, PostgreSQL 등 SQL DB** | SQL 데이터베이스에 메시지 저장. SQLAlchemy 기반.                           |
| `DynamoDBChatMessageHistory`  | **AWS DynamoDB**                | AWS의 NoSQL DB인 DynamoDB에 대화 저장. 서버리스 환경에서 유용.                |
| `MongoDBChatMessageHistory`   | **MongoDB**                     | 문서 기반 DB인 MongoDB와 통합하여 대화 저장.                               |
| `PostgresChatMessageHistory`  | **PostgreSQL**                  | PostgreSQL 전용 구현 (SQLAlchemy 없이).                            |
| `FileChatMessageHistory`      | **Local 파일**                    | JSON 파일로 로컬에 저장. 간단한 로깅에 적합.                                 |
| `FirestoreChatMessageHistory` | **Google Firestore**            | Firebase 기반 클라우드 NoSQL DB와 통합.                               |
| `ChromaMemory`                | **Chroma (벡터 DB)**              | 벡터 DB에 embedding 형태로 memory 저장. RAG나 유사 검색 기반 memory에 활용 가능. |
| `WeaviateMemory`              | **Weaviate**                    | Weaviate 벡터 DB와 통합된 memory 저장.                               |
| `QdrantMemory`                | **Qdrant**                      | 벡터 기반 memory 저장소로 Qdrant 사용.                                 |


# Memory on LLMChain

In [49]:
# LLMChain 은 off-the-shelf chain으로서 '일반적인 목적' 의 chain을 의미

In [50]:
from langchain.chains.llm import LLMChain

In [51]:
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=80,
)

In [52]:
chain = LLMChain(
    llm=llm,
    memory=memory,  # 메모리 제공!
    prompt=PromptTemplate.from_template("{question}")
)

  chain = LLMChain(


In [53]:
chain.invoke({'question':'My name is John'})

{'question': 'My name is John',
 'history': '',
 'text': 'Nice to meet you, John! How can I assist you today?'}

In [54]:
chain.invoke(input={'question':"I live in Seoul"})

{'question': 'I live in Seoul',
 'history': 'Human: My name is John\nAI: Nice to meet you, John! How can I assist you today?',
 'text': ", the capital city of South Korea. It is a bustling metropolis with a vibrant culture, delicious food, and a mix of modern skyscrapers and historic palaces. I love exploring the city's neighborhoods, trying new restaurants, and taking in the beautiful views of the Han River. Seoul is a dynamic and exciting place to call home."}

In [55]:
"""
{'question': 'I live in Seoul',
 'history': 'Human: My name is John\nAI: Nice to meet you, John! How can I assist you today?',
 'text': ", the capital city of South Korea. It is a bustling metropolis with a vibrant culture, delicious food, and a mix of modern skyscrapers and historic palaces. I love exploring the city's neighborhoods, trying new restaurants, and taking in the beautiful views of the Han River. Seoul is a dynamic and exciting place to call home."}

"""
None

In [56]:
# 과연 내 이름을 기억하곤 있을까?
chain.invoke({'question':'What is my name?'})

{'question': 'What is my name?',
 'history': "System: The human introduces himself as John. The AI greets John and asks how it can assist him. John mentions that he lives in Seoul.\nAI: , the capital city of South Korea. It is a bustling metropolis with a vibrant culture, delicious food, and a mix of modern skyscrapers and historic palaces. I love exploring the city's neighborhoods, trying new restaurants, and taking in the beautiful views of the Han River. Seoul is a dynamic and exciting place to call home.",
 'text': "I'm sorry, I do not have access to personal information such as your name."}

In [57]:
"""
{'question': 'What is my name?',
 'history': "System: The human introduces himself as John. The AI greets John and asks how it can assist him. John mentions that he lives in Seoul.\nAI: , the capital city of South Korea. It is a bustling metropolis with a vibrant culture, delicious food, and a mix of modern skyscrapers and historic palaces. I love exploring the city's neighborhoods, trying new restaurants, and taking in the beautiful views of the Han River. Seoul is a dynamic and exciting place to call home.",
 'text': "I'm sorry, I do not have access to that information."}  <-- 어라? 모른다고?
"""
None

In [58]:
# 그럼 chain 을 디버깅 해보자!
# verbose=True

In [59]:
chain = LLMChain(
    llm=llm,
    memory=memory, 
    prompt=PromptTemplate.from_template("{question}"),
    verbose=True,   # chain 을 실행했을때 chain 의 프롬프트 로그들을 확인할수 있다 (디버깅용 활용)
)

chain.invoke(input={'question':"My name is John"})
chain.invoke(input={'question':"I live in Seoul"})
chain.invoke(input={'question':"What is my name?"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mMy name is John[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mI live in Seoul[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWhat is my name?[0m

[1m> Finished chain.[0m


{'question': 'What is my name?',
 'history': "System: John introduces himself as living in Seoul, the capital city of South Korea. The AI describes Seoul as a bustling metropolis with a vibrant culture, delicious food, and a mix of modern skyscrapers and historic palaces. The AI loves exploring the city's neighborhoods, trying new restaurants, and taking in the beautiful views of the Han River, making Seoul a dynamic and exciting place to call home. The AI then learns John's name and greets him, ready to assist with any questions or requests he may have.\nAI: , the capital city of South Korea. It is a bustling metropolis with a vibrant culture, delicious food, and a mix of modern skyscrapers and historic temples. I love exploring the city's neighborhoods, trying new restaurants, and taking in the beautiful views of the Han River. Seoul is a dynamic and exciting place to call home.",
 'text': "I'm sorry, I do not have access to personal information such as your name."}

In [60]:
"""
> Entering new LLMChain chain...
Prompt after formatting:
My name is John   <--- 이게 프롬프트의 전부다!?

> Finished chain.


> Entering new LLMChain chain...
Prompt after formatting:
I live in Seoul <--- 이게 프롬프트의 전부다!?

> Finished chain.


> Entering new LLMChain chain...
Prompt after formatting:
What is my name? <--- 이게 프롬프트의 전부다!?

> Finished chain.


verbose=True 를 통해 chain 의 prompt 로그를 확인할수 있다.
prompt 에는 대화의 history 가 없었다!!!

=> prompt 에 대화의 history 를 추가해주어야 한다!

"""
None

In [61]:
# 그러나! 메모리는 계속 요약 업데이트 되고 있다! 
memory.load_memory_variables({})

{'history': "System: John introduces himself as living in Seoul, the capital city of South Korea. The AI describes Seoul as a bustling metropolis with a vibrant culture, delicious food, and a mix of modern skyscrapers and historic temples. The AI loves exploring the city's neighborhoods, trying new restaurants, and taking in the beautiful views of the Han River, making Seoul a dynamic and exciting place to call home.\nHuman: What is my name?\nAI: I'm sorry, I do not have access to personal information such as your name."}

In [62]:
# ↑ 위 메모리의 내용이 prompt 에 포함되어야 하는 것이다!

In [63]:
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
)

In [64]:
# history 까지 담을 괜찮은 템플릿을 준비해보자
template = """
    You are a helpful AI talking to a human.

    {chat_history}
    Human:{question}
    You:
"""

# AI 가 우리의 대화기록을 기억하면서 여기의 question 을 완성할 수 있기를 기대해보자.

## memory_key="chat_history"

In [65]:
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",   # 간단하게 이렇게만 지정해도 된다!?!  load_memory_variables({})  안해도 된단다!
)

In [66]:
chain = LLMChain(
    llm=llm,
    memory=memory,
    prompt=PromptTemplate.from_template(template),  # <-- template 지정
    verbose=True,
)

In [67]:
chain.invoke(input={'question':"My name is John"})





[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are a helpful AI talking to a human.

    
    Human:My name is John
    You:
[0m

[1m> Finished chain.[0m


{'question': 'My name is John',
 'chat_history': '',
 'text': 'Hello John! How can I assist you today?'}

In [68]:
chain.invoke(input={'question':"I live in Seoul"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are a helpful AI talking to a human.

    Human: My name is John
AI: Hello John! How can I assist you today?
    Human:I live in Seoul
    You:
[0m

[1m> Finished chain.[0m


{'question': 'I live in Seoul',
 'chat_history': 'Human: My name is John\nAI: Hello John! How can I assist you today?',
 'text': "That's great to hear! How can I assist you with information or tasks related to Seoul?"}

In [69]:
chain.invoke(input={'question':"What is my name?"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are a helpful AI talking to a human.

    Human: My name is John
AI: Hello John! How can I assist you today?
Human: I live in Seoul
AI: That's great to hear! How can I assist you with information or tasks related to Seoul?
    Human:What is my name?
    You:
[0m

[1m> Finished chain.[0m


{'question': 'What is my name?',
 'chat_history': "Human: My name is John\nAI: Hello John! How can I assist you today?\nHuman: I live in Seoul\nAI: That's great to hear! How can I assist you with information or tasks related to Seoul?",
 'text': 'Your name is John.'}

In [70]:
# ↑ 보다시피 prompt 에 대화의 chat history 가 남겨져 있다.

In [71]:
"""
{'question': 'What is my name?',
 'chat_history': "Human: My name is John\nAI: Nice to meet you, John! How can I assist you today?\nHuman: I live in Seoul\nAI: That's great to know! How can I assist you with information or tasks related to Seoul?",
 'text': 'Your name is John.'}  <-- 최종적으로 What is my name 에 대한 답변을 해준다.
"""
None

In [72]:
"""
- 프롬프트 템플릿 안에서 메모리 내용이 들어갈 공간을 준비한다.  (예: chat_history)
- 메모리를 활용할 템플릿은 원하는대로 작성하면 된다.
- Memory 클래스에선 history 를 어디에 꽂을지 지정해준다 (memory_key=)

"""
None

In [73]:
# 잘 요약이 되는지도 확인해보자
chain.invoke({'question': "My name is John"})
chain.invoke({'question': "I live in Seoul"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are a helpful AI talking to a human.

    Human: My name is John
AI: Hello John! How can I assist you today?
Human: I live in Seoul
AI: That's great to hear! How can I assist you with information or tasks related to Seoul?
Human: What is my name?
AI: Your name is John.
    Human:My name is John
    You:
[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are a helpful AI talking to a human.

    Human: My name is John
AI: Hello John! How can I assist you today?
Human: I live in Seoul
AI: That's great to hear! How can I assist you with information or tasks related to Seoul?
Human: What is my name?
AI: Your name is John.
Human: My name is John
AI: Hello John! How can I assist you today?
    Human:I live in Seoul
    You:
[0m

[1m> Finished chain.[0m


{'question': 'I live in Seoul',
 'chat_history': "Human: My name is John\nAI: Hello John! How can I assist you today?\nHuman: I live in Seoul\nAI: That's great to hear! How can I assist you with information or tasks related to Seoul?\nHuman: What is my name?\nAI: Your name is John.\nHuman: My name is John\nAI: Hello John! How can I assist you today?",
 'text': "That's great to hear! How can I assist you with information or tasks related to Seoul?"}

# Chat based memory

In [74]:
# '메세지' 를 기반으로 한 '사람과 AI' 의 대화 history 를 추가하는 방법을 알아보자

In [75]:
# mamory 클래스 출력방식 2가지
#  문자열 형태
#  message 형태

## return_message=True

In [76]:
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history", 
    return_messages=True,   # 문자열이 아닌 message 로 리턴함.
)

In [77]:
# 과연 prompt 에는 어떻게 대화 history 들을 넘겨줄수 있을까?
#  단순한 하나의 텍스트가 아니라... Human Messagbe - AI Message - Human Messagbe - AI Message - .... (여러 메세지들)
#  심지여 요약본 발생시 System message 도 있을텐데?

# prompt 에 이를 위한 공간을 어케 만드나?
#  => MessagePlaceHolder

## MessagePlaceHolder

In [78]:
# v0.3
from langchain_core.prompts.chat import MessagesPlaceholder

# https://python.langchain.com/api_reference/core/prompts/langchain_core.prompts.chat.MessagesPlaceholder.html#langchain_core.prompts.chat.MessagesPlaceholder

# Prompt template that assumes variable is already list of messages.
# A placeholder which can be used to pass in a list of messages.


In [79]:
prompt = ChatPromptTemplate.from_messages([
    ('system', 'You are a helpful AI talking to a human'),

    # ↓ 누가 보냈는지 알수 없는,
    #  예측하기 어려운 메세지의 양과 제한 없는 양의 메세지를 가질수 있다.
    MessagesPlaceholder(variable_name='chat_history'),
    # ↑ variable_name="chat_history"
    #  ConversationSummaryBufferMemory 는
    #    history 에서 message 들을 가져와서 (return_messages=True)
    #    이곳 MessagesPlaceHolder 를 채운다.
    #     AI message, System message, Human message ...
    #     얼마나 많은지 알수 없어도 여기에 채워지는 거다!    
    
    ('human', '{question}'),
])

In [80]:
chain = LLMChain(
    llm=llm,
    memory=memory,
    prompt=prompt, # 위에서 작성한 프롬프트!
    verbose=True,
)

In [81]:
chain.invoke({'question':"My name is John"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a helpful AI talking to a human
Human: My name is John[0m

[1m> Finished chain.[0m


{'question': 'My name is John',
 'chat_history': [HumanMessage(content='My name is John', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Hello John! How can I assist you today?', additional_kwargs={}, response_metadata={})],
 'text': 'Hello John! How can I assist you today?'}

In [82]:
chain.invoke({'question':"I live in Seoul"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a helpful AI talking to a human
Human: My name is John
AI: Hello John! How can I assist you today?
Human: I live in Seoul[0m

[1m> Finished chain.[0m


{'question': 'I live in Seoul',
 'chat_history': [HumanMessage(content='My name is John', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Hello John! How can I assist you today?', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='I live in Seoul', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Seoul is a vibrant city with a rich history and culture. Is there anything specific you would like to know or discuss about Seoul?', additional_kwargs={}, response_metadata={})],
 'text': 'Seoul is a vibrant city with a rich history and culture. Is there anything specific you would like to know or discuss about Seoul?'}

In [83]:
chain.invoke({'question':"What is my name?"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a helpful AI talking to a human
Human: My name is John
AI: Hello John! How can I assist you today?
Human: I live in Seoul
AI: Seoul is a vibrant city with a rich history and culture. Is there anything specific you would like to know or discuss about Seoul?
Human: What is my name?[0m

[1m> Finished chain.[0m


{'question': 'What is my name?',
 'chat_history': [HumanMessage(content='My name is John', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Hello John! How can I assist you today?', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='I live in Seoul', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Seoul is a vibrant city with a rich history and culture. Is there anything specific you would like to know or discuss about Seoul?', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What is my name?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Your name is John.', additional_kwargs={}, response_metadata={})],
 'text': 'Your name is John.'}

# LCEL Based Memory
- 커스텀 chain 에 memory 를 장착하기!

In [84]:
chain = prompt | llm

In [85]:
chain.invoke({
    'chat_history': memory.load_memory_variables({})['chat_history'],
    'question': 'My name is John',
})

AIMessage(content='Yes, you mentioned that earlier. How can I assist you further, John?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 103, 'total_tokens': 119, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-Bk51AQtxVt0klQw5lZhGyHIZWcVpO', 'finish_reason': 'stop', 'logprobs': None}, id='run--d49a9587-9bd8-4598-9f2c-1995f029ed7d-0', usage_metadata={'input_tokens': 103, 'output_tokens': 16, 'total_tokens': 119, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [86]:
"""
↑ 위 방법도 가능은 하다
단, 위 접근 방식의 문제는
우리가 chain 을 호출할 때마다 chat_history 도 추가해줘야 한다는 거다.

↓ 이보다 더 좋은 방법도 있다. 바로 'Runnables' 라는 것을 사용하는 것이다
"""
None

## RunnablePassthrough

In [87]:
# v0.3
from langchain_core.runnables.passthrough import RunnablePassthrough
# https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html#langchain_core.runnables.passthrough.RunnablePassthrough


# Runnable to passthrough inputs unchanged or with additional keys.
# This Runnable behaves almost like the identity function,
#  except that it can be configured to add additional keys to the output,
#  if the input is a dict.

In [88]:
# 메모리 변수 획특하는 함수 작성
def load_memory():
    return memory.load_memory_variables({})['chat_history']

In [89]:
chain = RunnablePassthrough.assign(chat_history=load_memory) | prompt | llm

chain.invoke({
    'question': 'My name is John',
})

"""
↑ 위를 실행하게 되면 가장 먼저 load_memory() 를 호출한다.
prompt가 필요로 하는 chat_history= 키 내부에 넣는다.

이는 마치
chain.invoke({
    "chat_history": load_memory(),  <- 요렇게 한것과 동일하다.
    "question": "My name is John",
})

"""





TypeError: load_memory() takes 0 positional arguments but 1 was given

In [90]:
llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",
    return_messages=True,
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful AI talking to a human"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

def load_memory(input):  
  print("🎃load_memory()", input)  # 확인해보자!!
  return memory.load_memory_variables({})["chat_history"]

chain = RunnablePassthrough.assign(chat_history=load_memory) | prompt | llm

chain.invoke({
    "question": "My name is John",
})

🎃load_memory() {'question': 'My name is John'}


AIMessage(content='Hello John! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 24, 'total_tokens': 34, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-Bk51ZNRnpOVx0WbpmYRRBXHVcfJS7', 'finish_reason': 'stop', 'logprobs': None}, id='run--6d705b61-e65e-46f4-b78b-ba860d1c8c9d-0', usage_metadata={'input_tokens': 24, 'output_tokens': 10, 'total_tokens': 34, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [None]:
"""
load_memory() {'question': 'My name is John'}

체인에 있는 모든 컴포넌트는 input 을 받을거고, 또 output 을 줄거다.
이게 랭체인의 핵심이다.  모든 것이 input을 얻을거고, 그 후엔 output 을 줄거다.


chain.invoke(
  {     <--  이 dict 는 chain 의 첫번째 아이템의 input 이 되는거다.
             바로 그게 load_memory() 의 input 이 된것이다!   규칙이다!
    "question": "My name is John",
  }
)

이후 load_memory() 를 실행히킨 결과로 얻은것이 chat_history 속성으로 들어가
체인의 다음 요소 로 전달되는 것이다.

"""
None



In [91]:
llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    memory_key="chat_history",
    return_messages=True,
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful AI talking to a human"),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

def load_memory(input):  
  print("🎃load_memory()", input)
  return memory.load_memory_variables({})["chat_history"]

chain = RunnablePassthrough.assign(chat_history=load_memory) | prompt | llm

# 체인 호출 함수를 직접 만들어 보자.
def invoke_chain(question):
    result = chain.invoke({
        "question": question,
    })
    # 체인 호출 결과를 메모리에 저장
    memory.save_context( 
        {"input": question},   # 사용자 질문
        {"output": result.content},  # result 는 AIMessage 가 될거아.
    )
    print('🟨result:', result)

In [92]:
invoke_chain("My name is John")

🎃load_memory() {'question': 'My name is John'}
🟨result: content='Hello John! How can I assist you today?' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 24, 'total_tokens': 34, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-Bk6Yq0EP2z1OYUl7ux1Mt35og5jMg', 'finish_reason': 'stop', 'logprobs': None} id='run--9b9bc26e-857e-44b9-808b-1955724811fe-0' usage_metadata={'input_tokens': 24, 'output_tokens': 10, 'total_tokens': 34, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [93]:
invoke_chain("What is my name?")  # 이름 기억하겠죠

🎃load_memory() {'question': 'What is my name?'}
🟨result: content='Your name is John.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 47, 'total_tokens': 52, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-Bk6ZayEoTA9AcYdo48gnZnrpKbc6W', 'finish_reason': 'stop', 'logprobs': None} id='run--b1567000-d036-4bd3-9348-35f04c5297c8-0' usage_metadata={'input_tokens': 47, 'output_tokens': 5, 'total_tokens': 52, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [94]:
llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    # memory_key="chat_history",   # 이제 이 또한 필요하지 않을 거다.  기본적인 메모리 key 는 'history' 
    return_messages=True,
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful AI talking to a human"),
        MessagesPlaceholder(variable_name="history"),  # variable_name= 값 변경!
        ("human", "{question}"),
    ]
)

def load_memory(input):  
  print("🎃load_memory()", input)
  return memory.load_memory_variables({})["history"]  # key 값 변경

chain = RunnablePassthrough.assign(history=load_memory) | prompt | llm # key 값 변경

# 체인 호출 함수를 직접 만들어 보자.
def invoke_chain(question):
    result = chain.invoke({
        "question": question,
    })
    # 체인 호출 결과를 메모리에 저장
    memory.save_context( 
        {"input": question},   # 사용자 질문
        {"output": result.content},  # result 는 AIMessage 가 될거아.
    )
    print('🟨result:', result)

In [95]:
invoke_chain("My name is John")

🎃load_memory() {'question': 'My name is John'}
🟨result: content='Hello John! How can I assist you today?' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 24, 'total_tokens': 34, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-Bk6d6vpFBqVcT8lcKE8JBuWMIR0mz', 'finish_reason': 'stop', 'logprobs': None} id='run--6bbbd428-0b35-4f0e-8da7-ea6a337cdf88-0' usage_metadata={'input_tokens': 24, 'output_tokens': 10, 'total_tokens': 34, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [96]:
invoke_chain("What is my name?") 

🎃load_memory() {'question': 'What is my name?'}
🟨result: content='Your name is John.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 47, 'total_tokens': 52, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-Bk6dOqkGpZQe7jdP2FT3TjZoA8Ogn', 'finish_reason': 'stop', 'logprobs': None} id='run--c2f099af-1f48-4860-b134-808dd0fa10a5-0' usage_metadata={'input_tokens': 47, 'output_tokens': 5, 'total_tokens': 52, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


## refactoring
아래와 같이 코드를 작성할수도 있다.

In [97]:
llm = ChatOpenAI(temperature=0.1)

memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=120,
    return_messages=True,
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful AI talking to a human"),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)

def load_memory():    # 매개변수 삭제
  return memory.load_memory_variables({})["history"]

chain = prompt | llm  # Runnable 삭제

def invoke_chain(question):
    result = chain.invoke({"question": question, "history": load_memory()})  # 체인호출히 'history' 추가
    memory.save_context( 
        {"input": question},
        {"output": result.content},
    )
    print('🟨result:', result)

In [98]:
invoke_chain("My name is John")

🟨result: content='Hello John! How can I assist you today?' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 24, 'total_tokens': 34, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-Bk6jlb8L1QJ6Rfp9HRVL0bj0fQ0hZ', 'finish_reason': 'stop', 'logprobs': None} id='run--a137031d-d65e-4b56-8aaa-78105828975e-0' usage_metadata={'input_tokens': 24, 'output_tokens': 10, 'total_tokens': 34, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}


In [99]:
invoke_chain("What is my name?") 

🟨result: content='Your name is John.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 47, 'total_tokens': 52, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-Bk6jtIy8ln5zoXFYnsVBqDKeBc5a4', 'finish_reason': 'stop', 'logprobs': None} id='run--1a6d5024-035b-44ac-b932-918fd3e1dfdc-0' usage_metadata={'input_tokens': 47, 'output_tokens': 5, 'total_tokens': 52, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
