In [4]:
# !pip uninstall -y langchain google-generativeai
# !pip install -qU \
#   "pydantic>=2.11.4,<3" \
#   "langchain-core>=0.3.70,<0.4" \
#   "langchain-community>=0.3.27,<0.4" \
#   "langchain-google-genai>=2.1.10" \
#   "google-genai>=0.7.0" \
#   "langsmith>=0.3.4"


Found existing installation: langchain 0.3.27
Uninstalling langchain-0.3.27:
  Successfully uninstalled langchain-0.3.27
Found existing installation: google-generativeai 0.8.5
Uninstalling google-generativeai-0.8.5:
  Successfully uninstalled google-generativeai-0.8.5
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.1/43.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m444.0/444.0 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m49.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.7/241.7 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m378.5/378.5 kB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━

### Langchain Setup

In [2]:
import os
from getpass import getpass
from google.colab import userdata

# must enter API key
os.environ["LANGCHAIN_API_KEY"] = userdata.get("LANGCHAIN_API_KEY") or \
    getpass("Enter LangSmith API Key: ")

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "aurelioai-langchain-course-chat-memory-openai-practice"

### Gemini Setup

In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI

os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY") or getpass(
    "Enter Gemini API Key: "
)

openai_model = "gemini-2.5-flash-lite"

llm = ChatGoogleGenerativeAI(
    model=openai_model,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [6]:
from langchain.prompts import (
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    ChatPromptTemplate
)

system_prompt = "You are a helpful assistant called Kubo."

prompt_template = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system_prompt),
    MessagesPlaceholder(variable_name="history"),
    HumanMessagePromptTemplate.from_template("{query}"),
])

pipeline = (
    {
        "query": lambda x: x["query"],
        "history": lambda x: x["history"]
    }
    | prompt_template
    | llm)

In [22]:
def display_chat_map(chat_map_data):
    """
    Print conversation history for each session, including system messages.
    Also prints some quick diagnostics if a session looks empty.
    """
    if not chat_map_data:
        print("Chat map is empty.")
        return

    print("Sessions:", list(chat_map_data.keys()))

    for session_id, history in chat_map_data.items():
        print(f"\n{'='*25} Conversation for Session ID: {session_id} {'='*25}")

        # Basic type/len checks
        if not hasattr(history, "messages"):
            print("History has no 'messages' attribute.")
            continue
        if not isinstance(history.messages, list):
            print(f"'messages' is not a list (got {type(history.messages)}).")
            # Try to render anyway if iterable
        if not history.messages:
            print("No messages in this session.")
            # If your class has internals, print them to help debug:
            if hasattr(history, "summary_text"):
                print("summary_text:", getattr(history, "summary_text", None))
            if hasattr(history, "recent"):
                try:
                    print("recent size:", len(history.recent))
                except Exception:
                    pass
            continue

        # Render messages
        for message in history.messages:
            role = getattr(message, "type", "unknown")
            content = getattr(message, "content", "")
            if role == "system":
                print(f"[SYSTEM] {content}")
            elif role == "human":
                print(f"User: {content}")
            elif role == "ai":
                print(f"Assistant: {content}")
            else:
                print(f"{role.capitalize()}: {content}")

    print("\n" + "="*80)


### Conversation Buffer memory

In [10]:
from langchain_core.chat_history import InMemoryChatMessageHistory

chat_map = {}
def get_chat_history(session_id: str) -> InMemoryChatMessageHistory:
    if session_id not in chat_map:
        # if session ID doesn't exist, create a new chat history
        chat_map[session_id] = InMemoryChatMessageHistory()
    return chat_map[session_id]

In [11]:
from langchain_core.runnables.history import RunnableWithMessageHistory

pipeline_with_history = RunnableWithMessageHistory(
    pipeline,
    get_session_history=get_chat_history,
    input_messages_key="query",
    history_messages_key="history"
)

In [14]:
pipeline_with_history.invoke(
    {"query": "Hi, my name is James"},
    config={"session_id": "id_123"}
)

AIMessage(content="Hi James, it's nice to meet you! How can I help you today?", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash-lite', 'safety_ratings': []}, id='run--ea9c1ad8-2b83-40a8-bff6-ff08aa06d01c-0', usage_metadata={'input_tokens': 15, 'output_tokens': 18, 'total_tokens': 33, 'input_token_details': {'cache_read': 0}})

In [28]:
chat_map['id_124'].messages

[HumanMessage(content='What is your name?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='I am Kubo.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash-lite', 'safety_ratings': []}, id='run--e541dd97-a122-4060-ac7b-2ecf5c3cf66f-0', usage_metadata={'input_tokens': 14, 'output_tokens': 4, 'total_tokens': 18, 'input_token_details': {'cache_read': 0}}),
 HumanMessage(content='What is my name?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='I do not have access to your personal information, including your name. I am a large language model, trained by Google.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash-lite', 'safety_ratings': []}, id='run--652d6261-ab16-4ff6-930b-5387c8bc7bd8-0', usage_metadata={'input_tokens': 25, 'output_token

In [27]:
pipeline_with_history.invoke(
    {"query": "What is my name?"},
    config={"session_id": "id_124"}
)

AIMessage(content="You've asked me to call you Buffalo, so I'll refer to you as Buffalo.", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash-lite', 'safety_ratings': []}, id='run--fd82e365-dd26-431f-9180-f636706fda8d-0', usage_metadata={'input_tokens': 73, 'output_tokens': 20, 'total_tokens': 93, 'input_token_details': {'cache_read': 0}})

### Conversation Buffer Window Memory

In [72]:
from pydantic import BaseModel, Field
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.messages import BaseMessage

class BufferWindowMessageHistory(BaseChatMessageHistory, BaseModel):
    messages: list[BaseMessage] = Field(default_factory=list)
    k: int = 4  # safer default

    # Use model_post_init instead of __init__ for side effects
    def model_post_init(self, __context):
        print(f"Initialized BufferWindowMessageHistory with k={self.k}")

    def add_messages(self, messages: list[BaseMessage]) -> None:
        self.messages.extend(messages)
        self.messages = self.messages[-self.k:]

    def clear(self) -> None:
        self.messages = []


In [73]:
chat_map: dict[str, BufferWindowMessageHistory] = {}

def get_chat_history(session_id: str, k: int = 4) -> BufferWindowMessageHistory:
    print(f"get_chat_history called with session_id={session_id} and k={k}")
    hist = chat_map.get(session_id)
    if hist is None:
        hist = BufferWindowMessageHistory(k=k)
        chat_map[session_id] = hist
    # OPTIONAL: if you want k-updates to apply to existing sessions:
    # else:
    #     hist.k = k
    return hist

In [74]:
from langchain_core.runnables import ConfigurableFieldSpec

pipeline_with_history = RunnableWithMessageHistory(
    pipeline,
    get_session_history=get_chat_history,
    input_messages_key="query",
    history_messages_key="history",
    history_factory_config=[
        ConfigurableFieldSpec(
            id="session_id",
            annotation=str,
            name="Session ID",
            description="The session ID to use for the chat history",
            default="id_default",
        ),
        ConfigurableFieldSpec(
            id="k",
            annotation=int,
            name="k",
            description="The number of messages to keep in the history",
            default=4,
        )
    ]
)

In [75]:
named_chain = pipeline_with_history.with_config(
    run_name="Chatbot with Window Memory"
)

In [76]:
named_chain.invoke(
    {"query": "My name is Orange"},
    config={"configurable": {"session_id": "id_k4", "k": 4}}
)

get_chat_history called with session_id=id_k4 and k=4
Initialized BufferWindowMessageHistory with k=4


AIMessage(content="Hello Orange! It's nice to meet you. How can I help you today?", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash-lite', 'safety_ratings': []}, id='run--8d0252fd-7a2d-4079-ac9a-c9d9add1fc98-0', usage_metadata={'input_tokens': 13, 'output_tokens': 18, 'total_tokens': 31, 'input_token_details': {'cache_read': 0}})

In [77]:
# chat_map["id_k4"].add_user_message("I'm researching ice cream flavors.")
# chat_map["id_k4"].add_ai_message("That's interesting, what are some examples?")

In [78]:
display_chat_map(chat_map)


Human: My name is Orange
Ai: Hello Orange! It's nice to meet you. How can I help you today?
Human: I'm researching ice cream flavors.
Ai: That's interesting, what are some examples?



### Conversation Summary Memory

In [67]:
from collections import deque
from typing import List
from pydantic import BaseModel, Field
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage


class ConversationSummaryBuffer(BaseChatMessageHistory, BaseModel):
    # Public list the wrappers/UI will read
    messages: List[BaseMessage] = Field(default_factory=list)

    # Internal state
    summary_text: str = ""
    recent_k: int = 4
    recent: deque = Field(default_factory=lambda: deque(maxlen=4))

    # LLM for summarization
    llm: ChatGoogleGenerativeAI = Field(
        default_factory=lambda: ChatGoogleGenerativeAI(
            model="gemini-2.5-flash-lite", temperature=0
        )
    )

    def model_post_init(self, __context):
        # ensure deque has the right maxlen if recent_k was overridden
        if self.recent.maxlen != self.recent_k:
            self.recent = deque(self.recent, maxlen=self.recent_k)
        self._rebuild_messages()  # <-- build initial [System(summary)] + recent

    # ---------------- helpers ----------------
    def _rebuild_messages(self) -> None:
        """Recompute public messages list from (summary_text, recent)."""
        sys = SystemMessage(content=self.summary_text or "(no summary yet)")
        self.messages = [sys] + list(self.recent)

    def _render(self, m: BaseMessage) -> str:
        role = "user" if isinstance(m, HumanMessage) else \
               "assistant" if isinstance(m, AIMessage) else "system"
        return f"{role}: {m.content}"

    def _fold_into_summary(self, msgs: List[BaseMessage]) -> None:
      if not msgs:
          return

      prompt = ChatPromptTemplate.from_messages([
          ("system",
          "You are a conversation memory compressor. Maintain a compact summary of OLDER context "
          "(everything except the most recent verbatim window). The summary must be concise, factual, "
          "and **paraphrased**. Do NOT copy sentences verbatim. Keep only durable facts, user prefs, "
          "goals, decisions, constraints, and open questions. Omit chit-chat, fillers, and stylistic text."),
          ("system",
          "HARD LIMITS:\n"
          "- Output ≤ {max_chars} characters.\n"
          "- No markdown, no lists unless bullet points are necessary.\n"
          "- Prefer short clauses separated by ';'."),
          ("system", "Current summary (may be empty):\n{summary}"),
          ("human",
          "Incorporate these older messages (in order):\n{older}\n\n"
          "Return ONLY the updated summary, nothing else.")
      ])

      older_text = "\n".join(self._render(m) for m in msgs)

      updated = (prompt | self.llm).invoke({
          "summary": self.summary_text,
          "older":   older_text,
          "max_chars": 600,          # <-- tune this budget
      })

      self.summary_text = getattr(updated, "content", str(updated))


    # ---------------- required API ----------------
    def add_messages(self, new_msgs: List[BaseMessage]) -> None:
        """Append all new messages verbatim; summarize only what falls out."""
        if not new_msgs:
            return

        evicted_batch: List[BaseMessage] = []

        # Append all at once, collecting anything that will be evicted
        for m in new_msgs:
            if self.recent_k > 0 and len(self.recent) == self.recent_k:
                # peek what will be evicted on append
                evicted_batch.append(self.recent[0])
            self.recent.append(m)  # deque(maxlen=recent_k) auto-evicts oldest if full

        # Fold evicted messages into the running summary in one LLM call
        if evicted_batch:
            self._fold_into_summary(evicted_batch)

        # reflect current state in the public messages list
        self._rebuild_messages()  # <-- keep messages in sync

    # Convenience
    def add_user_message(self, text: str) -> None:
        self.add_messages([HumanMessage(content=text)])

    def add_ai_message(self, text: str) -> None:
        self.add_messages([AIMessage(content=text)])

    def clear(self) -> None:
        self.summary_text = ""
        self.recent.clear()
        self._rebuild_messages()  # <-- keeps display functions happy

In [68]:
chat_map: dict[str, ConversationSummaryBuffer] = {}

def get_chat_history(session_id: str, llm: ChatGoogleGenerativeAI, recent_k: int = 4) -> ConversationSummaryBuffer:
    print(f"get_chat_history summary called with session_id={session_id}")
    hist = chat_map.get(session_id)
    if hist is None:
        hist = ConversationSummaryBuffer(llm=llm, recent_k = recent_k)
        chat_map[session_id] = hist
    # OPTIONAL: if you want k-updates to apply to existing sessions:
    # else:
    #     hist.k = k
    return hist

In [69]:
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.runnables import ConfigurableFieldSpec

pipeline_with_history = RunnableWithMessageHistory(
    pipeline,
    get_session_history=get_chat_history,
    input_messages_key="query",
    history_messages_key="history",
    history_factory_config=[
        ConfigurableFieldSpec(
            id="session_id",
            annotation=str,
            name="Session ID",
            description="The session ID to use for the chat history",
            default="id_default",
        ),
        ConfigurableFieldSpec(
            id="llm",
            annotation=ChatGoogleGenerativeAI,
            name="LLM",
            description="The LLM to use for the conversation summary",
            default=llm,
        ),
        ConfigurableFieldSpec(
            id="recent_k",
            annotation=int,
            name="Recent K",
            description="The number of recent messages to save as is",
            default=4,
        )
    ]
)

In [70]:
named_chain = pipeline_with_history.with_config(
    run_name="Chatbot with Summary memory and configurable K"
)

In [82]:
named_chain.invoke(
    {"query": "What are my interests"},
    config={"configurable": {"session_id": "id_sum_k_1", "llm": llm, "recent_k": 4}}
)

get_chat_history summary called with session_id=id_sum_k_1


AIMessage(content='Hobb, based on our previous conversations, your interests include:\n\n*   **Training bears to eat with forks.**\n*   **Fighting ducks.**\n*   You previously declined help.\n*   You asked if bears like ice cream, and I advised against feeding it to them due to health concerns.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash-lite', 'safety_ratings': []}, id='run--22f2c8d0-c803-41d0-bbe3-12ef90cd7f94-0', usage_metadata={'input_tokens': 97, 'output_tokens': 63, 'total_tokens': 160, 'input_token_details': {'cache_read': 0}})

In [57]:
chat_map["id_sum_k_1"].add_user_message("Any similar flavors?")
# chat_map["id_sum_k_1"].add_ai_message("Almond is similar")

In [83]:
display_chat_map(chat_map)

Sessions: ['id_sum_k_1']

[SYSTEM] User's name is Hobb; user trains bears to eat with forks; user likes to fight ducks; user previously declined help; user asked if bears like ice cream; assistant advised against feeding ice cream to bears due to health concerns; user asked for "hello" in French, assistant provided "Bonjour".
User: What is hello in spanish
Assistant: "Hello" in Spanish is **"Hola"**.
User: What are my interests
Assistant: Hobb, based on our previous conversations, your interests include:

*   **Training bears to eat with forks.**
*   **Fighting ducks.**
*   You previously declined help.
*   You asked if bears like ice cream, and I advised against feeding it to them due to health concerns.

