##### Testing the persistent storage logic

* Main idea: each chat thread will have a session uuid
* map the db uploaded in each thread and memory (human, ai messages) to that uuid
* store memory as a json file on disk that can be loaded whenever the user goes back to a particular thread and wants to access previous messages in the chat history
* use lazy loading to load chat history.
* bootstrap script: on startup, enumerate all uuid.json memory files and recover past session uuids to persist session data on new start

In [1]:
# Create a folder structure to store memory and db files
# /data/memory
# /data/db

import os

data_directory = "storage/"
memory_directory = data_directory+"memory"
db_directory = data_directory+"db"
os.makedirs(data_directory, exist_ok=True)
os.makedirs(memory_directory, exist_ok=True)
os.makedirs(db_directory, exist_ok=True)

Each file in the storage/memory directory will be a uuid.json file with memory as json for each chat session with that uuid

In [None]:
import uuid

# initialize empty .json and .db files to simulate pre-existing message and dbs
for i in range(10):
    current_session_id = str(uuid.uuid4())
    db_file_path = os.path.join(db_directory, current_session_id+".db")
    memory_file_path = os.path.join(memory_directory, current_session_id+".json")

    with open(db_file_path, "w") as f:
        pass

    with open(memory_file_path, "w") as f:
        pass
    

Add dummy chat history to all the files

In [18]:
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.messages.base import message_to_dict
from langchain_core.messages.utils import messages_from_dict
import json

for file in os.listdir(memory_directory):
    messages = []
    for i in range(10):
        messages.extend([
            HumanMessage(content="sample human query"),
            AIMessage(content="sample AI response")
        ])
    dict_data = [message_to_dict(message) for message in messages]
    file_path = os.path.join(memory_directory, file)
    with open(file_path, "w") as f:
        json.dump(dict_data, f)

Now, we have 10 .json files with chat history
Now, we write the bootstrap script which runs on startup, collects all past session ids and maps them for lazy loading

In [23]:
session_map = {}
session_file_map = {}

for file in os.listdir(memory_directory):
    uuid = file[:-5]
    session_map[uuid] = {"memory":None, "db":None}
    session_file_map[uuid] = {"memory_file": os.path.join(memory_directory, file), 
                              "db_file":os.path.join(db_directory, uuid+".db")}

session_map

{'10d2bc47-a28d-4e10-87cf-3d0249c41551': {'memory': None, 'db': None},
 '3fd1836d-9e20-43f2-9436-54e9a8f09494': {'memory': None, 'db': None},
 '921058a2-2956-4d05-bd4d-9dbb05d9b7ed': {'memory': None, 'db': None},
 'b96a9ad6-ea59-4b72-8ac2-e3d53dc1f8b7': {'memory': None, 'db': None},
 'c92b07d2-5f2f-436d-a530-453b69b6a6c8': {'memory': None, 'db': None},
 'd7cee794-a03f-468f-a342-da06d1f2fb20': {'memory': None, 'db': None},
 'd975ea2c-6aff-4c11-b563-53fb054a08a0': {'memory': None, 'db': None},
 'dfe06996-aa76-404d-ae65-63f4602be651': {'memory': None, 'db': None},
 'e992b789-9ba3-40da-9d8d-b35a162649f5': {'memory': None, 'db': None},
 'ffde8f05-bcb9-4e54-a286-f4f0b4664b08': {'memory': None, 'db': None}}

Lazy load memory and db for a given uuid

In [35]:
sample_uuid = "b96a9ad6-ea59-4b72-8ac2-e3d53dc1f8b7"

# for the given uuid, fetch the .json and .db files
memory_file = session_file_map[sample_uuid]["memory_file"]
db_file = session_file_map[sample_uuid]["db_file"]

# read all contents of the memory file into a list and add the new Human,AI message pairs
with open(memory_file, "r") as f:
    data = json.load(f)

print(data)

# populate the session_map with extracted chat history/ memory
msg_data = messages_from_dict(data)
session_map[sample_uuid]["memory"] = msg_data
session_map[sample_uuid]["db"] = os.path.join(db_directory, sample_uuid+".db")
print(data)
print(len(data))
print(session_map)

[{'type': 'human', 'data': {'content': 'sample human query', 'additional_kwargs': {}, 'response_metadata': {}, 'type': 'human', 'name': None, 'id': None}}, {'type': 'ai', 'data': {'content': 'sample AI response', 'additional_kwargs': {}, 'response_metadata': {}, 'type': 'ai', 'name': None, 'id': None, 'tool_calls': [], 'invalid_tool_calls': [], 'usage_metadata': None}}, {'type': 'human', 'data': {'content': 'sample human query', 'additional_kwargs': {}, 'response_metadata': {}, 'type': 'human', 'name': None, 'id': None}}, {'type': 'ai', 'data': {'content': 'sample AI response', 'additional_kwargs': {}, 'response_metadata': {}, 'type': 'ai', 'name': None, 'id': None, 'tool_calls': [], 'invalid_tool_calls': [], 'usage_metadata': None}}, {'type': 'human', 'data': {'content': 'sample human query', 'additional_kwargs': {}, 'response_metadata': {}, 'type': 'human', 'name': None, 'id': None}}, {'type': 'ai', 'data': {'content': 'sample AI response', 'additional_kwargs': {}, 'response_metadata

Let's display the past history line by line

In [37]:
# display to UI later
for msg in session_map[sample_uuid]["memory"]:
    print("---> ", msg.content)

--->  sample human query
--->  sample AI response
--->  sample human query
--->  sample AI response
--->  sample human query
--->  sample AI response
--->  sample human query
--->  sample AI response
--->  sample human query
--->  sample AI response
--->  sample human query
--->  sample AI response
--->  sample human query
--->  sample AI response
--->  sample human query
--->  sample AI response
--->  sample human query
--->  sample AI response
--->  sample human query
--->  sample AI response
--->  new human query
--->  new AI response


Now that we have the memory data from that session's json file, we can append new Human, AI message objects to this list and save the entire file again

In [None]:
new_message = [HumanMessage(content="new human query"), AIMessage(content="new AI response")]

data.extend([message_to_dict(msg) for msg in new_message])

file_path = os.path.join(memory_directory, sample_uuid+".json")
with open(file_path, "w") as f:
    json.dump(data, f)

Once we switch to a new session uuid, we can clear the memory from the session map to save memory

In [38]:
session_map[sample_uuid]["memory"] = None
session_map[sample_uuid]["db"] = None
print(session_map)

{'10d2bc47-a28d-4e10-87cf-3d0249c41551': {'memory': None, 'db': None}, '3fd1836d-9e20-43f2-9436-54e9a8f09494': {'memory': None, 'db': None}, '921058a2-2956-4d05-bd4d-9dbb05d9b7ed': {'memory': None, 'db': None}, 'b96a9ad6-ea59-4b72-8ac2-e3d53dc1f8b7': {'memory': None, 'db': None}, 'c92b07d2-5f2f-436d-a530-453b69b6a6c8': {'memory': None, 'db': None}, 'd7cee794-a03f-468f-a342-da06d1f2fb20': {'memory': None, 'db': None}, 'd975ea2c-6aff-4c11-b563-53fb054a08a0': {'memory': None, 'db': None}, 'dfe06996-aa76-404d-ae65-63f4602be651': {'memory': None, 'db': None}, 'e992b789-9ba3-40da-9d8d-b35a162649f5': {'memory': None, 'db': None}, 'ffde8f05-bcb9-4e54-a286-f4f0b4664b08': {'memory': None, 'db': None}}


ALL steps: