# Memory Visualization for Strands Agents

This notebook demonstrates how to visualize and analyze agent memory, providing insights into conversation patterns, memory usage, and engagement metrics.

## 1. Setup

In [None]:
import json
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from datetime import datetime
from typing import Dict, List, Any
from collections import Counter

# For visualization
%matplotlib inline
plt.style.use('ggplot')

# Directory where memory files are stored
MEMORY_DIR = os.path.join(os.getcwd(), "agent_memory")
os.makedirs(MEMORY_DIR, exist_ok=True)

## 2. Memory Loading Utilities

In [None]:
def load_memory_file(file_path: str) -> Dict:
    """Load a memory file and return its contents as a dictionary."""
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return {}
        
    try:
        with open(file_path, 'r') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return {}

def list_memory_files():
    """List all memory files in the memory directory."""
    files = []
    for file in os.listdir(MEMORY_DIR):
        if file.endswith('.json'):
            files.append(os.path.join(MEMORY_DIR, file))
    return files

# List available memory files
memory_files = list_memory_files()
print(f"Found {len(memory_files)} memory files:")
for file in memory_files:
    print(f"- {os.path.basename(file)}")

## 3. Memory Analysis Tools

In [None]:
class MemoryAnalyzer:
    """Analyzes and visualizes agent memory."""
    
    def __init__(self, memory_file: str):
        self.memory_file = memory_file
        self.memory_data = load_memory_file(memory_file)
        self.sessions = self.memory_data.get('sessions', {})
        self.summaries = self.memory_data.get('summaries', {})
        self.tags = self.memory_data.get('tags', {})
        
    def get_session_stats(self):
        """Get basic statistics for each session."""
        stats = []
        
        for session_id, messages in self.sessions.items():
            # Parse session timestamp
            try:
                # Assuming session_id format is YYYYMMDD_HHMMSS
                timestamp = datetime.strptime(session_id, "%Y%m%d_%H%M%S")
            except:
                timestamp = None
                
            # Count messages by role
            user_messages = sum(1 for msg in messages if msg.get('role') == 'user')
            assistant_messages = sum(1 for msg in messages if msg.get('role') == 'assistant')
            
            # Calculate message lengths
            user_msg_lengths = [len(msg.get('content', '')) for msg in messages if msg.get('role') == 'user']
            assistant_msg_lengths = [len(msg.get('content', '')) for msg in messages if msg.get('role') == 'assistant']
            
            # Check for tool usage
            tools_used = []
            tool_count = 0
            for msg in messages:
                if msg.get('role') == 'assistant' and 'tool_calls' in msg:
                    for tool_call in msg.get('tool_calls', []):
                        tool_name = tool_call.get('name')
                        if tool_name:
                            tools_used.append(tool_name)
                            tool_count += 1
                            
            # Get session summary if available
            summary = self.summaries.get(session_id, '')
            
            stats.append({
                'session_id': session_id,
                'timestamp': timestamp,
                'total_messages': len(messages),
                'user_messages': user_messages,
                'assistant_messages': assistant_messages,
                'avg_user_msg_length': np.mean(user_msg_lengths) if user_msg_lengths else 0,
                'avg_assistant_msg_length': np.mean(assistant_msg_lengths) if assistant_msg_lengths else 0,
                'tools_used': Counter(tools_used),
                'tool_count': tool_count,
                'has_summary': bool(summary),
                'summary': summary
            })
            
        return stats
    
    def plot_session_activity(self):
        """Plot activity across different sessions."""
        stats = self.get_session_stats()
        
        if not stats:
            print("No sessions found to analyze.")
            return
            
        # Convert to DataFrame for easier plotting
        df = pd.DataFrame(stats)
        
        # Sort by timestamp if available
        if 'timestamp' in df.columns and not df['timestamp'].isna().all():
            df = df.sort_values('timestamp')
            session_labels = [str(ts.date()) for ts in df['timestamp'] if pd.notna(ts)]
        else:
            # Fall back to session_id for ordering
            df = df.sort_values('session_id')
            session_labels = df['session_id']
            
        # Create figure with subplots
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
        fig.suptitle('Session Activity Analysis', fontsize=16)
        
        # Plot 1: Message counts by session
        ax = axes[0, 0]
        ax.bar(range(len(df)), df['user_messages'], label='User')
        ax.bar(range(len(df)), df['assistant_messages'], bottom=df['user_messages'], label='Assistant')
        ax.set_xticks(range(len(df)))
        ax.set_xticklabels(session_labels, rotation=45, ha='right')
        ax.set_ylabel('Number of Messages')
        ax.set_title('Message Count by Session')
        ax.legend()
        
        # Plot 2: Message length by session
        ax = axes[0, 1]
        ax.plot(range(len(df)), df['avg_user_msg_length'], marker='o', label='User')
        ax.plot(range(len(df)), df['avg_assistant_msg_length'], marker='s', label='Assistant')
        ax.set_xticks(range(len(df)))
        ax.set_xticklabels(session_labels, rotation=45, ha='right')
        ax.set_ylabel('Average Message Length (chars)')
        ax.set_title('Average Message Length by Session')
        ax.legend()
        
        # Plot 3: Tool usage count by session
        ax = axes[1, 0]
        ax.bar(range(len(df)), df['tool_count'])
        ax.set_xticks(range(len(df)))
        ax.set_xticklabels(session_labels, rotation=45, ha='right')
        ax.set_ylabel('Number of Tool Calls')
        ax.set_title('Tool Usage by Session')
        
        # Plot 4: Message ratio (assistant/user ratio) by session
        ax = axes[1, 1]
        # Calculate ratio, handle division by zero
        ratio = [a/u if u > 0 else 0 for a, u in zip(df['assistant_messages'], df['user_messages'])]
        ax.bar(range(len(df)), ratio)
        ax.axhline(y=1, color='r', linestyle='--', alpha=0.7)
        ax.set_xticks(range(len(df)))
        ax.set_xticklabels(session_labels, rotation=45, ha='right')
        ax.set_ylabel('Assistant/User Message Ratio')
        ax.set_title('Conversation Balance by Session')
        
        plt.tight_layout(rect=[0, 0, 1, 0.96])
        plt.show()
        
    def plot_tool_usage(self):
        """Plot tool usage across all sessions."""
        stats = self.get_session_stats()
        
        if not stats:
            print("No sessions found to analyze.")
            return
            
        # Combine tool usage across all sessions
        all_tools = Counter()
        for session_stats in stats:
            all_tools.update(session_stats['tools_used'])
            
        if not all_tools:
            print("No tool usage found in any session.")
            return
            
        # Plot tool usage
        fig, ax = plt.subplots(figsize=(10, 6))
        tools = list(all_tools.keys())
        counts = list(all_tools.values())
        
        # Sort by usage count
        sorted_indices = np.argsort(counts)
        tools = [tools[i] for i in sorted_indices]
        counts = [counts[i] for i in sorted_indices]
        
        ax.barh(tools, counts)
        ax.set_xlabel('Usage Count')
        ax.set_title('Tool Usage Across All Sessions')
        plt.tight_layout()
        plt.show()
        
    def word_cloud_from_messages(self, role='user'):
        """Generate a word cloud from messages of the specified role."""
        try:
            from wordcloud import WordCloud
            from nltk.corpus import stopwords
            import nltk
            
            # Download stopwords if not already downloaded
            try:
                stopwords.words('english')
            except:
                nltk.download('stopwords')
                
            # Collect all messages from the specified role
            all_text = ""
            for session_id, messages in self.sessions.items():
                for msg in messages:
                    if msg.get('role') == role and 'content' in msg:
                        all_text += msg['content'] + " "
                        
            if not all_text.strip():
                print(f"No {role} messages found.")
                return
                
            # Generate word cloud
            stopwords_set = set(stopwords.words('english'))
            wordcloud = WordCloud(
                width=800, height=400,
                background_color='white',
                stopwords=stopwords_set,
                max_words=100
            ).generate(all_text)
            
            # Display word cloud
            plt.figure(figsize=(10, 6))
            plt.imshow(wordcloud, interpolation='bilinear')
            plt.axis('off')
            plt.title(f"Word Cloud from {role.capitalize()} Messages")
            plt.show()
            
        except ImportError:
            print("Please install wordcloud and nltk to use this feature:")
            print("pip install wordcloud nltk")
            
    def analyze_memory_growth(self):
        """Analyze memory size growth over time."""
        stats = self.get_session_stats()
        
        if not stats:
            print("No sessions found to analyze.")
            return
            
        # Convert to DataFrame
        df = pd.DataFrame(stats)
        
        # Sort by timestamp if available
        if 'timestamp' in df.columns and not df['timestamp'].isna().all():
            df = df.sort_values('timestamp')
            time_labels = [str(ts.date()) for ts in df['timestamp'] if pd.notna(ts)]
        else:
            # Fall back to session_id for ordering
            df = df.sort_values('session_id')
            time_labels = df['session_id']
            
        # Calculate cumulative message counts
        df['cumulative_messages'] = df['total_messages'].cumsum()
        
        # Calculate file size at each point
        current_size = os.path.getsize(self.memory_file) / 1024  # KB
        
        fig, ax1 = plt.subplots(figsize=(10, 6))
        
        # Plot cumulative message count
        ax1.plot(range(len(df)), df['cumulative_messages'], marker='o', color='blue')
        ax1.set_xlabel('Session')
        ax1.set_ylabel('Cumulative Message Count', color='blue')
        ax1.tick_params(axis='y', labelcolor='blue')
        ax1.set_xticks(range(len(df)))
        ax1.set_xticklabels(time_labels, rotation=45, ha='right')
        
        # Add current file size annotation
        ax1.text(0.02, 0.95, f'Current Memory Size: {current_size:.2f} KB', 
                transform=ax1.transAxes, fontsize=12,
                bbox=dict(facecolor='white', alpha=0.8))
        
        ax1.set_title('Memory Growth Analysis')
        plt.tight_layout()
        plt.show()
        
        # Print estimated growth rate
        if len(df) > 1:
            total_messages = df['total_messages'].sum()
            bytes_per_message = (current_size * 1024) / total_messages if total_messages > 0 else 0
            print(f"Estimated memory usage per message: {bytes_per_message:.2f} bytes")
            print(f"At this rate, 1000 messages would require approximately {(bytes_per_message * 1000) / 1024:.2f} KB")

## 4. Analyze Memory File

Select a memory file to analyze. If you've run the previous example notebooks, you should have memory files available.

In [None]:
# If we found memory files, analyze the first one
# Otherwise, provide example data for demonstration

if memory_files:
    memory_file = memory_files[0]  # Choose the first memory file
    print(f"Analyzing memory file: {os.path.basename(memory_file)}")
    analyzer = MemoryAnalyzer(memory_file)
else:
    # Create a sample memory file for demonstration
    print("No memory files found. Creating sample data for demonstration.")
    sample_file = os.path.join(MEMORY_DIR, "sample_memory.json")
    
    # Create sample data
    sample_data = {
        "sessions": {
            "20230601_120000": [
                {"role": "user", "content": "Hello! My name is Alex and I'm interested in AI."}, 
                {"role": "assistant", "content": "Hi Alex! I'd be happy to talk about AI with you. What aspects of AI interest you the most?"}
            ],
            "20230602_140000": [
                {"role": "user", "content": "I'm working on a project involving natural language processing."}, 
                {"role": "assistant", "content": "That's great! Natural language processing is a fascinating field. What kind of NLP project are you working on?"}, 
                {"role": "user", "content": "It's a sentiment analysis tool for customer reviews."}, 
                {"role": "assistant", "content": "Sentiment analysis is very useful for understanding customer feedback. Are you using any specific libraries or models?"}
            ],
            "20230605_090000": [
                {"role": "user", "content": "I need help with transformers for my NLP project."}, 
                {"role": "assistant", "content": "I can help with transformers! They're powerful for NLP tasks. Do you have specific questions about using transformer models?", "tool_calls": [{"name": "search_documentation"}]}, 
                {"role": "user", "content": "Yes, I'm trying to fine-tune BERT for my sentiment analysis task."}, 
                {"role": "assistant", "content": "Fine-tuning BERT for sentiment analysis is a good approach. Here are some steps to help you get started...", "tool_calls": [{"name": "code_example"}]}
            ]
        },
        "summaries": {
            "20230601_120000": "Initial introduction. User Alex is interested in AI.",
            "20230602_140000": "Discussion about NLP project - sentiment analysis for customer reviews.",
            "20230605_090000": "Help with transformer models, specifically fine-tuning BERT for sentiment analysis."
        }
    }
    
    # Save sample data
    with open(sample_file, 'w') as f:
        json.dump(sample_data, f, indent=2)
        
    analyzer = MemoryAnalyzer(sample_file)

## 5. Visualize Memory Data

In [None]:
# Get session statistics
session_stats = analyzer.get_session_stats()

# Create a DataFrame for easier viewing
stats_df = pd.DataFrame(session_stats)

# Select columns to display
display_columns = ['session_id', 'total_messages', 'user_messages', 
                   'assistant_messages', 'avg_user_msg_length', 
                   'avg_assistant_msg_length', 'tool_count', 'has_summary']

# Display statistics
stats_df[display_columns]

In [None]:
# Plot session activity
analyzer.plot_session_activity()

In [None]:
# Plot tool usage
analyzer.plot_tool_usage()

In [None]:
# Generate word cloud from user messages
analyzer.word_cloud_from_messages(role='user')

In [None]:
# Analyze memory growth
analyzer.analyze_memory_growth()

## 6. Session Content Analysis

In [None]:
# Show session summaries
print("Session Summaries:")
for session_id, summary in analyzer.summaries.items():
    print(f"\n{session_id}: {summary}")

In [None]:
# Show a sample of messages from each session
for session_id, messages in analyzer.sessions.items():
    print(f"\nSession: {session_id}")
    print(f"Number of messages: {len(messages)}")
    
    # Show up to 3 messages as example
    sample_count = min(3, len(messages))
    print(f"Sample messages:")
    for i, msg in enumerate(messages[:sample_count]):
        role = msg.get('role', 'unknown')
        content = msg.get('content', '')
        # Truncate long messages
        if len(content) > 100:
            content = content[:97] + "..."
        print(f"  {i+1}. {role.capitalize()}: {content}")
        
    if len(messages) > sample_count:
        print(f"  ... and {len(messages) - sample_count} more messages")

## 7. Memory Monitoring Tool

Create a tool that can be used by Strands agents to monitor their own memory usage.

In [None]:
from strands.tools import tool

@tool
def monitor_memory(memory_file: str) -> str:
    """Monitor memory usage and provide statistics about the agent's memory."""
    if not os.path.exists(memory_file):
        return f"Memory file not found: {memory_file}"
        
    try:
        # Basic file stats
        file_size = os.path.getsize(memory_file) / 1024  # KB
        last_modified = datetime.fromtimestamp(os.path.getmtime(memory_file))
        
        # Load memory data
        memory_data = load_memory_file(memory_file)
        sessions = memory_data.get('sessions', {})
        summaries = memory_data.get('summaries', {})
        
        # Calculate statistics
        total_sessions = len(sessions)
        total_messages = sum(len(messages) for messages in sessions.values())
        user_messages = sum(sum(1 for msg in messages if msg.get('role') == 'user') 
                           for messages in sessions.values())
        assistant_messages = sum(sum(1 for msg in messages if msg.get('role') == 'assistant') 
                               for messages in sessions.values())
        summarized_sessions = sum(1 for session_id in sessions if session_id in summaries)
        
        # Get most recent session
        if total_sessions > 0:
            recent_session_id = sorted(sessions.keys())[-1]
            recent_session_msgs = len(sessions[recent_session_id])
        else:
            recent_session_id = "None"
            recent_session_msgs = 0
        
        # Format response
        response = f"""Memory Usage Report:

File: {os.path.basename(memory_file)}
Size: {file_size:.2f} KB
Last Modified: {last_modified}

Sessions: {total_sessions}
Total Messages: {total_messages}
- User Messages: {user_messages}
- Assistant Messages: {assistant_messages}
Sessions with Summaries: {summarized_sessions}/{total_sessions}

Most Recent Session: {recent_session_id}
Messages in Recent Session: {recent_session_msgs}

Estimated memory per message: {(file_size * 1024) / total_messages:.2f} bytes (if evenly distributed)
        """
        
        return response
        
    except Exception as e:
        return f"Error analyzing memory file: {e}"

In [None]:
# Test the memory monitoring tool
if memory_files:
    memory_stats = monitor_memory(memory_files[0])
    print(memory_stats)
else:
    memory_stats = monitor_memory(os.path.join(MEMORY_DIR, "sample_memory.json"))
    print(memory_stats)

## 8. Memory Pruning and Optimization

In [None]:
def optimize_memory(memory_file: str, strategy: str = 'summarize_old'):
    """Optimize memory usage by pruning or summarizing old conversations.
    
    Strategies:
    - summarize_old: Replace older session details with summaries
    - remove_oldest: Remove the oldest sessions
    - compact: Remove unnecessary fields and truncate long messages
    """
    if not os.path.exists(memory_file):
        print(f"Memory file not found: {memory_file}")
        return
        
    try:
        # Load memory
        memory_data = load_memory_file(memory_file)
        sessions = memory_data.get('sessions', {})
        summaries = memory_data.get('summaries', {})
        
        if not sessions:
            print("No sessions found to optimize.")
            return
            
        # Backup original file
        backup_file = memory_file + ".bak"
        with open(backup_file, 'w') as f:
            json.dump(memory_data, f, indent=2)
            
        print(f"Created backup at: {backup_file}")
        
        # Sort sessions by date
        sorted_sessions = sorted(sessions.keys())
        
        # Implement optimization strategies
        if strategy == 'summarize_old':
            # Keep the most recent 3 sessions, summarize the rest
            keep_sessions = sorted_sessions[-3:] if len(sorted_sessions) > 3 else sorted_sessions
            
            for session_id in sorted_sessions:
                if session_id not in keep_sessions:
                    # If we don't have a summary yet, create a simple one
                    if session_id not in summaries:
                        messages = sessions[session_id]
                        user_msgs = [msg['content'] for msg in messages if msg.get('role') == 'user']
                        # Create a simple summary from the first user message
                        if user_msgs:
                            summaries[session_id] = f"Session with {len(messages)} messages. First topic: {user_msgs[0][:100]}..."
                    
                    # Replace full conversation with just first and last message
                    if len(sessions[session_id]) > 2:
                        sessions[session_id] = [sessions[session_id][0], sessions[session_id][-1]]
            
        elif strategy == 'remove_oldest':
            # Keep only the most recent 5 sessions
            keep_sessions = sorted_sessions[-5:] if len(sorted_sessions) > 5 else sorted_sessions
            
            # Remove old sessions
            for session_id in list(sessions.keys()):
                if session_id not in keep_sessions:
                    del sessions[session_id]
                    if session_id in summaries:
                        del summaries[session_id]
                        
        elif strategy == 'compact':
            # Compact all messages by removing unnecessary fields and truncating long content
            for session_id, messages in sessions.items():
                for i, msg in enumerate(messages):
                    # Keep only essential fields
                    essential = {}
                    if 'role' in msg:
                        essential['role'] = msg['role']
                    if 'content' in msg:
                        # Truncate very long messages
                        content = msg['content']
                        if len(content) > 500:
                            essential['content'] = content[:497] + "..."
                        else:
                            essential['content'] = content
                    # Add tool information if present, but simplified
                    if 'tool_calls' in msg:
                        essential['tool_calls'] = [{'name': tc.get('name')} for tc in msg['tool_calls']]
                        
                    sessions[session_id][i] = essential
        
        # Save optimized memory
        memory_data['sessions'] = sessions
        memory_data['summaries'] = summaries
        
        with open(memory_file, 'w') as f:
            json.dump(memory_data, f, indent=2)
            
        # Report optimization results
        original_size = os.path.getsize(backup_file) / 1024
        new_size = os.path.getsize(memory_file) / 1024
        saved = original_size - new_size
        saved_percent = (saved / original_size) * 100 if original_size > 0 else 0
        
        print(f"Memory optimization complete!")
        print(f"Strategy used: {strategy}")
        print(f"Original size: {original_size:.2f} KB")
        print(f"New size: {new_size:.2f} KB")
        print(f"Saved: {saved:.2f} KB ({saved_percent:.1f}%)")
        
    except Exception as e:
        print(f"Error optimizing memory: {e}")

In [None]:
# Run optimization on a memory file
if memory_files:
    # Choose a strategy: 'summarize_old', 'remove_oldest', or 'compact'
    optimize_memory(memory_files[0], strategy='summarize_old')
else:
    optimize_memory(os.path.join(MEMORY_DIR, "sample_memory.json"), strategy='compact')

## 9. Memory Search Tool

In [None]:
@tool
def search_memory_by_keyword(memory_file: str, keyword: str) -> str:
    """Search for specific keywords or topics across all memory sessions."""
    if not os.path.exists(memory_file):
        return f"Memory file not found: {memory_file}"
        
    try:
        memory_data = load_memory_file(memory_file)
        sessions = memory_data.get('sessions', {})
        
        if not sessions:
            return "No sessions found to search."
            
        results = []
        keyword = keyword.lower()
        
        for session_id, messages in sessions.items():
            session_matches = []
            
            # Try to format the session date
            try:
                date_part = session_id.split('_')[0]
                formatted_date = f"{date_part[:4]}-{date_part[4:6]}-{date_part[6:]}"
            except:
                formatted_date = session_id
                
            for msg in messages:
                content = msg.get('content', '').lower()
                if keyword in content:
                    role = "User" if msg.get('role') == 'user' else "Assistant"
                    
                    # Extract context around the keyword (snippet)
                    keyword_pos = content.find(keyword)
                    start_pos = max(0, keyword_pos - 40)
                    end_pos = min(len(content), keyword_pos + len(keyword) + 40)
                    
                    snippet = content[start_pos:end_pos]
                    if start_pos > 0:
                        snippet = "..." + snippet
                    if end_pos < len(content):
                        snippet = snippet + "..."
                        
                    # Highlight the keyword
                    highlighted = snippet.replace(keyword, f"[{keyword}]")
                    
                    session_matches.append(f"{role}: {highlighted}")
            
            if session_matches:
                results.append(f"\nFrom session {formatted_date}:")
                results.extend(session_matches)
                
        if not results:
            return f"No matches found for '{keyword}'."
            
        return "\n".join([f"Search results for '{keyword}':"]+results)
        
    except Exception as e:
        return f"Error searching memory: {e}"

In [None]:
# Test memory search
if memory_files:
    search_results = search_memory_by_keyword(memory_files[0], "project")
    print(search_results)
else:
    search_results = search_memory_by_keyword(os.path.join(MEMORY_DIR, "sample_memory.json"), "project")
    print(search_results)

## 10. Conclusion

This notebook demonstrated various techniques for visualizing and analyzing agent memory, including:

1. **Session activity analysis** - Understanding conversation patterns across sessions
2. **Tool usage visualization** - Identifying which tools are most frequently used
3. **Content analysis** - Examining message content via word clouds and keyword searches
4. **Memory optimization** - Techniques for reducing memory size while preserving key information
5. **Memory monitoring** - Tools for agents to analyze their own memory usage

These analytics and management capabilities are essential for maintaining efficient memory systems in production agent applications, especially as conversation histories grow over time.