# Revolutionizing AI with LLMs: Understanding Memory Management
This notebook demonstrates key concepts in LLM memory management, implementations of memory systems, and practical applications.

## Setup and Requirements
First, let's import required libraries and set up our environment:

In [ ]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import numpy as np
import matplotlib.pyplot as plt

# Set random seed for reproducibility
torch.manual_seed(42)

## 1. Understanding Context Window Limitations
Let's demonstrate the context window limitation issue with a practical example:

In [ ]:
# Initialize tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Example long text that exceeds context window
long_text = ' '.join(['This is a very long text'] * 500)

# Tokenize
input_ids = tokenizer.encode(long_text, return_tensors='pt')

# Check token length
print(f'Input length: {input_ids.shape[1]} tokens')
print(f'Model max length: {model.config.n_positions} tokens')

# Demonstrate truncation
if input_ids.shape[1] > model.config.n_positions:
    print('\nText exceeds context window - truncating...')
    input_ids = input_ids[:, :model.config.n_positions]

## 2. Implementing a Memory Management System
Let's create a basic implementation of a tiered memory system similar to MemGPT:

In [ ]:
class TieredMemorySystem:
    def __init__(self):
        self.core_memory = []  # Short-term, immediate access
        self.conversation_memory = []  # Recent conversation history
        self.archival_memory = {}  # Long-term storage
        
        # Memory limits
        self.core_memory_limit = 5
        self.conversation_memory_limit = 20
        
    def add_to_core_memory(self, item):
        if len(self.core_memory) >= self.core_memory_limit:
            # Move oldest item to conversation memory
            self.conversation_memory.append(self.core_memory.pop(0))
        self.core_memory.append(item)
        
    def add_to_conversation_memory(self, item):
        if len(self.conversation_memory) >= self.conversation_memory_limit:
            # Archive oldest conversation
            timestamp = len(self.archival_memory)
            self.archival_memory[timestamp] = self.conversation_memory.pop(0)
        self.conversation_memory.append(item)
        
    def get_context(self):
        return {
            'core': self.core_memory,
            'conversation': self.conversation_memory,
            'archival_size': len(self.archival_memory)
        }

## 3. Testing the Memory System
Let's demonstrate how the tiered memory system works with a simulated conversation:

In [ ]:
# Initialize memory system
memory_system = TieredMemorySystem()

# Simulate conversation
conversation = [
    "User asks about climate change",
    "Bot provides overview of global warming",
    "User asks about specific impacts",
    "Bot discusses rising sea levels",
    "User asks about solutions",
    "Bot suggests renewable energy"
]

# Process conversation
for message in conversation:
    memory_system.add_to_core_memory(message)
    
# Print current state
print('Memory System State:')
print('\nCore Memory:')
print('\n'.join(memory_system.core_memory))
print('\nConversation Memory:')
print('\n'.join(memory_system.conversation_memory))

## 4. Visualizing Memory Usage
Let's create a visualization of memory usage across different tiers:

In [ ]:
def plot_memory_usage(memory_system):
    memory_stats = {
        'Core Memory': len(memory_system.core_memory),
        'Conversation Memory': len(memory_system.conversation_memory),
        'Archival Memory': len(memory_system.archival_memory)
    }
    
    plt.figure(figsize=(10, 6))
    plt.bar(memory_stats.keys(), memory_stats.values())
    plt.title('Memory Usage Across Tiers')
    plt.ylabel('Number of Items')
    plt.show()

plot_memory_usage(memory_system)