# Memory Performance Analysis

Interactive analysis of memory allocation patterns using `MemoryProfiler`.

This notebook helps you:
- Profile allocation patterns in your workloads
- Identify memory hotspots and fragmentation
- Detect memory leaks
- Analyze cross-thread memory operations
- Generate optimization recommendations

## 1. Setup

In [None]:
import sys
import threading
import time
import random
from dataclasses import dataclass
from datetime import datetime
from typing import Optional

# Check if concurrent_collections is available
try:
    from concurrent_collections import MemoryProfiler, SkipListMap, config
    LIBRARY_AVAILABLE = True
    print(f"concurrent_collections loaded successfully")
    print(f"Python: {sys.version}")
    print(f"Free-threaded: {not sys._is_gil_enabled() if hasattr(sys, '_is_gil_enabled') else 'N/A'}")
except ImportError:
    LIBRARY_AVAILABLE = False
    print("concurrent_collections not installed - running in simulation mode")
    print("Install with: pip install concurrent_collections")

## 2. Simulation Mode (when library not installed)

If the library isn't installed, we provide simulated data for demonstration.

In [None]:
if not LIBRARY_AVAILABLE:
    @dataclass
    class SizeHistogram:
        buckets: dict
        total_count: int
        total_bytes: int
        
        def hot_sizes(self, top_n=5):
            sorted_buckets = sorted(self.buckets.items(), key=lambda x: -x[1])
            return sorted_buckets[:top_n]
    
    @dataclass
    class ThreadMemoryStats:
        thread_id: int
        thread_name: Optional[str]
        alloc_count: int
        free_count: int
        alloc_bytes: int
        free_bytes: int
        cross_thread_frees_sent: int
        cross_thread_frees_received: int
    
    @dataclass
    class FragmentationMetrics:
        internal_fragmentation: float
        external_fragmentation: float
        largest_free_block: int
        free_block_count: int
        utilization: float
    
    @dataclass
    class MemoryProfilerReport:
        alloc_count: int
        free_count: int
        alloc_bytes: int
        free_bytes: int
        current_allocated: int
        peak_allocated: int
        alloc_rate: float
        free_rate: float
        bytes_rate: float
        alloc_latency_p50: float
        alloc_latency_p95: float
        alloc_latency_p99: float
        alloc_latency_p999: float
        free_latency_p50: float
        free_latency_p95: float
        free_latency_p99: float
        free_latency_p999: float
        cross_thread_free_count: int
        cross_thread_free_pct: float
        size_histogram: SizeHistogram
        thread_stats: list
        fragmentation: FragmentationMetrics
        leaked_allocations: Optional[list]
        leaked_bytes: int
        duration_seconds: float
        start_time: datetime
        end_time: datetime
    
    def generate_simulated_report():
        """Generate realistic simulated profiler data."""
        return MemoryProfilerReport(
            alloc_count=1_523_456,
            free_count=1_523_400,
            alloc_bytes=98_765_432,
            free_bytes=98_760_000,
            current_allocated=5_432,
            peak_allocated=15_234_567,
            alloc_rate=152345.6,
            free_rate=152340.0,
            bytes_rate=9876543.2,
            alloc_latency_p50=15.2,
            alloc_latency_p95=42.8,
            alloc_latency_p99=128.5,
            alloc_latency_p999=512.3,
            free_latency_p50=12.1,
            free_latency_p95=35.6,
            free_latency_p99=98.2,
            free_latency_p999=245.8,
            cross_thread_free_count=45_678,
            cross_thread_free_pct=3.0,
            size_histogram=SizeHistogram(
                buckets={
                    "1-8": 12345,
                    "9-16": 45678,
                    "17-32": 234567,
                    "33-64": 567890,
                    "65-128": 456789,
                    "129-256": 123456,
                    "257-512": 56789,
                    "513-1024": 23456,
                    "1025-4096": 2345,
                    "4097+": 141,
                },
                total_count=1_523_456,
                total_bytes=98_765_432,
            ),
            thread_stats=[
                ThreadMemoryStats(1, "MainThread", 456789, 456700, 29876543, 29870000, 12345, 5678),
                ThreadMemoryStats(2, "Worker-1", 234567, 234560, 15234567, 15230000, 8765, 3456),
                ThreadMemoryStats(3, "Worker-2", 234567, 234560, 15234567, 15230000, 7654, 4567),
                ThreadMemoryStats(4, "Worker-3", 198765, 198760, 12987654, 12980000, 6543, 5678),
                ThreadMemoryStats(5, "Worker-4", 198765, 198760, 12987654, 12980000, 5432, 6789),
                ThreadMemoryStats(6, "Worker-5", 100001, 100000, 6543210, 6540000, 4321, 7890),
                ThreadMemoryStats(7, "Worker-6", 50001, 50000, 3456789, 3450000, 618, 8901),
                ThreadMemoryStats(8, "Worker-7", 50001, 50000, 3456789, 3450000, 0, 2719),
            ],
            fragmentation=FragmentationMetrics(
                internal_fragmentation=0.12,
                external_fragmentation=0.05,
                largest_free_block=1048576,
                free_block_count=234,
                utilization=0.83,
            ),
            leaked_allocations=None,
            leaked_bytes=0,
            duration_seconds=10.0,
            start_time=datetime.now(),
            end_time=datetime.now(),
        )
    
    print("Simulation classes created")

## 3. Workload Configuration

Configure your test workload parameters.

In [None]:
# Workload parameters
WORKLOAD_CONFIG = {
    'num_threads': 8,
    'operations_per_thread': 100_000,
    'key_range': 50_000,
    'value_size_min': 10,
    'value_size_max': 1000,
    'read_ratio': 0.7,  # 70% reads, 30% writes
    'delete_ratio': 0.1,  # 10% of writes are deletes
}

print("Workload Configuration:")
for key, value in WORKLOAD_CONFIG.items():
    print(f"  {key}: {value}")

## 4. Run Profiled Workload

In [None]:
if LIBRARY_AVAILABLE:
    def run_workload():
        """Run the configured workload with profiling."""
        m = SkipListMap()
        
        def worker(thread_id):
            for i in range(WORKLOAD_CONFIG['operations_per_thread']):
                key = f"key_{random.randint(0, WORKLOAD_CONFIG['key_range'])}"\n",
                
                if random.random() < WORKLOAD_CONFIG['read_ratio']:
                    _ = m.get(key)
                else:
                    if random.random() < WORKLOAD_CONFIG['delete_ratio']:
                        m.pop(key, None)
                    else:
                        size = random.randint(
                            WORKLOAD_CONFIG['value_size_min'],
                            WORKLOAD_CONFIG['value_size_max']
                        )
                        m[key] = 'x' * size
        
        with MemoryProfiler(
            track_sizes=True,
            track_latency=True,
            track_per_thread=True,
            track_cross_thread=True,
        ) as prof:
            threads = [
                threading.Thread(target=worker, args=(i,), name=f"Worker-{i}")
                for i in range(WORKLOAD_CONFIG['num_threads'])
            ]
            
            start = time.perf_counter()
            for t in threads:
                t.start()
            for t in threads:
                t.join()
            elapsed = time.perf_counter() - start
        
        return prof.report(), elapsed
    
    print("Running workload...")
    report, elapsed = run_workload()
    print(f"Completed in {elapsed:.2f} seconds")
else:
    print("Using simulated data...")
    report = generate_simulated_report()
    elapsed = report.duration_seconds
    print(f"Simulated {report.duration_seconds:.2f} seconds of profiling")

## 5. Results Summary

In [None]:
print("=" * 60)
print("MEMORY PROFILER RESULTS")
print("=" * 60)

print(f"\n### Allocation Statistics ###")
print(f"Total allocations: {report.alloc_count:,}")
print(f"Total frees: {report.free_count:,}")
print(f"Total bytes allocated: {report.alloc_bytes / 1024**2:.2f} MB")
print(f"Peak memory: {report.peak_allocated / 1024**2:.2f} MB")
print(f"Current allocated: {report.current_allocated / 1024:.2f} KB")

print(f"\n### Throughput ###")
print(f"Allocation rate: {report.alloc_rate:,.0f} allocs/sec")
print(f"Free rate: {report.free_rate:,.0f} frees/sec")
print(f"Byte rate: {report.bytes_rate / 1024**2:.2f} MB/sec")

print(f"\n### Latency (nanoseconds) ###")
print(f"Alloc P50: {report.alloc_latency_p50:.1f} ns")
print(f"Alloc P95: {report.alloc_latency_p95:.1f} ns")
print(f"Alloc P99: {report.alloc_latency_p99:.1f} ns")
print(f"Alloc P99.9: {report.alloc_latency_p999:.1f} ns")
print(f"Free P99: {report.free_latency_p99:.1f} ns")

print(f"\n### Cross-Thread Operations ###")
print(f"Cross-thread frees: {report.cross_thread_free_count:,}")
print(f"Cross-thread percentage: {report.cross_thread_free_pct:.2f}%")

print(f"\n### Fragmentation ###")
print(f"Internal fragmentation: {report.fragmentation.internal_fragmentation:.1%}")
print(f"External fragmentation: {report.fragmentation.external_fragmentation:.1%}")
print(f"Memory utilization: {report.fragmentation.utilization:.1%}")

## 6. Allocation Size Distribution

In [None]:
print("### Allocation Size Distribution ###\n")
print(f"{'Size Range':<15} {'Count':>12} {'Percentage':>12}")
print("-" * 40)

total = report.size_histogram.total_count
for size_range, count in sorted(report.size_histogram.buckets.items(), 
                                 key=lambda x: int(x[0].split('-')[0].replace('+', ''))):
    pct = (count / total) * 100 if total > 0 else 0
    bar = '#' * int(pct / 2)
    print(f"{size_range:<15} {count:>12,} {pct:>10.1f}% {bar}")

print(f"\n### Hot Allocation Sizes ###")
print("These sizes are most frequently allocated:")
for i, (size_range, count) in enumerate(report.size_histogram.hot_sizes(5), 1):
    pct = (count / total) * 100
    print(f"  {i}. {size_range}: {count:,} ({pct:.1f}%)")

## 7. Per-Thread Analysis

In [None]:
print("### Per-Thread Memory Statistics ###\n")

# Sort by allocation count
sorted_threads = sorted(report.thread_stats, key=lambda x: -x.alloc_count)

print(f"{'Thread':<15} {'Allocs':>10} {'Frees':>10} {'Bytes':>12} {'X-Thread Sent':>14}")
print("-" * 65)

for ts in sorted_threads:
    name = ts.thread_name or f"Thread-{ts.thread_id}"
    print(f"{name:<15} {ts.alloc_count:>10,} {ts.free_count:>10,} "
          f"{ts.alloc_bytes/1024:>10,.0f}KB {ts.cross_thread_frees_sent:>14,}")

# Cross-thread analysis
print(f"\n### Cross-Thread Free Pattern ###")
total_sent = sum(ts.cross_thread_frees_sent for ts in report.thread_stats)
total_received = sum(ts.cross_thread_frees_received for ts in report.thread_stats)
print(f"Total cross-thread frees sent: {total_sent:,}")
print(f"Total cross-thread frees received: {total_received:,}")

# Find imbalances
print(f"\nThreads with high cross-thread activity:")
for ts in sorted_threads:
    if ts.cross_thread_frees_sent > 0:
        ratio = ts.cross_thread_frees_sent / ts.alloc_count * 100 if ts.alloc_count > 0 else 0
        if ratio > 1:  # More than 1% cross-thread
            name = ts.thread_name or f"Thread-{ts.thread_id}"
            print(f"  {name}: {ratio:.1f}% of allocations freed by other threads")

## 8. Visualizations

In [None]:
try:
    import matplotlib.pyplot as plt
    import numpy as np
    PLOTTING_AVAILABLE = True
except ImportError:
    PLOTTING_AVAILABLE = False
    print("matplotlib not available - skipping visualizations")
    print("Install with: pip install matplotlib")

In [None]:
if PLOTTING_AVAILABLE:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # 1. Size distribution bar chart
    ax1 = axes[0, 0]
    sizes = list(report.size_histogram.buckets.keys())
    counts = list(report.size_histogram.buckets.values())
    ax1.bar(range(len(sizes)), counts, color='steelblue')
    ax1.set_xticks(range(len(sizes)))
    ax1.set_xticklabels(sizes, rotation=45, ha='right')
    ax1.set_xlabel('Size Range (bytes)')
    ax1.set_ylabel('Count')
    ax1.set_title('Allocation Size Distribution')
    ax1.ticklabel_format(style='scientific', axis='y', scilimits=(0,0))
    
    # 2. Per-thread allocation pie chart
    ax2 = axes[0, 1]
    thread_names = [ts.thread_name or f"T{ts.thread_id}" for ts in report.thread_stats]
    thread_allocs = [ts.alloc_count for ts in report.thread_stats]
    ax2.pie(thread_allocs, labels=thread_names, autopct='%1.1f%%', startangle=90)
    ax2.set_title('Allocations by Thread')
    
    # 3. Latency comparison
    ax3 = axes[1, 0]
    percentiles = ['P50', 'P95', 'P99', 'P99.9']
    alloc_latencies = [
        report.alloc_latency_p50,
        report.alloc_latency_p95,
        report.alloc_latency_p99,
        report.alloc_latency_p999,
    ]
    free_latencies = [
        report.free_latency_p50,
        report.free_latency_p95,
        report.free_latency_p99,
        report.free_latency_p999,
    ]
    x = np.arange(len(percentiles))
    width = 0.35
    ax3.bar(x - width/2, alloc_latencies, width, label='Alloc', color='coral')
    ax3.bar(x + width/2, free_latencies, width, label='Free', color='mediumseagreen')
    ax3.set_xticks(x)
    ax3.set_xticklabels(percentiles)
    ax3.set_xlabel('Percentile')
    ax3.set_ylabel('Latency (ns)')
    ax3.set_title('Allocation vs Free Latency')
    ax3.legend()
    ax3.set_yscale('log')
    
    # 4. Cross-thread free flow
    ax4 = axes[1, 1]
    sent = [ts.cross_thread_frees_sent for ts in report.thread_stats]
    received = [ts.cross_thread_frees_received for ts in report.thread_stats]
    x = np.arange(len(thread_names))
    ax4.bar(x - width/2, sent, width, label='Sent', color='indianred')
    ax4.bar(x + width/2, received, width, label='Received', color='dodgerblue')
    ax4.set_xticks(x)
    ax4.set_xticklabels(thread_names, rotation=45, ha='right')
    ax4.set_xlabel('Thread')
    ax4.set_ylabel('Count')
    ax4.set_title('Cross-Thread Free Pattern')
    ax4.legend()
    
    plt.tight_layout()
    plt.savefig('memory_profile_charts.png', dpi=150, bbox_inches='tight')
    plt.show()
    print("\nCharts saved to memory_profile_charts.png")

## 9. Recommendations

In [None]:
def generate_recommendations(report):
    """Generate optimization recommendations based on profiling data."""
    recommendations = []
    
    # Check allocation latency
    if report.alloc_latency_p99 > 500:
        recommendations.append({
            'severity': 'warning',
            'category': 'Latency',
            'issue': f'High allocation latency: P99 = {report.alloc_latency_p99:.0f}ns',
            'recommendation': 'Consider pre-allocating memory pools for hot paths',
        })
    
    # Check cross-thread free percentage
    if report.cross_thread_free_pct > 10:
        recommendations.append({
            'severity': 'warning',
            'category': 'Cross-Thread',
            'issue': f'High cross-thread free rate: {report.cross_thread_free_pct:.1f}%',
            'recommendation': 'Consider thread-affinity for allocation/free patterns',
        })
    elif report.cross_thread_free_pct > 5:
        recommendations.append({
            'severity': 'info',
            'category': 'Cross-Thread',
            'issue': f'Moderate cross-thread free rate: {report.cross_thread_free_pct:.1f}%',
            'recommendation': 'Cross-thread frees are normal for concurrent data structures',
        })
    
    # Check fragmentation
    if report.fragmentation.internal_fragmentation > 0.2:
        recommendations.append({
            'severity': 'warning',
            'category': 'Fragmentation',
            'issue': f'High internal fragmentation: {report.fragmentation.internal_fragmentation:.1%}',
            'recommendation': 'Use size classes that match your allocation patterns',
        })
    
    if report.fragmentation.external_fragmentation > 0.15:
        recommendations.append({
            'severity': 'warning',
            'category': 'Fragmentation',
            'issue': f'High external fragmentation: {report.fragmentation.external_fragmentation:.1%}',
            'recommendation': 'Consider memory compaction or arena allocation',
        })
    
    # Check for memory leaks
    if report.leaked_bytes > 0:
        recommendations.append({
            'severity': 'error',
            'category': 'Leak',
            'issue': f'Memory leak detected: {report.leaked_bytes:,} bytes',
            'recommendation': 'Enable stack traces to identify leak sources',
        })
    
    # Check size distribution
    hot_sizes = report.size_histogram.hot_sizes(3)
    if hot_sizes:
        top_size, top_count = hot_sizes[0]
        top_pct = (top_count / report.size_histogram.total_count) * 100
        if top_pct > 50:
            recommendations.append({
                'severity': 'info',
                'category': 'Size Distribution',
                'issue': f'Dominant allocation size: {top_size} ({top_pct:.0f}% of allocations)',
                'recommendation': 'Consider a specialized allocator for this size class',
            })
    
    # Check utilization
    if report.fragmentation.utilization < 0.7:
        recommendations.append({
            'severity': 'warning',
            'category': 'Utilization',
            'issue': f'Low memory utilization: {report.fragmentation.utilization:.1%}',
            'recommendation': 'Memory is underutilized; consider reducing heap size',
        })
    
    # If everything looks good
    if not recommendations:
        recommendations.append({
            'severity': 'success',
            'category': 'Overall',
            'issue': 'No significant issues detected',
            'recommendation': 'Memory allocation patterns look healthy',
        })
    
    return recommendations

# Generate and display recommendations
recommendations = generate_recommendations(report)

severity_icons = {
    'error': '\U0001F534',    # Red circle
    'warning': '\U0001F7E0',  # Orange circle
    'info': '\U0001F7E1',     # Yellow circle
    'success': '\u2705',      # Green checkmark
}

print("### Recommendations ###\n")
for rec in recommendations:
    icon = severity_icons.get(rec['severity'], '')
    print(f"{icon} [{rec['category']}] {rec['issue']}")
    print(f"   Recommendation: {rec['recommendation']}\n")

## 10. Export Results

In [None]:
import json
from dataclasses import asdict

def export_report(report, filename='memory_profile_report.json'):
    """Export report to JSON."""
    # Convert to dict (handling nested dataclasses)
    def to_dict(obj):
        if hasattr(obj, '__dataclass_fields__'):
            return {k: to_dict(v) for k, v in asdict(obj).items()}
        elif isinstance(obj, list):
            return [to_dict(item) for item in obj]
        elif isinstance(obj, datetime):
            return obj.isoformat()
        else:
            return obj
    
    data = to_dict(report)
    
    with open(filename, 'w') as f:
        json.dump(data, f, indent=2, default=str)
    
    print(f"Report exported to {filename}")

# Export
export_report(report)

# Also export recommendations
with open('memory_recommendations.json', 'w') as f:
    json.dump(recommendations, f, indent=2)
print("Recommendations exported to memory_recommendations.json")

## Summary

This notebook analyzed memory allocation patterns and provided:

1. **Basic Statistics** - Allocation counts, bytes, rates
2. **Latency Analysis** - P50/P95/P99/P99.9 for alloc and free
3. **Size Distribution** - Which sizes are most commonly allocated
4. **Per-Thread Analysis** - Memory usage by thread
5. **Cross-Thread Patterns** - How memory flows between threads
6. **Fragmentation Metrics** - Internal and external fragmentation
7. **Visualizations** - Charts for quick understanding
8. **Recommendations** - Actionable optimization suggestions

For more advanced analysis, see:
- `smr_performance_analysis.ipynb` - SMR-specific profiling
- `memory_subsystem_comparison.ipynb` - Compare IBR vs DEBRA+