# RNG SIMD Performance Analysis

This notebook runs the RNG SIMD benchmark and visualizes performance for each data type.

**Just run all cells** to automatically:
1. Execute the benchmark
2. Load results
3. Generate performance plots for each data type


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import subprocess
from pathlib import Path
from datetime import datetime
import numpy as np

sns.set_theme(style="whitegrid")
plt.rcParams['figure.dpi'] = 100

print("✓ Libraries imported")


## Run Benchmark


In [None]:
# Configuration
BENCHMARK_SIZE = 4194304  # 4M elements
CSV_FILE = 'benchmark_results.csv'

# Find executable
executable = None
for path in [
    '/home/ubuntu/tt-metal/build_Release/tt-train/sources/examples/rng_simd/rng_simd',
    '/home/ubuntu/tt-metal/build/tt-train/sources/examples/rng_simd/rng_simd',
]:
    if Path(path).exists():
        executable = path
        break

if executable:
    print(f"Found: {executable}")
    print(f"\nRunning benchmark ({BENCHMARK_SIZE:,} elements)...")
    print("="*80)
    
    start = datetime.now()
    result = subprocess.run(
        [executable, f'--csv={CSV_FILE}', str(BENCHMARK_SIZE)],
        capture_output=True, text=True
    )
    
    if result.stderr:
        print(result.stderr)
    
    elapsed = (datetime.now() - start).total_seconds()
    print("="*80)
    print(f"✓ Completed in {elapsed:.1f} seconds")
else:
    print("⚠ Executable not found!")


## Load Data


In [None]:
df = pd.read_csv(CSV_FILE)

print(f"✓ Loaded {len(df)} results")
print(f"\nData types: {list(df['data_type'].unique())}")
print(f"Implementations: {list(df['implementation'].unique())}")
print(f"Distributions: {list(df['distribution'].unique())}")

df.head()


## Performance by Data Type

Bar charts showing throughput with separate plots for Sequential and Parallel implementations.


In [None]:
data_types = df['data_type'].unique()
# Only show Uniform and Normal distributions
distributions = ['Uniform[-1,1]', 'Normal(0,1)']

sequential_impls = ['MT19937 (Sequential)', 'SSE (Sequential)', 'AVX2 (Sequential)']
parallel_impls = ['MT19937 (Parallel)', 'SSE (Parallel)', 'AVX2 (Parallel)']

for dtype in data_types:
    print(f"\n{'='*80}")
    print(f"DATA TYPE: {dtype.upper()}")
    print('='*80)
    
    # Create 2x2 grid: Sequential (top), Parallel (bottom), 2 distributions
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Top row: Sequential
    for idx, dist in enumerate(distributions):
        ax = axes[0, idx]
        data = df[(df['data_type'] == dtype) & 
                  (df['distribution'] == dist) & 
                  (df['implementation'].isin(sequential_impls))]
        
        if not data.empty:
            data = data.sort_values('throughput_gb_s', ascending=False)
            
            colors = plt.cm.Oranges(np.linspace(0.5, 0.9, len(data)))
            bars = ax.bar(range(len(data)), data['throughput_gb_s'], color=colors)
            
            ax.set_xticks(range(len(data)))
            ax.set_xticklabels([impl.replace(' (Sequential)', '') for impl in data['implementation']], 
                              rotation=45, ha='right')
            ax.set_ylabel('Throughput (GB/s)', fontsize=11, fontweight='bold')
            ax.set_title(f'{dist} - Sequential', fontsize=11, fontweight='bold')
            ax.grid(axis='y', alpha=0.3)
            
            for bar in bars:
                height = bar.get_height()
                ax.text(bar.get_x() + bar.get_width()/2, height,
                       f'{height:.1f}', ha='center', va='bottom', fontsize=9, fontweight='bold')
    
    # Bottom row: Parallel
    for idx, dist in enumerate(distributions):
        ax = axes[1, idx]
        data = df[(df['data_type'] == dtype) & 
                  (df['distribution'] == dist) & 
                  (df['implementation'].isin(parallel_impls))]
        
        if not data.empty:
            data = data.sort_values('throughput_gb_s', ascending=False)
            
            colors = plt.cm.viridis(np.linspace(0.3, 0.9, len(data)))
            bars = ax.bar(range(len(data)), data['throughput_gb_s'], color=colors)
            
            ax.set_xticks(range(len(data)))
            ax.set_xticklabels([impl.replace(' (Parallel)', '') for impl in data['implementation']], 
                              rotation=45, ha='right')
            ax.set_ylabel('Throughput (GB/s)', fontsize=11, fontweight='bold')
            ax.set_title(f'{dist} - Parallel', fontsize=11, fontweight='bold')
            ax.grid(axis='y', alpha=0.3)
            
            for bar in bars:
                height = bar.get_height()
                ax.text(bar.get_x() + bar.get_width()/2, height,
                       f'{height:.1f}', ha='center', va='bottom', fontsize=9, fontweight='bold')
    
    plt.suptitle(f'{dtype.upper()} - Throughput: Sequential vs Parallel', 
                 fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()


## Speedup Analysis

Speedup relative to MT19937 Sequential baseline, showing Sequential and Parallel separately.


In [None]:
for dtype in data_types:
    print(f"\n{'='*80}")
    print(f"SPEEDUP - DATA TYPE: {dtype.upper()}")
    print('='*80)
    
    # Create 2x2 grid: Sequential (top), Parallel (bottom)
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Top row: Sequential
    for idx, dist in enumerate(distributions):
        ax = axes[0, idx]
        data = df[(df['data_type'] == dtype) & 
                  (df['distribution'] == dist) & 
                  (df['implementation'].isin(sequential_impls))].copy()
        
        if not data.empty:
            baseline = df[(df['data_type'] == dtype) & 
                         (df['distribution'] == dist) & 
                         (df['implementation'] == 'MT19937 (Sequential)')]['time_ms'].values
            
            if len(baseline) > 0:
                data['speedup'] = baseline[0] / data['time_ms']
                data = data.sort_values('speedup', ascending=False)
                
                colors = []
                for s in data['speedup']:
                    if s < 2:
                        colors.append('#fdae61')
                    elif s < 5:
                        colors.append('#f46d43')
                    else:
                        colors.append('#d73027')
                
                bars = ax.bar(range(len(data)), data['speedup'], color=colors, alpha=0.8)
                
                ax.set_xticks(range(len(data)))
                ax.set_xticklabels([impl.replace(' (Sequential)', '') for impl in data['implementation']], 
                                  rotation=45, ha='right')
                ax.set_ylabel('Speedup (×)', fontsize=11, fontweight='bold')
                ax.set_title(f'{dist} - Sequential', fontsize=11, fontweight='bold')
                ax.grid(axis='y', alpha=0.3)
                ax.axhline(y=1, color='black', linestyle='--', linewidth=1, alpha=0.5)
                
                for bar in bars:
                    height = bar.get_height()
                    ax.text(bar.get_x() + bar.get_width()/2, height,
                           f'{height:.1f}×', ha='center', va='bottom', fontsize=9, fontweight='bold')
    
    # Bottom row: Parallel
    for idx, dist in enumerate(distributions):
        ax = axes[1, idx]
        data = df[(df['data_type'] == dtype) & 
                  (df['distribution'] == dist) & 
                  (df['implementation'].isin(parallel_impls))].copy()
        
        if not data.empty:
            baseline = df[(df['data_type'] == dtype) & 
                         (df['distribution'] == dist) & 
                         (df['implementation'] == 'MT19937 (Sequential)')]['time_ms'].values
            
            if len(baseline) > 0:
                data['speedup'] = baseline[0] / data['time_ms']
                data = data.sort_values('speedup', ascending=False)
                
                colors = []
                for s in data['speedup']:
                    if s < 2:
                        colors.append('#fee08b')
                    elif s < 5:
                        colors.append('#abdda4')
                    elif s < 10:
                        colors.append('#66c2a5')
                    else:
                        colors.append('#3288bd')
                
                bars = ax.bar(range(len(data)), data['speedup'], color=colors, alpha=0.8)
                
                ax.set_xticks(range(len(data)))
                ax.set_xticklabels([impl.replace(' (Parallel)', '') for impl in data['implementation']], 
                                  rotation=45, ha='right')
                ax.set_ylabel('Speedup (×)', fontsize=11, fontweight='bold')
                ax.set_title(f'{dist} - Parallel', fontsize=11, fontweight='bold')
                ax.grid(axis='y', alpha=0.3)
                ax.axhline(y=1, color='black', linestyle='--', linewidth=1, alpha=0.5)
                
                for bar in bars:
                    height = bar.get_height()
                    ax.text(bar.get_x() + bar.get_width()/2, height,
                           f'{height:.1f}×', ha='center', va='bottom', fontsize=9, fontweight='bold')
    
    plt.suptitle(f'{dtype.upper()} - Speedup: Sequential vs Parallel', 
                 fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()


## Summary


In [None]:
print("\nBEST IMPLEMENTATIONS")
print("="*80)

for dtype in data_types:
    print(f"\n{dtype.upper()}:")
    print("-" * 80)
    
    for dist in distributions:
        data = df[(df['data_type'] == dtype) & (df['distribution'] == dist)]
        
        if not data.empty:
            best = data.loc[data['throughput_gb_s'].idxmax()]
            baseline = data[data['implementation'] == 'MT19937 (Sequential)']['time_ms'].values
            speedup = baseline[0] / best['time_ms'] if len(baseline) > 0 else 1.0
            
            print(f"  {dist:16} → {best['implementation']:20} "
                  f"{best['throughput_gb_s']:7.2f} GB/s ({speedup:6.1f}× speedup)")
