In [1]:
import os
import sys
import subprocess
import json
import shutil
import pandas as pd
from IPython.display import display, HTML

# ======================================================================================
# --- CONFIGURATION ---
# ======================================================================================
CONFIG = {
    # --- Model Paths (Verify these are correct) ---
    "DENSE_MODEL_PATH": "/workspace/saved_models_and_logs/resnet50_baseline/resnet50_baseline_ft_imagenetmini_final.pth",
    "SPARSE_MODEL_PATH": "/workspace/saved_models_and_logs/pruning_nm_sparsity/resnet50_prune_nm24_ft/model_final.pth",

    # --- Batch Sizes to Test ---
    "BATCH_SIZES": [16, 32, 64, 128], # Add or remove sizes as needed

    # --- Model & Data Parameters ---
    "NUM_CLASSES": 1000,
    "INPUT_NAME": "input",
    "INPUT_SHAPE": "3x224x224",

    # --- Output Directory ---
    "OUTPUT_DIR": "/workspace/batch_benchmark_results",

    # --- trtexec Parameters ---
    "ITERATIONS": 500,
    "DURATION": 10,
}
# ======================================================================================
# --- SCRIPT LOGIC (No need to edit below this line) ---
# ======================================================================================

def check_prerequisites():
    if shutil.which("trtexec") is None:
        print("ERROR: 'trtexec' not found. Cannot proceed.")
        sys.exit(1)
    print("✓ 'trtexec' is available.")

def run_benchmark(model_type, onnx_path, batch_size, is_sparse):
    """Runs trtexec for a given model, batch size, and sparsity setting."""
    print(f"  > Benchmarking {model_type.upper()} model with Batch Size = {batch_size}...")
    
    # Create unique filenames for each run
    run_id = f"{model_type}_b{batch_size}"
    engine_path = os.path.join(CONFIG["OUTPUT_DIR"], f"engine_{run_id}.engine")
    json_path = os.path.join(CONFIG["OUTPUT_DIR"], f"results_{run_id}.json")
    log_path = os.path.join(CONFIG["OUTPUT_DIR"], f"log_{run_id}.log")

    opt_shape_str = f"{CONFIG['INPUT_NAME']}:{batch_size}x{CONFIG['INPUT_SHAPE']}"
    
    command = [
        "trtexec",
        f"--onnx={onnx_path}",
        "--fp16",
        "--useCudaGraph",
        f"--optShapes={opt_shape_str}",
        f"--iterations={CONFIG['ITERATIONS']}",
        f"--duration={CONFIG['DURATION']}",
        f"--saveEngine={engine_path}",
        f"--exportTimes={json_path}",
    ]

    if is_sparse:
        command.append("--sparsity=enable")

    command_str = " ".join(command) + f" > {log_path} 2>&1"

    try:
        subprocess.run(command_str, shell=True, check=True)
        print(f"    ✓ Benchmark successful. Log: {log_path}")
        return json_path
    except subprocess.CalledProcessError as e:
        print(f"    ❌ trtexec FAILED for BS={batch_size}. Check log: {log_path}")
        # Check for OOM in the log
        with open(log_path, 'r') as f:
            if "out of memory" in f.read().lower():
                print("    HINT: This was likely an Out of Memory (OOM) error. The batch size may be too large for your GPU.")
        return None

def parse_results_from_array(json_path, batch_size):
    """Parses a JSON file that is an array of run data."""
    if not json_path or not os.path.exists(json_path) or os.path.getsize(json_path) == 0:
        return {"Throughput (images/sec)": 0, "Batch Latency (ms)": 0}
    try:
        with open(json_path, 'r') as f:
            run_data = json.load(f)
        if not isinstance(run_data, list) or not run_data: return None
        latencies = [run.get('latencyMs', 0) for run in run_data]
        median_latency_ms = sorted(latencies)[len(latencies) // 2]
        first_run_start_ms = run_data[0].get('startH2dMs', 0)
        last_run_end_ms = run_data[-1].get('endD2hMs', 0)
        total_duration_s = (last_run_end_ms - first_run_start_ms) / 1000.0
        total_images = len(run_data) * batch_size
        images_per_second = total_images / total_duration_s if total_duration_s > 0 else 0
        return {
            "Throughput (images/sec)": images_per_second,
            "Batch Latency (ms)": median_latency_ms
        }
    except Exception:
        return {"Throughput (images/sec)": 0, "Batch Latency (ms)": 0}

def main():
    print("--- Starting Automated Multi-Batch Benchmark Suite ---")
    check_prerequisites()
    os.makedirs(CONFIG["OUTPUT_DIR"], exist_ok=True)
    
    # --- ONNX Export (Done only once) ---
    # We don't need to re-import the export function, just assume ONNX exists
    # If they don't, the user can run the notebook cell once.
    dense_onnx_path = "/workspace/benchmark_results/resnet50_dense.onnx"
    sparse_onnx_path = "/workspace/benchmark_results/resnet50_sparse.onnx"

    if not os.path.exists(dense_onnx_path) or not os.path.exists(sparse_onnx_path):
        print("ERROR: ONNX files not found. Please run the ONNX export cells from the notebook first.")
        print(f"Expected dense: {dense_onnx_path}")
        print(f"Expected sparse: {sparse_onnx_path}")
        sys.exit(1)
        
    print(f"Found existing ONNX files:\n  - {dense_onnx_path}\n  - {sparse_onnx_path}")

    all_results = []
    
    # --- Main Benchmark Loop ---
    for bs in CONFIG["BATCH_SIZES"]:
        print(f"\n===== RUNNING BENCHMARKS FOR BATCH SIZE: {bs} =====")
        
        # Run Dense
        dense_json_path = run_benchmark("dense", dense_onnx_path, bs, is_sparse=False)
        
        # Run Sparse
        sparse_json_path = run_benchmark("sparse", sparse_onnx_path, bs, is_sparse=True)
        
        # Parse and store results
        dense_perf = parse_results_from_array(dense_json_path, bs)
        sparse_perf = parse_results_from_array(sparse_json_path, bs)
        
        # Calculate speedup
        try:
            speedup = sparse_perf["Throughput (images/sec)"] / dense_perf["Throughput (images/sec)"] if dense_perf["Throughput (images/sec)"] > 0 else 0
        except:
            speedup = 0
            
        all_results.append({
            "Batch Size": bs,
            "Dense Throughput (img/s)": dense_perf["Throughput (images/sec)"],
            "Sparse Throughput (img/s)": sparse_perf["Throughput (images/sec)"],
            "Dense Latency (ms)": dense_perf["Batch Latency (ms)"],
            "Sparse Latency (ms)": sparse_perf["Batch Latency (ms)"],
            "Speedup": speedup
        })

    # --- Final Summary ---
    print("\n\n" + "="*80)
    print("--- FINAL BENCHMARK SUMMARY ---")
    print("="*80)
    
    df = pd.DataFrame(all_results)
    df = df.set_index("Batch Size")

    # Format for display
    formatted_df = df.style.format({
        "Dense Throughput (img/s)": "{:,.2f}",
        "Sparse Throughput (img/s)": "{:,.2f}",
        "Dense Latency (ms)": "{:.2f}",
        "Sparse Latency (ms)": "{:.2f}",
        "Speedup": "{:.2f}x"
    }).bar(subset=['Speedup'], color='#5fba7d', vmin=1.0)
    
    # In a script, we print the dataframe. In a notebook, display() would be used.
    print(df.to_string(formatters={
        "Dense Throughput (img/s)": "{:,.2f}".format,
        "Sparse Throughput (img/s)": "{:,.2f}".format,
        "Dense Latency (ms)": "{:.2f}".format,
        "Sparse Latency (ms)": "{:.2f}".format,
        "Speedup": "{:.2f}x".format
    }))
    print("\nBenchmark artifacts (logs, engines, JSON) saved in:", CONFIG["OUTPUT_DIR"])


if __name__ == "__main__":
    main()

--- Starting Automated Multi-Batch Benchmark Suite ---
✓ 'trtexec' is available.
Found existing ONNX files:
  - /workspace/benchmark_results/resnet50_dense.onnx
  - /workspace/benchmark_results/resnet50_sparse.onnx

===== RUNNING BENCHMARKS FOR BATCH SIZE: 16 =====
  > Benchmarking DENSE model with Batch Size = 16...
    ✓ Benchmark successful. Log: /workspace/batch_benchmark_results/log_dense_b16.log
  > Benchmarking SPARSE model with Batch Size = 16...
    ✓ Benchmark successful. Log: /workspace/batch_benchmark_results/log_sparse_b16.log

===== RUNNING BENCHMARKS FOR BATCH SIZE: 32 =====
  > Benchmarking DENSE model with Batch Size = 32...
    ✓ Benchmark successful. Log: /workspace/batch_benchmark_results/log_dense_b32.log
  > Benchmarking SPARSE model with Batch Size = 32...
    ✓ Benchmark successful. Log: /workspace/batch_benchmark_results/log_sparse_b32.log

===== RUNNING BENCHMARKS FOR BATCH SIZE: 64 =====
  > Benchmarking DENSE model with Batch Size = 64...
    ✓ Benchmark suc