In [None]:
%%writefile benchmark_notebook_setup.py
# This is a helper file to make the notebook cleaner. It will contain our functions.
import os
import sys
import subprocess
import json
import shutil
import torch
import torchvision
import pandas as pd

def check_prerequisites():
    """Checks if trtexec is installed and available in the system's PATH."""
    print("Checking for 'trtexec'...")
    if shutil.which("trtexec") is None:
        print("❌ ERROR: 'trtexec' command not found.")
        raise RuntimeError("trtexec not found. Cannot proceed with benchmarking.")
    print("✅ 'trtexec' is available.")

def export_to_onnx(model_path, onnx_path, num_classes, batch_size, input_name, input_shape):
    """Loads a PyTorch model and exports it to ONNX format."""
    # ... (This function is correct, no changes needed) ...
    print(f"  Exporting '{os.path.basename(model_path)}' to ONNX...")
    if not os.path.exists(model_path):
        print(f"  ❌ ERROR: Model file not found at {model_path}")
        return False
    try:
        shape_parts = [int(d) for d in input_shape.split('x')]
        dummy_input_shape = (batch_size, *shape_parts)
        device = "cuda" if torch.cuda.is_available() else "cpu"
        model = torchvision.models.resnet50(weights=None, num_classes=num_classes)
        model.load_state_dict(torch.load(model_path, map_location=device))
        model.eval().to(device)
        dummy_input = torch.randn(dummy_input_shape, device=device)
        torch.onnx.export(
            model, dummy_input, onnx_path, export_params=True, opset_version=13,
            do_constant_folding=True, input_names=[input_name], output_names=['output'],
            dynamic_axes={input_name: {0: 'batch_size'}, 'output': {0: 'batch_size'}}
        )
        print(f"  ✅ Successfully exported to {onnx_path}")
        return True
    except Exception as e:
        print(f"  ❌ FAILED to export to ONNX: {e}")
        return False

def run_benchmark(onnx_path, engine_path, json_path, log_path, config, is_sparse):
    """Constructs and runs the trtexec benchmark command."""
    # ... (This function is correct from the last fix, no changes needed) ...
    print(f"  Building and benchmarking with trtexec...")
    opt_shape_str = f"{config['INPUT_NAME']}:{config['BATCH_SIZE']}x{config['INPUT_SHAPE']}"
    command = [
        "trtexec", f"--onnx={onnx_path}", "--fp16", "--useCudaGraph",
        f"--optShapes={opt_shape_str}", f"--iterations={config['ITERATIONS']}",
        f"--duration={config['DURATION']}", f"--saveEngine={engine_path}",
        f"--exportTimes={json_path}",
    ]
    if is_sparse:
        command.append("--sparsity=enable")
    command_str = " ".join(command) + f" > {log_path} 2>&1"
    print(f"  Running command: {command_str}")
    try:
        subprocess.run(command_str, shell=True, check=True)
        print(f"  ✅ trtexec benchmark completed. Log saved to {log_path}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"  ❌ trtexec FAILED.")
        print(f"  Return code: {e.returncode}")
        print(f"  Check the full log file for details: {log_path}")
        return False

def parse_results(json_path, batch_size):
    """Parses the JSON output from trtexec to get key performance metrics."""
    # ===================================================================
    # --- THIS IS THE CORRECTED FUNCTION ---
    # ===================================================================
    try:
        with open(json_path, 'r') as f:
            # The JSON file might contain multiple JSON objects. We only need the last one.
            # Read all lines and find the last valid JSON object.
            json_text = ""
            for line in f:
                # A simple heuristic: the results object starts with '{'
                if line.strip().startswith('{'):
                    json_text = line
            
            # If no line started with '{', something is wrong.
            if not json_text:
                print(f"  ⚠️ Warning: Could not find a valid JSON object in {json_path}")
                return None
            
            data = json.loads(json_text)

        # Now, safely access the keys
        results_dict = data.get("results", {})
        
        throughput_qps = results_dict.get("throughput(qps)", 0)
        images_per_second = throughput_qps * batch_size

        latency_ms_list = results_dict.get("latency", [0])
        median_latency_ms = sorted(latency_ms_list)[len(latency_ms_list) // 2]

        gpu_compute_ms_list = results_dict.get("GPU-compute", [0])
        median_gpu_compute_ms = sorted(gpu_compute_ms_list)[len(gpu_compute_ms_list) // 2]
        
        return {
            "Throughput (images/sec)": images_per_second,
            "Batch Latency (ms)": median_latency_ms,
            "GPU Compute Time (ms)": median_gpu_compute_ms
        }
    except Exception as e:
        print(f"  ❌ Error parsing {json_path}: {e}")
        return None

In [None]:
import os
import pandas as pd
from IPython.display import display, HTML

# --- Main Configuration Dictionary ---
CONFIG = {
    # --- Model & Data Parameters ---
    "NUM_CLASSES": 1000,  
    "BATCH_SIZE": 32,
    "INPUT_NAME": "input", 
    "INPUT_SHAPE": "3x224x224", # CxHxW

    # --- Paths ---
    # The base directory where your 'saved_models_and_logs' folder is located.
    # Assumes you are running this notebook from `/workspace`.
    "BASE_PATH": "/workspace",
    
    # Relative paths to your model files
    "DENSE_MODEL_RELATIVE_PATH": "saved_models_and_logs/resnet50_baseline/resnet50_baseline_ft_imagenetmini_final.pth",
    "SPARSE_MODEL_RELATIVE_PATH": "saved_models_and_logs/pruning_nm_sparsity/resnet50_prune_nm24_ft/model_final.pth",
    
    # Where all generated files (ONNX, engines, logs, JSON) will be saved.
    "OUTPUT_DIR_NAME": "benchmark_results",

    # --- trtexec Parameters ---
    "ITERATIONS": 500,
    "DURATION": 10,
}

# --- Construct Absolute Paths ---
CONFIG["DENSE_MODEL_PATH"] = os.path.join(CONFIG["BASE_PATH"], CONFIG["DENSE_MODEL_RELATIVE_PATH"])
CONFIG["SPARSE_MODEL_PATH"] = os.path.join(CONFIG["BASE_PATH"], CONFIG["SPARSE_MODEL_RELATIVE_PATH"])
CONFIG["OUTPUT_DIR"] = os.path.join(CONFIG["BASE_PATH"], CONFIG["OUTPUT_DIR_NAME"])

# --- Display Configuration for Verification ---
print("--- Configuration Loaded ---")
for key, value in CONFIG.items():
    print(f"{key:<30}: {value}")
print("-" * 50)

# Create the output directory
os.makedirs(CONFIG["OUTPUT_DIR"], exist_ok=True)
print(f"Results will be saved in: {CONFIG['OUTPUT_DIR']}")

In [None]:
# Import the helper functions we just wrote to the .py file
from benchmark_notebook_setup import check_prerequisites, export_to_onnx, run_benchmark, parse_results

# Run the check
try:
    check_prerequisites()
except RuntimeError as e:
    print(e)

In [None]:
print("--- Processing DENSE Model ---")

# Define paths for the dense model's artifacts
dense_onnx_path = os.path.join(CONFIG["OUTPUT_DIR"], "resnet50_dense.onnx")
dense_engine_path = os.path.join(CONFIG["OUTPUT_DIR"], "resnet50_dense.engine")
dense_json_path = os.path.join(CONFIG["OUTPUT_DIR"], "results_dense.json")
dense_log_path = os.path.join(CONFIG["OUTPUT_DIR"], "benchmark_dense.log")

# Step 1: Export to ONNX
if export_to_onnx(CONFIG["DENSE_MODEL_PATH"], dense_onnx_path, CONFIG["NUM_CLASSES"], CONFIG["BATCH_SIZE"], CONFIG["INPUT_NAME"], CONFIG["INPUT_SHAPE"]):
    # Step 2: Run Benchmark (is_sparse=False)
    run_benchmark(dense_onnx_path, dense_engine_path, dense_json_path, dense_log_path, CONFIG, is_sparse=False)

print("\n--- DENSE Model Processing Complete ---")

In [None]:
print("--- Processing N:M SPARSE Model ---")

# Define paths for the sparse model's artifacts
sparse_onnx_path = os.path.join(CONFIG["OUTPUT_DIR"], "resnet50_sparse.onnx")
sparse_engine_path = os.path.join(CONFIG["OUTPUT_DIR"], "resnet50_sparse.engine")
sparse_json_path = os.path.join(CONFIG["OUTPUT_DIR"], "results_sparse.json")
sparse_log_path = os.path.join(CONFIG["OUTPUT_DIR"], "benchmark_sparse.log")

# Step 1: Export to ONNX
if export_to_onnx(CONFIG["SPARSE_MODEL_PATH"], sparse_onnx_path, CONFIG["NUM_CLASSES"], CONFIG["BATCH_SIZE"], CONFIG["INPUT_NAME"], CONFIG["INPUT_SHAPE"]):
    # Step 2: Run Benchmark (is_sparse=True)
    run_benchmark(sparse_onnx_path, sparse_engine_path, sparse_json_path, sparse_log_path, CONFIG, is_sparse=True)

print("\n--- SPARSE Model Processing Complete ---")

In [3]:
# ==============================================================================
# FINAL ANALYSIS CELL (Corrected for JSON Array Format)
# ==============================================================================
import os
import json
import pandas as pd
from IPython.display import display, HTML

print("--- Final Analysis Script (Corrected for JSON Array Format) ---")

# --- Configuration ---
BATCH_SIZE = 32
OUTPUT_DIR = "/workspace/benchmark_results"

# --- Define paths to the existing JSON files ---
dense_json_path = os.path.join(OUTPUT_DIR, "results_dense.json")
sparse_json_path = os.path.join(OUTPUT_DIR, "results_sparse.json")

def parse_results_from_array(json_path, batch_size):
    """
    This new function correctly parses a JSON file that is an array of run data.
    It calculates the summary statistics (throughput, median latency) manually.
    """
    print(f"Parsing array-formatted results from: {json_path}")
    if not os.path.exists(json_path) or os.path.getsize(json_path) == 0:
        print(f"  ❌ Error: File not found or is empty.")
        return None
        
    try:
        with open(json_path, 'r') as f:
            # The entire file is a single JSON array
            run_data = json.load(f)

        if not isinstance(run_data, list) or not run_data:
            print("  ❌ Error: JSON content is not a non-empty list.")
            return None

        # --- Calculate Metrics Manually ---
        
        # 1. Latency and GPU Compute Time
        latencies = [run.get('latencyMs', 0) for run in run_data]
        gpu_times = [run.get('computeMs', 0) for run in run_data]
        
        median_latency_ms = sorted(latencies)[len(latencies) // 2]
        median_gpu_compute_ms = sorted(gpu_times)[len(gpu_times) // 2]

        # 2. Throughput
        # Total time is from the start of the first inference to the end of the last one.
        first_run_start_ms = run_data[0].get('startH2dMs', 0)
        last_run_end_ms = run_data[-1].get('endD2hMs', 0)
        total_duration_s = (last_run_end_ms - first_run_start_ms) / 1000.0

        total_images = len(run_data) * batch_size
        images_per_second = total_images / total_duration_s if total_duration_s > 0 else 0
        
        print("  ✅ Successfully parsed and calculated statistics.")
        return {
            "Throughput (images/sec)": images_per_second,
            "Batch Latency (ms)": median_latency_ms,
            "GPU Compute Time (ms)": median_gpu_compute_ms
        }
    except Exception as e:
        print(f"  ❌ An unexpected error occurred while parsing {json_path}: {e}")
        return None

# --- Main Analysis Logic ---
dense_results = parse_results_from_array(dense_json_path, BATCH_SIZE)
sparse_results = parse_results_from_array(sparse_json_path, BATCH_SIZE)

if not dense_results or not sparse_results:
    print("\n❌ Could not generate comparison. One or both result files failed to parse. Please check errors above.")
else:
    df = pd.DataFrame([dense_results, sparse_results], index=['Dense Model', 'N:M Sparse Model'])
    
    throughput_speedup = df.loc['N:M Sparse Model', 'Throughput (images/sec)'] / df.loc['Dense Model', 'Throughput (images/sec)'] if df.loc['Dense Model', 'Throughput (images/sec)'] != 0 else float('inf')
    latency_speedup = df.loc['Dense Model', 'Batch Latency (ms)'] / df.loc['N:M Sparse Model', 'Batch Latency (ms)'] if df.loc['N:M Sparse Model', 'Batch Latency (ms)'] != 0 else float('inf')

    df['Speedup (Throughput)'] = [1.0, throughput_speedup]
    
    styled_df = df.style.format({
        'Throughput (images/sec)': '{:,.2f}',
        'Batch Latency (ms)': '{:.2f} ms',
        'GPU Compute Time (ms)': '{:.2f} ms',
        'Speedup (Throughput)': '{:.2f}x',
    }).bar(subset=['Speedup (Throughput)'], align='left', color=['#d65f5f', '#5fba7d'], vmin=0.9, vmax=max(1.5, df['Speedup (Throughput)'].max())) \
      .set_caption(f"<h2>Benchmark Summary (Batch Size: {BATCH_SIZE})</h2>") \
      .set_table_styles([{'selector': 'th', 'props': [('text-align', 'left'), ('font-weight', 'bold')]},
                         {'selector': 'td', 'props': [('text-align', 'left')]}])
      
    display(HTML(styled_df.to_html()))

--- Final Analysis Script (Corrected for JSON Array Format) ---
Parsing array-formatted results from: /workspace/benchmark_results/results_dense.json
  ✅ Successfully parsed and calculated statistics.
Parsing array-formatted results from: /workspace/benchmark_results/results_sparse.json
  ✅ Successfully parsed and calculated statistics.


Unnamed: 0,Throughput (images/sec),Batch Latency (ms),GPU Compute Time (ms),Speedup (Throughput)
Dense Model,3916.38,7.67 ms,6.18 ms,1.00x
N:M Sparse Model,4126.49,7.08 ms,5.59 ms,1.05x
