In [1]:
# %%
# ===================================================================
#                      IMPORTS AND SETUP
# ===================================================================
import torch
import os
import pandas as pd
import numpy as np
import gc

# Imports for data loading and transformations
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# Check for torch_tensorrt installation
try:
    import torch_tensorrt
    print("✓ torch_tensorrt imported successfully.")
except ImportError:
    print("✗ WARNING: torch_tensorrt is not installed. This script may not work.")

# Set the primary device for evaluation
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Evaluation will run on: {DEVICE}")

if DEVICE.type == 'cpu':
    print("⚠️ WARNING: TensorRT models are optimized for GPU. Running on CPU will be slow and is not a typical use case.")

✓ torch_tensorrt imported successfully.
Evaluation will run on: cuda


In [2]:
# %%
# ===================================================================
#                        CONFIGURATION
# ===================================================================

# --- 1. SET THE PATH TO YOUR VALIDATION DATASET ---
VALIDATION_DATA_PATH = '/workspace/imagenet-mini/val'

# --- 2. MODEL PATHS ---
CONTAINER_DATA_PATH = '/workspace'
MODEL_PATHS = {
    'TensorRT FP32': os.path.join(CONTAINER_DATA_PATH, r'saved_models_and_logs/tensorrt/resnet50_trt_fp32.ts'),
    'TensorRT FP16': os.path.join(CONTAINER_DATA_PATH, r'saved_models_and_logs/tensorrt/resnet50_trt_fp16.ts')
}

# --- 3. EVALUATION PARAMETERS ---
EVAL_BATCH_SIZE = 64
REQUIRED_BATCH_SIZE_FOR_TRT = 32
NUM_WORKERS = 2 if DEVICE.type == 'cuda' else 0

# --- 4. DATA TRANSFORMS ---
eval_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [3]:
# %%
# ===================================================================
#    CELL 3: ACCURACY EVALUATION FUNCTION (SIMPLE 'drop_last' VERSION)
# ===================================================================

@torch.no_grad()
def evaluate_model_accuracy_simple(model, model_name, data_loader, device):
    """
    A simple evaluation function that assumes all batches are the same size.
    This requires the DataLoader to use `drop_last=True`.
    """
    model.to(device)
    model.eval()

    correct_top1 = 0
    correct_top5 = 0
    total = 0
    num_batches = len(data_loader)

    is_fp16 = 'FP16' in model_name.upper()
    if is_fp16:
        print("  -> Model identified as FP16. Input tensors will be converted to half-precision.")

    for i, (images, labels) in enumerate(data_loader):
        images, labels = images.to(device), labels.to(device)
        if is_fp16:
            images = images.half()

        outputs = model(images)
        _, pred = outputs.topk(k=5, dim=1, largest=True, sorted=True)
        pred = pred.t()
        correct = pred.eq(labels.view(1, -1).expand_as(pred))

        correct_top1 += correct[:1].reshape(-1).float().sum(0, keepdim=True).item()
        correct_top5 += correct[:5].reshape(-1).float().sum(0, keepdim=True).item()
        total += labels.size(0)
        
        print(f"\r  -> Processing batch {i+1}/{num_batches}", end="")
            
    print() # Newline after progress bar
    top1_acc = (correct_top1 / total) * 100.0 if total > 0 else 0.0
    top5_acc = (correct_top5 / total) * 100.0 if total > 0 else 0.0
    return top1_acc, top5_acc

print("✓ Simple evaluation function defined.")

✓ Simple evaluation function defined.


In [4]:
# %%
# ===================================================================
#      CELL 4: MAIN EXECUTION (SIMPLE, INCOMPLETE ATTEMPT)
# ===================================================================

print("🚀 Starting Initial TensorRT Model Accuracy Evaluation (Simple Method)...")

print(f"\nLoading validation dataset with drop_last=True...")
if not os.path.exists(VALIDATION_DATA_PATH):
    raise FileNotFoundError(f"Validation data path not found: {VALIDATION_DATA_PATH}")

val_dataset_simple = ImageFolder(VALIDATION_DATA_PATH, eval_transforms)
# NOTE: Using drop_last=True here for simplicity, but this is flawed.
val_loader_simple = DataLoader(
    val_dataset_simple,
    batch_size=REQUIRED_BATCH_SIZE_FOR_TRT, # Batch size must match static TRT engine
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True if DEVICE.type == 'cuda' else False,
    drop_last=True  # <-- THE KEY FLAW
)
ignored_images = len(val_dataset_simple) % REQUIRED_BATCH_SIZE_FOR_TRT
effective_images = len(val_dataset_simple) - ignored_images

print(f"✓ Dataset loaded. Evaluating on {effective_images}/{len(val_dataset_simple)} images across {len(val_loader_simple)} batches.")
print(f"⚠️ WARNING: {ignored_images} images from the last batch are being ignored.")

# --- Evaluate Models with the simple function ---
results_list_simple = []
for model_name, model_path in MODEL_PATHS.items():
    print("\n" + "="*50)
    print(f"Processing Model: {model_name}")
    print("="*50)
    if not os.path.exists(model_path):
        print(f"✗ ERROR: Model file not found at '{model_path}'. Skipping.")
        results_list_simple.append({'Model': model_name, 'Top-1 Accuracy (%)': 'File Not Found', 'Top-5 Accuracy (%)': 'File Not Found'})
        continue

    try:
        model = torch.jit.load(model_path).to(DEVICE)
        top1, top5 = evaluate_model_accuracy_simple(model, model_name, val_loader_simple, DEVICE)
        print(f"  -> ✅ (Incomplete) Result: Top-1 = {top1:.2f}%, Top-5 = {top5:.2f}%")
        results_list_simple.append({'Model': model_name, 'Top-1 Accuracy (%)': f"{top1:.2f}", 'Top-5 Accuracy (%)': f"{top5:.2f}"})
    except Exception as e:
        print(f"✗ ERROR: An error occurred while processing {model_name}: {e}")
        results_list_simple.append({'Model': model_name, 'Top-1 Accuracy (%)': 'Evaluation Failed', 'Top-5 Accuracy (%)': 'Evaluation Failed'})
    finally:
        if 'model' in locals(): del model
        gc.collect()

# --- Display Initial, Flawed Report ---
print("\n\n" + "="*50)
print("📋 INITIAL (INCOMPLETE) ACCURACY REPORT")
print("="*50)
if results_list_simple:
    results_df_simple = pd.DataFrame(results_list_simple)
    print(results_df_simple.to_string(index=False))

🚀 Starting Initial TensorRT Model Accuracy Evaluation (Simple Method)...

Loading validation dataset with drop_last=True...
✓ Dataset loaded. Evaluating on 3904/3923 images across 122 batches.

Processing Model: TensorRT FP32
  -> Processing batch 122/122
  -> ✅ (Incomplete) Result: Top-1 = 64.93%, Top-5 = 87.50%

Processing Model: TensorRT FP16
  -> Model identified as FP16. Input tensors will be converted to half-precision.
  -> Processing batch 122/122
  -> ✅ (Incomplete) Result: Top-1 = 65.06%, Top-5 = 87.45%


📋 INITIAL (INCOMPLETE) ACCURACY REPORT
        Model Top-1 Accuracy (%) Top-5 Accuracy (%)
TensorRT FP32              64.93              87.50
TensorRT FP16              65.06              87.45


In [5]:
# %%
# ===================================================================
#    CELL 5: CORRECTING THE FLAW - EVALUATING ALL IMAGES
# ===================================================================

print("Analysis of the previous result:")
print("The `drop_last=True` approach is fast but inaccurate because it ignores the final batch of images.")
print("To get the true accuracy, we must evaluate all images. This requires 'padding' the last batch to match the static size required by the TensorRT engine.")
print("\nDefining a new, more robust evaluation function that handles this padding...\n")

@torch.no_grad()
def evaluate_model_accuracy_robust(model, model_name, data_loader, device, required_batch_size):
    """
    A robust evaluation function that handles static batch sizes by padding the last batch.
    """
    model.to(device)
    model.eval()

    correct_top1 = 0
    correct_top5 = 0
    total = 0
    num_batches = len(data_loader)

    is_fp16 = 'FP16' in model_name.upper()
    
    for i, (images, labels) in enumerate(data_loader):
        current_batch_size = images.shape[0]
        images, labels = images.to(device), labels.to(device)
        
        # --- PADDING LOGIC ---
        if current_batch_size < required_batch_size:
            print(f"\r  -> Processing batch {i+1}/{num_batches} (Padding from {current_batch_size} to {required_batch_size})", end="")
            padding_size = required_batch_size - current_batch_size
            padding_tensor = torch.zeros(padding_size, *images.shape[1:], device=device, dtype=images.dtype)
            images = torch.cat((images, padding_tensor), dim=0)
        else:
             print(f"\r  -> Processing batch {i+1}/{num_batches}", end="")

        if is_fp16:
            images = images.half()

        outputs = model(images)
        outputs = outputs[:current_batch_size] # Slice to remove padding results

        _, pred = outputs.topk(k=5, dim=1, largest=True, sorted=True)
        pred = pred.t()
        correct = pred.eq(labels.view(1, -1).expand_as(pred))

        correct_top1 += correct[:1].reshape(-1).float().sum(0, keepdim=True).item()
        correct_top5 += correct[:5].reshape(-1).float().sum(0, keepdim=True).item()
        total += labels.size(0)

    print("\n") # Newline after progress bar
    top1_acc = (correct_top1 / total) * 100.0 if total > 0 else 0.0
    top5_acc = (correct_top5 / total) * 100.0 if total > 0 else 0.0
    return top1_acc, top5_acc

print("✓ Robust evaluation function is now defined.")

Analysis of the previous result:
The `drop_last=True` approach is fast but inaccurate because it ignores the final batch of images.
To get the true accuracy, we must evaluate all images. This requires 'padding' the last batch to match the static size required by the TensorRT engine.

Defining a new, more robust evaluation function that handles this padding...

✓ Robust evaluation function is now defined.


In [6]:
# %%
# ===================================================================
#           CELL 6: RERUNNING EVALUATION (ROBUST METHOD)
# ===================================================================

print("🚀 Rerunning evaluation with the robust padding method to include all images...")

if not os.path.exists(VALIDATION_DATA_PATH):
    raise FileNotFoundError(f"Validation data path not found: {VALIDATION_DATA_PATH}")

print(f"\nLoading validation dataset with drop_last=False to include all images...")
val_dataset_robust = ImageFolder(VALIDATION_DATA_PATH, eval_transforms)
val_loader_robust = DataLoader(
    val_dataset_robust,
    batch_size=REQUIRED_BATCH_SIZE_FOR_TRT,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True if DEVICE.type == 'cuda' else False,
    drop_last=False # <-- THE FIX
)
print(f"✓ Dataset loaded. Evaluating on all {len(val_dataset_robust)} images across {len(val_loader_robust)} batches.")

# --- Evaluate Models with the robust function ---
results_list_robust = []
for model_name, model_path in MODEL_PATHS.items():
    print("\n" + "="*50)
    print(f"Processing Model: {model_name}")
    print("="*50)
    if not os.path.exists(model_path):
        print(f"✗ ERROR: Model file not found at '{model_path}'. Skipping.")
        results_list_robust.append({'Model': model_name, 'Top-1 Accuracy (%)': 'File Not Found', 'Top-5 Accuracy (%)': 'File Not Found'})
        continue

    try:
        model = torch.jit.load(model_path).to(DEVICE)
        top1, top5 = evaluate_model_accuracy_robust(model, model_name, val_loader_robust, DEVICE, REQUIRED_BATCH_SIZE_FOR_TRT)
        print(f"  -> ✅ (Complete) Result: Top-1 = {top1:.2f}%, Top-5 = {top5:.2f}%")
        results_list_robust.append({'Model': model_name, 'Top-1 Accuracy (%)': f"{top1:.2f}", 'Top-5 Accuracy (%)': f"{top5:.2f}"})
    except Exception as e:
        print(f"✗ ERROR: An error occurred while processing {model_name}: {e}")
        results_list_robust.append({'Model': model_name, 'Top-1 Accuracy (%)': 'Evaluation Failed', 'Top-5 Accuracy (%)': 'Evaluation Failed'})
    finally:
        if 'model' in locals(): del model
        gc.collect()

# --- Display Final, Correct Report ---
print("\n\n" + "="*50)
print("📋 FINAL & COMPLETE ACCURACY REPORT")
print("="*50)
if results_list_robust:
    results_df_robust = pd.DataFrame(results_list_robust)
    print(results_df_robust.to_string(index=False))

print("\n🎉 Full evaluation complete!")

🚀 Rerunning evaluation with the robust padding method to include all images...

Loading validation dataset with drop_last=False to include all images...
✓ Dataset loaded. Evaluating on all 3923 images across 123 batches.

Processing Model: TensorRT FP32
  -> Processing batch 123/123 (Padding from 19 to 32)

  -> ✅ (Complete) Result: Top-1 = 64.92%, Top-5 = 87.51%

Processing Model: TensorRT FP16
  -> Processing batch 123/123 (Padding from 19 to 32)

  -> ✅ (Complete) Result: Top-1 = 65.05%, Top-5 = 87.46%


📋 FINAL & COMPLETE ACCURACY REPORT
        Model Top-1 Accuracy (%) Top-5 Accuracy (%)
TensorRT FP32              64.92              87.51
TensorRT FP16              65.05              87.46

🎉 Full evaluation complete!
