In [1]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.5.1.post0-py3-none-any.whl.metadata (39 kB)
Collecting lightning-utilities<2.0,>=0.10.0 (from lightning)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Downloading torchmetrics-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting pytorch-lightning (from lightning)
  Downloading pytorch_lightning-2.5.1.post0-py3-none-any.whl.metadata (20 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1 (from fsspec[http]<2026.0,>=2022.5.0->lightning)
  Downloading aiohttp-3.11.18-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning)
  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.1.2 (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2026.0,>=2022.5.0->lightning)
  Downloading aiosignal-1.3.2-py2.py3-none-an

In [3]:
import os
import time
import numpy as np
import torch
import onnx
import onnxruntime as ort
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import pandas as pd
from model_def import *

In [4]:
class CheXpertDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, image_size=224):
        self.df = pd.read_csv(csv_path)
        self.image_paths = self.df["corrected_path"].values

        # Extract label columns from start to end
        start_col = "Enlarged Cardiomediastinum"
        end_col = "No Finding"
        label_columns = self.df.loc[:, start_col:end_col].columns

        # Load and convert -1 to 1
        self.labels = self.df[label_columns].astype(np.float32).values
        self.labels[self.labels == -1.0] = 1.0  # Convert -1s to 1s

        self.transform = transforms.Compose([
            transforms.Resize((image_size, image_size)),
            transforms.Grayscale(num_output_channels=3),
            transforms.ToTensor(),
            transforms.Normalize([0.5]*3, [0.5]*3)
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # path = self.image_paths[idx].replace("/data/", "/")
        path = self.image_paths[idx].replace("/mnt/data/", "/mnt/dataset/")
        image = Image.open(path).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return image, label

In [5]:
from torch.utils.data import DataLoader, Subset, random_split

csv_path = r"filtered_chexpert_paths.csv"

full_dataset = CheXpertDataset(csv_path)
total_len = len(full_dataset)

# Indices
sixty_percent = int(0.6 * total_len)
next_percent = int(0.62 * total_len)

# First 60% for training/validation
# dataset_60 = Subset(full_dataset, list(range(0, sixty_percent)))

# Middle 30% for testing
dataset_test = Subset(full_dataset, list(range(sixty_percent, next_percent)))

test_loader = DataLoader(dataset_test, batch_size=16, shuffle=False, num_workers=4)

In [6]:
def benchmark_session(ort_session):
    model_size = os.path.getsize(onnx_model_path)
    print(f"Model Size on Disk: {model_size / 1e6 :.2f} MB")
    
    print(f"Execution provider: {ort_session.get_providers()}")
    correct = 0
    total = 0
    
    for images, labels in test_loader:
        images_np = images.numpy()
        # Run ONNX model inference
        outputs = ort_session.run(None, {ort_session.get_inputs()[0].name: images_np})[0]  # shape: [B, 14]
        # Predicted class: index of max logit
        # predicted = np.argmax(outputs, axis=1)
        preds = (1 / (1 + np.exp(-outputs))) > 0.5
        # If labels are one-hot or multi-hot: use argmax
        # target = np.argmax(labels.numpy(), axis=1)
        # correct += np.sum(predicted == target)
        # total += labels.size(0)
        labels_np = labels.numpy().astype(bool)
        correct += np.sum(preds == labels_np)
        total += labels_np.size
    
    accuracy = (correct / total) * 100
    print(f"ONNX Model Accuracy: {accuracy:.2f}%")

    num_trials = 100  # Number of trials

    # Get a single sample from the test data
    
    single_sample, _ = next(iter(test_loader))  
    single_sample = single_sample[:1].numpy()
    
    # Warm-up run
    ort_session.run(None, {ort_session.get_inputs()[0].name: single_sample})
    
    latencies = []
    for _ in range(num_trials):
        start_time = time.time()
        ort_session.run(None, {ort_session.get_inputs()[0].name: single_sample})
        latencies.append(time.time() - start_time)
    print(f"Inference Latency (single sample, median): {np.percentile(latencies, 50) * 1000:.2f} ms")
    print(f"Inference Latency (single sample, 95th percentile): {np.percentile(latencies, 95) * 1000:.2f} ms")
    print(f"Inference Latency (single sample, 99th percentile): {np.percentile(latencies, 99) * 1000:.2f} ms")
    print(f"Inference Throughput (single sample): {num_trials/np.sum(latencies):.2f} FPS")

    
    num_batches = 50  # Number of trials
    # Get a batch from the test data
    batch_input, _ = next(iter(test_loader))  
    batch_input = batch_input.numpy()
    
    # Warm-up run
    ort_session.run(None, {ort_session.get_inputs()[0].name: batch_input})
    
    batch_times = []
    for _ in range(num_batches):
        start_time = time.time()
        ort_session.run(None, {ort_session.get_inputs()[0].name: batch_input})
        batch_times.append(time.time() - start_time)
        
    batch_fps = (batch_input.shape[0] * num_batches) / np.sum(batch_times) 
    print(f"Batch Throughput: {batch_fps:.2f} FPS")

In [8]:
onnx_model_path = "./mlflowModel1.onnx"
ort_session = ort.InferenceSession(onnx_model_path, providers=['OpenVINOExecutionProvider'])
benchmark_session(ort_session)
ort.get_device()  

Model Size on Disk: 28.28 MB
Execution provider: ['OpenVINOExecutionProvider', 'CPUExecutionProvider']
ONNX Model Accuracy: 77.96%
Inference Latency (single sample, median): 12.55 ms
Inference Latency (single sample, 95th percentile): 14.66 ms
Inference Latency (single sample, 99th percentile): 21.26 ms
Inference Throughput (single sample): 77.21 FPS
Batch Throughput: 94.69 FPS


'CPU-OPENVINO_CPU'