In [12]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchinfo import summary
import time
import numpy as np
from model_def import *

In [13]:
model_path = r"./mlflowModel1.pt"  
device = torch.device("cpu")
model = torch.load(model_path, map_location=device, weights_only=False)
model.eval()  
summary(model)

Layer (type:depth-idx)                        Param #
LightningCheXpertModel                        --
├─DenseNet: 1-1                               --
│    └─Sequential: 2-1                        --
│    │    └─Conv2d: 3-1                       9,408
│    │    └─BatchNorm2d: 3-2                  128
│    │    └─ReLU: 3-3                         --
│    │    └─MaxPool2d: 3-4                    --
│    │    └─_DenseBlock: 3-5                  335,040
│    │    └─_Transition: 3-6                  33,280
│    │    └─_DenseBlock: 3-7                  919,680
│    │    └─_Transition: 3-8                  132,096
│    │    └─_DenseBlock: 3-9                  2,837,760
│    │    └─_Transition: 3-10                 526,336
│    │    └─_DenseBlock: 3-11                 2,158,080
│    │    └─BatchNorm2d: 3-12                 2,048
│    └─Linear: 2-2                            14,350
├─BCEWithLogitsLoss: 1-2                      --
Total params: 6,968,206
Trainable params: 6,968,206
Non-trainabl

In [20]:
class CheXpertDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, image_size=224):
        self.df = pd.read_csv(csv_path)
        self.image_paths = self.df["corrected_path"].values

        # Extract label columns from start to end
        start_col = "Enlarged Cardiomediastinum"
        end_col = "No Finding"
        label_columns = self.df.loc[:, start_col:end_col].columns

        # Load and convert -1 to 1
        self.labels = self.df[label_columns].astype(np.float32).values
        self.labels[self.labels == -1.0] = 1.0  # Convert -1s to 1s

        self.transform = transforms.Compose([
            transforms.Resize((image_size, image_size)),
            transforms.Grayscale(num_output_channels=3),
            transforms.ToTensor(),
            transforms.Normalize([0.5]*3, [0.5]*3)
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # path = self.image_paths[idx].replace("/data/", "/")
        path = self.image_paths[idx].replace("/mnt/data/", "/mnt/dataset/")
        image = Image.open(path).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return image, label

In [36]:
from torch.utils.data import DataLoader, Subset, random_split

csv_path = r"filtered_chexpert_paths.csv"

full_dataset = CheXpertDataset(csv_path)
total_len = len(full_dataset)

# Indices
sixty_percent = int(0.6 * total_len)
next_percent = int(0.62 * total_len)

# First 60% for training/validation
# dataset_60 = Subset(full_dataset, list(range(0, sixty_percent)))

# Middle 30% for testing
dataset_test = Subset(full_dataset, list(range(sixty_percent, next_percent)))

test_loader = DataLoader(dataset_test, batch_size=16, shuffle=False, num_workers=4)

In [37]:
model_size = os.path.getsize(model_path) 
print(f"Model Size on Disk: {model_size/ (1e6) :.2f} MB")

Model Size on Disk: 28.52 MB


In [38]:
from tqdm import tqdm

correct = 0
total = 0
threshold = 0.5

# Wrap the test_loader with tqdm
with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Evaluating", unit="batch"):
        outputs = model(images)                             # shape: [batch_size, 14]
        preds = torch.sigmoid(outputs) > threshold          # shape: [batch_size, 14]
        labels = labels.bool()                              # convert labels to bool

        correct += (preds == labels).sum().item()
        total += labels.numel()

accuracy = (correct / total) * 100
print(f"Multi-label accuracy: {accuracy:.2f}%")

Evaluating: 100%|██████████| 10/10 [00:30<00:00,  3.03s/batch]

Multi-label accuracy: 77.96%





In [39]:
num_trials = 100  # Number of trials

# Get a single sample from the test data

single_sample, _ = next(iter(test_loader))  
single_sample = single_sample[0].unsqueeze(0)  

# Warm-up run 
with torch.no_grad():
    model(single_sample)

latencies = []
with torch.no_grad():
    for _ in range(num_trials):
        start_time = time.time()
        _ = model(single_sample)
        latencies.append(time.time() - start_time)

In [40]:
print(f"Inference Latency (single sample, median): {np.percentile(latencies, 50) * 1000:.2f} ms")
print(f"Inference Latency (single sample, 95th percentile): {np.percentile(latencies, 95) * 1000:.2f} ms")
print(f"Inference Latency (single sample, 99th percentile): {np.percentile(latencies, 99) * 1000:.2f} ms")
print(f"Inference Throughput (single sample): {num_trials/np.sum(latencies):.2f} FPS")

Inference Latency (single sample, median): 133.29 ms
Inference Latency (single sample, 95th percentile): 136.45 ms
Inference Latency (single sample, 99th percentile): 145.06 ms
Inference Throughput (single sample): 7.48 FPS


In [42]:
num_batches = 10  # Number of trials

# Get a batch from the test data
batch_input, _ = next(iter(test_loader))  

# Warm-up run 
with torch.no_grad():
    model(batch_input)

batch_times = []
with torch.no_grad():
    for _ in range(num_batches):
        start_time = time.time()
        _ = model(batch_input)
        batch_times.append(time.time() - start_time)

In [43]:
batch_fps = (batch_input.shape[0] * num_batches) / np.sum(batch_times) 
print(f"Batch Throughput: {batch_fps:.2f} FPS")

Batch Throughput: 5.60 FPS


In [44]:
print(f"Model Size on Disk: {model_size/ (1e6) :.2f} MB")
print(f"Multi-label accuracy: {accuracy:.2f}%")
print(f"Inference Latency (single sample, median): {np.percentile(latencies, 50) * 1000:.2f} ms")
print(f"Inference Latency (single sample, 95th percentile): {np.percentile(latencies, 95) * 1000:.2f} ms")
print(f"Inference Latency (single sample, 99th percentile): {np.percentile(latencies, 99) * 1000:.2f} ms")
print(f"Inference Throughput (single sample): {num_trials/np.sum(latencies):.2f} FPS")
print(f"Batch Throughput: {batch_fps:.2f} FPS")

Model Size on Disk: 28.52 MB
Multi-label accuracy: 77.96%
Inference Latency (single sample, median): 133.29 ms
Inference Latency (single sample, 95th percentile): 136.45 ms
Inference Latency (single sample, 99th percentile): 145.06 ms
Inference Throughput (single sample): 7.48 FPS
Batch Throughput: 5.60 FPS
