In [1]:
# Ensemble Variant 1: S: Densenet201, W: ResNet152d (Best Model)
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_recall_curve, roc_auc_score, f1_score
from tqdm import tqdm
import sys
sys.path.extend(["..", "../../../backbone","../../..","../.."])
from dataloader import create_dataloader
from tabulate import tabulate
from metric import Metric
torch.cuda.empty_cache()


# Set device to GPU if available, else use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Current device: {torch.cuda.get_device_name(torch.cuda.current_device())}" if torch.cuda.is_available() else "Current device: CPU")

Current device: NVIDIA A100 80GB PCIe


In [2]:
# Define hyperparameters
batch_size = 16
num_epochs = 200
learning_rate = 0.000001
image_size = 384
num_workers = 4
num_labels = 21
in_channels = 3
num_classes = 21
data_dir = '../../../../../data/GT-main'
omit = 'rfmid'

In [3]:
# Create test dataloader
test_dataloader = create_dataloader(data_dir=data_dir, batch_size=batch_size, num_workers=num_workers, size=image_size, phase='test', omit=omit)

384
../../../../../data/GT-main/./set1/test.csv


In [4]:
# thresholds = [0.46, 0.2, 0.4, 0.65, 0.5, 0.53, 0.57, 0.44, 0.3,  0.65, 0.39, 0.35, 0.24, 0.26, 0.21, 0.62, 0.71, 0.6,  0.45, 0.21, 0.2 ]
# thresholds = [0.26, 0.21, 0.26, 0.41, 0.99, 0.5, 0.36, 0.26, 0.15, 0.68, 0.47, 0.41, 0.64, 0.4, 0.17, 0.48, 0.19, 0.13, 0.36, 0.55, 0.35]
thresholds = [0.4, 0.26, 0.31, 0.37, 0.49, 0.34, 0.36, 0.41, 0.38, 0.63, 0.58, 0.19, 0.35, 0.99, 0.5, 0.91, 0.54, 0.7, 0.74, 0.28, 0.1 ]
model1 = torch.load('../models/strongdn_dn-rfmid.pth', map_location=device)
model2 = torch.load('../models/strongdn_rn-rfmid.pth', map_location=device)
model1.to(device)
model2.to(device)

CTranEncoder(
  (backbone): ResNet152d(
    (features): ResNet(
      (conv1): Sequential(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): ReLU(inplace=True)
        (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2

In [5]:
def calculate_metrics(test_labels, test_preds, thresholds):
    num_classes = test_labels.shape[1]
    metrics_dict = {}
    thresholded_test_preds = np.where(test_preds > np.array(thresholds).reshape(1, -1), 1, 0)

    for label in range(num_classes):
        TP = np.sum((test_labels[:, label] == 1) & (thresholded_test_preds[:, label] == 1))
        FP = np.sum((test_labels[:, label] == 0) & (thresholded_test_preds[:, label] == 1))
        TN = np.sum((test_labels[:, label] == 0) & (thresholded_test_preds[:, label] == 0))
        FN = np.sum((test_labels[:, label] == 1) & (thresholded_test_preds[:, label] == 0))

        precision = TP / (TP + FP) if (TP + FP) > 0 else 1.0
        recall = TP / (TP + FN) if (TP + FN) > 0 else 1.0
        f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 1.0
        auc = roc_auc_score(test_labels[:, label], test_preds[:, label])

        metrics_dict[label] = {'Precision': precision, 'Recall': recall, 'F1': f1, 'AUC': auc}

    return metrics_dict

In [6]:
# Evaltuate the model on test set
a = 0.6
b = 0.4
model1.eval()
model2.eval()
test_preds, test_labels = [], []
with torch.no_grad():
    for images, labels in tqdm(test_dataloader):
        images, labels = images.to(device), labels.to(device)

        outputs1 = model1(images)
        outputs1 = torch.sigmoid(outputs1)
        outputs2 = model2(images)
        outputs2 = torch.sigmoid(outputs2)

        outputs = a * outputs1 + b * outputs2

        test_preds += outputs.tolist()
        test_labels += labels.tolist()

test_labels = np.array(test_labels)
test_preds = np.array(test_preds)

100%|██████████| 34/34 [00:08<00:00,  3.81it/s]


In [7]:
metrics_dict = calculate_metrics(test_labels, test_preds, thresholds)

# Print the dictionary in tabular format using the tabulate library
headers = ['Label', 'Precision', 'Recall', 'F1', 'AUC']
table = []
for label in range(len(metrics_dict)):
    row = [label]
    for metric in ['Precision', 'Recall', 'F1', 'AUC']:
        value = metrics_dict[label][metric]
        row.append('{:.4f}'.format(value))
    table.append(row)

print(tabulate(table, headers=headers))

  Label    Precision    Recall      F1     AUC
-------  -----------  --------  ------  ------
      0       0.7778    0.7527  0.765   0.9563
      1       0.7122    0.8319  0.7674  0.9514
      2       0.619     0.8125  0.7027  0.8959
      3       0.4932    0.7059  0.5806  0.9011
      4       0.6786    0.6786  0.6786  0.9205
      5       0.8276    0.6154  0.7059  0.9373
      6       0.2571    0.4091  0.3158  0.8665
      7       0.8077    0.7241  0.7636  0.9567
      8       0.75      0.45    0.5625  0.8966
      9       0         0       1       0.8433
     10       0.8333    0.8333  0.8333  0.968
     11       0.625     0.5556  0.5882  0.9811
     12       1         0.5385  0.7     0.9903
     13       0         0       1       0.947
     14       0.4545    0.625   0.5263  0.9608
     15       0.5       0.1     0.1667  0.9406
     16       0.6667    0.4     0.5     0.9818
     17       1         0       0       0.937
     18       1         0.0769  0.1429  0.7274
     19       0.