In [7]:
import os
import random
import numpy as np
from pathlib import Path

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report
from tqdm import tqdm

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [8]:
from PIL import Image

class CenterLungCrop(object):
    """
    Remove fixed margins at top/bottom/sides where borders/text often live.
    Tune fractions after visual inspection.
    """
    def __init__(self, top_frac=0.08, bottom_frac=0.08, side_frac=0.05):
        self.top_frac = top_frac
        self.bottom_frac = bottom_frac
        self.side_frac = side_frac

    def __call__(self, img: Image.Image):
        w, h = img.size
        left   = int(self.side_frac * w)
        right  = int((1 - self.side_frac) * w)
        top    = int(self.top_frac * h)
        bottom = int((1 - self.bottom_frac) * h)
        return img.crop((left, top, right, bottom))

In [9]:
DATA_DIR = Path(r"E:\30_LLMs\Datasets\kaggle_pneumonia\chest_xray")
IMG_SIZE = 384
mean = [0.485, 0.456, 0.406]
std  = [0.229, 0.224, 0.225]

# Training transforms: crop to center + random crop/scale to break edge reliance
train_tfms = transforms.Compose([
    CenterLungCrop(top_frac=0.08, bottom_frac=0.08, side_frac=0.05),
    transforms.Resize(int(IMG_SIZE * 1.3)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.70, 1.0), ratio=(0.95, 1.05)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(7),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

val_tfms = transforms.Compose([
    CenterLungCrop(top_frac=0.08, bottom_frac=0.08, side_frac=0.05),
    transforms.Resize(int(IMG_SIZE * 1.2)),
    transforms.CenterCrop((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

train_ds = datasets.ImageFolder(DATA_DIR / "train", transform=train_tfms)
val_ds   = datasets.ImageFolder(DATA_DIR / "val", transform=val_tfms)
test_ds  = datasets.ImageFolder(DATA_DIR / "test", transform=val_tfms)

len(train_ds), len(val_ds), len(test_ds), train_ds.classes

(5216, 16, 624, ['NORMAL', 'PNEUMONIA'])

In [10]:
# Load your existing best checkpoint
ckpt_path = Path("checkpoints/densenet121_kaggle_best.pth")
ckpt = torch.load(ckpt_path, map_location=device)

# Rebuild model
model = models.densenet121(weights=None)
in_features = model.classifier.in_features
model.classifier = nn.Linear(in_features, 2)
model.load_state_dict(ckpt["model_state"])
model.to(device)

# Lower learning rate for fine-tuning
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

# Recreate scheduler (optional, or just use fixed LR)
# If you had a scheduler before, recreate it here with fewer epochs

print(f"Loaded checkpoint from epoch {ckpt['epoch']}, continuing fine-tuning with new transforms...")

  ckpt = torch.load(ckpt_path, map_location=device)


Loaded checkpoint from epoch 1, continuing fine-tuning with new transforms...


In [11]:
def compute_class_weights(dataset):
    labels = np.array(dataset.targets)
    classes = sorted(set(labels))
    counts = np.array([(labels == c).sum() for c in classes], dtype=np.float32)
    num_pos = counts[1]
    num_neg = counts[0]
    pos_weight = num_neg / max(num_pos, 1.0)
    return pos_weight, counts

pos_weight, counts = compute_class_weights(train_ds)
print(f"Class counts -> NORMAL: {int(counts[0])}, PNEUMONIA: {int(counts[1])}, pos_weight: {pos_weight:.3f}")

BATCH_SIZE = 32
NUM_WORKERS = 0  # Windows: start with 0, try 2 later

train_loader = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True,
)

val_loader = DataLoader(
    val_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True,
)

test_loader = DataLoader(
    test_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True,
)

Class counts -> NORMAL: 1341, PNEUMONIA: 3875, pos_weight: 0.346


In [12]:
model = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
in_features = model.classifier.in_features
model.classifier = nn.Linear(in_features, 2)
model = model.to(device)

class_weights = torch.tensor([1.0, pos_weight], dtype=torch.float32, device=device)
criterion = nn.CrossEntropyLoss(weight=class_weights)

optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=12)

In [13]:
@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    all_probs = []
    all_targets = []
    correct = 0
    total = 0

    for x, y in loader:
        x = x.to(device)
        y = y.to(device)
        logits = model(x)
        probs = torch.softmax(logits, dim=1)
        preds = probs.argmax(dim=1)

        correct += (preds == y).sum().item()
        total += y.size(0)

        all_probs.append(probs[:, 1].cpu().numpy())  # prob of PNEUMONIA
        all_targets.append(y.cpu().numpy())

    all_probs = np.concatenate(all_probs)
    all_targets = np.concatenate(all_targets)

    try:
        auc = roc_auc_score(all_targets, all_probs)
    except Exception:
        auc = float("nan")

    acc = correct / total
    bin_preds = (all_probs >= 0.5).astype(int)
    cm = confusion_matrix(all_targets, bin_preds, labels=[0, 1])
    report = classification_report(
        all_targets, bin_preds, target_names=["NORMAL", "PNEUMONIA"], digits=4
    )
    return acc, auc, cm, report

In [14]:
EPOCHS = 12
best_auc = -1.0

ckpt_dir = Path("./checkpoints")
ckpt_dir.mkdir(parents=True, exist_ok=True)
ckpt_path = ckpt_dir / "densenet121_kaggle_best.pth"

for epoch in range(1, EPOCHS + 1):
    model.train()
    running_loss = 0.0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS}")

    for x, y in pbar:
        x = x.to(device)
        y = y.to(device)

        optimizer.zero_grad(set_to_none=True)
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * x.size(0)
        pbar.set_postfix(loss=loss.item())

    scheduler.step()
    train_loss = running_loss / len(train_ds)

    val_acc, val_auc, val_cm, val_report = evaluate(model, val_loader)
    print(f"\nEpoch {epoch}: train_loss={train_loss:.4f} val_acc={val_acc:.4f} val_auc={val_auc:.4f}")
    print("Val CM:\n", val_cm)
    print("Val report:\n", val_report)

    if val_auc > best_auc:
        best_auc = val_auc
        torch.save(
            {
                "epoch": epoch,
                "model_state": model.state_dict(),
                "val_auc": val_auc,
                "img_size": IMG_SIZE,
                "normalization": {"mean": mean, "std": std},
            },
            ckpt_path,
        )
        print("Saved best model to:", ckpt_path)

test_acc, test_auc, test_cm, test_report = evaluate(model, test_loader)
print(f"\nTEST: acc={test_acc:.4f} auc={test_auc:.4f}")
print("Test CM:\n", test_cm)
print("Test report:\n", test_report)

Epoch 1/12: 100%|████████████████████████████████████████████████████████| 163/163 [49:02<00:00, 18.05s/it, loss=0.224]



Epoch 1: train_loss=0.1447 val_acc=0.6250 val_auc=1.0000
Val CM:
 [[2 6]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     1.0000    0.2500    0.4000         8
   PNEUMONIA     0.5714    1.0000    0.7273         8

    accuracy                         0.6250        16
   macro avg     0.7857    0.6250    0.5636        16
weighted avg     0.7857    0.6250    0.5636        16

Saved best model to: checkpoints\densenet121_kaggle_best.pth


Epoch 2/12: 100%|███████████████████████████████████████████████████████| 163/163 [35:49<00:00, 13.19s/it, loss=0.0103]



Epoch 2: train_loss=0.0870 val_acc=0.5625 val_auc=1.0000
Val CM:
 [[1 7]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     1.0000    0.1250    0.2222         8
   PNEUMONIA     0.5333    1.0000    0.6957         8

    accuracy                         0.5625        16
   macro avg     0.7667    0.5625    0.4589        16
weighted avg     0.7667    0.5625    0.4589        16



Epoch 3/12: 100%|███████████████████████████████████████████████████████| 163/163 [35:50<00:00, 13.19s/it, loss=0.0143]



Epoch 3: train_loss=0.0819 val_acc=0.5625 val_auc=0.9375
Val CM:
 [[1 7]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     1.0000    0.1250    0.2222         8
   PNEUMONIA     0.5333    1.0000    0.6957         8

    accuracy                         0.5625        16
   macro avg     0.7667    0.5625    0.4589        16
weighted avg     0.7667    0.5625    0.4589        16



Epoch 4/12: 100%|██████████████████████████████████████████████████████| 163/163 [35:51<00:00, 13.20s/it, loss=0.00563]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])



Epoch 4: train_loss=0.0764 val_acc=0.5000 val_auc=1.0000
Val CM:
 [[0 8]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     0.0000    0.0000    0.0000         8
   PNEUMONIA     0.5000    1.0000    0.6667         8

    accuracy                         0.5000        16
   macro avg     0.2500    0.5000    0.3333        16
weighted avg     0.2500    0.5000    0.3333        16



Epoch 5/12: 100%|███████████████████████████████████████████████████████| 163/163 [35:55<00:00, 13.22s/it, loss=0.0537]



Epoch 5: train_loss=0.0394 val_acc=0.5625 val_auc=1.0000
Val CM:
 [[1 7]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     1.0000    0.1250    0.2222         8
   PNEUMONIA     0.5333    1.0000    0.6957         8

    accuracy                         0.5625        16
   macro avg     0.7667    0.5625    0.4589        16
weighted avg     0.7667    0.5625    0.4589        16



Epoch 6/12: 100%|███████████████████████████████████████████████████████| 163/163 [33:23<00:00, 12.29s/it, loss=0.0186]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])



Epoch 6: train_loss=0.0434 val_acc=0.5000 val_auc=1.0000
Val CM:
 [[0 8]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     0.0000    0.0000    0.0000         8
   PNEUMONIA     0.5000    1.0000    0.6667         8

    accuracy                         0.5000        16
   macro avg     0.2500    0.5000    0.3333        16
weighted avg     0.2500    0.5000    0.3333        16



Epoch 7/12: 100%|████████████████████████████████████████████████████████| 163/163 [23:54<00:00,  8.80s/it, loss=0.013]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])



Epoch 7: train_loss=0.0373 val_acc=0.5000 val_auc=1.0000
Val CM:
 [[0 8]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     0.0000    0.0000    0.0000         8
   PNEUMONIA     0.5000    1.0000    0.6667         8

    accuracy                         0.5000        16
   macro avg     0.2500    0.5000    0.3333        16
weighted avg     0.2500    0.5000    0.3333        16



Epoch 8/12: 100%|███████████████████████████████████████████████████████| 163/163 [22:45<00:00,  8.38s/it, loss=0.0656]



Epoch 8: train_loss=0.0301 val_acc=0.6250 val_auc=1.0000
Val CM:
 [[2 6]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     1.0000    0.2500    0.4000         8
   PNEUMONIA     0.5714    1.0000    0.7273         8

    accuracy                         0.6250        16
   macro avg     0.7857    0.6250    0.5636        16
weighted avg     0.7857    0.6250    0.5636        16



Epoch 9/12: 100%|███████████████████████████████████████████████████████| 163/163 [24:28<00:00,  9.01s/it, loss=0.0402]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])



Epoch 9: train_loss=0.0237 val_acc=0.5000 val_auc=1.0000
Val CM:
 [[0 8]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     0.0000    0.0000    0.0000         8
   PNEUMONIA     0.5000    1.0000    0.6667         8

    accuracy                         0.5000        16
   macro avg     0.2500    0.5000    0.3333        16
weighted avg     0.2500    0.5000    0.3333        16



Epoch 10/12: 100%|█████████████████████████████████████████████████████| 163/163 [30:32<00:00, 11.24s/it, loss=0.00125]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])



Epoch 10: train_loss=0.0182 val_acc=0.5000 val_auc=1.0000
Val CM:
 [[0 8]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     0.0000    0.0000    0.0000         8
   PNEUMONIA     0.5000    1.0000    0.6667         8

    accuracy                         0.5000        16
   macro avg     0.2500    0.5000    0.3333        16
weighted avg     0.2500    0.5000    0.3333        16



Epoch 11/12: 100%|█████████████████████████████████████████████████████| 163/163 [30:49<00:00, 11.35s/it, loss=0.00156]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])



Epoch 11: train_loss=0.0112 val_acc=0.5000 val_auc=1.0000
Val CM:
 [[0 8]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     0.0000    0.0000    0.0000         8
   PNEUMONIA     0.5000    1.0000    0.6667         8

    accuracy                         0.5000        16
   macro avg     0.2500    0.5000    0.3333        16
weighted avg     0.2500    0.5000    0.3333        16



Epoch 12/12: 100%|█████████████████████████████████████████████████████| 163/163 [28:14<00:00, 10.40s/it, loss=0.00263]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])



Epoch 12: train_loss=0.0107 val_acc=0.5000 val_auc=1.0000
Val CM:
 [[0 8]
 [0 8]]
Val report:
               precision    recall  f1-score   support

      NORMAL     0.0000    0.0000    0.0000         8
   PNEUMONIA     0.5000    1.0000    0.6667         8

    accuracy                         0.5000        16
   macro avg     0.2500    0.5000    0.3333        16
weighted avg     0.2500    0.5000    0.3333        16


TEST: acc=0.7420 auc=0.9242
Test CM:
 [[ 73 161]
 [  0 390]]
Test report:
               precision    recall  f1-score   support

      NORMAL     1.0000    0.3120    0.4756       234
   PNEUMONIA     0.7078    1.0000    0.8289       390

    accuracy                         0.7420       624
   macro avg     0.8539    0.6560    0.6522       624
weighted avg     0.8174    0.7420    0.6964       624



In [15]:
# If you used CenterLungCrop in training/val, define it here too:
class CenterLungCrop(object):
    def __init__(self, top_frac=0.08, bottom_frac=0.08, side_frac=0.05):
        self.top_frac = top_frac
        self.bottom_frac = bottom_frac
        self.side_frac = side_frac

    def __call__(self, img):
        w, h = img.size
        left   = int(self.side_frac * w)
        right  = int((1 - self.side_frac) * w)
        top    = int(self.top_frac * h)
        bottom = int((1 - self.bottom_frac) * h)
        return img.crop((left, top, right, bottom))

IMG_SIZE = 384
mean = [0.485, 0.456, 0.406]
std  = [0.229, 0.224, 0.225]

test_tfms = transforms.Compose([
    CenterLungCrop(top_frac=0.08, bottom_frac=0.08, side_frac=0.05),  # include if used in val/app
    transforms.Resize(int(IMG_SIZE * 1.2)),
    transforms.CenterCrop((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

test_ds = datasets.ImageFolder(DATA_DIR / "test", transform=test_tfms)
test_loader = DataLoader(test_ds, batch_size=16, shuffle=False)

In [16]:
from torchvision import models

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CKPT_PATH = Path("checkpoints/densenet121_kaggle_best.pth")

ckpt = torch.load(CKPT_PATH, map_location=DEVICE, weights_only=False)

model = models.densenet121(weights=None)
in_features = model.classifier.in_features
model.classifier = torch.nn.Linear(in_features, 2)
model.load_state_dict(ckpt["model_state"], strict=True)
model.to(DEVICE)
model.eval()

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [17]:
all_labels = []
all_probs  = []  # pneumonia probability
all_preds  = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        logits = model(images)
        probs = torch.softmax(logits, dim=1)  # [B, 2]

        pneu_probs = probs[:, 1]              # class index 1 = PNEUMONIA
        preds = torch.argmax(probs, dim=1)

        all_labels.append(labels.cpu().numpy())
        all_probs.append(pneu_probs.cpu().numpy())
        all_preds.append(preds.cpu().numpy())

all_labels = np.concatenate(all_labels)
all_probs  = np.concatenate(all_probs)
all_preds  = np.concatenate(all_preds)

In [19]:
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report

acc = accuracy_score(all_labels, all_preds)

# AUC (only if both classes present in test set)
try:
    auc = roc_auc_score(all_labels, all_probs)
except ValueError:
    auc = None

cm = confusion_matrix(all_labels, all_preds)
report = classification_report(all_labels, all_preds, target_names=["NORMAL", "PNEUMONIA"])

print("Test accuracy:", acc)
print("Test AUC:", auc)
print("Confusion matrix:\n", cm)
print("Classification report:\n", report)

Test accuracy: 0.6955128205128205
Test AUC: 0.892477536708306
Confusion matrix:
 [[ 44 190]
 [  0 390]]
Classification report:
               precision    recall  f1-score   support

      NORMAL       1.00      0.19      0.32       234
   PNEUMONIA       0.67      1.00      0.80       390

    accuracy                           0.70       624
   macro avg       0.84      0.59      0.56       624
weighted avg       0.80      0.70      0.62       624



In [22]:
# Generate validation predictions
val_labels = []
val_probs = []

model.eval()
with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        logits = model(images)
        probs = torch.softmax(logits, dim=1)

        pneu_probs = probs[:, 1]  # pneumonia probability

        val_labels.append(labels.cpu().numpy())
        val_probs.append(pneu_probs.cpu().numpy())

val_labels = np.concatenate(val_labels)
val_probs = np.concatenate(val_probs)

print(f"Generated {len(val_labels)} validation predictions")

Generated 16 validation predictions


In [23]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

thresholds = np.linspace(0.1, 0.9, 17)

best_thr = 0.5
best_f1 = 0.0

for thr in thresholds:
    preds = (val_probs >= thr).astype(int)  # 1 = PNEUMONIA
    acc = accuracy_score(val_labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(
        val_labels, preds, average="binary", pos_label=1, zero_division=0
    )
    print(f"thr={thr:.2f} | acc={acc:.3f} | prec={precision:.3f} | rec={recall:.3f} | f1={f1:.3f}")

    if f1 > best_f1:
        best_f1 = f1
        best_thr = thr

print("Best threshold by F1:", best_thr, "F1:", best_f1)

thr=0.10 | acc=0.562 | prec=0.533 | rec=1.000 | f1=0.696
thr=0.15 | acc=0.562 | prec=0.533 | rec=1.000 | f1=0.696
thr=0.20 | acc=0.562 | prec=0.533 | rec=1.000 | f1=0.696
thr=0.25 | acc=0.562 | prec=0.533 | rec=1.000 | f1=0.696
thr=0.30 | acc=0.562 | prec=0.533 | rec=1.000 | f1=0.696
thr=0.35 | acc=0.562 | prec=0.533 | rec=1.000 | f1=0.696
thr=0.40 | acc=0.562 | prec=0.533 | rec=1.000 | f1=0.696
thr=0.45 | acc=0.625 | prec=0.571 | rec=1.000 | f1=0.727
thr=0.50 | acc=0.625 | prec=0.571 | rec=1.000 | f1=0.727
thr=0.55 | acc=0.625 | prec=0.571 | rec=1.000 | f1=0.727
thr=0.60 | acc=0.625 | prec=0.571 | rec=1.000 | f1=0.727
thr=0.65 | acc=0.625 | prec=0.571 | rec=1.000 | f1=0.727
thr=0.70 | acc=0.625 | prec=0.571 | rec=1.000 | f1=0.727
thr=0.75 | acc=0.688 | prec=0.615 | rec=1.000 | f1=0.762
thr=0.80 | acc=0.750 | prec=0.667 | rec=1.000 | f1=0.800
thr=0.85 | acc=0.750 | prec=0.667 | rec=1.000 | f1=0.800
thr=0.90 | acc=0.812 | prec=0.727 | rec=1.000 | f1=0.842
Best threshold by F1: 0.9 F1: 0

In [24]:
thr = best_thr  # or whatever you pick

test_preds = (all_probs >= thr).astype(int)
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score

acc = accuracy_score(all_labels, test_preds)
cm = confusion_matrix(all_labels, test_preds)
report = classification_report(all_labels, test_preds, target_names=["NORMAL", "PNEUMONIA"])
auc = roc_auc_score(all_labels, all_probs)

print("Threshold:", thr)
print("Test accuracy:", acc)
print("Test AUC:", auc)
print("Confusion matrix:\n", cm)
print("Classification report:\n", report)

Threshold: 0.9
Test accuracy: 0.7788461538461539
Test AUC: 0.892477536708306
Confusion matrix:
 [[ 97 137]
 [  1 389]]
Classification report:
               precision    recall  f1-score   support

      NORMAL       0.99      0.41      0.58       234
   PNEUMONIA       0.74      1.00      0.85       390

    accuracy                           0.78       624
   macro avg       0.86      0.71      0.72       624
weighted avg       0.83      0.78      0.75       624

