# Feature Extraction with ResNet18

This notebook extracts 512-d feature vectors from chest X-ray images using a pretrained ResNet18 backbone.

In [3]:
import os
from pathlib import Path

import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision import transforms, models
from PIL import Image

In [7]:
# Paths
repo_root = Path('..').resolve()
candidates = [
    repo_root / 'data' / 'chest_xray',
    repo_root / 'chest_xray',
    repo_root.parent / 'chest_xray',
    repo_root.parent / 'chest_xray' / 'chest_xray',
]
data_dir = None
for cand in candidates:
    if (cand / 'train').exists() and (cand / 'val').exists():
        data_dir = cand
        break

if data_dir is None:
    checked = '\n'.join(str(p) for p in candidates)
    raise FileNotFoundError(f'Could not find dataset. Checked:\n{checked}')

print(f'Using data dir: {data_dir}')

Using data dir: /Users/meetmehta/Desktop/project/DL Project/chest_xray


In [13]:
# Dataset and dataloaders
import sys
sys.path.append(str(repo_root))
from src.dataset import ChestXRayDataset

# Transforms (no augmentation)
image_size = 224
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_ds = ChestXRayDataset(data_dir, 'train', transform=transform)
val_ds = ChestXRayDataset(data_dir, 'val', transform=transform)

batch_size = 32
num_workers = 0
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers)

print(f'Train samples: {len(train_ds)} | Val samples: {len(val_ds)}')

Train samples: 5216 | Val samples: 16


In [10]:
# Load pretrained ResNet18 and remove final classification layer
use_pretrained = True
model = None
try:
    weights = models.ResNet18_Weights.DEFAULT
    model = models.resnet18(weights=weights)
except Exception as exc:
    print(f'Warning: could not download pretrained weights ({exc}).')
    print('Proceeding with random initialization. If you need pretrained weights, download them manually.')
    use_pretrained = False
    model = models.resnet18(weights=None)

model.fc = torch.nn.Identity()

# Freeze all weights
for param in model.parameters():
    param.requires_grad = False

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
model.eval()

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/meetmehta/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
Proceeding with random initialization. If you need pretrained weights, download them manually.


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [8]:
def extract_features(data_loader, model, device):
    features_list = []
    labels_list = []
    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            feats = model(images)
            features_list.append(feats.cpu().numpy())
            labels_list.append(labels.numpy())
    features = np.concatenate(features_list, axis=0)
    labels = np.concatenate(labels_list, axis=0)
    return features, labels

In [14]:
train_features, train_labels = extract_features(train_loader, model, device)
val_features, val_labels = extract_features(val_loader, model, device)

print('Train features:', train_features.shape)
print('Val features:', val_features.shape)

np.save('train_features.npy', train_features)
np.save('val_features.npy', val_features)
np.save('train_labels.npy', train_labels)
np.save('val_labels.npy', val_labels)

print('Saved: train_features.npy, val_features.npy, train_labels.npy, val_labels.npy')

Train features: (5216, 512)
Val features: (16, 512)
Saved: train_features.npy, val_features.npy, train_labels.npy, val_labels.npy
