# Imports

In [1]:
import os, torch, random
import SimpleITK, re
import numpy as np
import matplotlib.pyplot as plt 
from torchvision import transforms, models
from torch.utils.data import DataLoader
import re
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

from torch.optim.lr_scheduler import ReduceLROnPlateau
from utils.callbacks import EarlyStopping
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score

In [2]:
SEED = 2024

def seed_everything(seed):
    random.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

seed_everything(SEED)

## GPU

In [3]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


# Dataset

In [4]:
data_transforms = {
    'train': transforms.Compose([
        # Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] 
        # to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
        transforms.ToTensor(), 
        # transforms.RandomResizedCrop(224),
        # transforms.RandomHorizontalFlip(),
    ]),
    'test': transforms.Compose([
        transforms.ToTensor(), # PIL Image or numpy.ndarray (H x W x C)
        # transforms.Resize(256),
        # transforms.CenterCrop(224)
    ]),
}

In [5]:
from OASIS_2D.dataset import OASIS_Dataset

total_dataset = OASIS_Dataset(flag='all', seed=SEED)

batch_size = 8
total_dataloader = DataLoader(
    total_dataset, batch_size=batch_size
)

Total 100, disease 50, healthy 50.


# Extract features 

## Model

In [125]:
# https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
model = models.resnet18(weights='DEFAULT')

# Here, we need to freeze all the network except the final layer. 
# We need to set requires_grad = False to freeze the parameters 
# so that the gradients are not computed in backward().
for param in model.parameters():
    param.requires_grad = False
    
# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to ``nn.Linear(num_ftrs, len(class_names))``.
model.fc = torch.nn.Linear(num_ftrs, 2)

model.to(device)

## Extractor

In [126]:
from torchvision.models.feature_extraction import create_feature_extractor

return_nodes = {
    "avgpool": "avgpool",
    "fc": "fc"
}
extractor = create_feature_extractor(
    model, return_nodes=return_nodes
)

In [127]:
all_outputs = []
for inputs, _ in total_dataloader:
    inputs = inputs.to(device)
    outputs = extractor(inputs)
    
    # only take the output before the final linear layer
    # squeeze to remove pooled dimension (512, 1, 1) -> (512)
    all_outputs.append(outputs['avgpool'].squeeze())
    
all_outputs = torch.vstack(all_outputs)
all_outputs = all_outputs.detach().cpu().numpy()

## Save

In [21]:
features = {
    'patient_id': total_dataset.patient_ids,
    'day': total_dataset.days,
    'label': total_dataset.labels,
    'feature': all_outputs
}

In [128]:
result_dir = os.path.join('OASIS_2D', 'scratch', model._get_name())

if not os.path.exists(result_dir):
    os.makedirs(result_dir, exist_ok=True)
    
torch.save(features, os.path.join(result_dir, 'features.pt'))