In [63]:
import os
import pandas as pd
from torch.utils.data import Dataset
import torch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, average_precision_score, roc_auc_score

In [64]:
class CustomDataset(Dataset):
    def __init__(self, annotations_file, feature_dir):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = feature_dir
        self.feature_paths = []
        self.labels = []
        problem = ["/scratch/features/train/set22/trn24016.pth",
"/scratch/features/train/set12/trn12411.pth",
"/scratch/features/train/set9/trn09320.pth",
"/scratch/features/train/set18/trn19397.pth",
"/scratch/features/train/set7/trn07778.pth",
"/scratch/features/train/set14/trn15124.pth",
"/scratch/features/train/set14/trn15109.pth",
"/scratch/features/train/set10/trn10310.pth",
"/scratch/features/train/set13/trn14334.pth"]
        for set in os.listdir(feature_dir):
            set_path = os.path.join(feature_dir, set)
            for feature in os.listdir(set_path):
                feature_path = os.path.join(set_path, feature)
                if(feature_path in problem):
                    continue
                self.feature_paths.append(feature_path)
                
                image = feature.split(".")[0]
                self.labels.append(torch.tensor(self.img_labels[self.img_labels['NoteAcc_DEID'] == image][['nodule*lung', 'opacity*lung', 'atelectasis*lung', 'consolidation*lung', 'mass*lung', 'pneumothorax*lung']].values[0]))
                # self.labels.append(torch.tensor(self.img_labels[self.img_labels['NoteAcc_DEID'] == image][['opacity*lung']].values[0]))

    def __len__(self):
        return len(self.feature_paths)

    def __getitem__(self, idx):
        feature = torch.load(self.feature_paths[idx])
        label = self.labels[idx]
        
        return feature, label



In [65]:
label_file = 'imgtrain_Abnormality_and_Location_Labels.csv'
train_features = '/scratch/features/train'
traindata = CustomDataset(label_file, train_features)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [66]:
num_features=2048
num_labels=6
class MultiLabelNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MultiLabelNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)
    
    def forward(self, x): 
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = MultiLabelNN(num_features, num_labels).to(device)


In [67]:
criterion = nn.BCEWithLogitsLoss()  # For multi-label classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [68]:
num_epochs = 10
dataloader = DataLoader(traindata, batch_size=512, shuffle=True)
for epoch in range(num_epochs):
    model.train()  
    running_loss = 0.0
    running_accuracy = 0
    epoch_outputs = torch.empty((0,6))
    epoch_targets = torch.empty((0,6))
    for inputs, targets in dataloader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(torch.mean(inputs, dim=1))
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        epoch_outputs = torch.cat((epoch_outputs, outputs.cpu()))
        epoch_targets = torch.cat((epoch_targets, targets.cpu()))
        running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(traindata)
    probabilities = torch.sigmoid(epoch_outputs)
    predictions = (probabilities > 0.5).float()

    predictions_np = predictions.detach().numpy()
    targets_np = epoch_targets.numpy()

    average_precision = average_precision_score(targets_np, predictions_np, average='macro')
    auroc_score = roc_auc_score(targets_np,predictions_np,average='macro')
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Average Precision (Macro): {average_precision:.4f}, AUROC: {auroc_score:.4f}')


Epoch 1/10, Loss: 1764.1739, Average Precision (Macro): 0.3102, AUROC: 0.5054
Epoch 2/10, Loss: 1237.2479, Average Precision (Macro): 0.3091, AUROC: 0.5039
Epoch 3/10, Loss: 812.3440, Average Precision (Macro): 0.3111, AUROC: 0.5089
Epoch 4/10, Loss: 593.3073, Average Precision (Macro): 0.3094, AUROC: 0.5052
Epoch 5/10, Loss: 433.4786, Average Precision (Macro): 0.3073, AUROC: 0.5013
Epoch 6/10, Loss: 421.2435, Average Precision (Macro): 0.3074, AUROC: 0.5010
Epoch 7/10, Loss: 441.2268, Average Precision (Macro): 0.3068, AUROC: 0.4999
Epoch 8/10, Loss: 358.0331, Average Precision (Macro): 0.3111, AUROC: 0.5097
Epoch 9/10, Loss: 305.7465, Average Precision (Macro): 0.3089, AUROC: 0.5031
Epoch 10/10, Loss: 374.9251, Average Precision (Macro): 0.3076, AUROC: 0.5025


In [69]:
print(predictions_np, targets_np)

[[1. 1. 0. 0. 0. 0.]
 [1. 1. 1. 0. 0. 0.]
 [1. 1. 0. 0. 0. 0.]
 ...
 [1. 0. 1. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0.]] [[0. 1. 0. 0. 0. 0.]
 [1. 1. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 ...
 [1. 0. 0. 0. 0. 0.]
 [1. 1. 0. 1. 0. 0.]
 [1. 0. 1. 0. 0. 0.]]


In [70]:
print(probabilities)

tensor([[1.0000e+00, 1.0000e+00, 6.9580e-13, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [1.0000e+00, 1.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [1.0000e+00, 1.0000e+00, 2.2349e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        ...,
        [1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]],
       grad_fn=<SigmoidBackward0>)
