In [33]:
from torch_geometric.datasets import KarateClub
import os
import pandas as pd
import numpy as np
from torch.utils.data import Dataset
import torch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader as dataloader_normal, TensorDataset
from torch_geometric.data import Data, DataLoader
import nibabel as nib
from sklearn.metrics import accuracy_score, average_precision_score, roc_auc_score
from sklearn.neighbors import NearestNeighbors
import numpy as np
from skimage.measure import regionprops
from torch_geometric.nn import GCNConv
import time

In [34]:
class CustomDataset(Dataset):
    
    def get_edges(self,centroids):
        

        k_neighbors = self.k
        knn_model = NearestNeighbors(n_neighbors=k_neighbors)

        knn_model.fit(centroids)

        distances, indices = knn_model.kneighbors(centroids)

        indices = indices[:, :]
        edges = [[],[]]
        for i in range(420):
            for j in range(4):
                edges[0].append(i)
                edges[1].append(indices[i][j])
        return edges
    def __init__(self, annotations_file, feature_dir, slic_dir, lungmask_dir,centroids_dir, k):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = feature_dir
        self.feature_paths = []
        self.labels = []
        self.slic_paths = []
        self.lungmask_paths = []
        self.k = k
        self.centroid_paths = []
        problem = ["/scratch/features/train/set22/trn24016.pth",
        "/scratch/features/train/set12/trn12411.pth",
        "/scratch/features/train/set9/trn09320.pth",
        "/scratch/features/train/set18/trn19397.pth",
        "/scratch/features/train/set7/trn07778.pth",
        "/scratch/features/train/set14/trn15124.pth",
        "/scratch/features/train/set14/trn15109.pth",
        "/scratch/features/train/set10/trn10310.pth",
        "/scratch/features/train/set13/trn14334.pth"]
        for set in os.listdir(feature_dir):
            set_path = os.path.join(feature_dir, set)
            set_path_slic = os.path.join(slic_dir,set)
            set_path_lungmask = os.path.join(lungmask_dir, set)
            set_path_centroids = os.path.join(centroids_dir, set)
            for feature in os.listdir(set_path):
                image = feature.split(".")[0]
                feature_path = os.path.join(set_path, feature)
                slic_path = os.path.join(set_path_slic, image + '.nii')
                lungmask_path = os.path.join(set_path_lungmask, image + '.nii')
                centroids_path = os.path.join(set_path_centroids, image + '.npy')
                if(feature_path in problem):
                    continue
                self.feature_paths.append(feature_path)
                self.slic_paths.append(slic_path)
                self.lungmask_paths.append(lungmask_path)
                self.centroid_paths.append(centroids_path)
                self.labels.append(torch.tensor(self.img_labels[self.img_labels['NoteAcc_DEID'] == image][['nodule*lung', 'opacity*lung', 'atelectasis*lung', 'consolidation*lung', 'mass*lung', 'pneumothorax*lung']].values[0]))
                # self.labels.append(torch.tensor(self.img_labels[self.img_labels['NoteAcc_DEID'] == image][['opacity*lung']].values[0]))

    def __len__(self):
        return len(self.feature_paths)

    def __getitem__(self, idx):
        feature = torch.load(self.feature_paths[idx])
        label = self.labels[idx]
        
        # slic = nib.load(self.slic_paths[idx]).get_fdata()
        
        centroids = np.load(self.centroid_paths[idx])
        
        # lungmask = nib.load(self.lungmask_paths[idx]).get_fdata()
        edges = self.get_edges(centroids)
        
        return feature, label, torch.tensor(edges)

In [35]:
label_file = 'imgtrain_Abnormality_and_Location_Labels.csv'
train_features = '/scratch/features/train'
train_slic = '/home/RadChest/slic/train'
train_lungmask = '/home/RadChest/lungmask/train'
train_centroid = '/scratch/centroids/train'
traindata = CustomDataset(label_file, train_features, train_slic, train_lungmask,train_centroid, 20)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [36]:
num_features=2048
num_labels=1
class MultiLabelNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MultiLabelNN, self).__init__()
        self.conv1 = GCNConv(2048,1024)
        self.conv2 = GCNConv(1024,512)
        self.conv3 = GCNConv(512,128)
        self.conv4 = GCNConv(128, 16)
        self.fc1 = nn.Linear(16*420, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 64)
        self.fc4 = nn.Linear(64, output_dim)
    
    def forward(self, data): 
        x = data.x
        edge_index = data.edge_index
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index).relu()
        x = self.conv3(x, edge_index).relu()
        x = self.conv4(x, edge_index).relu()
        x = x.reshape((-1,420*16))
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

model = MultiLabelNN(num_features, num_labels).to(device)


In [37]:
criterion = nn.BCEWithLogitsLoss()  # For multi-label classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [38]:
num_epochs = 10
dataloader = dataloader_normal(traindata, batch_size=512, shuffle=True, num_workers=30)
for epoch in range(num_epochs):
    model.train()  
    running_loss = 0.0
    running_accuracy = 0
    epoch_outputs = torch.empty((0,1))
    epoch_targets = torch.empty((0,1))
    i = 0
    for inputs, targets, edges in dataloader:
        print(i)
        i+=1
        graphs = [Data(x = inputs[j], edge_index = edges[j]) for j in range(inputs.shape[0])]
        dataloader_graph = DataLoader(graphs, batch_size=inputs.shape[0],num_workers=30)
        batchx = []
        for databatch in dataloader_graph:
            batchx = databatch
        # inputs = inputs.to(device)
        batchx = batchx.to(device)
        targets = targets.to(device)
        # edges = edges.to(device)
        optimizer.zero_grad()
        # outputs = model(inputs, edges)
        outputs = model(batchx)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        epoch_outputs = torch.cat((epoch_outputs, outputs.cpu()))
        epoch_targets = torch.cat((epoch_targets, targets.cpu()))
        running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(traindata)
    probabilities = torch.sigmoid(epoch_outputs)
    predictions = (probabilities > 0.5).float()

    predictions_np = predictions.detach().numpy()
    targets_np = epoch_targets.numpy()

    average_precision = average_precision_score(targets_np, predictions_np, average='macro')
    auroc_score = roc_auc_score(targets_np,predictions_np,average='macro')
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Average Precision (Macro): {average_precision:.4f}, AUROC: {auroc_score:.4f}')


0




ValueError: Target size (torch.Size([512, 6])) must be the same as input size (torch.Size([512, 1]))

In [None]:
print(predictions_np, targets_np)

[[0.]
 [0.]
 [0.]
 ...
 [0.]
 [0.]
 [0.]] [[1.]
 [1.]
 [0.]
 ...
 [1.]
 [0.]
 [0.]]


In [None]:
x = 0
for i in range(6):
    a = 0
    b = 0
    c = 0
    for j in range(predictions_np.shape[0]):
        if(predictions_np[j][i] == 1):
            if(predictions_np[j][i] == targets_np[j][i]):
                a += 1
            b += 1
        if(targets_np[j][i] == 1):
            c+= 1
    print(c, a/b)
    x += a/b


ZeroDivisionError: division by zero

In [None]:
print(batchx)

DataBatch(x=[88620, 2048], edge_index=[2, 354480], batch=[88620], ptr=[212])


In [None]:
print(next(iter(dataloader_graph)))



DataBatch(x=[88620, 2048], edge_index=[2, 354480], batch=[88620], ptr=[212])


In [None]:
print(batchx)

DataBatch(x=[88620, 2048], edge_index=[2, 354480], batch=[88620], ptr=[212])


In [None]:
print(graphs)

[Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[420, 2048], edge_index=[2, 1680]), Data(x=[42

In [None]:
print(inputs[0].shape)

torch.Size([420, 2048])


In [None]:
print(batch)

NameError: name 'batch' is not defined

In [None]:
i = 0
for inputs, targets, edges in traindata:
    model.train()  
    epoch_outputs = torch.empty((0,6))
    epoch_targets = torch.empty((0,6))
    print(i)
    i+=1
    if(i == 2):
        break
    for epoch in range(num_epochs):
        inputs = inputs.to(device)
        targets = targets.to(device).reshape((1,6))
        edges = edges.to(device)
        optimizer.zero_grad()
        outputs = model(inputs, edges)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        epoch_outputs = torch.cat((epoch_outputs, outputs.cpu()))
        epoch_targets = torch.cat((epoch_targets, targets.cpu()))
        epoch_loss = loss.item() 
        probabilities = torch.sigmoid(outputs)
        predictions = (probabilities > 0.5).float()
        print(f'epoch: {epoch}: {epoch_loss:.4f} Predictions: {predictions} target: {targets}')

0


TypeError: MultiLabelNN.forward() takes 2 positional arguments but 3 were given

In [None]:
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Layer: conv1.bias | Size: torch.Size([512]) | Values : tensor([0.0052, 0.0054], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: conv1.lin.weight | Size: torch.Size([512, 2048]) | Values : tensor([[-0.0233, -0.0419, -0.0058,  ...,  0.0100,  0.0238,  0.0218],
        [-0.0005,  0.0066, -0.0137,  ...,  0.0385,  0.0023, -0.0347]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: conv2.bias | Size: torch.Size([16]) | Values : tensor([-0.0060, -0.0067], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: conv2.lin.weight | Size: torch.Size([16, 512]) | Values : tensor([[-0.1120,  0.0025, -0.0936,  ...,  0.0110,  0.0789, -0.0176],
        [ 0.0371, -0.0806,  0.0841,  ..., -0.0558, -0.0736, -0.0106]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: fc1.weight | Size: torch.Size([128, 6720]) | Values : tensor([[-0.0090,  0.0042,  0.0097,  ...,  0.0067,  0.0033, -0.0159],
        [-0.0165, -0.0073, -0.0012,  ...,  0.0085, -0.0061, -0.0070]],
       device='cuda:0', grad_