In [1]:
#Outline for Prostate Cancer Surgical Margin NN 

#1. ModelA = MLP that predicts surgical margin status based on tabular data from REDCap database
#1a. Load and pre-process the data
#1b. Construct the neural network arhcitecture with a new class
#1c. Set hyperparameters and run training and testing loops to opitmize parameters. Save with model.state_dict 

#2. ModelB = a Densenet that predicts surgical margin status (+ or -) based on pre-op MRI images
#2a. Create a "custom image dataset" with image files, pull out margin status "labels" from a corresponding csv file (refer to lightning tutorial for help)
#2b. Construct the architecture of the Densenet with a new class 
#2c. Set hyperparameters and run training and testing loops to opitmize parameters. Save with model.state_dict 

#3. Ensemble Network = model A + model B
#3a. new Ensemble class that subclasses nn.module, include one linear layer that takes as input 2 and gives output 1 (concatenation of A and B will make the input need to be 2, we want output to be 4)
    #-->in forward method, pass x1 thru A, x2 thru B, then use torch.cat(A output, B output) dim=1, then pass thru linear layer and add nonlinearity 
#3b. create new model instances and load state_dicts, then create an instance of Ensemble model 
#3c. Set hyperparameters and run training and testing loops to opitmize parameters ?

#4. Evaluate the model 

In [2]:
import os
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchvision.io import read_image
import torchvision
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from skimage import io

In [3]:
#1a and #2a — getting the datasets ready (redcap_file has tabular data, annotations_file has the labels, img_dir holds the MRI's)

class CustomImageDataset(Dataset):
    def __init__(self, redcap_file = None, annotations_file = None, img_dir = None, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file) #default header=0
        self.img_dir = img_dir
        self.tabular = pd.read_csv(redcap_file)
        self.transform = transform
        self.target_transform = target_transform

        
    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx): #iloc indexer syntax is data.iloc[<row selection>, <column selection>]
        img_path = self.img_dir
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        #we can joing the image root directory with the first column of the labels csv !
        #bc the first column of labels csv has the image name! 
        #makes sense because we want to access specific BASED ON THE INDEX designated in the csv
        image = torchvision.transforms.functional.to_tensor(io.imread(img_path))
        
        label = self.img_labels.iloc[idx, 1]
        label = torch.from_numpy(np.array(label)).float()
#         label = torch.tensor(int(self.img_labels.iloc[idx, 1]))
                

        tabular = self.tabular.iloc[idx, 1:] #tabular data starts in second column of csv (after ID's)
        tabular = tabular[["age", "PSA", "total gleason"]] 
        tabular = tabular.tolist()
        tabular = torch.from_numpy(np.array(tabular)).float()
#         tabular = torch.tensor(tabular)
        
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)

        return image, label, tabular  

    
    
my_redcap_file = '/Users/Stephen_Schmit/Desktop/redcap.csv'
my_annotations_file = '/Users/Stephen_Schmit/Desktop/labels.csv' 
my_img_dir = '/Users/Stephen_Schmit/Desktop/DenseImages'
   
#these two lines work but the images need to be resized
# prostate_data = CustomImageDataset(redcap_file = my_redcap_file, annotations_file = my_annotations_file, img_dir = my_img_dir, transform = transforms.ToTensor(), target_transform=None)
# prostate_data[0]

prostate_data = CustomImageDataset(redcap_file = my_redcap_file, annotations_file = my_annotations_file, img_dir = my_img_dir, transform = transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224)]), target_transform=None)

prostate_data[2]

(tensor([[[0.0258, 0.0220, 0.0143,  ..., 0.0518, 0.0446, 0.0356],
          [0.0064, 0.0135, 0.0194,  ..., 0.0572, 0.0489, 0.0367],
          [0.0007, 0.0094, 0.0217,  ..., 0.0486, 0.0434, 0.0355],
          ...,
          [0.0546, 0.0588, 0.0606,  ..., 0.3059, 0.3735, 0.3955],
          [0.0576, 0.0633, 0.0662,  ..., 0.4029, 0.4232, 0.3897],
          [0.0544, 0.0603, 0.0645,  ..., 0.4556, 0.4087, 0.3261]],
 
         [[0.0258, 0.0220, 0.0143,  ..., 0.0518, 0.0446, 0.0356],
          [0.0064, 0.0135, 0.0194,  ..., 0.0572, 0.0489, 0.0367],
          [0.0007, 0.0094, 0.0217,  ..., 0.0486, 0.0434, 0.0355],
          ...,
          [0.0546, 0.0588, 0.0606,  ..., 0.3059, 0.3735, 0.3955],
          [0.0576, 0.0633, 0.0662,  ..., 0.4029, 0.4232, 0.3897],
          [0.0544, 0.0603, 0.0645,  ..., 0.4556, 0.4087, 0.3261]],
 
         [[0.0258, 0.0220, 0.0143,  ..., 0.0518, 0.0446, 0.0356],
          [0.0064, 0.0135, 0.0194,  ..., 0.0572, 0.0489, 0.0367],
          [0.0007, 0.0094, 0.0217,  ...,

In [4]:
#hperparameters for 1a
learning_rate = 1e-3
epochs = 3

#dataloaders for 1a
train_set, test_set = torch.utils.data.random_split(prostate_data, [3, 2])

# #categorical targets (labels) must be integers aka longtensors, inputs must be floats
# train_set = train_set.type(torch.LongTensor)
# test_set = test_set.type(torch.LongTensor)

train_loader = DataLoader(dataset=train_set, batch_size=1, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=1, shuffle=True)


In [5]:
#1c. will need to change input size to correspond with number of tabular data fields


class MyMLP(nn.Module):
    def __init__(self):
        super(MyMLP, self).__init__()
        self.flatten = nn.Flatten() #believe that flattening is not necessary now because it's not an image? 
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(3, 3), #only 3 fields in my "practice data"
            nn.ReLU(),
            nn.Linear(3, 3),
            nn.ReLU(),
            nn.Linear(3, 1), #output is only 1 neuron for binary classification?
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
    
modelA = MyMLP()
print(modelA)

MyMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=3, out_features=3, bias=True)
    (1): ReLU()
    (2): Linear(in_features=3, out_features=3, bias=True)
    (3): ReLU()
    (4): Linear(in_features=3, out_features=1, bias=True)
    (5): ReLU()
  )
)


In [6]:
#1c. continued
optimizer = torch.optim.SGD(modelA.parameters(), lr=learning_rate)
loss_fn = nn.BCELoss()


#define the train and test loops
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (image, label, tabular) in enumerate(dataloader):        
        # Compute prediction and loss
        pred = model(tabular)
        label = torch.unsqueeze(label, 0) #gets the target size to be the same as the models' output size [1,1] because batch size is 1 and then returns one prediction each batch
                                        #ASK ABOUT THIS TO MAKE SURE IT'S OK
        loss = loss_fn(pred, label) 
        
        # Backpropagation
        optimizer.zero_grad()  #sets gradients of all optimized torch tensors to 0
        loss.backward()   #READ THIS
                          #The change in the loss for a small change in an input weight is the gradient of that weight and is calculated using backpropagation
                         #backpropagation is kicked off when we call .backward() on the error tensor (thus loss.backward() written here). 
                        #Autograd then calculates and stores the gradients for each model parameter in the parameter’s .grad attribute.
        optimizer.step() #Finally, we call .step() to initiate gradient descent. The optimizer adjusts each parameter by its gradient stored in .grad

        if batch % 1 == 0:
            loss, current = loss.item(), batch * len(label)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for image, label, tabular in dataloader:
            pred = model(tabular)
            label = torch.unsqueeze(label, 0)
            test_loss += loss_fn(pred, label).item() #compares the label (y) with the prediction based on loss fn defined above
            correct += (pred.argmax(1) == label).type(torch.float).sum().item()  #why is "1" an argument of argmax?
            
    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, modelA, loss_fn, optimizer)
    test_loop(test_loader, modelA, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.000000  [    0/    3]
loss: 0.000000  [    1/    3]
loss: 0.000000  [    2/    3]
Test Error: 
 Accuracy: 0.0%, Avg loss: 100.000000 

Epoch 2
-------------------------------
loss: 0.000000  [    0/    3]
loss: 0.000000  [    1/    3]
loss: 0.000000  [    2/    3]
Test Error: 
 Accuracy: 0.0%, Avg loss: 100.000000 

Epoch 3
-------------------------------
loss: 0.000000  [    0/    3]
loss: 0.000000  [    1/    3]
loss: 0.000000  [    2/    3]
Test Error: 
 Accuracy: 0.0%, Avg loss: 100.000000 

Done!


In [None]:
# end of code for tabular data—below work on densenet once tabular figured out

In [20]:
#2b. ??????? need to figure out the densenet. Do we want pretrained? Also, this doesnt look anything like last tutorial.
#where are the train and test loops? In this example how does the model know the label?

import torch
model = torch.hub.load('pytorch/vision:v0.9.0', 'densenet121', pretrained=True)
# or any of these variants
# model = torch.hub.load('pytorch/vision:v0.9.0', 'densenet169', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.9.0', 'densenet201', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.9.0', 'densenet161', pretrained=True)


#preprocess the images
input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model(input_batch)
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
probabilities = torch.nn.functional.softmax(output[0], dim=0)
print(probabilities)

Using cache found in /Users/Stephen_Schmit/.cache/torch/hub/pytorch_vision_v0.9.0


NameError: name 'Image' is not defined

In [None]:
#2c. continued, actually running the training now 

loss_fn = nn.CrossEntropyLoss() #loss_fn defined as cross entropy loss because this is the best function in classication tasks

epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")
