In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision.utils import save_image
from torchvision import transforms, models, datasets
import torchvision
import numpy as np
import h5py
from matplotlib import pyplot as plt
from skimage.transform import resize
from Utils import save_large_dataset, load_large_dataset, calculate_metric
plt.ion()   # interactive mode

In [2]:
CUDA = True

In [3]:
X = load_large_dataset('images')
Y = load_large_dataset('labels')

In [4]:
X = X.squeeze() #remove unnecessary dimension
Y = Y.squeeze()

X = X[:,:,:,58:61] #take only 3 slices and treat them as channels

print (X.shape)
print (Y.shape)

(1792, 121, 145, 3)
(1792,)


In [5]:
X_padded = np.pad(X,((0,0),(51,52),(40,39),(0,0)), 'constant') #pad with zeros to get 224x224 dimension of images
X = X_padded
X = np.float32(X)

In [6]:
X = np.rollaxis(X, 3, 1) #move channel dimension to be the first one
print (X.shape)

(1792, 3, 224, 224)


In [7]:
np.random.seed(9999) #seed fixed for reproducibility
mask = np.random.rand(len(X)) < 0.9  #array of boolean variables

training_set = X[mask]
training_labels = Y[mask]

validation_set = X[~mask]
validation_labels = Y[~mask]

In [8]:
BATCH_SIZE = 64

In [9]:
training_set = torch.from_numpy(training_set) #convert to torch tensor
training_labels = torch.from_numpy(training_labels) #convert to torch tensor

In [10]:
validation_set = torch.from_numpy(validation_set) #convert to torch tensor
validation_labels = torch.from_numpy(validation_labels) #convert to torch tensor

In [11]:
training_labels = training_labels.long()

In [12]:
validation_labels = validation_labels.long()

In [13]:
print (training_labels.shape)
print (validation_labels.shape)

torch.Size([1637])
torch.Size([155])


In [14]:
dataset = torch.utils.data.TensorDataset(training_set, training_labels)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [15]:
val_set = torch.utils.data.TensorDataset(validation_set, validation_labels)
validation_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True)

# Model definition


In [16]:
# RESNET_18 model, with pretrained weights and fine-tuned to 2 classes
net = models.resnet18(pretrained=True)
num_ftrs = net.fc.in_features
net.fc = nn.Linear(num_ftrs, 2)

# VGG_11 model with pretrained weights and fine-tuned to 2 classes
#net = models.vgg11_bn(pretrained=True)
#net.classifier._modules['6'] = nn.Linear(4096, 2)

criterion = nn.CrossEntropyLoss()

In [17]:
if (CUDA):
    net.cuda()

In [18]:
optimizer = optim.Adam(net.parameters(), lr=0.0001, weight_decay=1e-3)

# Training

In [19]:
for epoch in range(10):  
    
    net.train()
    running_loss = 0.0
    running_corrects = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        if (CUDA):
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
       
        loss.backward()
        optimizer.step()

        # statistics
        running_loss += loss.data[0] * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(training_set)
    #epoch_acc = running_corrects / len(training_set)
    metric = calculate_metric(preds,labels.data)
    print('Loss: {:.4f} Metric: {:.4f}'.format(epoch_loss, metric))

    #VALIDATION
    running_loss = 0.0
    running_corrects = 0.0
    net.eval() #set in the evaluation mode (important for Dropout and Batchnorm)
    
    for j, val_data in enumerate(validation_loader, 0):
        # get the inputs
        val_inputs, val_labels = val_data

        # wrap them in Variable
        if (CUDA):
            val_inputs, val_labels = Variable(val_inputs.cuda()), Variable(val_labels.cuda())
        else:
            val_inputs, val_labels = Variable(val_inputs), Variable(val_labels)

        outputs = net(val_inputs)
        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, val_labels)

        # statistics
        running_loss += loss.data[0] * val_inputs.size(0)
        running_corrects += torch.sum(preds == val_labels.data)

    val_epoch_loss = running_loss / len(validation_set)
    #val_epoch_acc = running_corrects / len(validation_set)
    metric = calculate_metric(preds,val_labels.data)
    print('Validation Loss: {:.4f} Validation Metric: {:.4f}'.format(val_epoch_loss, metric))
    
print('Finished Training')

Loss: 0.6914 Metric: 0.5288
Validation Loss: 1.1550 Validation Metric: 0.5000
Loss: 0.5197 Metric: 0.7336
Validation Loss: 0.7237 Validation Metric: 0.5199
Loss: 0.2943 Metric: 0.9439
Validation Loss: 0.9040 Validation Metric: 0.4808
Loss: 0.1105 Metric: 0.9773
Validation Loss: 1.3446 Validation Metric: 0.6333
Loss: 0.0534 Metric: 0.9444
Validation Loss: 2.1948 Validation Metric: 0.5659
Loss: 0.0521 Metric: 0.9643
Validation Loss: 2.1077 Validation Metric: 0.5659
Loss: 0.0854 Metric: 0.9318
Validation Loss: 3.5628 Validation Metric: 0.5312
Loss: 0.0866 Metric: 0.9667
Validation Loss: 1.5689 Validation Metric: 0.5618
Loss: 0.0508 Metric: 1.0000
Validation Loss: 1.6082 Validation Metric: 0.6264
Loss: 0.0321 Metric: 0.9615
Validation Loss: 2.0256 Validation Metric: 0.6538
Finished Training


# Traininig on augmented set

In [20]:
X = load_large_dataset('augmented_train_set_1')
Y = load_large_dataset('augmented_train_set_labels_1')

In [21]:
print (X.shape)
print (Y.shape)

(8185, 3, 121, 145)
(8185,)


In [22]:
X_padded = np.pad(X,((0,0),(0,0),(51,52),(40,39)), 'constant') #pad with zeros to get 224x224 dimension of images
X = X_padded
X = np.float32(X)

In [23]:
training_set = torch.from_numpy(X) #convert to torch tensor
training_labels = torch.from_numpy(Y) #convert to torch tensor

In [24]:
training_labels = training_labels.long()

In [25]:
dataset = torch.utils.data.TensorDataset(training_set, training_labels)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Model definition

In [26]:
# RESNET_18 model, with pretrained weights and fine-tuned to 2 classes
net = models.resnet18(pretrained=True)
num_ftrs = net.fc.in_features
net.fc = nn.Linear(num_ftrs, 2)

# VGG_11 model with pretrained weights and fine-tuned to 2 classes
#net = models.vgg11_bn(pretrained=True)
#net.classifier._modules['6'] = nn.Linear(4096, 2)

criterion = nn.CrossEntropyLoss()

In [27]:
if (CUDA):
    net.cuda()

In [28]:
optimizer = optim.Adam(net.parameters(), lr=0.0001, weight_decay=1e-3)

In [29]:
for epoch in range(50):  
    
    net.train()
    running_loss = 0.0
    running_corrects = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        if (CUDA):
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
       
        loss.backward()
        optimizer.step()

        # statistics
        running_loss += loss.data[0] * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(training_set)
    #epoch_acc = running_corrects / len(training_set)
    metric = calculate_metric(preds,labels.data)
    print('Loss: {:.4f} Metric: {:.4f}'.format(epoch_loss, metric))

    #VALIDATION
    running_loss = 0.0
    running_corrects = 0.0
    net.eval() #set in the evaluation mode (important for Dropout and Batchnorm)
    
    for j, val_data in enumerate(validation_loader, 0):
        # get the inputs
        val_inputs, val_labels = val_data

        # wrap them in Variable
        if (CUDA):
            val_inputs, val_labels = Variable(val_inputs.cuda()), Variable(val_labels.cuda())
        else:
            val_inputs, val_labels = Variable(val_inputs), Variable(val_labels)

        outputs = net(val_inputs)
        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, val_labels)

        # statistics
        running_loss += loss.data[0] * val_inputs.size(0)
        running_corrects += torch.sum(preds == val_labels.data)

    val_epoch_loss = running_loss / len(validation_set)
    #val_epoch_acc = running_corrects / len(validation_set)
    metric = calculate_metric(preds,val_labels.data)
    print('Validation Loss: {:.4f} Validation Metric: {:.4f}'.format(val_epoch_loss, metric))
    
print('Finished Training')

Loss: 0.6585 Metric: 0.6357
Validation Loss: 0.8189 Validation Metric: 0.5000
Loss: 0.6453 Metric: 0.5471
Validation Loss: 0.7595 Validation Metric: 0.5483
Loss: 0.6359 Metric: 0.6350
Validation Loss: 0.7920 Validation Metric: 0.6108
Loss: 0.6344 Metric: 0.5037
Validation Loss: 0.9421 Validation Metric: 0.5278
Loss: 0.6256 Metric: 0.6761
Validation Loss: 1.0757 Validation Metric: 0.5714
Loss: 0.6242 Metric: 0.5550
Validation Loss: 0.9415 Validation Metric: 0.5382
Loss: 0.6208 Metric: 0.6316
Validation Loss: 0.7787 Validation Metric: 0.6735
Loss: 0.6165 Metric: 0.6272
Validation Loss: 0.7297 Validation Metric: 0.4382
Loss: 0.6109 Metric: 0.7779
Validation Loss: 0.7498 Validation Metric: 0.4801
Loss: 0.6091 Metric: 0.6095
Validation Loss: 0.7434 Validation Metric: 0.5167
Loss: 0.6018 Metric: 0.7324
Validation Loss: 0.8833 Validation Metric: 0.5735
Loss: 0.5944 Metric: 0.6845
Validation Loss: 0.8249 Validation Metric: 0.4618
Loss: 0.5878 Metric: 0.6299
Validation Loss: 0.9309 Validation M