In [84]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

## Importing the augmented dataset:
*   `dataset_augmentation.ipynb` - Loads the original Brain Tumor Dataset (3064 T1-Weighted MRI images) and augments the dataset using techniques such as rotating, mirroring, flipping over an axis and salting.
*   `augmented_images.npz` - Contains the full dataset after augmentation (15320 images and 15320 labels)


In [85]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [86]:
#images_path = '../dataset/augmented_images.npz'
images_path = '/content/gdrive/MyDrive/augmented_images.npz'

In [87]:
with np.load(images_path) as data:
    
  images = data['images']
  labels = data['labels']
  print('images: ', images.shape)
  print('labels:', labels.shape)

images:  (15320, 128, 128)
labels: (15320,)


## Reformatting the data
*   reformat into a tensorflow-friendly shape
*   shuffle the data
*   split the dataset into train, validation and test dataset with the following ratio: 80, 10, 10


In [88]:
num_labels = 3
num_channels = 1 # MRI images are grayscale
image_size = 128

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels

images, labels = reformat(images, labels)
print('images:', images.shape)
print('labels:', labels.shape)

images: (15320, 128, 128, 1)
labels: (15320, 3)


In [89]:
# Shufling the two numpy arrays in unison
from sklearn import utils
images, labels = utils.shuffle(images,labels)

In [90]:
train_dataset, remaining_dataset, train_labels, remaining_labels = train_test_split(images, labels, train_size=0.8)
valid_dataset,test_dataset, valid_labels, test_labels = train_test_split(remaining_dataset, remaining_labels, test_size=0.5)

print('Training set:', train_dataset.shape, train_labels.shape)
print('Validation set:', valid_dataset.shape, valid_labels.shape)
print('Test set:', test_dataset.shape, test_labels.shape)

Training set: (12256, 128, 128, 1) (12256, 3)
Validation set: (1532, 128, 128, 1) (1532, 3)
Test set: (1532, 128, 128, 1) (1532, 3)


In [91]:
print(train_labels[0], train_labels[10416], train_labels[2010])

[1. 0. 0.] [0. 0. 1.] [1. 0. 0.]


In [92]:
print(valid_labels[30], valid_labels[1531], valid_labels[200])

[0. 1. 0.] [1. 0. 0.] [1. 0. 0.]


## Defining the brain tumor classification model


In [93]:
batch_size = 16
train_tf_dataset = tf.data.Dataset.from_tensor_slices(
    (train_dataset, train_labels)).shuffle(1000).batch(batch_size)

In [94]:
# Architecture:
# Input: 128 x 128 x 1
# Conv1: 128 x 128 x 64
# MaxPool1: 64 x 64 x 64
# Conv2: 64 x 64 x 128
# MaxPool2: 32 x 32 x 128
# Conv3: 32 x 32 x 256
# MaxPool3: 16 x 16 x 256
# FC: 16 * 16 * 256 , 256
# Output: 256, 3

filter_size = 3
depth_conv1 = 64
depth_conv2 = 128
depth_conv3 = 256

weights = {
    'wc1' : tf.Variable(tf.random.truncated_normal([filter_size, filter_size, num_channels, depth_conv1], stddev=0.1)),
    'wc2' : tf.Variable(tf.random.truncated_normal([filter_size, filter_size, depth_conv1, depth_conv2], stddev=0.1)),
    'wc3' : tf.Variable(tf.random.truncated_normal([filter_size, filter_size, depth_conv2, depth_conv3], stddev=0.1)),
    'wfc' : tf.Variable(tf.random.truncated_normal([16 * 16 * depth_conv3, depth_conv3], stddev = 0.1)),
    'wout': tf.Variable(tf.random.truncated_normal([depth_conv3, num_labels], stddev = 0.1)),
}

biases = {
    'bc1' : tf.Variable(tf.constant(1.0, shape=[depth_conv1])),
    'bc2' : tf.Variable(tf.constant(1.0, shape=[depth_conv2])),
    'bc3' : tf.Variable(tf.constant(1.0, shape=[depth_conv3])),
    'bfc' : tf.Variable(tf.constant(1.0, shape=[depth_conv3])),
    'bout': tf.Variable(tf.constant(1.0, shape=[num_labels])),
}


In [95]:
# Wrapper functions for the convolutional and max pooling layers
def conv2d(x, W, b, stride=1):
  x = tf.nn.conv2d(x, W, strides=[1,stride,stride,1], padding='SAME')
  x = tf.nn.bias_add(x, b)
  return tf.nn.relu(x)

def maxpool(x, k = 2):
  return tf.nn.max_pool(x, ksize=[1,k,k,1], strides=[1,k,k,1], padding='SAME')


In [96]:
def braintumor_classification_model(data):
  conv_layer1 = conv2d(data, weights['wc1'], biases['bc1'])
  conv_layer1 = maxpool(conv_layer1)

  conv_layer2 = conv2d(conv_layer1, weights['wc2'], biases['bc2'])
  conv_layer2 = maxpool(conv_layer2)

  conv_layer3 = conv2d(conv_layer2, weights['wc3'], biases['bc3'])
  conv_layer3 = maxpool(conv_layer3)

  conv_layer3_shape = conv_layer3.get_shape().as_list()
  fc_layer = tf.reshape(conv_layer3, [conv_layer3_shape[0], conv_layer3_shape[1] * conv_layer3_shape[2] * conv_layer3_shape[3]])
  fc_layer = tf.add(tf.matmul(fc_layer, weights['wfc']), biases['bfc'])
  fc_layer = tf.nn.relu(fc_layer)

  output_layer = tf.add(tf.matmul(fc_layer, weights['wout']), biases['bout'])
  return output_layer

In [97]:
def compute_loss(labels, logits):
  return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))

In [98]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [99]:
num_steps = 100
display_step = 10
learning_rate = 0.01

optimizer = tf.keras.optimizers.Adam(learning_rate)


# Run training for the given number of steps.
for step, (batch_x, batch_y) in enumerate(train_tf_dataset.take(num_steps), 1):  
    
  # Training computation.
  with tf.GradientTape() as g:
    logits = braintumor_classification_model(batch_x)
    loss = compute_loss(batch_y, logits)
    
  # Optimizer.
  optimizer.minimize(loss, g.watched_variables(), tape=g)
  
  # Predictions for the training, validation, and test data.
  if step % display_step == 0:
    train_prediction = tf.nn.softmax(logits)
    train_acc = accuracy(train_prediction, batch_y)
    print("step: %i, loss: %f, train acc: %f" % (step, loss, train_acc))



step: 10, loss: 225.957947, train acc: 56.250000
step: 20, loss: 4.734693, train acc: 50.000000
step: 30, loss: 2.129059, train acc: 68.750000
step: 40, loss: 1.204660, train acc: 43.750000
step: 50, loss: 1.514118, train acc: 43.750000
step: 60, loss: 1.384868, train acc: 50.000000
step: 70, loss: 0.705682, train acc: 68.750000
step: 80, loss: 0.983278, train acc: 56.250000
step: 90, loss: 0.765288, train acc: 75.000000
step: 100, loss: 1.121211, train acc: 68.750000


### Validate on batches

In [100]:
num_batches = 4 
batch_size = int(len(valid_dataset) / num_batches) # 4 batches, 383 examples in each batch
print("Num of examples in one batch:", batch_size)
valid_tf_dataset = tf.data.Dataset.from_tensor_slices((valid_dataset, valid_labels)).batch(batch_size)

Num of examples in one batch: 383


In [101]:
valid_acc = 0

for (batch_x, batch_y) in valid_tf_dataset:  
    valid_prediction = tf.nn.softmax(braintumor_classification_model(batch_x))
    valid_acc += accuracy(valid_prediction, batch_y)
 
print("Validation accuracy: ", valid_acc / num_batches)

Validation accuracy:  59.39947780678851


### Test on batches

In [102]:
num_batches = 4 
batch_size = int(len(test_dataset) / num_batches) # 4 batches, 383 examples in each batch
print("Num of examples in one batch:", batch_size)
test_tf_dataset = tf.data.Dataset.from_tensor_slices((test_dataset, test_labels)).batch(batch_size)

Num of examples in one batch: 383


In [103]:
test_acc = 0

for (batch_x, batch_y) in test_tf_dataset:  
    test_prediction = tf.nn.softmax(braintumor_classification_model(batch_x))
    test_acc += accuracy(test_prediction, batch_y)
 
print("Test accuracy: ", test_acc / num_batches)

Test accuracy:  60.96605744125326
