In [4]:
import numpy as np 
import pandas as pd 
import os
from os import listdir
import tensorflow as tf 
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, Input, ZeroPadding2D, MaxPooling2D, BatchNormalization, Activation
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle   #shuffling the data improves the model
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model,load_model
from PIL import Image
import cv2
import imutils

Using TensorFlow backend.


In [13]:
# Store your data images path in image_dir
image_dir= 'C:/Users/Shiv/Downloads/brain_tumor_dataset/'

In [14]:
# Creating Function, which generate augumented data
def data_augmention(file_dir, total_samples, final_dir):
    aug_data = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.1, 
                                  brightness_range=(0.3, 1.0), horizontal_flip=True, vertical_flip=True, fill_mode='nearest')

    for filename in listdir(file_dir):
        image = cv2.imread(file_dir + '/' + filename)
        image = image.reshape((1,)+image.shape)         # reshape the image
        save_prefix = 'aug_' + filename[:-4]
        i=0
        for batch in aug_data.flow(x=image, batch_size=1, save_to_dir=final_dir,save_prefix=save_prefix, save_format='jpg'):
                i += 1
                if i > total_samples:
                    break

In [15]:
# Genrated augmented Data will save in augmented_data_path
augmented_data_path = 'C:/Users/Shiv/Downloads/brain_tumor_dataset/Aug_data/'
# augment data for the examples with label equal to 'yes' representing tumurous examples
data_augmention(file_dir=image_dir+'yes', total_samples=6, final_dir=augmented_data_path+'yes')
# augment data for the examples with label equal to 'no' representing non-tumurous examples
data_augmention(file_dir=image_dir+'no', total_samples=9, final_dir=augmented_data_path+'no')

In [6]:
# Extracting various features of image and storing in an array
def extract_feature(image, size):
    width, height = size
    # Convert the image to grayscale, and blur it slightly
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    
    thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.erode(thresh, None, iterations=2)
    thresh = cv2.dilate(thresh, None, iterations=2)

    # Find contours in thresholded image, then grab the largest one
    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    c = max(cnts, key=cv2.contourArea)
    # extreme points
    extLeft = tuple(c[c[:, :, 0].argmin()][0])
    extRight = tuple(c[c[:, :, 0].argmax()][0])
    extTop = tuple(c[c[:, :, 1].argmin()][0])
    extBot = tuple(c[c[:, :, 1].argmax()][0])
    
    # crop new image out of the original image using the four extreme points (left, right, top, bottom)
    image = image[extTop[1]:extBot[1], extLeft[0]:extRight[0]]  
    
    final_image = cv2.resize(image, dsize=(width, height), interpolation=cv2.INTER_CUBIC)
    feature = final_image / 255.         # normalize values
    
    return feature

In [19]:
def load_data(dir_list, image_size):

    # load all images in a directory
    x = []
    y = []
    
    for directory in dir_list:
        for filename in listdir(directory):
            image = cv2.imread(directory+'/'+filename)
            feature = extract_feature(image, image_size)    # extracting features of image 
            x.append(feature)              # convert image to numpy array and append it to X
            # append a value of 1 to the target array if the image is in the folder named 'yes', otherwise append 0.
            if directory[-3:] == 'yes':
                y.append('Yes')
            else:
                y.append('No')
                
    x = np.array(x)
    x, y = shuffle(x, y)      # Shuffle the data to improve model
    
    print(f'Number of examples is: {len(x)}')
    print(f'x shape is: {x.shape}')
    
    return x, y

In [20]:
augmented_yes =augmented_data_path+'yes'
augmented_no = augmented_data_path+'no'

IMG_WIDTH, IMG_HEIGHT = (150, 150)

x, y = load_data([augmented_yes, augmented_no], (IMG_WIDTH, IMG_HEIGHT))

Number of examples is: 2064
x shape is: (2064, 150, 150, 3)


In [21]:
values = np.array(y)
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
y = onehot_encoder.fit_transform(integer_encoded)

In [22]:
from keras.utils import to_categorical
label=to_categorical(y)
print(label.shape[0])

2064


In [23]:
def split_data(x, y, test_size=0.25):      # Spliting the data in Training, Validation and Testing Sets
       
    x_train, x_test_val, y_train, y_test_val = train_test_split(x, y, test_size=test_size)
    x_test, x_val, y_test, y_val = train_test_split(x_test_val, y_test_val, test_size=0.5)
    
    return x_train, y_train, x_val, y_val, x_test, y_test

In [24]:
trainData, trainLabel, valData, valLabel, testData, testLabel = split_data(x, y, test_size=0.3)
INPUT_SHAPE = (IMG_WIDTH, IMG_HEIGHT, 3)

In [30]:
model = Sequential()
model.add(Conv2D(32, (7, 7), activation='relu', input_shape = INPUT_SHAPE, use_bias=False))
model.add(BatchNormalization(axis = 3, name = 'bn0'))
model.add(MaxPooling2D(pool_size=(4, 4)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(4, 4)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Dense(2))
model.add(Activation('softmax'))
model.compile(loss = "categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])

In [31]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 144, 144, 32)      4704      
_________________________________________________________________
bn0 (BatchNormalization)     (None, 144, 144, 32)      128       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 36, 36, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 34, 34, 32)        9248      
_________________________________________________________________
batch_normalization_2 (Batch (None, 34, 34, 32)        128       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 8, 8, 32)         

In [32]:
model.fit(x=trainData, y=trainLabel, batch_size=32, epochs=22, validation_data=(valData, valLabel))

Instructions for updating:
Use tf.cast instead.
Train on 1444 samples, validate on 310 samples
Epoch 1/22
Epoch 2/22
Epoch 3/22
Epoch 4/22
Epoch 5/22
Epoch 6/22
Epoch 7/22
Epoch 8/22
Epoch 9/22
Epoch 10/22
Epoch 11/22
Epoch 12/22
Epoch 13/22
Epoch 14/22
Epoch 15/22
Epoch 16/22
Epoch 17/22
Epoch 18/22
Epoch 19/22
Epoch 20/22
Epoch 21/22
Epoch 22/22


<keras.callbacks.callbacks.History at 0x2740102f160>

In [33]:
# Predict for the test set
Y_newpred=model.predict(testData)
print(Y_newpred)

[[9.99555409e-01 4.44570818e-04]
 [1.32125959e-01 8.67874026e-01]
 [9.99996662e-01 3.38232189e-06]
 [9.98530030e-01 1.46991306e-03]
 [1.11216992e-04 9.99888778e-01]
 [1.79525506e-09 1.00000000e+00]
 [8.83772771e-07 9.99999166e-01]
 [9.99996424e-01 3.58932334e-06]
 [9.99993205e-01 6.77384878e-06]
 [9.99960542e-01 3.94454437e-05]
 [9.99997020e-01 2.98975556e-06]
 [9.99988794e-01 1.11522922e-05]
 [9.99996543e-01 3.48292861e-06]
 [3.81269347e-04 9.99618649e-01]
 [9.99256313e-01 7.43666373e-04]
 [9.98884857e-01 1.11509871e-03]
 [9.99989748e-01 1.02758040e-05]
 [3.22121414e-05 9.99967813e-01]
 [9.99955773e-01 4.42583587e-05]
 [2.30969101e-01 7.69030869e-01]
 [9.94276702e-01 5.72332554e-03]
 [9.54734348e-03 9.90452647e-01]
 [9.97981250e-01 2.01878720e-03]
 [3.29650100e-03 9.96703446e-01]
 [9.99762833e-01 2.37150234e-04]
 [6.13062759e-04 9.99386907e-01]
 [2.73153946e-05 9.99972701e-01]
 [1.06846727e-07 9.99999881e-01]
 [9.99386430e-01 6.13524811e-04]
 [5.76016068e-01 4.23983991e-01]
 [1.680347

In [34]:
scores = model.evaluate(testData, testLabel, verbose=0)
print('Accuracy: {}% \n Error: {}%'.format(scores[1]*100, 100 - scores[1]*100))

Accuracy: 97.09677696228027% 
 Error: 2.9032230377197266%


In [36]:
model.save('ShivModel.h5')

In [7]:
def model_prediction(input_image):        # Takes path of an image
    image = cv2.imread(input_image)
    image = extract_feature(image, (150, 150))
    image = image.reshape((1,)+image.shape)
    img_array = np.array(image)
    
    model = load_model('ShivModel.h5')       # Load model, change image to array and predict
    x = model.predict(img_array)
    
    return x

In [12]:
image = 'C:/Users/Shiv/Desktop/test_file/No.jpg'

In [13]:
result = model_prediction(image)
answer = result[0]
if (answer[1] > answer[0]):
    print('Yes, there is a tumor.')
else:
    print('No, there is not.')

No, there is not.
