In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
from PIL import Image
import os

# Data preparation

Declaring constants

In [23]:
SIZE = 224
PATH = r"D:\College\Capstone Project 2\data"

In [24]:
SIZE = 224

path = PATH + r"\brain_tumor_dataset"

yes_tumor_path = path + r"/yes"
no_tumor_path = path + r"/no"

yes_tumor = os.listdir(yes_tumor_path)
no_tumor = os.listdir(no_tumor_path)

len(os.listdir(no_tumor_path)), len(os.listdir(yes_tumor_path))

(1500, 1500)

Resizing all images in `224x224` pixel format

In [4]:
# Resize yes_tumor images
for p in yes_tumor:
    path = yes_tumor_path+"/"+p
    # resize_save(path)
    try:
        im = Image.open(path)
        im = im.resize((SIZE, SIZE))
        im.save(path)
    except:
        print(path)
        break
# Resize no_tumor
for p in no_tumor:
    path = no_tumor_path+"/"+p
    # resize_save(path)
    try:
        im = Image.open(path)
        im = im.resize((SIZE, SIZE))
        im.save(path)
    except:
        print(path)
        break

In [5]:
len(os.listdir(yes_tumor_path)), len(os.listdir(no_tumor_path))

(1500, 1500)

Creating `data` - `label` pairs
> yes_tumor -> 1
>>
> no_tumor -> 0

In [6]:
data = []
label = []

for p in yes_tumor:
  path = yes_tumor_path+"/"+p
  image = cv.imread(path)
  data.append(image)
  label.append(1)

for p in no_tumor:
  path = no_tumor_path+"/"+p
  image = cv.imread(path)
  data.append(image)
  label.append(0)

len(data) == len(label) == 3000
# data[0]

True

Normalizing data

In [7]:
data = np.array(data)/255.0
label = np.array(label)

data.shape, label.shape

((3000, 224, 224, 3), (3000,))

Splitting the data in `train` and `test` in **80-20** ratio

In [8]:
from sklearn.model_selection import train_test_split as tts
x_train, x_test, y_train, y_test = tts(data, label, test_size = 0.20)

x_train.shape, x_test.shape, y_train.shape, y_test.shape

((2400, 224, 224, 3), (600, 224, 224, 3), (2400,), (600,))

# Training the model

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

In [None]:
model_1 = Sequential()

model_1.add(Conv2D(64, kernel_size=3, activation='relu'))
model_1.add(MaxPooling2D(pool_size = (2,2)))

model_1.add(Conv2D(64, kernel_size=3, activation='relu', kernel_initializer="he_uniform"))
model_1.add(Conv2D(64, kernel_size=3, activation='relu'))

model_1.add(Conv2D(64, kernel_size=3, activation='relu', kernel_initializer="he_uniform"))
model_1.add(Conv2D(64, kernel_size=3, activation='relu'))

model_1.add(Flatten())

model_1.add(Dense(1, activation='sigmoid'))


model_1.compile(optimizer=Adam(), loss = BinaryCrossentropy(),  metrics = ["accuracy"])


history = model_1.fit(x_train, y_train, epochs = 10, validation_data=(x_test, y_test), batch_size = 16)


model_1.save("Big_Brain_tumor_model_1.h5")

In [15]:
model_1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 64)      1792      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 111, 111, 64)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      36928     
                                                                 
 conv2d_2 (Conv2D)           (None, 107, 107, 64)      36928     
                                                                 
 conv2d_3 (Conv2D)           (None, 105, 105, 64)      36928     
                                                                 
 conv2d_4 (Conv2D)           (None, 103, 103, 64)      36928     
                                                        

# Generating reports

In [25]:
from  keras.models import load_model
model_1 = load_model(PATH + r"\Trained_models\Big_Brain_tumor_model_1.h5")

In [12]:
y_p = (model_1.predict(x_test) > 0.5).astype("int32")
# y_p = y_p.squeeze()

In [20]:
from sklearn.metrics import confusion_matrix, classification_report

In [17]:
y_p = y_p.squeeze()

In [18]:
y_p.shape, y_test.shape

((600,), (600,))

Confusion matrix showing result in form of
<br>
[[`TN`, `FP`], <br>
 [`FN`, `TP`]]

In [19]:
confusion_matrix(y_test, y_p)

array([[286,   0],
       [  5, 309]], dtype=int64)

So, there were `5` incorrect predictions out of `600` total images

In [22]:
print(classification_report(y_test, y_p))

              precision    recall  f1-score   support

           0       0.98      1.00      0.99       286
           1       1.00      0.98      0.99       314

    accuracy                           0.99       600
   macro avg       0.99      0.99      0.99       600
weighted avg       0.99      0.99      0.99       600



The `confusion matrics'reports` show that the `accuracy` of model on `test dataset` is `99.000%`

# Experimenting with shuffled data in same dataset

In [36]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from keras.preprocessing.image import array_to_img
import matplotlib.axes as ax

Experiment 1

In [54]:
incorrect = 0

for i in range(len(y_p)):
    if y_p[i] != y_test[i]:
        print(i)
        incorrect += 1
incorrect, len(y_test)

257
312
342
396
520


(5, 600)

Experiment 2

In [44]:
incorrect = 0

for i in range(len(y_p)):
    if y_p[i] != y_test[i]:
        print(i)
        incorrect += 1
incorrect, len(y_test)

47
88
127
216
286
305
316
329
334
539


(10, 600)

Experiment 3

In [47]:
incorrect = 0

for i in range(len(y_p)):
    if y_p[i] != y_test[i]:
        print(i)
        incorrect += 1
incorrect, len(y_test)

168
219
229
230
427
533
534
567


(8, 600)

Experiment 4

In [50]:
incorrect = 0

for i in range(len(y_p)):
    if y_p[i] != y_test[i]:
        print(i)
        incorrect += 1
incorrect, len(y_test)

5
16
188
260
399
470
511


(7, 600)

Experiment 5

In [74]:
incorrect = 0

for i in range(len(y_p)):
    if y_p[i] != y_test[i]:
        print(i)
        incorrect += 1
incorrect, len(y_test)

76
112
206
295
329
397
415


(7, 600)

Experiment 6

In [58]:
incorrect = 0

for i in range(len(y_p)):
    if y_p[i] != y_test[i]:
        print(i)
        incorrect += 1
incorrect, len(y_test)

31
100
176
275
296
329
367
418
557


(9, 600)

Experiment 7

In [61]:
incorrect = 0

for i in range(len(y_p)):
    if y_p[i] != y_test[i]:
        print(i)
        incorrect += 1
incorrect, len(y_test)

64
170
205
273
303
420
448
518


(8, 600)

Experiment 8

In [64]:
incorrect = 0

for i in range(len(y_p)):
    if y_p[i] != y_test[i]:
        print(i)
        incorrect += 1
incorrect, len(y_test)

50
161
256
314
363
589


(6, 600)

Experiment 9

In [68]:
incorrect = 0

for i in range(len(y_p)):
    if y_p[i] != y_test[i]:
        print(i)
        incorrect += 1
incorrect, len(y_test)

39
196
247
321
397
507


(6, 600)

Experiment 10

In [71]:
incorrect = 0

for i in range(len(y_p)):
    if y_p[i] != y_test[i]:
        print(i)
        incorrect += 1
incorrect, len(y_test)

143
163
181
214
249
298
310
371


(8, 600)

So after various tests around 5 to 10 incorrect reports in 600 which gives an estimate of accuracy i.e. **above `98% `**

In [26]:
accuracies =  {"Experiment 1" : 99.600, 
               "Experiment 2" : 98.333,
               "Experiment 3" : 98.667,
               "Experiment 4" : 98.833,
               "Experiment 5" : 98.833,
               "Experiment 6" : 98.500,
               "Experiment 7" : 98.667,
               "Experiment 8" : 99.000,
               "Experiment 9" : 99.000,
               "Experiment 10" : 98.667}

In [78]:
average_accuracy = 0
s = 0
for key in accuracies.keys():
    s += accuracies[key]

average_accuracy = s / 10

In [79]:
print(f"After 10 experiments the average accuracy of this model for detecting brain tumor successfully is {average_accuracy}")

After 10 experiments the average accuracy of this model for detecting brain tumor successfully is 98.81
