In [46]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,MaxPooling2D, Flatten, Dense, Dropout
import os, io,zipfile
from PIL import Image
import tqdm
import cv2
from pathlib import Path

In [3]:
# load data - data has training (tumor,no-tumor) and test (tumor,no-tumor) datasets

zip_file_path=("/content/MRIBrainTumor.zip")
extracted_dir=("/content/extracted_data")
!mkdir -p $extracted_dir
with zipfile.ZipFile(zip_file_path,'r') as zip_ref:
  zip_ref.extractall(extracted_dir)
!ls $extracted_dir

Testing  Training


In [103]:
from pathlib import Path
from keras.preprocessing import image
import numpy as np

# path_to_no_tumor_directory and "path_to_pituitary_tumor_directory"
no_tumor_path = Path("/content/extracted_data/Training/no_tumor")
pituitary_tumor_path = Path("/content/extracted_data/Training/pituitary_tumor")

images = []  # List for images
labels = []  # Each time load the image and put in image array, add labels 1 tumor / 0 no-tumor to the labels array

for img_path in no_tumor_path.glob("*jpg"):
    img = image.load_img(img_path, target_size=(224, 224))  # Load img from disk - load img files' contents to memory
    img_array = image.img_to_array(img)  # Convert img to list of images

    images.append(img_array)  # Add img to list of images
    labels.append(0)  # For each not-tumor: value is 0

for img_path in pituitary_tumor_path.glob("*jpg"):
    img = image.load_img(img_path, target_size=(224, 224))  # Load img from disk - load img files' contents to memory
    img_array = image.img_to_array(img)  # Convert img to list of images
    images.append(img_array)  # Add img to list of images
    labels.append(1)  # For each tumor: value is 1

# create a single numpy array (not a python list) with all the images we loaded
x_train=np.array(images)
#convert labels to a numpy array
y_train=np.array(labels)


In [104]:
np.unique(y_train)  # 2 classes 0 and 1

array([0, 1])

In [105]:
x_train.shape  #(1222, 224, 224, 3)
#y_train.shape #(1222,)

(1222, 224, 224, 3)

In [106]:
pd.Series(y_train).value_counts()

1    827
0    395
dtype: int64

In [107]:
xtrain, xtest, ytrain, ytest = train_test_split(x_train, y_train, random_state=10, test_size=0.2)


In [108]:
xtest.min(), xtest.max(),

(0.0, 255.0)

In [109]:
# feature scaling  between 0 to 1
xtrain=xtrain.astype('float32')
xtest=xtest.astype("float32")
xtrain/=255
xtest/=255
print(xtrain.min(), xtrain.max())
print(xtest.min(), xtest.max())


0.0 1.0
0.0 1.0


In [110]:
# one-hot encode the labels #no-tumor 0 and tumor 1 -> convert label index to categorical encoding
# convert class vectors to binary class metrices - array with an elemnt set to 1 and the rest 0

from tensorflow.keras.utils import to_categorical
ytrain=to_categorical(ytrain, 2) # number of classes is 2
ytest=to_categorical(ytest, 2)
ytest


array([[0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.

In [111]:
# Train Model Convolutional Neural Network # VGG Shorten
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Dense, Flatten

# maxpool: scale down convolution laers - keep largest values # dropout usually after maxpool # extra pixels left over an edge (ima size is not exactly /3) : add padding to img 0 3xtrat 0 added to edge : same padding
model=Sequential()

model.add(Conv2D(32,(3,3),padding='same', activation='relu',input_shape=(224,224,3)))  # image 2D # each filter is capable of detecting 1 pattern - sinze of window 3x3 : use when creating tiles from each image -> org to 3x3 tiles
model.add(Conv2D(32,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25)) # % of nn to cut (bet 25 to 50)

model.add(Conv2D(64,(3,3), padding='same', activation='relu'))
model.add(Conv2D(64,(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25)) # % of nn to cut (bet 25 to 50

# transition from convolution layer to dense layer : no work with D data -> flatten layer
model.add(Flatten())  # to output
model.add(Dense(512,activation='relu'))
model.add(Dropout(0.5))  # 50 : workharder to get the las answer correct

model. add(Dense(2, activation='sigmoid')) # if was mroe classes : softmax

model.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_32 (Conv2D)          (None, 224, 224, 32)      896       
                                                                 
 conv2d_33 (Conv2D)          (None, 222, 222, 32)      9248      
                                                                 
 max_pooling2d_16 (MaxPooli  (None, 111, 111, 32)      0         
 ng2D)                                                           
                                                                 
 dropout_24 (Dropout)        (None, 111, 111, 32)      0         
                                                                 
 conv2d_34 (Conv2D)          (None, 111, 111, 64)      18496     
                                                                 
 conv2d_35 (Conv2D)          (None, 109, 109, 64)      36928     
                                                      

In [112]:
# train
history=model.fit(xtrain,ytrain,epochs=5,shuffle=True,validation_split=0.1)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [114]:
# batch_size=32,epochs=1,shuffle=True
# batch size: how many at once fit in nn during training -32-128 images
# epochs how mant times trainig data during the process
# validation_data=(x_test,y_test), validate our training - this is data that model will never see dring training and test the acc of the training data
# randomize the order of the training data : shuffle True

In [115]:
model_structure=model.to_json() # save NN -write json to text
f=Path("model_structure.json") # path object
f.write_text(model_structure)

#save weights
model.save_weights('braintumor.weights.h5')

In [124]:
from pathlib import Path
from keras.preprocessing import image
import numpy as np
import matplotlib.pyplot as plt

# Test Model Prediction
class_labels = ["no_tumor", "tumor"]

f=Path("/content/model_structure.json")
model_structure=f.read_text()
model=model_from_json(model_structure)
model.load_weights("/content/braintumor.weights.h5")

img=image.load_img("/content/brain.jpg", target_size=(224,224))  # resize the image to size the nn expect 224x224
# 3D numpy array -> nn
image_to_test=image.img_to_array(img)/255  # RGB scale 0-1


In [126]:
# batch of images as once- create batches even 1 img
# batches as 4D array  1: list of images,  3:image data
# 4D add new axis to it with numpy: np.expand_dims()
list_of_images=np.expand_dims(image_to_test,axis=0)  #axis=0 :new axis is the 1st dimension
results=model.predict(list_of_images)

single_result=results[0] # check 1st result - 1 img we have -array with 10 element - instaed of returniong 10 values - grab the array element with highest value -probability
most_likely_class_index=int(np.argmax(single_result))

# grab liklihood value
class_likelihood=single_result[most_likely_class_index]
class_labels=class_labels[most_likely_class_index]




In [127]:
# VGG : 16 layers
from keras.preprocessing import image
from keras.applications import vgg16

model=vgg16.VGG16()
img = image.load_img("/content/brain.jpg", target_size=(224, 224)) # vgg image size

x = image.img_to_array(img) #convert img to array to feed to nn
import numpy as np
x = np.expand_dims(x, axis=0) # 4D
x=vgg16.preprocess_input(x)


predictions=model.predict(x)  # get back a 1000 element array of floating point numbers - each element in array tell us how likely our picture contains each of the 1000 object the model was training to recognize
# decode tell us name of the most likely matches -> give top 5 most likely matches
predicted_classes=vgg16.decode_predictions(predictions, top=9) # get top 9 mathces

for imagenetid,name, likelihood in predicted_classes[0]:
  print("prediction:{}-{:2f}".format(name,likelihood))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json
prediction:pitcher-0.639047
prediction:bolo_tie-0.058239
prediction:vase-0.034219
prediction:water_jug-0.034005
prediction:buckle-0.030014
prediction:whiskey_jug-0.029362
prediction:mask-0.023460
prediction:hook-0.023370
prediction:goblet-0.014760
