In [1]:
!pip install tensorflow==2.10.1

Collecting tensorflow==2.10.1
  Downloading tensorflow-2.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (578.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m578.1/578.1 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting keras<2.11,>=2.10.0 (from tensorflow==2.10.1)
  Downloading keras-2.10.0-py2.py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m71.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras-preprocessing>=1.1.1 (from tensorflow==2.10.1)
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<3.20,>=3.9.2 (from tensorflow==2.10.1)
  Downloading protobuf-3.19.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m 

In [39]:
#importing libraries
import os
import cv2
import itertools
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.utils import shuffle
from glob import glob
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import*
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from tensorflow.keras.applications import MobileNet

In [2]:
#parameters
H, W = 224, 224
channel = 3
IMAGE_SHAPE = [224, 224]
num_class = 3
batch_size = 64
class_names = ["brain_glioma", "brain_menin", "brain_tumor"]

In [3]:
model_path = "/kaggle/working//models/vgg_for_brain_tumor-v2.h5"
path = "/kaggle/input/multi-cancer/Multi Cancer/Brain Cancer"

# Functions

In [4]:
#create folder for save augmented images
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [5]:
def load_data(path, split=0.1):
    images = shuffle(glob(os.path.join(path, "*", "*.jpg")))
    split_rate = int(len(images) * split)
    train, valid = train_test_split(images, test_size=split_rate, random_state=42)
    train, test = train_test_split(train, test_size=split_rate, random_state=42)
    return train, valid, test

In [6]:
def process_image(path):
    #decode the path
    path = path.decode()
    #read image
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    #resize the image
    image = cv2.resize(image, [224, 224])
    #scale the image
    image = image / 255.0
    #change the data type of image
    image = image.astype(np.float32)

    #labeling the image
    class_name = path.split("/")[-2]
    class_idx = class_names.index(class_name)
    class_idx = np.array(class_idx, dtype=np.int32)

    return image, class_idx

In [7]:
def parse(path):
    image, labels = tf.numpy_function(process_image, [path], (tf.float32, tf.int32))
    labels = tf.one_hot(labels, 3)
    image.set_shape([224, 224, 3])
    labels.set_shape(3)
  
    return image, labels

In [8]:
#tensorflow dataset
def tf_dataset(images, batch=8):
    dataset = tf.data.Dataset.from_tensor_slices((images))
    dataset = dataset.map(parse)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(8)
    return dataset

# Model | MobileNet

In [40]:
mob = MobileNet(input_shape=IMAGE_SHAPE+[channel], weights='imagenet', include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5


In [41]:
for layer in mob.layers:
    layer.trainable = False

In [42]:
x = layers.Flatten()(mob.output)

In [43]:
prediction_layer = layers.Dense(num_class, activation='softmax' )(x)
#declare the model
model = Model(inputs=mob.input, outputs=prediction_layer)

In [16]:
create_dir("/kaggle/working/models")

In [44]:
callbacks = [
    ModelCheckpoint(model_path, verbose=1, save_best_only=True),
    ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, min_lr=1e-6, verbose=1)
]

In [45]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [46]:
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv1 (Conv2D)              (None, 112, 112, 32)      864       
                                                                 
 conv1_bn (BatchNormalizatio  (None, 112, 112, 32)     128       
 n)                                                              
                                                                 
 conv1_relu (ReLU)           (None, 112, 112, 32)      0         
                                                                 
 conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)     288       
                                                                 
 conv_dw_1_bn (BatchNormaliz  (None, 112, 112, 32)     128       
 ation)                                                    

# Data Preprocessing

In [34]:
train, valid, test = load_data(path)

In [35]:
print(f" Train:{len(train)} Valid:{len(valid)}, Test:{len(test)}")

 Train:12000 Valid:1500, Test:1500


In [36]:
train_df = tf_dataset(train)
valid_df = tf_dataset(valid)
test_df = tf_dataset(test)

In [37]:
for i, j in train_df.take(1):
    print(i.numpy().shape)

(64, 224, 224, 3)


In [47]:
model.fit(
    train_df,
    validation_data=test_df,
    epochs=20,
    callbacks=callbacks
)

Epoch 1/20
Epoch 1: val_loss improved from inf to 0.17790, saving model to /kaggle/working//models/vgg_for_brain_tumor-v2.h5
Epoch 2/20

KeyboardInterrupt: 

In [None]:
model.evaluate(test_df)

In [None]:
import itertools
#plot confusion matrix
def plt_confusion_matrix(cm, classes, normalize=False, title="Confusion Matrix", cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_mark = np.arange(len(classes))
    plt.xticks(tick_mark, classes, rotation=45)
    plt.yticks(tick_mark, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.axis]
        print("normalized confusion matrix")

    else:
        print("confusion matrix without normalization")

    thresh = cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.xlabel("predicted label")
    plt.ylabel("True label")

In [None]:
#prediction
prediction = model.predict(test_df, verbose=0)

In [None]:
np.around(prediction)

In [None]:
y_pred_classes = np.argmax(prediction, axis=1)

In [None]:
#function for get labels of test set
def get_test_data_class(test_path):
    names = []
    for i in test_path:
        name = i.split("/")[-2]
        name_idx = class_names.index(name)
        names.append(name_idx)
    names = np.array(names, dtype=np.int32)
    return names

In [None]:
classes = get_test_data_class(x_test)

In [None]:
cm = confusion_matrix(y_true=classes, y_pred=y_pred_classes)

In [None]:
plt_confusion_matrix(cm=cm, classes=class_names, title="confusion matrix", )