In [None]:
!pip install tensorflow==2.10.1

In [None]:
#importing libraries
import os
import cv2
import itertools
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.utils import shuffle
from glob import glob
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import*
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from tensorflow.keras.applications import InceptionV3

In [None]:
#parameters
H, W = 224, 224
channel = 3
IMAGE_SHAPE = [224, 224]
num_class = 3
batch_size = 64
class_names = ["brain_glioma", "brain_menin", "brain_tumor"]

In [None]:
model_path = "/kaggle/working//models/vgg_for_brain_tumor-v2.h5"
path = "/kaggle/input/multi-cancer/Multi Cancer/Brain Cancer"

# Functions

In [None]:
#create folder for save augmented images
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [None]:
def load_data(path, split=0.1):
    images = shuffle(glob(os.path.join(path, "*", "*.jpg")))
    split_rate = int(len(images) * split)
    train, valid = train_test_split(images, test_size=split_rate, random_state=42)
    train, test = train_test_split(train, test_size=split_rate, random_state=42)
    return train, valid, test

In [None]:
def process_image(path):
    #decode the path
    path = path.decode()
    #read image
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    #resize the image
    image = cv2.resize(image, [224, 224])
    #scale the image
    image = image / 255.0
    #change the data type of image
    image = image.astype(np.float32)

    #labeling the image
    class_name = path.split("/")[-2]
    class_idx = class_names.index(class_name)
    class_idx = np.array(class_idx, dtype=np.int32)

    return image, class_idx

In [None]:
def parse(path):
    image, labels = tf.numpy_function(process_image, [path], (tf.float32, tf.int32))
    labels = tf.one_hot(labels, 3)
    image.set_shape([224, 224, 3])
    labels.set_shape(3)
  
    return image, labels

In [None]:
#tensorflow dataset
def tf_dataset(images, batch=8):
    dataset = tf.data.Dataset.from_tensor_slices((images))
    dataset = dataset.map(parse)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(8)
    return dataset

# Model | Inception-V3

In [None]:
incp = InceptionV3(input_shape=IMAGE_SHAPE+[channel], weights='imagenet', include_top=False)

In [None]:
for layer in incp.layers:
    layer.trainable = False

In [None]:
x = layers.Flatten()(incp.output)

In [None]:
prediction_layer = layers.Dense(num_class, activation='softmax' )(x)
#declare the model
model = Model(inputs=incp.input, outputs=prediction_layer)

In [None]:
create_dir("/kaggle/working/models")

In [None]:
callbacks = [
    ModelCheckpoint(model_path, verbose=1, save_best_only=True),
    ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, min_lr=1e-6, verbose=1)
]

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

# Data Preprocessing

In [None]:
train, valid, test = load_data(path)

In [None]:
print(f" Train:{len(train)} Valid:{len(valid)}, Test:{len(test)}")

In [None]:
train_df = tf_dataset(train)
valid_df = tf_dataset(valid)
test_df = tf_dataset(test)

In [None]:
for i, j in train_df.take(1):
    print(i.numpy().shape)

In [None]:
model.fit(
    train_df,
    validation_data=test_df,
    epochs=20,
    callbacks=callbacks
)

In [None]:
model.evaluate(test_df)

In [None]:
import itertools
#plot confusion matrix
def plt_confusion_matrix(cm, classes, normalize=False, title="Confusion Matrix", cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_mark = np.arange(len(classes))
    plt.xticks(tick_mark, classes, rotation=45)
    plt.yticks(tick_mark, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.axis]
        print("normalized confusion matrix")

    else:
        print("confusion matrix without normalization")

    thresh = cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.xlabel("predicted label")
    plt.ylabel("True label")

In [None]:
#prediction
prediction = model.predict(test_df, verbose=0)

In [None]:
np.around(prediction)

In [None]:
y_pred_classes = np.argmax(prediction, axis=1)

In [None]:
#function for get labels of test set
def get_test_data_class(test_path):
    names = []
    for i in test_path:
        name = i.split("/")[-2]
        name_idx = class_names.index(name)
        names.append(name_idx)
    names = np.array(names, dtype=np.int32)
    return names

In [None]:
classes = get_test_data_class(x_test)

In [None]:
cm = confusion_matrix(y_true=classes, y_pred=y_pred_classes)

In [None]:
plt_confusion_matrix(cm=cm, classes=class_names, title="confusion matrix", )

In [None]:
from sklearn.metrics import accuracy_score, f1_score

# Define a function to evaluate the model on a given dataset
def evaluate_model(model, dataset):
    y_true = []
    y_pred = []

    for images, labels in dataset:
        predictions = model.predict(images)
        predicted_labels = np.argmax(predictions, axis=1)
        true_labels = np.argmax(labels, axis=1)
        y_true.extend(true_labels)
        y_pred.extend(predicted_labels)

    accuracy = accuracy_score(y_true, y_pred)
    f1_scores = f1_score(y_true, y_pred, average=None)

    return accuracy, f1_scores

# Evaluate the model on the test dataset
test_accuracy, test_f1_scores = evaluate_model(model, test_df)

# Evaluate the model on the train dataset (optional)
train_accuracy, train_f1_scores = evaluate_model(model, train_df)

# Print the results
print("Test Accuracy:", test_accuracy)
print("F1-Score (Giloma):", test_f1_scores[0])
print("F1-Score (Menin):", test_f1_scores[1])
print("F1-Score (Tumor):", test_f1_scores[2])

# If you want to print the results for the train dataset as well
# print("Train Accuracy:", train_accuracy)
# print("Train F1-Score (Giloma):", train_f1_scores[0])
# print("Train F1-Score (Menin):", train_f1_scores[1])
# print("Train F1-Score (Tumor):", train_f1_scores[2])