In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub # type: ignore
arjunbasandrai_medical_scan_classification_dataset_path = kagglehub.dataset_download('arjunbasandrai/medical-scan-classification-dataset')

print('Data source import complete.')


Downloading from https://www.kaggle.com/api/v1/datasets/download/arjunbasandrai/medical-scan-classification-dataset?dataset_version_number=10...


100%|██████████| 15.0G/15.0G [03:04<00:00, 87.8MB/s]


Extracting files...
Data source import complete.


In [None]:
import matplotlib.pyplot as plt
import numpy as np

import os

import tensorflow as tf
from tensorflow import keras
from keras.utils import image_dataset_from_directory
from keras import Sequential, Input
from keras.layers import RandomFlip, RandomContrast, RandomRotation, RandomZoom, Rescaling
from keras.layers import GlobalAveragePooling2D, Dense, Conv2D, BatchNormalization, Dropout, Flatten
from keras.applications import EfficientNetV2L as base
from keras.optimizers import Adam
from keras.losses import SparseCategoricalCrossentropy as scc
from keras.callbacks import EarlyStopping, LearningRateScheduler

from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix

f'Tensorflow version: {tf.__version__}'

'Tensorflow version: 2.17.1'

In [None]:
strategy = tf.distribute.MirroredStrategy()
print('DEVICES AVAILABLE: {}'.format(strategy.num_replicas_in_sync))

DEVICES AVAILABLE: 1


In [None]:
import matplotlib.pyplot as plt
import numpy as np

import os

import tensorflow as tf
from tensorflow import keras
from keras.utils import image_dataset_from_directory
from keras import Sequential, Input
from keras.layers import RandomFlip, RandomContrast, RandomRotation, RandomZoom, Rescaling
from keras.layers import GlobalAveragePooling2D, Dense, Conv2D, BatchNormalization, Dropout, Flatten
from keras.applications import EfficientNetV2L as base
from keras.optimizers import Adam
from keras.losses import SparseCategoricalCrossentropy as scc
from keras.callbacks import EarlyStopping, LearningRateScheduler

from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix

import kagglehub # Importing kagglehub to access the downloaded dataset path

f'Tensorflow version: {tf.__version__}'

# ... (other code) ...

image_size = 256
batch_size = 32
epochs = 100
lr_init = .003

# Assuming 'arjunbasandrai_medical_scan_classification_dataset_path'
# from the first cell contains the path to the downloaded dataset:

# Search for the 'Kidney Cancer' folder within the downloaded dataset
import os
def find_kidney_cancer_path(root_path):
    for root, dirs, files in os.walk(root_path):
        if 'Kidney Cancer' in dirs:
            return os.path.join(root, 'Kidney Cancer')
    return None  # Return None if 'Kidney Cancer' folder is not found

kidney_cancer_path = find_kidney_cancer_path(arjunbasandrai_medical_scan_classification_dataset_path[0])

if kidney_cancer_path:
    train_ds = tf.keras.utils.image_dataset_from_directory(
        kidney_cancer_path,  # Use the found path
        validation_split=0.2,
        subset="training",
        seed=123,
        image_size=(image_size, image_size),
        batch_size=batch_size)

    val_ds = tf.keras.utils.image_dataset_from_directory(
        kidney_cancer_path,  # Use the found path
        validation_split=0.2,
        subset="validation",
        seed=123,
        image_size=(image_size, image_size),
        batch_size=batch_size)
else:
    print("Error: 'Kidney Cancer' folder not found within the downloaded dataset.")

Found 10000 files belonging to 1 classes.
Using 8000 files for training.
Found 10000 files belonging to 1 classes.
Using 2000 files for validation.


In [None]:
class_names = train_ds.class_names
val_batches = tf.data.experimental.cardinality(val_ds)
test_ds = val_ds.take(val_batches // 2)
val_ds = val_ds.skip(val_batches // 2)
class_names

['Kidney Cancer']

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

In [None]:
es = EarlyStopping(monitor='val_accuracy', patience=10, min_delta=0.001, restore_best_weights=True)

def create_model(base_model):
    input = Input(shape=(image_size, image_size, 3))
    x = RandomFlip('horizontal')(input)
    x = RandomRotation(0.2)(x)
    x = RandomZoom(0.2,0.2)(x)
    x = RandomContrast(0.2)(x)
    b_model = base_model(x)
    flatten = Flatten()(b_model)
    x = Dense(1024,activation='relu')(flatten)
    norm = BatchNormalization()(x)
    x = Dense(512,activation='swish')(flatten)
    norm = BatchNormalization()(x)
    x = Dense(128,activation='relu')(norm)
    norm = BatchNormalization()(x)
    dropout = Dropout(0.2)(norm)
    x = Dense(len(class_names))(dropout)
    model = tf.keras.models.Model(inputs=input, outputs=x)

    model.compile(optimizer=Adam(lr_init),
              loss=scc(from_logits=True),
              metrics=['accuracy'])
    return model

def schedule(epoch,lr):
    if epoch < 5:
        return ((epoch)+1)*lr_init/5
    if epoch < 15:
        return lr
    else:
        return lr * tf.math.exp(-0.1)
lr_scheduler = LearningRateScheduler(schedule,verbose=1)

def plot_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss=history.history['loss']
    val_loss=history.history['val_loss']
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

In [None]:
with strategy.scope():
    base_model = base(input_shape=(image_size,image_size,3), include_top=False, weights='imagenet')
    base_model.trainable = False
    model = create_model(base_model)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-l_notop.h5
[1m473176280/473176280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 0us/step


In [None]:
history = model.fit(train_ds,
                    epochs=epochs,
                    validation_data=val_ds,
                   callbacks=[es,lr_scheduler])


Epoch 1: LearningRateScheduler setting learning rate to 0.0006000000000000001.
Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8473s[0m 34s/step - accuracy: 0.6244 - loss: 0.0000e+00 - val_accuracy: 0.7282 - val_loss: 0.0000e+00 - learning_rate: 6.0000e-04

Epoch 2: LearningRateScheduler setting learning rate to 0.0012000000000000001.
Epoch 2/100
[1m106/250[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m1:11:56[0m 30s/step - accuracy: 0.6318 - loss: 0.0000e+00

In [None]:
model.save('kidney_cancer.h5')



TypeError: cannot pickle 'module' object

In [None]:
plot_history(history)

In [None]:
model.evaluate(test_ds)

In [None]:
model.evaluate(val_ds)