In [1]:
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
import numpy as np
import kagglehub

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Download Dataset
path = kagglehub.dataset_download("andrewmvd/medical-mnist")

print("Path to dataset files:", path)

Path to dataset files: /Users/allenavila/.cache/kagglehub/datasets/andrewmvd/medical-mnist/versions/1


In [4]:
# Create dataset
BATCHSIZE = 64
IMG_HEIGHT = 48
IMG_WIDTH = 48

train_ds = keras.utils.image_dataset_from_directory(
    path,
    validation_split = 0.2,
    subset = "training",
    seed = 123,
    image_size = (IMG_HEIGHT, IMG_WIDTH),
    batch_size = BATCHSIZE,
    color_mode='grayscale'
)

validation_ds = keras.utils.image_dataset_from_directory(
    path,
    validation_split = 0.2,
    subset = "validation",
    seed = 123,
    image_size = (IMG_HEIGHT, IMG_WIDTH),
    batch_size = BATCHSIZE,
    color_mode='grayscale'
)

Found 58954 files belonging to 6 classes.
Using 47164 files for training.
Found 58954 files belonging to 6 classes.
Using 11790 files for validation.


In [5]:
train_ds

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 48, 48, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

In [6]:
class_names = train_ds.class_names
class_names

['AbdomenCT', 'BreastMRI', 'CXR', 'ChestCT', 'Hand', 'HeadCT']

In [7]:
# convert train and validation datasets into NumPy arrays
def dataset_to_numpy(dataset):
    images = []
    labels = []

    for image_batch, label_batch in dataset:
        images.append(image_batch.numpy())
        labels.append(label_batch.numpy())

    images = np.concatenate(images, axis=0)
    labels = np.concatenate(labels, axis=0)

    return images, labels

In [8]:
X_train, y_train = dataset_to_numpy(train_ds)
X_valid, y_valid = dataset_to_numpy(validation_ds)

2024-12-12 14:01:29.584894: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-12-12 14:01:30.104545: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [9]:
X_train.shape

(47164, 48, 48, 1)

In [10]:
y_train

array([1, 0, 4, ..., 1, 5, 1], dtype=int32)

In [12]:
# Create Model
# CNN Model
model = keras.models.Sequential()
model.add(keras.layers.Conv2D(filters=32, kernel_size=3, activation="relu", padding="same", input_shape = [48, 48, 1]))
model.add(keras.layers.MaxPool2D(pool_size=2, strides=2))
model.add(keras.layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding="same"))
model.add(keras.layers.MaxPool2D(pool_size=2, strides=2))

model.add(keras.layers.Flatten())

model.add(keras.layers.Dense(units= 64, activation="relu"))
model.add(keras.layers.Dense(units = 6, activation="softmax"))

In [13]:
model.summary()

In [14]:
model.compile(
    optimizer = "adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

In [15]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

Epoch 1/10
[1m1474/1474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 16ms/step - accuracy: 0.9442 - loss: 1.3340 - val_accuracy: 0.9959 - val_loss: 0.0157
Epoch 2/10
[1m1474/1474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 16ms/step - accuracy: 0.9948 - loss: 0.0173 - val_accuracy: 0.9961 - val_loss: 0.0121
Epoch 3/10
[1m1474/1474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 16ms/step - accuracy: 0.9951 - loss: 0.0151 - val_accuracy: 0.9953 - val_loss: 0.0190
Epoch 4/10
[1m1474/1474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 17ms/step - accuracy: 0.9971 - loss: 0.0104 - val_accuracy: 0.9978 - val_loss: 0.0169
Epoch 5/10
[1m1474/1474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 17ms/step - accuracy: 0.9975 - loss: 0.0084 - val_accuracy: 0.9984 - val_loss: 0.0087
Epoch 6/10
[1m1474/1474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 16ms/step - accuracy: 0.9980 - loss: 0.0096 - val_accuracy: 0.9978 - val_loss: 0.0184
Epoc

In [16]:
score = model.evaluate(X_valid, y_valid)

[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9992 - loss: 0.0042


In [21]:
# predicting model
y_predict_prob = model.predict(X_valid)
y_predict = np.argmax(y_predict_prob, axis=1)

print(f'Real label from test set: {class_names[y_valid[0]]}, {class_names[y_valid[1]]}, {class_names[y_valid[2]]}')
print(f'Predicted label from test set: {class_names[y_predict[0]]} {class_names[y_predict[1]]} {class_names[y_predict[2]]}')

[1m369/369[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
Real label from test set: HeadCT, AbdomenCT, AbdomenCT
Predicted label from test set: HeadCT AbdomenCT AbdomenCT
