# Expand the dataset by classifying using MobileNetv2

2024-08-19


The MobileNetV2 was trained on 1000 labelled images from Cytopix and 

In [1]:
import cv2
from pathlib import Path

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.rc("font", size=14)
plt.rc("axes", labelsize=14, titlesize=14)
plt.rc("legend", fontsize=14)
plt.rc("xtick", labelsize=10)
plt.rc("ytick", labelsize=10)

In [3]:
import tensorflow as tf
import keras

keras.backend.clear_session()
tf.random.set_seed(42)

2024-08-20 10:56:36.635431: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-20 10:56:36.651787: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-20 10:56:36.673387: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-20 10:56:36.673413: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-20 10:56:36.688041: I tensorflow/core/platform/cpu_feature_gua

In [None]:
SAMPLE_NAME = "24-Survive-017"
LOCAION_IN = "resources/out/cytpix"
LOCAION_OUT = "resources/out/cytpix"

folder_class_names = ["discocyte", "echinocyte", "granular", "holly_leaf", "sickle"]
sorted_dir = SAMPLE_NAME + "_sorted"
path_out = Path.cwd().parent / LOCAION_OUT / sorted_dir

try:
    path_out.mkdir(parents=False, exist_ok=False)
except FileExistsError as e:
    print(f"{e}")
finally:
    for item in folder_class_names:
        path_folder = path_out / item
        try:
            path_folder.mkdir()
        except FileExistsError as e:
            print(f"{e}")

[Errno 17] File exists: '/home/t.afanasyeva/deep_learning_anaemias/resources/out/cytpix/24-Survive-017_sorted'
[Errno 17] File exists: '/home/t.afanasyeva/deep_learning_anaemias/resources/out/cytpix/24-Survive-017_sorted/discocyte'
[Errno 17] File exists: '/home/t.afanasyeva/deep_learning_anaemias/resources/out/cytpix/24-Survive-017_sorted/echinocyte'
[Errno 17] File exists: '/home/t.afanasyeva/deep_learning_anaemias/resources/out/cytpix/24-Survive-017_sorted/granular'
[Errno 17] File exists: '/home/t.afanasyeva/deep_learning_anaemias/resources/out/cytpix/24-Survive-017_sorted/holly_leaf'
[Errno 17] File exists: '/home/t.afanasyeva/deep_learning_anaemias/resources/out/cytpix/24-Survive-017_sorted/sickle'


Prepare data

In [None]:
from keras.layers import (
    Rescaling,
    RandomFlip,
    RandomRotation,
    RandomTranslation,
    RandomContrast,
    RandomBrightness,
)

AUTOTUNE = tf.data.AUTOTUNE
IMG_SIZE = (96, 96)
BATCH_SIZE = 24
path_in = Path.cwd().parent / LOCAION_IN / SAMPLE_NAME
file_names = [image.stem for image in path_in.iterdir()]

test_ds = keras.utils.image_dataset_from_directory(
    path_in,
    labels=None,
    color_mode="rgb",
    batch_size=None,
    image_size=IMG_SIZE,
    shuffle=True,
    seed=93,
    data_format="channels_last",
    verbose=True,
)
class_names = test_ds.class_names

rescale = tf.keras.Sequential([Rescaling(1.0 / 255)])
data_augmentation = tf.keras.Sequential(
    [
        RandomFlip("horizontal_and_vertical"),
        RandomRotation(0.2),
        RandomTranslation(0.2, 0.2),
        RandomContrast(0.2),
        RandomBrightness(0.2),
    ]
)


def prepare(ds, batch_size):

    ds = ds.map(lambda x: rescale(x), num_parallel_calls=AUTOTUNE)
    ds = ds.batch(batch_size, num_parallel_calls=AUTOTUNE)

    return ds.prefetch(buffer_size=AUTOTUNE)


test_ds = prepare(
    test_ds,
    batch_size=BATCH_SIZE,
)

for data in test_ds.take(1):
    print(f"Test data shape: {data.shape}")

Found 23655 files.


2024-08-20 10:56:38.859855: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1928] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13764 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:3b:00.0, compute capability: 7.5


Test data shape: (24, 96, 96, 3)


2024-08-20 10:56:40.173667: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Initiate model and load weights


In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Dense,
    BatchNormalization,
    ReLU,
    GlobalAveragePooling2D,
)
from tensorflow.keras.regularizers import L2
from tensorflow.keras.applications import MobileNetV2


mobile_net = keras.applications.MobileNetV2(
    input_shape=(96, 96, 3),
    alpha=1.0,
    include_top=False,
    weights=None,
)

model = Sequential()

model.add(mobile_net)

model.add(GlobalAveragePooling2D())
model.add(Dense(5, activation="softmax"))

model.compile(
    optimizer="adam",
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"],
)

model.build((None, 96, 96, 3))
model_path = Path.cwd().parent / "model_zoo" / "mobile_net_v04.weights.h5"
model.load_weights(
    model_path,
    skip_mismatch=False,
)
model.summary()

  trackable.load_own_variables(weights_store.get(inner_path))


Predict new images from test_ds

In [9]:
y_pred = model.predict(test_ds)
y_pred = y_pred.argmax(axis=1)
len(y_pred)

I0000 00:00:1724144201.963117  636726 service.cc:145] XLA service 0x7fbf7c0027d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1724144201.963175  636726 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2024-08-20 10:56:42.003857: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-08-20 10:56:42.346100: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


[1m 27/986[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5s[0m 6ms/step

I0000 00:00:1724144204.192995  636726 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m986/986[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 9ms/step


23655

Get class names from integers

In [10]:
name_dict = {
    0: "discocyte",
    1: "holly_leaf",
    2: "granular",
    3: "sickle",
    4: "echinocyte",
}
y_pred = [name_dict[k] for k in y_pred]

Write images to folder

In [14]:
extension = ".png"

for name, prediction in zip(file_names, y_pred):
    image_path_in = str(path_in / (name + extension))
    img = cv2.imread(image_path_in)
    image_path_out = str(path_out / prediction / (name + extension))
    cv2.imwrite(image_path_out, img)