In [79]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import os
from sklearn.model_selection import train_test_split
from PIL import Image, UnidentifiedImageError

In [None]:
# Constants
IMG_HEIGHT, IMG_WIDTH = 144, 144
BATCH_SIZE = 32
EPOCHS = 20
DATASET_DIR = '/kaggle/input/microscopic-peripheral-blood-cell-images-mendeley/PBC_dataset_normal_DIB'
SEED = 42 # Seed for reproducibility

In [None]:
# Convert an image (read with OpenCV) to RGB, resize it using bicubic interpolation, and add a batch dimension.
def preprocess_image(image, img_height, img_width):
    image = image.convert("RGB")
    image = image.resize((img_width, img_height), resample=Image.BICUBIC)
    image = tf.keras.preprocessing.image.img_to_array(image)
    return image

In [None]:
# Programmatically create file lists for train, validation, and test sets
def create_data_splits(dataset_dir, test_split=0.1, val_split=0.1):
    train_files, val_files, test_files = [], [], []
    for class_dir in os.listdir(dataset_dir):
        class_path = os.path.join(dataset_dir, class_dir)
        if os.path.isdir(class_path):
            files = [os.path.join(class_dir, f) for f in os.listdir(class_path) if f.endswith(('.png', '.jpg', '.jpeg'))]
            train, temp = train_test_split(files, test_size=(val_split + test_split), random_state=SEED)
            val, test = train_test_split(temp, test_size=test_split / (val_split + test_split), random_state=SEED)
            train_files.extend(train)
            val_files.extend(val)
            test_files.extend(test)
    return train_files, val_files, test_files

In [83]:
train_files, val_files, test_files = create_data_splits(DATASET_DIR)

In [None]:
# Custom Data Generators to load images from specific file lists
class_indices = {class_name: idx for idx, class_name in enumerate(sorted(os.listdir(DATASET_DIR)))}

In [93]:
def custom_generator(file_list, dataset_dir, batch_size, img_height, img_width, shuffle=False):
    file_list = file_list.copy()
    if shuffle:
        np.random.shuffle(file_list)
    while True:
        batch_paths = file_list[:batch_size]
        file_list = file_list[batch_size:] + batch_paths
        
        batch_input, batch_output = [], []
        for file_path in batch_paths:
            full_path = os.path.join(dataset_dir, file_path)
            try:
                img = tf.keras.preprocessing.image.load_img(full_path)
                img_array = preprocess_image(img, img_height, img_width)
                class_name = file_path.split(os.sep)[0]
                label = class_indices[class_name]
                batch_input.append(img_array)
                batch_output.append(label)
            except Exception as e:
                print(f"Skipping file {file_path} due to error: {e}")
        if batch_input and batch_output:
            batch_x = np.array(batch_input)
            batch_y = tf.keras.utils.to_categorical(batch_output, num_classes=len(class_indices))
            yield batch_x, batch_y

In [None]:
# Initialize generators
train_generator = custom_generator(train_files, DATASET_DIR, BATCH_SIZE, IMG_HEIGHT, IMG_WIDTH)
validation_generator = custom_generator(val_files, DATASET_DIR, BATCH_SIZE, IMG_HEIGHT, IMG_WIDTH)

In [None]:
# Model architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    BatchNormalization(),
    Dropout(0.5),
    
    Dense(len(class_indices), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
# Calculate steps per epoch based on available data
train_steps_per_epoch = len(train_files) // BATCH_SIZE
val_steps_per_epoch = len(val_files) // BATCH_SIZE
test_steps = len(test_files) // BATCH_SIZE

In [None]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_steps_per_epoch,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=val_steps_per_epoch,
    callbacks=[early_stopping]
)

Epoch 1/20
[1m386/427[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m5s[0m 144ms/step - accuracy: 0.5323 - loss: 1.9177
Skipping unreadable file: neutrophil/.DS_169665.jpg. Error: cannot identify image file <_io.BytesIO object at 0x7b9880724590>
[1m427/427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 171ms/step - accuracy: 0.5481 - loss: 1.8686 - val_accuracy: 0.3909 - val_loss: 3.1152
Epoch 2/20
[1m246/427[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m22s[0m 127ms/step - accuracy: 0.8558 - loss: 0.9219
Skipping unreadable file: neutrophil/.DS_169665.jpg. Error: cannot identify image file <_io.BytesIO object at 0x7b9880409e40>
[1m362/427[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m8s[0m 125ms/step - accuracy: 0.8591 - loss: 0.9054
Skipping unreadable file: neutrophil/.DS_169665.jpg. Error: cannot identify image file <_io.BytesIO object at 0x7b9838125080>
[1m427/427[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 138ms/step - accuracy: 0.8610 - loss: 0.8961 

In [None]:
# Re-create the test generator without shuffling to match the true labels order
test_generator = custom_test_generator(test_files, DATASET_DIR, BATCH_SIZE, IMG_HEIGHT, IMG_WIDTH, shuffle=False)

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator, steps=test_steps)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 145ms/step - accuracy: 0.8314 - loss: 0.9221
Test Loss: 0.7813481688499451
Test Accuracy: 0.8891509175300598


In [None]:
from sklearn.metrics import classification_report

total_test_samples = len(test_files)
steps = (total_test_samples // BATCH_SIZE) + 1

# Predict on the test data, ensuring the prediction array length matches the test set
predictions = model.predict(test_generator, steps=steps)
predicted_classes = np.argmax(predictions, axis=1)
predicted_classes = predicted_classes[:total_test_samples]

true_labels = []
for i, (x_batch, y_batch) in enumerate(test_generator):
    true_labels.extend(np.argmax(y_batch, axis=1))
    if i + 1 >= steps:
        break
true_labels = np.array(true_labels)[:total_test_samples]

report = classification_report(true_labels, predicted_classes, target_names=list(class_indices.keys()))
print(report)

[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 110ms/step
              precision    recall  f1-score   support

    basophil       0.60      0.25      0.36       122
  eosinophil       0.84      0.84      0.84       312
erythroblast       0.69      0.62      0.65       156
          ig       0.61      0.87      0.72       290
  lymphocyte       0.56      0.47      0.51       122
    monocyte       0.60      0.44      0.50       142
  neutrophil       0.78      0.83      0.81       333
    platelet       0.78      0.79      0.79       235

    accuracy                           0.71      1712
   macro avg       0.68      0.64      0.65      1712
weighted avg       0.71      0.71      0.70      1712



In [None]:
# Save the model
model.save('/kaggle/working/model_v2.keras')

In [None]:
model = tf.keras.models.load_model('/kaggle/working/model_v2.keras')

In [None]:
from PIL import Image

In [None]:
# Test model on a patient

test_dir = "/kaggle/input/testdata2/BHR"
test_files = [f for f in os.listdir(test_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg','tif'))]

inv_class_indices = {v: k for k, v in class_indices.items()}
counts = {class_name: 0 for class_name in class_indices.keys()}

for file in test_files:
    full_path = os.path.join(test_dir, file)
    try:
        img = Image.open(full_path)
        processed_img = preprocess_image(img, IMG_HEIGHT, IMG_WIDTH)
        prediction = model.predict(processed_img)
        predicted_idx = np.argmax(prediction, axis=1)[0]
        predicted_class = inv_class_indices[predicted_idx]
        counts[predicted_class] += 1
    except Exception as e:
        print(f"Skipping file {file} due to error: {e}")

print("Total count of images per predicted category:")
for category, count in counts.items():
    print(f"{category}: {count}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

In [None]:
test_dir = "/kaggle/input/testdata/ALA"
test_files = [f for f in os.listdir(test_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg','tif'))]
inv_class_indices = {v: k for k, v in class_indices.items()}

counts = {class_name: 0 for class_name in class_indices.keys()}

for file in test_files:
    full_path = os.path.join(test_dir, file)
    try:
        img = Image.open(full_path)
        processed_img = preprocess_image(img, IMG_HEIGHT, IMG_WIDTH)
        prediction = model.predict(processed_img)
        predicted_idx = np.argmax(prediction, axis=1)[0]
        predicted_class = inv_class_indices[predicted_idx]
        counts[predicted_class] += 1
    except Exception as e:
        print(f"Skipping file {file} due to error: {e}")

print("Total count of images per predicted category:")
for category, count in counts.items():
    print(f"{category}: {count}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15