In [3]:
import os
import tensorflow as tf

# Define paths to training and testing datasets
path = os.getcwd()
TRAIN_PATH = os.path.join(path, 'archive/Train')
TEST_PATH = os.path.join(path, 'archive/Test-A')

In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMAGE_SIZE = 224
BATCH_SIZE = 32

data_gen = ImageDataGenerator(rescale=1./255,
    width_shift_range=0.05,
    height_shift_range=0.05,
    horizontal_flip=True,
    vertical_flip=True,
    validation_split=0.2)


train_gen = data_gen.flow_from_directory(directory = TRAIN_PATH,subset='training',batch_size = BATCH_SIZE,shuffle = True,class_mode = 'categorical',target_size = (IMAGE_SIZE, IMAGE_SIZE))

valid_gen = data_gen.flow_from_directory(directory = TRAIN_PATH,subset='validation',batch_size = BATCH_SIZE,shuffle = False,class_mode = 'categorical',target_size = (IMAGE_SIZE, IMAGE_SIZE))

test_gen = ImageDataGenerator(rescale=1./255).flow_from_directory(directory = TEST_PATH,batch_size = BATCH_SIZE,shuffle = False,class_mode = 'categorical',target_size = (IMAGE_SIZE, IMAGE_SIZE))

Found 8142 images belonging to 5 classes.
Found 2033 images belonging to 5 classes.
Found 4339 images belonging to 5 classes.


In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Reshape

# Get number of classes
num_classes = len(train_gen.class_indices)

# Pure LSTM model - reshape image to sequence
model = Sequential([
    Reshape((IMAGE_SIZE, IMAGE_SIZE * 3), input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
    LSTM(256, return_sequences=True),
    Dropout(0.3),
    LSTM(128, return_sequences=True),
    Dropout(0.3),
    LSTM(64),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(**kwargs)


In [6]:
history = model.fit(
    train_gen,
    validation_data=valid_gen,
    epochs=10
)

Epoch 1/10
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 557ms/step - accuracy: 0.6096 - loss: 1.1072 - val_accuracy: 0.6129 - val_loss: 1.0738
Epoch 2/10
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 771ms/step - accuracy: 0.6123 - loss: 1.0918 - val_accuracy: 0.6129 - val_loss: 1.0746
Epoch 3/10
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 966ms/step - accuracy: 0.6123 - loss: 1.0929 - val_accuracy: 0.6129 - val_loss: 1.0733
Epoch 4/10
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m327s[0m 1s/step - accuracy: 0.6123 - loss: 1.0868 - val_accuracy: 0.6129 - val_loss: 1.0763
Epoch 5/10
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 1s/step - accuracy: 0.6123 - loss: 1.0836 - val_accuracy: 0.6129 - val_loss: 1.0787
Epoch 6/10
[1m255/255[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 1s/step - accuracy: 0.6123 - loss: 1.0819 - val_accuracy: 0.6129 - val_loss: 1.0785
Epoch 7/10
[

In [7]:
# Evaluate on test data
print("\n" + "="*50)
print("EVALUATING ON TEST SET...")
print("="*50)

test_loss, test_accuracy = model.evaluate(test_gen, verbose=1)

print("\n" + "="*50)
print("TEST SET RESULTS")
print("="*50)
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"Test Loss: {test_loss:.4f}")
print("="*50)

# Compare all three sets
print("\n" + "="*50)
print("COMPARISON: TRAIN vs VALIDATION vs TEST")
print("="*50)
print(f"Training Accuracy:   {history.history['accuracy'][-1]:.4f} ({history.history['accuracy'][-1]*100:.2f}%)")
print(f"Validation Accuracy: {history.history['val_accuracy'][-1]:.4f} ({history.history['val_accuracy'][-1]*100:.2f}%)")
print(f"Test Accuracy:       {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print("="*50)


EVALUATING ON TEST SET...
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 217ms/step - accuracy: 0.6130 - loss: 1.0732

TEST SET RESULTS
Test Accuracy: 0.6130 (61.30%)
Test Loss: 1.0732

COMPARISON: TRAIN vs VALIDATION vs TEST
Training Accuracy:   0.6123 (61.23%)
Validation Accuracy: 0.6129 (61.29%)
Test Accuracy:       0.6130 (61.30%)


In [8]:
test_loss, test_accuracy = model.evaluate(test_gen, verbose=1)

print("\n" + "="*50)
print("TEST SET EVALUATION")
print("="*50)
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"Test Loss: {test_loss:.4f}")
print("="*50)

[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 223ms/step - accuracy: 0.6130 - loss: 1.0732

TEST SET EVALUATION
Test Accuracy: 0.6130 (61.30%)
Test Loss: 1.0732


In [10]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score

# Get predictions on test set
print("\n" + "="*50)
print("GENERATING PREDICTIONS...")
print("="*50)

test_gen.reset()  # Reset generator to start from beginning
predictions = model.predict(test_gen, verbose=1)
predicted_classes = np.argmax(predictions, axis=1)

# Get true labels
true_classes = test_gen.classes
class_labels = list(test_gen.class_indices.keys())

# Calculate overall metrics
precision = precision_score(true_classes, predicted_classes, average='weighted', zero_division=0)
recall = recall_score(true_classes, predicted_classes, average='weighted', zero_division=0)
f1 = f1_score(true_classes, predicted_classes, average='weighted', zero_division=0)
print("\n" + "="*50)
print("OVERALL METRICS")
print("="*50)
print(f"Precision: {precision:.4f} ({precision*100:.2f}%)")
print(f"Recall:    {recall:.4f} ({recall*100:.2f}%)")
print(f"F1-Score:  {f1:.4f} ({f1*100:.2f}%)")
print("="*50)

# Detailed classification report (per-class metrics)
print("\n" + "="*50)
print("CLASSIFICATION REPORT (Per-Class Metrics)")
print("="*50)
print(classification_report(true_classes, predicted_classes, target_names=class_labels, zero_division=0))

import numpy as np

# See which classes the model predicts
unique, counts = np.unique(predicted_classes, return_counts=True)
class_labels = list(test_gen.class_indices.keys())

print("\n" + "="*50)
print("PREDICTION DISTRIBUTION")
print("="*50)
for class_idx, count in zip(unique, counts):
    print(f"{class_labels[class_idx]}: {count} predictions ({count/len(predicted_classes)*100:.2f}%)")
print("="*50)

# See true class distribution
unique_true, counts_true = np.unique(true_classes, return_counts=True)
print("\n" + "="*50)
print("TRUE CLASS DISTRIBUTION")
print("="*50)
for class_idx, count in zip(unique_true, counts_true):
    print(f"{class_labels[class_idx]}: {count} images ({count/len(true_classes)*100:.2f}%)")
print("="*50)


GENERATING PREDICTIONS...
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 250ms/step

OVERALL METRICS
Precision: 0.3758 (37.58%)
Recall:    0.6130 (61.30%)
F1-Score:  0.4660 (46.60%)

CLASSIFICATION REPORT (Per-Class Metrics)
              precision    recall  f1-score   support

    Basophil       0.00      0.00      0.00        89
  Eosinophil       0.00      0.00      0.00       322
  Lymphocyte       0.00      0.00      0.00      1034
    Monocyte       0.00      0.00      0.00       234
  Neutrophil       0.61      1.00      0.76      2660

    accuracy                           0.61      4339
   macro avg       0.12      0.20      0.15      4339
weighted avg       0.38      0.61      0.47      4339


PREDICTION DISTRIBUTION
Neutrophil: 4339 predictions (100.00%)

TRUE CLASS DISTRIBUTION
Basophil: 89 images (2.05%)
Eosinophil: 322 images (7.42%)
Lymphocyte: 1034 images (23.83%)
Monocyte: 234 images (5.39%)
Neutrophil: 2660 images (61.30%)
