In [1]:
# Step 1: Clone repo and install packages
!git clone https://github.com/niaz1971/MALARIAL_CELL_CLASSIFICATION_USING_CNN.git
%cd MALARIAL_CELL_CLASSIFICATION_USING_CNN/
!pip install lime kaggle
!pip install -U git+https://github.com/qubvel/classification_models.git

Cloning into 'MALARIAL_CELL_CLASSIFICATION_USING_CNN'...
remote: Enumerating objects: 318, done.[K
remote: Counting objects: 100% (318/318), done.[K
remote: Compressing objects: 100% (314/314), done.[K
remote: Total 318 (delta 19), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (318/318), 5.52 MiB | 15.15 MiB/s, done.
Resolving deltas: 100% (19/19), done.
/content/MALARIAL_CELL_CLASSIFICATION_USING_CNN
Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lime
  Building wheel for lime (setup.py) ... [?25l[?25hdone
  Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283834 sha256=e0e69be80b44852a37c5e5a416e6e29946122acbb45c1b2c18ed89f60b7f096f
  Stored in directory: /root/.cache/pip/wheels/e7/5d/0e/4b4fff9a47468fed56332

In [2]:
# Step 2: Set up Kaggle API for dataset download
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
# Step 3: Download and unzip malaria cell images dataset
!kaggle datasets download -d iarunava/cell-images-for-detecting-malaria
!unzip -q cell-images-for-detecting-malaria.zip

Dataset URL: https://www.kaggle.com/datasets/iarunava/cell-images-for-detecting-malaria
License(s): unknown
Downloading cell-images-for-detecting-malaria.zip to /content/MALARIAL_CELL_CLASSIFICATION_USING_CNN
 84% 566M/675M [00:10<00:03, 31.9MB/s]
100% 675M/675M [00:10<00:00, 65.7MB/s]


In [4]:
# Step 4: Imports
import os
import random
import shutil
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import tensorflow as tf
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, accuracy_score
import matplotlib.image as mpimg

import classification_models.tfkeras
from classification_models.tfkeras import Classifiers

# Set seeds for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [5]:
# Step 5: Prepare dataset folders
original_dataset = 'cell_images'
parasitized_dir = os.path.join(original_dataset, 'Parasitized')
uninfected_dir = os.path.join(original_dataset, 'Uninfected')

base_dir = 'malaria_data'
os.makedirs(base_dir, exist_ok=True)
train_dir = os.path.join(base_dir, 'train')
valid_dir = os.path.join(base_dir, 'valid')
test_dir = os.path.join(base_dir, 'test')

for d in [train_dir, valid_dir, test_dir]:
    os.makedirs(d, exist_ok=True)

classes = ['Parasitized', 'Uninfected']
for split_dir in [train_dir, valid_dir, test_dir]:
    for cls in classes:
        os.makedirs(os.path.join(split_dir, cls), exist_ok=True)

In [6]:
# Step 6: Split data function
def split_data(source_dir, train_dir, valid_dir, test_dir,
               train_ratio=0.8, valid_ratio=0.1, test_ratio=0.1):
    files = os.listdir(source_dir)
    random.shuffle(files)
    total = len(files)
    train_end = int(total * train_ratio)
    valid_end = train_end + int(total * valid_ratio)

    train_files = files[:train_end]
    valid_files = files[train_end:valid_end]
    test_files = files[valid_end:]

    for f in train_files:
        shutil.copy(os.path.join(source_dir, f), os.path.join(train_dir, f))
    for f in valid_files:
        shutil.copy(os.path.join(source_dir, f), os.path.join(valid_dir, f))
    for f in test_files:
        shutil.copy(os.path.join(source_dir, f), os.path.join(test_dir, f))

# Perform splits for both classes
split_data(parasitized_dir, os.path.join(train_dir, 'Parasitized'),
           os.path.join(valid_dir, 'Parasitized'),
           os.path.join(test_dir, 'Parasitized'))
split_data(uninfected_dir, os.path.join(train_dir, 'Uninfected'),
           os.path.join(valid_dir, 'Uninfected'),
           os.path.join(test_dir, 'Uninfected'))

In [7]:
# Step 7: Data augmentation and generators with ResNet preprocessing
ResNet18, preprocess_input = Classifiers.get('resnet18')

train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   horizontal_flip=True, vertical_flip=True,
                                   rotation_range=40, shear_range=0.2,
                                   width_shift_range=0.4, height_shift_range=0.4,
                                   fill_mode='nearest')
valid_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(224,224),
                                                    batch_size=32,
                                                    class_mode='categorical',
                                                    shuffle=True,
                                                    seed=42)
valid_generator = valid_datagen.flow_from_directory(valid_dir,
                                                    target_size=(224,224),
                                                    batch_size=32,
                                                    class_mode='categorical',
                                                    shuffle=True,
                                                    seed=42)
test_generator = test_datagen.flow_from_directory(test_dir,
                                                  target_size=(224,224),
                                                  batch_size=1,
                                                  class_mode='categorical',
                                                  shuffle=False)

class_labels = train_generator.class_indices
class_names = {v: k for k, v in class_labels.items()}
print("Class labels:", class_labels)

Found 22048 images belonging to 2 classes.
Found 2755 images belonging to 2 classes.
Found 2755 images belonging to 2 classes.
Class labels: {'Parasitized': 0, 'Uninfected': 1}


In [8]:
# Step 8: Build model with pretrained ResNet18 base (exclude top)
base_model = ResNet18(input_shape=(224,224,3), weights='imagenet', include_top=False)

# Freeze base layers
for layer in base_model.layers:
    layer.trainable = False

x = Flatten()(base_model.output)
x = Dense(4608, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(1152, activation='relu')(x)
output = Dense(len(class_names), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)
model.summary()

Downloading data from https://github.com/qubvel/classification_models/releases/download/0.0.1/resnet18_imagenet_1000_no_top.h5
[1m44920640/44920640[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 0us/step


In [9]:
# Step 9: Compile model
optimizer = SGD(learning_rate=1e-4, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [10]:
# Step 10: Callbacks
callbacks = [EarlyStopping(monitor='val_loss', patience=4, verbose=1),
             ModelCheckpoint('best_malaria_model_resnet18.keras', monitor='val_loss', save_best_only=True, verbose=1),
             ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=3, verbose=1, min_lr=1e-5)]

In [None]:
# Step 11: Train model
history = model.fit(train_generator, epochs=10, steps_per_epoch=50,
                    validation_data=valid_generator, callbacks=callbacks)

  self._warn_if_super_not_called()


Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.5844 - loss: 0.8447
Epoch 1: val_loss improved from inf to 0.39024, saving model to best_malaria_model_resnet18.keras
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m304s[0m 6s/step - accuracy: 0.5855 - loss: 0.8422 - val_accuracy: 0.8301 - val_loss: 0.3902 - learning_rate: 1.0000e-04
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.7617 - loss: 0.5275

In [None]:
# Step 12: Plot training curves
def plot_training_curves(history):
    epochs = range(1, len(history.history['accuracy']) + 1)
    plt.figure(figsize=(12,5))
    plt.subplot(1,2,1)
    plt.plot(epochs, history.history['accuracy'], 'b-', label='Train Accuracy')
    plt.plot(epochs, history.history['val_accuracy'], 'r-', label='Val Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.subplot(1,2,2)
    plt.plot(epochs, history.history['loss'], 'b-', label='Train Loss')
    plt.plot(epochs, history.history['val_loss'], 'r-', label='Val Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

plot_training_curves(history)

In [None]:
# Step 13: Load best saved model
model = tf.keras.models.load_model('best_malaria_model_resnet18.keras')

In [None]:
# Step 14: Evaluate on validation and test sets
val_loss, val_acc = model.evaluate(valid_generator)
test_loss, test_acc = model.evaluate(test_generator)
print(f'Validation Acc: {val_acc:.4f}, Loss: {val_loss:.4f}')
print(f'Test Acc: {test_acc:.4f}, Loss: {test_loss:.4f}')

In [None]:
# Step 15: Predictions and classification report
nb_samples = len(test_generator.filenames)
preds = model.predict(test_generator, steps=nb_samples, verbose=1)
pred_labels = np.argmax(preds, axis=1)

print(classification_report(test_generator.classes, pred_labels,
                            target_names=[class_names[i] for i in range(len(class_names))]))

accuracy = accuracy_score(test_generator.classes, pred_labels)
print(f'Overall Test Accuracy: {accuracy:.4f}')

In [None]:
# Step 16: Plot classification metrics
report_dict = classification_report(test_generator.classes, pred_labels,
                                    target_names=[class_names[i] for i in range(len(class_names))],
                                    output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()
report_classes = report_df.iloc[:-3, :-1]

cm = confusion_matrix(test_generator.classes, pred_labels)
class_acc = cm.diagonal() / cm.sum(axis=1)
report_classes['accuracy'] = class_acc

ax = report_classes[['precision', 'recall', 'f1-score', 'accuracy']].plot(kind='bar', figsize=(10,6))
plt.title('Classification Metrics per Class Including Accuracy')
plt.xlabel('Classes')
plt.ylabel('Score')
plt.ylim([0,1])
plt.xticks(rotation=0)
for p in ax.patches:
    height = p.get_height()
    ax.annotate(f'{height:.2f}', (p.get_x()+p.get_width()/2, height),
                ha='center', va='bottom', fontsize=9, fontweight='bold')
plt.legend(loc='lower right')
plt.grid(axis='y')
plt.show()

print(f"\nOverall Accuracy: {accuracy:.4f}")

In [None]:
# Step 17: Confusion matrix
plt.figure(figsize=(6,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names.values(), yticklabels=class_names.values())
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Step 18: ROC curve
fpr, tpr, _ = roc_curve(test_generator.classes, preds[:,1])
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8,6))
plt.plot(fpr, tpr, 'b-', label=f'ROC curve (AUC={roc_auc:.3f})')
plt.plot([0,1], [0,1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()

In [None]:
# Step 19: Visualize test images with predictions
sample_files = random.sample(test_generator.filenames, 10)
plt.figure(figsize=(20,8))
for i, fname in enumerate(sample_files):
    plt.subplot(2, 5, i+1)
    img_path = os.path.join(test_dir, fname)
    img = mpimg.imread(img_path)
    plt.imshow(img)
    plt.axis('off')
    img_obj = load_img(img_path, target_size=(224,224))
    img_arr = img_to_array(img_obj)
    img_exp = np.expand_dims(img_arr, axis=0)
    pred = model.predict(img_exp)
    pred_label = class_names[np.argmax(pred)]
    true_label = os.path.basename(os.path.dirname(img_path))
    plt.title(f'Pred: {pred_label}\nTrue: {true_label}', fontsize=10)
plt.tight_layout()
plt.show()