This project utilizes the MobileNetV2 deep learning model for efficient and accurate classification of malaria-infected and healthy cells. It takes images of blood cells as input and performs binary classification into two classes: Parasitized (infected) and Uninfected (healthy). The classification outputs either Parasitized or Uninfected based on the confidence scores: Parasitized if parasitized_confidence > uninfected_confidence; otherwise, Uninfected.

#Import Required Libraries

In [None]:
!pip install opendatasets



In [None]:
import opendatasets as od
import pandas as pd
import tensorflow as tf
import cv2
import numpy as np
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
from sklearn.model_selection import KFold
from tensorflow.keras.callbacks import EarlyStopping

#Load and Preprocess Dataset

In [None]:
od.download('https://www.kaggle.com/datasets/iarunava/cell-images-for-detecting-malaria')

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: shabrinaazadirach
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/iarunava/cell-images-for-detecting-malaria
Downloading cell-images-for-detecting-malaria.zip to ./cell-images-for-detecting-malaria


100%|██████████| 675M/675M [00:30<00:00, 23.1MB/s]





In [None]:
data_dir = '/content/cell-images-for-detecting-malaria/cell_images/cell_images'
batch_size = 32
image_size = (224, 224)

In [None]:
def custom_preprocess(image):
    if image is None:
        raise ValueError("Image is None. Skipping.")
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    equalized_image = cv2.equalizeHist(gray_image)
    normalized_image = equalized_image / 255.0
    expanded_image = np.repeat(normalized_image[..., np.newaxis], 3, axis=-1)
    resized_image = cv2.resize(expanded_image, image_size)
    return resized_image

In [None]:
class CustomDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, file_paths, labels, batch_size, shuffle=True):
        self.file_paths = file_paths
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.file_paths) / self.batch_size))

    def __getitem__(self, index):
        start = index * self.batch_size
        end = (index + 1) * self.batch_size
        batch_paths = self.file_paths[start:end]
        batch_labels = self.labels[start:end]

        batch_images = []
        valid_labels = []

        for file_path, label in zip(batch_paths, batch_labels):
            try:
                processed_image = custom_preprocess(cv2.imread(file_path))
                batch_images.append(processed_image)
                valid_labels.append(label)
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

        if len(batch_images) > 0:
            batch_images = np.array(batch_images, dtype="float32")
            valid_labels = np.array(valid_labels, dtype="float32")
        else:
            raise ValueError("No valid images found in this batch.")

        return batch_images, valid_labels

    def on_epoch_end(self):
        if self.shuffle:
            temp = list(zip(self.file_paths, self.labels))
            np.random.shuffle(temp)
            self.file_paths, self.labels = zip(*temp)

In [None]:
# Load file paths and labels
categories = ['Parasitized', 'Uninfected']
file_paths = []
labels = []

for category in categories:
    category_path = os.path.join(data_dir, category)
    label = 0 if category == 'Uninfected' else 1
    for img_file in os.listdir(category_path):
        file_paths.append(os.path.join(category_path, img_file))
        labels.append(label)

In [None]:
# Split into train, validation, and test sets
train_paths, temp_paths, train_labels, temp_labels = train_test_split(
    file_paths, labels, test_size=0.3, stratify=labels, random_state=42
)
val_paths, test_paths, val_labels, test_labels = train_test_split(
    temp_paths, temp_labels, test_size=0.5, stratify=temp_labels, random_state=42
)

In [None]:
# Create data generators
train_gen = CustomDataGenerator(train_paths, train_labels, batch_size)
val_gen = CustomDataGenerator(val_paths, val_labels, batch_size)
test_gen = CustomDataGenerator(test_paths, test_labels, batch_size, shuffle=False)

#Model Architecture

In [None]:
# Function to build the model with Dropout and L2 Regularization
def build_model(input_shape=(224, 224, 3)):
    base_model = MobileNetV2(input_shape=input_shape, include_top=False, weights='imagenet')
    base_model.trainable = False  # Freeze the base model

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu', kernel_regularizer=l2(0.01))(x)  # L2 Regularization
    x = Dropout(0.5)(x)  # Dropout
    x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)  # L2 Regularization
    x = Dropout(0.5)(x)  # Dropout
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

#Model Training

In [None]:
# K-Fold Cross-Validation
def train_with_kfold(file_paths, labels, batch_size=32, num_folds=5):
    kfold = KFold(n_splits=num_folds, shuffle=True, random_state=42)
    histories = []
    fold_no = 1

    for train_idx, val_idx in kfold.split(file_paths):
        print(f"\nTraining for fold {fold_no}...")

        model = build_model()

        early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

        history = model.fit(
            train_gen,
            validation_data=val_gen,
            epochs=10,
            callbacks=[early_stopping]
        )

        histories.append(history)

        val_loss, val_accuracy = model.evaluate(val_gen, verbose=0)
        print(f"Fold {fold_no} - Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

        fold_no += 1

    return histories

In [None]:
# Train with K-fold Cross-Validation
histories = train_with_kfold(file_paths, labels, batch_size=batch_size, num_folds=5)


Training for fold 1...
Epoch 1/10


  self._warn_if_super_not_called()


[1m 80/603[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m27s[0m 52ms/step - accuracy: 0.6627 - loss: 3.1795Error processing file /content/cell-images-for-detecting-malaria/cell_images/cell_images/Uninfected/Thumbs.db: Image is None. Skipping.
[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - accuracy: 0.7972 - loss: 1.6482Error processing file /content/cell-images-for-detecting-malaria/cell_images/cell_images/Parasitized/Thumbs.db: Image is None. Skipping.
[1m603/603[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 122ms/step - accuracy: 0.7973 - loss: 1.6470 - val_accuracy: 0.8918 - val_loss: 0.3978
Epoch 2/10
[1m514/603[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m5s[0m 62ms/step - accuracy: 0.8748 - loss: 0.4350Error processing file /content/cell-images-for-detecting-malaria/cell_images/cell_images/Uninfected/Thumbs.db: Image is None. Skipping.
[1m602/603[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 62ms/step - accuracy: 0.8752 - l

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

# Plot results
def plot_kfold_results(histories):
    plt.figure(figsize=(14, 6))

    plt.subplot(1, 2, 1)
    for i, history in enumerate(histories):
        plt.plot(history.history['loss'], label=f'Fold {i+1} Train Loss')
        plt.plot(history.history['val_loss'], linestyle="--", label=f'Fold {i+1} Val Loss')
    plt.title('Loss Across Folds')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    for i, history in enumerate(histories):
        plt.plot(history.history['accuracy'], label=f'Fold {i+1} Train Accuracy')
        plt.plot(history.history['val_accuracy'], linestyle="--", label=f'Fold {i+1} Val Accuracy')
    plt.title('Accuracy Across Folds')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

plot_kfold_results(histories)

#Model Evaluation

In [None]:
test_loss, test_accuracy = model.evaluate(test_gen)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

In [None]:
model.save("/content/drive/Shareddrives/TKTB1 PROYEK AKHIR/malaria_classification_model2.h5")



#Try

In [None]:
input_image = '/content/WhatsApp Image 2024-11-25 at 17.15.16_f4285b01.jpg'

In [None]:
from tensorflow.keras.models import load_model

In [None]:
model = load_model('/content/drive/Shareddrives/TKTB1 PROYEK AKHIR/malaria_classification_model2.h5')



In [None]:
resized_image = custom_preprocess(cv2.imread(input_image))

In [None]:
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Could not read the image at {image_path}")
    
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    equalized_image = cv2.equalizeHist(gray_image)
    normalized_image = equalized_image / 255.0
    expanded_image = np.repeat(normalized_image[..., np.newaxis], 3, axis=-1)
    resized_image = cv2.resize(expanded_image, (224, 224))
    return np.expand_dims(resized_image, axis=0)

In [None]:

# Prediction function
def predict(image_path):
    try:
        input_data = preprocess_image(image_path)
        prediction = model.predict(input_data)[0][0]
        parasitized_confidence = float(prediction)
        uninfected_confidence = float(1 - prediction)

        result = "Parasitized" if parasitized_confidence > uninfected_confidence else "Uninfected"

        return {
            "prediction": result,
            "confidence": {
                "Parasitized": parasitized_confidence,
                "Uninfected": uninfected_confidence
            }
        }

    except Exception as e:
        return {"error": str(e)}

In [None]:
prediction = predict(input_image)
print(prediction)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
{'prediction': 'Uninfected', 'confidence': {'Parasitized': 0.0009809107286855578, 'Uninfected': 0.9990190892713144}}
