In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPool2D, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Set parameters
img_width, img_height = 64, 64
batch_size = 16
num_folds = 7  # K in K-Fold

In [3]:
# Load dataset
datagen = ImageDataGenerator(rescale=1/255.0)
data_generator = datagen.flow_from_directory(directory='malaria_dataset_2',
                                             target_size=(img_width, img_height),
                                             class_mode='binary',
                                             batch_size=batch_size,
                                             shuffle=True)


Found 27558 images belonging to 2 classes.


In [4]:
# Get image paths and labels
X, y = [], []
for i in range(len(data_generator)):
    batch_x, batch_y = data_generator.next()
    X.extend(batch_x)
    y.extend(batch_y)


In [5]:
X = np.array(X)
y = np.array(y)

In [6]:
# Initialize K-Fold
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

In [7]:
fold_results = []

In [8]:
for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
    print(f"\nTraining on Fold {fold+1}/{num_folds}...")



Training on Fold 1/7...

Training on Fold 2/7...

Training on Fold 3/7...

Training on Fold 4/7...

Training on Fold 5/7...

Training on Fold 6/7...

Training on Fold 7/7...


In [9]:
for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
    print(f"\nTraining on Fold {fold+1}/{num_folds}...")

    # Split data
    X_train, X_val = X[train_idx], X[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]


Training on Fold 1/7...

Training on Fold 2/7...

Training on Fold 3/7...

Training on Fold 4/7...

Training on Fold 5/7...

Training on Fold 6/7...

Training on Fold 7/7...


In [10]:
model = Sequential([
        Conv2D(16, (3,3), input_shape=(img_width, img_height, 3), activation='relu'),
        MaxPool2D(2,2),
        Dropout(0.2),

        Conv2D(32, (3,3), activation='relu'),
        MaxPool2D(2,2),
        Dropout(0.3),

        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])

2025-07-04 07:11:32.811888: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-07-04 07:11:32.813426: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1


In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 62, 62, 16)        448       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 31, 31, 16)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 31, 31, 16)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 29, 29, 32)        4640      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 14, 14, 32)       0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 14, 14, 32)        0

In [12]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics = ['accuracy'])

In [13]:
# Train model
model.fit(X_train, y_train, epochs=5, batch_size=batch_size, validation_data=(X_val, y_val), verbose=1)

Epoch 1/5


2025-07-04 07:11:44.862450: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2025-07-04 07:11:45.575434: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2025-07-04 07:12:04.290000: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x15f80d870>

In [14]:
 # Evaluate model
y_pred_probs = model.predict(X_val)
y_pred = (y_pred_probs > 0.5).astype(int)

accuracy = accuracy_score(y_val, y_pred)
fold_results.append(accuracy)
    
print(f"Fold {fold+1} Accuracy: {accuracy:.2f}")
print(classification_report(y_val, y_pred))

 12/123 [=>............................] - ETA: 0s 

2025-07-04 07:13:17.768023: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Fold 7 Accuracy: 0.95
              precision    recall  f1-score   support

         0.0       0.96      0.94      0.95      1997
         1.0       0.94      0.96      0.95      1939

    accuracy                           0.95      3936
   macro avg       0.95      0.95      0.95      3936
weighted avg       0.95      0.95      0.95      3936



In [15]:
# Final cross-validation accuracy
print(f"\nMean Accuracy over {num_folds} folds: {np.mean(fold_results):.2f}")


Mean Accuracy over 7 folds: 0.95
