# **DETECTING MALARIA USING CONVOLUTIONAL NEURAL NETWORKS: A TENSORFLOW APPROACH**

`*Author:Maureen Kitang'a*`

**Introduction:**

Malaria remains a significant public health concern worldwide, particularly in regions with limited access to healthcare resources. Rapid and accurate diagnosis of malaria is crucial for effective treatment and disease management. In recent years, deep learning techniques, particularly Convolutional Neural Networks (CNNs), have shown promising results in medical image analysis tasks, including malaria detection. In this project, we leverage the power of CNNs implemented using TensorFlow to create a robust model for malaria detection.

### *IMPORTING LIBRARIES*

I began by importing numpy, pandas, and matplotlib. I decided to use `Keras` with `Tensorflow` backend to implement the CNN model. So, I imported a number of layers from keras.layers including `Convolution2D`, `MaxPooling2D`, `Flatten`, `Dense`, `BatchNormalization`, and `Dropout`.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D,MaxPool2D,Dropout,Flatten,Dense,BatchNormalization,InputLayer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import BinaryCrossentropy
from google.colab import drive

### *Getting the Data*

In [None]:
dataset, dataset_info = tfds.load('malaria',
                                  with_info = True,
                                  as_supervised= True,
                                  shuffle_files= True,
                                  split=['train'])

In [None]:
# Define a function to split the dataset into training, validation, and test sets
def splits(dataset, TRAIN_RATIO, VAL_RATIO, TEST_RATIO):
  DATASET_SIZE = len(dataset)
  train_dataset = dataset.take(int(TRAIN_RATIO* DATASET_SIZE))

  val_test_dataset = dataset.skip(int(TRAIN_RATIO* DATASET_SIZE))
  val_dataset = val_test_dataset.take(int(VAL_RATIO * DATASET_SIZE))

  test_dataset = val_test_dataset.skip(int(VAL_RATIO * DATASET_SIZE))
  return train_dataset, val_dataset, test_dataset

In [None]:
# Define ratios for train, validation, and test splits
TRAIN_RATIO = 0.8
VAL_RATIO = 0.1
TEST_RATIO = 0.1

# Split the dataset
train_dataset, val_dataset, test_dataset = splits(dataset[0], TRAIN_RATIO, VAL_RATIO, TEST_RATIO)
print(list(train_dataset.take(1).as_numpy_iterator()),
      list(val_dataset.take(1).as_numpy_iterator()),list(test_dataset.take(1).as_numpy_iterator()))

### *DATA VISUALIZATION*

In [None]:
# Visualize a sample of the dataset
for i, (image, label) in enumerate(train_dataset.take(16)):
  ax = plt.subplot(4, 4, i + 1)
  plt.imshow(image)
  plt.title(dataset_info.features['label'].int2str(label))
plt.axis('off')

### *DATA PREPROCESSING*

In [None]:
# Preprocess the images: resize and rescale
IM_SIZE = 224
def resize_rescale(image, label):
  return tf.image.resize(image, (IM_SIZE, IM_SIZE))/255.0, label

In [None]:
# Apply preprocessing to train, validation, and test datasets
train_dataset =  train_dataset.map(resize_rescale)
val_dataset =  val_dataset.map(resize_rescale)
test_dataset = test_dataset.map(resize_rescale)
train_dataset

In [None]:
# Define batch size
BATCH_SIZE = 32

In [None]:
# Verify preprocessing
for data in train_dataset.take(1):
  print(image, label)

In [None]:
# Shuffle and batch the datasets
train_dataset = train_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
# Print dataset information
print("Train Dataset:", train_dataset)
print("Validation Dataset:", val_dataset)

### *DATA MODELING*

In [None]:
cnn_model = tf.keras.Sequential([
    InputLayer(input_shape = (IM_SIZE, IM_SIZE, 3)),
    Conv2D(filters=6, kernel_size=6, strides=1, padding='valid',activation='relu'),
    BatchNormalization(),
    MaxPool2D (pool_size = 2, strides = 2),

    Conv2D(filters=16, kernel_size=3, strides=1, padding='valid',activation='relu'),
    BatchNormalization(),
    MaxPool2D (pool_size = 2, strides = 2),
    Flatten(),
    Dense(100, activation='relu'),
    BatchNormalization(),

    Dense(10, activation='relu'),
    BatchNormalization(),

    Dense(1, activation='sigmoid'),
])

cnn_model.summary()


In [None]:
y_true = [0,1,0,0]
y_pred = [0.6, 0.51, 0.94, 1]
bce = tf.keras.losses.BinaryCrossentropy()
bce(y_true, y_pred)

In [None]:
cnn_model.compile(optimizer = Adam(learning_rate=0.01),
                    loss = BinaryCrossentropy(),
                    metrics = 'accuracy')

In [None]:
history = cnn_model.fit(train_dataset, validation_data=val_dataset, epochs = 20, verbose = 1)

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model_loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_loss', 'val_loss'])
plt.show()

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model_accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train_accuracy', 'val_accuracy'])
plt.show()

### *MODEL EVALUATION AND TESTING*

In [None]:
test_dataset = test_dataset.batch(1)

In [None]:
test_dataset

In [None]:
cnn_model.evaluate(test_dataset)

In [None]:
def parasite_or_not(x):
  if(x < 0.5):
    return str('P')
  else:
    return str('U')

In [None]:
parasite_or_not(cnn_model.predict(test_dataset.take(1))[0][0])

In [None]:
for i, (image, label) in enumerate(test_dataset.take(9)):
  ax = plt.subplot(3, 3, i + 1)
  plt.imshow(image[0])
  plt.title(str(parasite_or_not(label.numpy()[0])) + ":" +str(parasite_or_not(cnn_model.predict(image)[0][0])))
  plt.axis('off')

In [None]:
cnn_model.save("CNNSavedModel")

In [None]:
cnn_model.save("lenet.hdf5")

In [None]:
cnn_model.save_weights("weights/cnn_weights")