<a href="https://colab.research.google.com/github/naafey-aamer/Malaria_be_gone/blob/main/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [112]:
import tensorflow_datasets as tfds
import tensorflow as tf

In [113]:
#from tensorflow.org
(raw_train, raw_validation, raw_test), metadata = tfds.load(
    'malaria',
    split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
    with_info=True,
    as_supervised=True,)

In [114]:
IMG_SIZE = 65

#from tensorflow.org
def format_example(image, label):
  image = tf.cast(image, tf.float32)
  image = tf.image.random_flip_left_right(image)  # Model overfits after 3-4 EPOCHS so Randomized Data augmentation applied
  image = image / 255
  image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
  return image, label


train = raw_train.map(format_example)
validation = raw_validation.map(format_example)
test = raw_test.map(format_example)


In [115]:
print(len(train))
print(len(validation))
print(len(test))

22046
2756
2756


In [116]:
BATCH_SIZE = 128 #Lower sized batches are overfitting
SHUFFLE_SIZE = 1000
train_ds = train.shuffle(SHUFFLE_SIZE).batch(BATCH_SIZE)
validation_ds = validation.batch(BATCH_SIZE)
test_ds = test.batch(BATCH_SIZE)

In [117]:
#settled on this shape keeping computational costs in mind and preventing overfitting
#average pooling not working well
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
  tf.keras.layers.MaxPooling2D(2, 2),

  tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),

  tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),

  tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),

  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.5), #Dropout significantly preventing overfitting
  tf.keras.layers.Dense(2, activation='sigmoid')
])

In [118]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model.summary()

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_56 (Conv2D)          (None, 63, 63, 32)        896       
                                                                 
 max_pooling2d_55 (MaxPoolin  (None, 31, 31, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_57 (Conv2D)          (None, 29, 29, 64)        18496     
                                                                 
 max_pooling2d_56 (MaxPoolin  (None, 14, 14, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_58 (Conv2D)          (None, 12, 12, 128)       73856     
                                                                 
 max_pooling2d_57 (MaxPoolin  (None, 6, 6, 128)      

In [119]:
from tensorflow.keras.callbacks import EarlyStopping #early stopping to prevent overfitting

EPOCHS = 9
early_stop = EarlyStopping(monitor='val_loss', patience=2)

model.fit(train_ds,
          validation_data=validation_ds,
          epochs=EPOCHS,
          callbacks=[early_stop])


Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


<keras.callbacks.History at 0x7997c47d7010>

In [120]:
loss, accuracy = model.evaluate(test_ds)
print(f"Loss: {loss}")
print(f"Accuracy: {accuracy}")

Loss: 0.14663340151309967
Accuracy: 0.9535558819770813
