<a href="https://colab.research.google.com/github/thesis17/Afaan-Oromoo-chatGPT/blob/main/%5BCNN%5D_Malaria_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
iarunava_cell_images_for_detecting_malaria_path = kagglehub.dataset_download('iarunava/cell-images-for-detecting-malaria')

print('Data source import complete.')


In [None]:
import tensorflow as tf
import numpy as np
import cv2
from tqdm import tqdm
import os

# Data Preparation

In [None]:
def load_data_(path_data,categories):
    data = []
    for category in categories:
        path = os.path.join(path_data,category)
        label = categories.index(category)

        for files in tqdm(os.listdir(path),desc=f'Load : {category}') :
            try:
                img = os.path.join(path,files)
                image_read = cv2.imread(img)
                if image_read is None:
                    print(f"Failed {img}")
                image_read = cv2.resize(image_read,(64,64))
                data.append([image_read,label])
            except:
                continue
    return data

In [None]:
jalur = '/kaggle/input/cell-images-for-detecting-malaria/cell_images'
cate = ["Parasitized","Uninfected"]
malaria_data = load_data_(jalur,cate)

# Preprocessing

In [None]:
import matplotlib.pyplot as plt
import random

random.seed(42)
random.shuffle(malaria_data)


labs = {0:'Parasitized',
        1:'Uninfected'}

rands = np.random.randint(1,100)

plt.imshow(malaria_data[rands][0])
plt.title(f'Label = {labs[malaria_data[rands][1]]}')
plt.show()

In [None]:
x = []
y = []

for gambar,label_ in malaria_data:
    x.append(gambar)
    y.append(label_)

In [None]:
x = []
y = []

for gambar,label_ in malaria_data:
    x.append(gambar)
    y.append(label_)

x = np.array(x).astype('float32') / 255.0
y = np.array(y) # Keep y as integers 0 or 1
np.set_printoptions(precision=10,suppress=False)
print(x[10].max())

# Split

In [None]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,stratify=y,test_size=0.2,random_state=42)

print(len(x_train))
print(len(x_test))
print(len(y_train))
print(len(y_test))

# Modelling

In [None]:
models = tf.keras.Sequential([
    tf.keras.layers.Conv2D(150,kernel_size=(4,4),
                           padding='same',
                           activation='relu',
                           input_shape=(64,64,3)),

    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),

    tf.keras.layers.Conv2D(150,kernel_size=(4,4),
                           padding='same',
                           activation='relu'),

    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(200,activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(200,activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1,activation='sigmoid')
])

In [None]:
models.summary()

In [None]:
models.compile(optimizer='RMSProp',
               loss='binary_crossentropy',
               metrics=['accuracy'])

In [None]:
lrpletau = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                factor=0.1,
                                                patience=2,
                                                verbose=1)

earlystp = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                            patience=3,
                                            verbose=1)

# Training

In [None]:
models.fit(x_train,
           y_train,
           epochs=15,
           validation_split=0.2,
           batch_size=64,
           shuffle=True,
           callbacks=[lrpletau,earlystp]
           )

In [None]:
plt.plot(models.history.history['accuracy'])
plt.plot(models.history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train','Test'],loc='upper left')

In [None]:
plt.plot(models.history.history['loss'])
plt.plot(models.history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('loss')
plt.xlabel('Epoch')
plt.legend(['Train','Test'],loc='upper left')

# Evaluating

In [None]:
models.evaluate(x_test,y_test,batch_size=64)

In [None]:
fig, axs = plt.subplots(3,3,figsize=(10,10))

for i in range(9):
  row,col = divmod(i,3)
  samp = np.random.randint(0, len(x_test)) # Ensure samp is within the bounds of x_test
  ypred = models.predict(x_test[samp].reshape(1,64,64,3))
  predicted_label = 1 if ypred > 0.5 else 0 # Get the predicted label (0 or 1)
  axs[row,col].imshow(x_test[samp])
  axs[row,col].set_title(f'Actual = {labs[y_test[samp]]} \n Pred = {labs[predicted_label]}')

plt.tight_layout() # Adjust layout to prevent overlapping titles
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
conf = confusion_matrix(y_test,models.predict(x_test) > 0.5)
sns.heatmap(conf,annot=True,fmt='d',cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()