In [2]:
import warnings
warnings.filterwarnings('ignore')

In [10]:
import os
import numpy as np 
import pandas as pd 

from sklearn.model_selection import train_test_split 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from keras.callbacks import EarlyStopping 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg19 import VGG19
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras import models, layers
from keras.applications import resnet50
from keras.models import Model
from keras.layers import Input

import matplotlib.pyplot as plt
import cv2

In [4]:
for dirname, _, filenames in os.walk('./data'):
    print(dirname)

./data
./data/Uninfected
./data/Parasitized
./data/cell_images
./data/cell_images/Uninfected
./data/cell_images/Parasitized


In [5]:
def data_prep(parasitized, uninfected):
    
    parasitized = ["./data/Parasitized" + '/' +  parasitize for parasitize in parasitized]
    uninfected = ["./data/Uninfected" + '/' +  uninfect for uninfect in uninfected]

    labels = len(parasitized) * ['parasitized'] + len(uninfected) * ['uninfected']
    data = parasitized + uninfected

    return pd.DataFrame({'Image_Path': data , 'Labels': labels})

In [6]:
df = data_prep(os.listdir("./data/Parasitized/"), os.listdir("./data/Uninfected/"))

In [7]:
print("Shape of the DataFrame:", df.shape)
print("\nHead of the DataFrame:")
print(df.head())
print("\nTail of the DataFrame:")
print(df.tail())

Shape of the DataFrame: (27558, 2)

Head of the DataFrame:
                                          Image_Path       Labels
0  ./data/Parasitized/C137P98ThinF_IMG_20151005_1...  parasitized
1  ./data/Parasitized/C133P94ThinF_IMG_20151004_1...  parasitized
2  ./data/Parasitized/C39P4thinF_original_IMG_201...  parasitized
3  ./data/Parasitized/C70P31_ThinF_IMG_20150819_1...  parasitized
4  ./data/Parasitized/C132P93ThinF_IMG_20151004_1...  parasitized

Tail of the DataFrame:
                                              Image_Path      Labels
27553  ./data/Uninfected/C145P106ThinF_IMG_20151016_1...  uninfected
27554  ./data/Uninfected/C72P33_ThinF_IMG_20150815_10...  uninfected
27555  ./data/Uninfected/C101P62ThinF_IMG_20150918_15...  uninfected
27556  ./data/Uninfected/C7NthinF_IMG_20150611_105444...  uninfected
27557  ./data/Uninfected/C183P144NThinF_IMG_20151201_...  uninfected


In [11]:

# Function to load and preprocess images
def load_images(file_paths, target_size=(64, 64)):
    images = []
    for path in file_paths:
        img = cv2.imread(path)
        img = cv2.resize(img, target_size)
        images.append(img)
    return np.array(images)

# Load and preprocess images
X = load_images(df['Image_Path'])
y = df['Labels']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build an improved CNN model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())  
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(256, activation='relu'))  
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))  
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))  
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the improved CNN model
model.fit(X_train, (y_train == 'parasitized').astype(int), epochs=30, batch_size=32, validation_split=0.2)

# Extract features from the CNN model
cnn_features_train = model.predict(X_train)
cnn_features_test = model.predict(X_test)

# Reshape features for KNN
cnn_features_train = cnn_features_train.reshape(cnn_features_train.shape[0], -1)
cnn_features_test = cnn_features_test.reshape(cnn_features_test.shape[0], -1)

# Build a KNN model
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(cnn_features_train, (y_train == 'parasitized'))

# Predict using the hybrid model
knn_predictions = knn_model.predict(cnn_features_test)

# Evaluate the hybrid model
accuracy = accuracy_score((y_test == 'parasitized'), knn_predictions)
print()
print(f"Accuracy of the hybrid model: {accuracy*100}")

# Save models
if not os.path.exists('cnn-knn-models'):
    os.makedirs('cnn-knn-models')

# Save CNN model
knn_model.save('cnn-knn-models/knn_model.h5')

# Save KNN model
import joblib
joblib.dump(knn_model, 'cnn-knn-models/knn_model.joblib')

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30

Accuracy of the hybrid model: 95.17416545718433


['cnn-knn-models/knn_model.joblib']