# Image Classification with CNN for Malaria Data

<img src='https://anmdecolombia.org.co/wp-content/uploads/2021/12/paludismo-malaria.jpg'>
<a href='https://www.kaggle.com/datasets/iarunava/cell-images-for-detecting-malaria' target=_blank>Data dosyasini burada bulabilirsiniz</a>

* Malaria veriseti Infected - Uninfected sınıflarından yaklaşık 27 000 resim içerir, bu data setle geliştireceğimiz CNN - Keras modeliyle enfekte olup olmayanları resim üzerinden tespit edeceğiz, daha sonra modelimizi huggingface yükleyip modele veriğimiz resimlerin % 97 oranında doğru tahminde bulunmasını göreceğiz, çünkü modelimiz %97 başarı kaydetti, bir sonraki aşama olarak ta app inventor ile modelin ceptelefonuna uygulamasını yapacağız.  

In [1]:
#!pip install tensorflow

In [6]:
#!pip install opencv-python

In [2]:
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten, Input, MaxPooling2D, Dropout,BatchNormalization, Reshape

In [3]:
labels=['Uninfected','Parasitized'] #resimleri etiketledim
img_path='cell_images/'

In [4]:
img_list=[]                                            # img list ve etiketlerin olduğu bir fonksiyon yazıyoruz
label_list=[]
for label in labels:                                    #cell klasörüne gider
    for img_file in os.listdir(img_path+label):         #listdir ile img klasörüne gidip alt dosyaları bulur
        img_list.append(img_path+label+'/'+img_file)    #data dosyasındaki resimleri img dosyasına ekler
        label_list.append(label)

In [5]:
df=pd.DataFrame({'img':img_list,'label':label_list})

In [6]:
df.head(5)

Unnamed: 0,img,label
0,cell_images/Uninfected/C100P61ThinF_IMG_201509...,Uninfected
1,cell_images/Uninfected/C100P61ThinF_IMG_201509...,Uninfected
2,cell_images/Uninfected/C100P61ThinF_IMG_201509...,Uninfected
3,cell_images/Uninfected/C100P61ThinF_IMG_201509...,Uninfected
4,cell_images/Uninfected/C100P61ThinF_IMG_201509...,Uninfected


In [7]:
d={'Uninfected':0, 'Parasitized':1}
#sözlük oluşturup dosya adlarına sayısal değerler atadı.

In [8]:
df['class']=df['label'].map(d)

In [9]:
df.head(5)

Unnamed: 0,img,label,class
0,cell_images/Uninfected/C100P61ThinF_IMG_201509...,Uninfected,0
1,cell_images/Uninfected/C100P61ThinF_IMG_201509...,Uninfected,0
2,cell_images/Uninfected/C100P61ThinF_IMG_201509...,Uninfected,0
3,cell_images/Uninfected/C100P61ThinF_IMG_201509...,Uninfected,0
4,cell_images/Uninfected/C100P61ThinF_IMG_201509...,Uninfected,0


x=[]
for img in df['img']: 
    img=cv2.imread(str(img))
    img=cv2.resize(img,(170,170)) 
    img=img/255.0 
    x.append(img)

* datada okunmayan hatalı resimler vardı, onları atlayan bir kod yazdık poeyle, fakat bu sfer train test yaparken datada eşitsizlik oldu dimension farklı göründü,

In [11]:
x=[]
for img_path in df['img']:
    img = cv2.imread(str(img_path))
    if img is not None:
        img = cv2.resize(img, (128, 128))
        img = img / 255.0
        x.append(img)

In [12]:
x=np.array(x)

In [13]:
y=df['class']

In [14]:
print(len(x))
print(len(y))
# x ve y sayısına bak

18007
18008


In [15]:
# xvey deki örnek sayısı al ikisini eşitle ve train test yap

In [16]:
from sklearn.utils import resample

n_x_samples = len(x)
y_resampled = resample(y, replace=True, n_samples=n_x_samples, random_state=42)

In [17]:
x_train, x_test, y_train, y_test = train_test_split(x, y_resampled, test_size=0.2, random_state=42)

In [18]:
model = Sequential()
model.add(Input(shape=(128, 128, 3)))

# Daha az filtre ve daha küçük kernel boyutu kullanarak modeli basitleştirebiliriz
model.add(Conv2D(16, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3)) # Dropout oranını biraz azalttık
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [19]:
history=model.fit(x_train,y_train,validation_data=(x_test,y_test), epochs=50, verbose=1)

Epoch 1/50
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 55ms/step - accuracy: 0.7522 - loss: 0.5740 - val_accuracy: 0.7651 - val_loss: 0.5459
Epoch 2/50
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 54ms/step - accuracy: 0.7710 - loss: 0.5423 - val_accuracy: 0.7651 - val_loss: 0.5470
Epoch 3/50
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 52ms/step - accuracy: 0.7655 - loss: 0.5473 - val_accuracy: 0.7651 - val_loss: 0.5467
Epoch 4/50
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 51ms/step - accuracy: 0.7630 - loss: 0.5482 - val_accuracy: 0.7651 - val_loss: 0.5462
Epoch 5/50
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 51ms/step - accuracy: 0.7662 - loss: 0.5446 - val_accuracy: 0.7651 - val_loss: 0.5470
Epoch 6/50
[1m451/451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 52ms/step - accuracy: 0.7671 - loss: 0.5400 - val_accuracy: 0.7651 - val_loss: 0.5501
Epoch 7/50
[1m4

In [20]:
model.save('malaria_cnn_model.h5')



In [4]:
#!pip install opencv-python

In [None]:
#!pip install tensorflow