# Pneumonia detection with CNN and pneumonia highlighting.

# We have a dataset with 4 classes:
## 3270 - Normal   
## 3001 - Pneumonia-Bacterial   
## 1656 - Pneumonia-Viral    
## 1281 - COVID-19   

# split data into various folders

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import cv2
import datetime
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, recall_score

from tensorflow.keras import backend as K
from tensorflow.keras import layers
from tensorflow.keras import Model, Sequential
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator


print(tf.__version__)

2.15.0


In [None]:
for dirname, _, filenames in os.walk('/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/Curated-X-Ray-Dataset/'):
    print(dirname)

/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/Curated-X-Ray-Dataset/
/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/Curated-X-Ray-Dataset/COVID-19
/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/Curated-X-Ray-Dataset/Normal
/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/Curated-X-Ray-Dataset/Pneumonia-Bacterial
/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/Curated-X-Ray-Dataset/Pneumonia-Viral


# All filenames

In [None]:
filenames = tf.io.gfile.glob('/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/Curated-X-Ray-Dataset/*/*')
print(len(filenames))
filenames[:3]



9208


['/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/Curated-X-Ray-Dataset/COVID-19/COVID-19 (1251).jpg',
 '/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/Curated-X-Ray-Dataset/COVID-19/COVID-19 (1252).jpg',
 '/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/Curated-X-Ray-Dataset/COVID-19/COVID-19 (1253).jpg']

# To DataFrame

In [None]:
data = pd.DataFrame()
for el in range(0, len(filenames)):
    target = filenames[el].split('/')[-2]
    path = filenames[el]

    data.loc[el, 'filename'] = path
    data.loc[el, 'class'] = target

print(data['class'].value_counts(dropna=False))
data

Normal                 3270
Pneumonia-Bacterial    3001
Pneumonia-Viral        1656
COVID-19               1281
Name: class, dtype: int64


Unnamed: 0,filename,class
0,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
1,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
2,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
3,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
4,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
...,...,...
9203,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Viral
9204,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Viral
9205,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Viral
9206,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Viral


# Shuffle Data

In [None]:
data = shuffle(data, random_state=42)
data.reset_index(drop=True, inplace=True)
data

Unnamed: 0,filename,class
0,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
1,/content/drive/MyDrive/pneumonia_detection/3-k...,Normal
2,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Bacterial
3,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Bacterial
4,/content/drive/MyDrive/pneumonia_detection/3-k...,Normal
...,...,...
9203,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Bacterial
9204,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Bacterial
9205,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Bacterial
9206,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19


# Drop out trash

In [None]:
indexes=[]

def func(x):
    if x[-4:] != '.jpg':
        idx = data[data['filename'] == x].index
        indexes.append(idx[0])
        print(idx[0], x)
    return x

data['filename'].map(func)

print(data.shape)
data.drop(index=indexes, axis=0, inplace=True)
data.reset_index(drop=True, inplace=True)
print(data.shape)

(9208, 2)
(9208, 2)


# Images shape

In [None]:
for el in range(100, 120):
    path = data.loc[el, 'filename']
    img  = cv2.imread(path)
    print(img.shape)

(890, 1300, 3)
(1024, 1024, 3)
(1179, 1434, 3)
(440, 880, 3)
(1024, 1024, 3)
(219, 546, 3)
(300, 400, 3)
(640, 1112, 3)
(592, 1120, 3)
(1479, 1793, 3)
(584, 1224, 3)
(832, 1096, 3)
(1024, 1024, 3)
(300, 400, 3)
(1456, 1800, 3)
(1164, 1538, 3)
(1024, 1024, 3)
(1024, 1024, 3)
(1028, 1348, 3)
(300, 400, 3)


# SPLIT train_data, val_data

In [None]:
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42, stratify=data['class'])
print(train_data['class'].value_counts(dropna=False))
print(test_data['class'].value_counts(dropna=False))

Normal                 2616
Pneumonia-Bacterial    2400
Pneumonia-Viral        1325
COVID-19               1025
Name: class, dtype: int64
Normal                 654
Pneumonia-Bacterial    601
Pneumonia-Viral        331
COVID-19               256
Name: class, dtype: int64


In [None]:
test_data

Unnamed: 0,filename,class
2267,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Bacterial
6484,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
2682,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
2618,/content/drive/MyDrive/pneumonia_detection/3-k...,Normal
1425,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Bacterial
...,...,...
1210,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
4419,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
55,/content/drive/MyDrive/pneumonia_detection/3-k...,Normal
6762,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19


In [None]:
test_data.to_csv('/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/test_data.csv', index=False)


# SPLIT train_data, test_data

In [None]:
train_data, val_data = train_test_split(train_data, test_size=0.1, random_state=42, stratify=train_data['class'])
print(train_data['class'].value_counts(dropna=False))
print(val_data['class'].value_counts(dropna=False))

Normal                 2354
Pneumonia-Bacterial    2160
Pneumonia-Viral        1192
COVID-19                923
Name: class, dtype: int64
Normal                 262
Pneumonia-Bacterial    240
Pneumonia-Viral        133
COVID-19               102
Name: class, dtype: int64


In [None]:
val_data.to_csv('/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/val_data.csv', index=False)


In [None]:
train_data

Unnamed: 0,filename,class
806,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Bacterial
4547,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
814,/content/drive/MyDrive/pneumonia_detection/3-k...,Normal
3067,/content/drive/MyDrive/pneumonia_detection/3-k...,Normal
5839,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Viral
...,...,...
7385,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
7168,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Bacterial
3846,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Bacterial
2071,/content/drive/MyDrive/pneumonia_detection/3-k...,Normal


In [None]:
val_data

Unnamed: 0,filename,class
7133,/content/drive/MyDrive/pneumonia_detection/3-k...,Pneumonia-Viral
923,/content/drive/MyDrive/pneumonia_detection/3-k...,Normal
1552,/content/drive/MyDrive/pneumonia_detection/3-k...,Normal
7931,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
8821,/content/drive/MyDrive/pneumonia_detection/3-k...,Normal
...,...,...
1369,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
3331,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
2644,/content/drive/MyDrive/pneumonia_detection/3-k...,COVID-19
3744,/content/drive/MyDrive/pneumonia_detection/3-k...,Normal


In [None]:
from PIL import Image

def generate_folders(dataframe,
                     dataname="train",
                     destination_folder='/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/train'):



  # Iterate through each row in the DataFrame
  for index, row in dataframe.iterrows():
      # Get the image filename
      filename = row[0]

      # Open the image using PIL
      image = Image.open(os.path.join(filename))

      # Save the image to the destination folder
      image.save(os.path.join(destination_folder, row[1],filename.split("/")[-1]))


In [None]:
generate_folders(dataframe =test_data,
                  dataname="test",
                  destination_folder='/content/drive/MyDrive/pneumonia_detection/3-kinds-of-pneumonia/test')