In [None]:
# Libraries

import pandas as pd
import time
import numpy as np
import random
import os
from tqdm import tqdm

# Data Analysis
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

#TensorFlow
import tensorflow as tf
from tensorflow.keras.preprocessing.image import array_to_img

# Sklearn
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

# Classes Tensorflow
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from tensorflow.keras.preprocessing import image

%matplotlib inline

In [None]:
## Preprocessing

In [None]:
train_dir = 'D:/documents/GitHub/Radios_COVID19/COVID-19_Radiography_Dataset'

normal_imgs = [fn for fn in os.listdir(f'{train_dir}/Normal/images') if fn.endswith('.png')]
covid_imgs = [fn for fn in os.listdir(f'{train_dir}/COVID/images') if fn.endswith('.png')]
pneumonia_imgs = [fn for fn in os.listdir(f'{train_dir}/Viral Pneumonia/images') if fn.endswith('.png')]
lung_opacity_imgs = [fn for fn in os.listdir(f'{train_dir}/Lung_Opacity/images') if fn.endswith('.png')]

In [None]:
#Randomly select X of each images
random_sample = 400

select_norm = np.random.choice(normal_imgs, random_sample, replace = False)
select_covid = np.random.choice(covid_imgs, random_sample, replace = False)
select_pneumonia = np.random.choice(pneumonia_imgs, random_sample, replace = False)
select_lung_opacity = np.random.choice(lung_opacity_imgs, random_sample, replace = False)

In [None]:
liste = []

for fn in select_norm :
    liste.append(f'{train_dir}/Normal/images/' + fn)
for fn in select_covid :
    liste.append(f'{train_dir}/COVID/images/' + fn)
for fn in select_pneumonia :
    liste.append(f'{train_dir}/Viral Pneumonia/images/' + fn)
for fn in select_lung_opacity :
    liste.append(f'{train_dir}/Lung_Opacity/images/' + fn)

liste = list(map(lambda x : [x, x.split('/')[5]], liste))

In [None]:
#Créer un DataFrame pandas
df = pd.DataFrame(liste, columns = ['filepath', 'nameLabel'])
df['label'] = df['nameLabel'].replace(df.nameLabel.unique(), [*range(len(df.nameLabel.unique()))]).astype(str)

print('DataFrame size : ', df.shape)
print(df['nameLabel'].value_counts())
df.head()

In [None]:
#Charger Exemple Image
filepath = df.filepath[9]

im = tf.io.read_file(filepath)
im = tf.image.decode_jpeg(im, channels = 1)
plt.imshow(im)
plt.axis('off');

In [None]:
size = 256 #Image size (depending on model definition)
ratio_val = 0.2 #percentage of dataset to be held into test
batch_size = 210

train_data_generator = ImageDataGenerator( )
test_data_generator = ImageDataGenerator( )

# Création des itérateurs qui chargeront des lots d'images
train, test = train_test_split(df, test_size = ratio_val)

print(f"Number of rows in train set: {len(train)}")

train_generator = train_data_generator.flow_from_dataframe(dataframe = train,
                                                           x_col="filepath",
                                                           y_col="label",
                                                           class_mode = "sparse",
                                                           target_size = (size,size),
                                                           batch_size = batch_size,
                                                           color_mode = "grayscale")

print(f"Number of rows in test set: {len(test)}")
test_generator = test_data_generator.flow_from_dataframe(dataframe = test,
                                                         x_col="filepath",
                                                         y_col="label",
                                                         class_mode = "sparse",
                                                         target_size = (size,size),
                                                         batch_size = batch_size,
                                                         color_mode = "grayscale",
                                                         shuffle=False) 

In [None]:
## LeNet Model

In [None]:
# Instanciation modèle séquentiel
lenet = Sequential()

# Ajout des différentes couches
lenet.add(Conv2D(filters = 30 , kernel_size = (5,5), input_shape =[256,256,1], activation = "relu"))
lenet.add(MaxPooling2D(pool_size = (2,2)))

lenet.add(Conv2D(filters = 16, kernel_size = (3,3), activation = "relu"))
lenet.add(MaxPooling2D(pool_size = (2,2)))

lenet.add(Flatten())
lenet.add(Dropout(rate = 0.2))

lenet.add(Dense(units = 128, activation = "relu"))
lenet.add(Dense(units = 4, activation = "softmax"))

lenet.summary()

In [None]:
# Compilation
lenet.compile(loss = "sparse_categorical_crossentropy", optimizer = "Adam", metrics = ["accuracy"])

In [None]:
# Entrainement du modèle
train_size = train_generator.samples
test_size = test_generator.samples

epochs = 5

history = lenet.fit_generator(train_generator,
                              epochs = epochs,
                              steps_per_epoch = train_size // batch_size,
                              validation_data = test_generator,
                              validation_steps = test_size // batch_size)

In [None]:
# Courbes de perte et d'accuracy
plt.figure(figsize = (16,5))

plt.subplot(121)
plt.plot(history.history["loss"], label = "Train")
plt.plot(history.history["val_loss"], label = "Test")
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.ylim(0,2)

plt.subplot(122)
plt.plot(history.history["accuracy"], label = "Train")
plt.plot(history.history["val_accuracy"], label = "Test")
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show();

In [None]:
## Model Performance

In [None]:
# Prédictions (probabilités)
test_generator.reset()
test_pred_lenet = lenet.predict(test_generator, verbose = 1 )

In [None]:
# Récupération des classes à partir des probabilités
test_pred_lenet_class = test_pred_lenet.argmax(axis = 1)

# Matrice de confusion
print(confusion_matrix(test_generator.classes, test_pred_lenet_class))

In [None]:
print(classification_report(test_generator.classes, test_pred_lenet_class, target_names=test_generator.class_indices.keys()))