In [1]:
# Import des 

# Import des packages 
import pandas as pd
import numpy as np
import pathlib
import os
import gc
#from scipy import sparse

# Visualisation
import seaborn as sns
import matplotlib.pyplot as plt

import cv2

# Keras et tensorflow
import tensorflow as tf

from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dropout 
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D 
from tensorflow.keras.layers import MaxPooling2D

from tensorflow.keras.optimizers import Adam

from tensorflow.keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay



path_to_data = r"C:\Users\Inrae\Documents\Projet_Data_Science"
data_folder_path = os.path.join(path_to_data,"COVID-19_Radiography_Dataset")
output_path = os.path.join(path_to_data,"processed")
final_size=(224,224)

# Import des données on sépare entre apprentissage et validation
batch_size=16
data_dir=output_path
size=final_size

div_dir = os.path.join(path_to_data,"divided")

# Définir les chemins  # Répertoire original contenant les classes
train_dir = os.path.join(div_dir,"train")
val_dir =  os.path.join(div_dir,"val")
test_dir =  os.path.join(div_dir,"test")



In [None]:
print(tf.__version__)

# On vérifie que la gpu fonctionne

print(tf.config.list_physical_devices('GPU'))



# En repartant du notebook de MME pour l'import des données
## Preprocessing et export des données masquées

## On importe les images préprocessées et masquées

In [None]:

path_to_data = r"C:\Users\Inrae\Documents\Projet_Data_Science"
data_folder_path = os.path.join(path_to_data,"COVID-19_Radiography_Dataset")
output_path = os.path.join(path_to_data,"processed")
final_size=(224,224)

# Import des données on sépare entre apprentissage et validation
batch_size=16
data_dir=output_path
size=final_size

# On laisse en couleur pour pouvoir utiliser les modeles preentrainés

train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.5,
    subset="training",
    #color_mode= "grayscale",
    seed=42,
    image_size=size,
    batch_size=batch_size)


val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.5,
    subset="validation",
    #color_mode= "grayscale",
    seed=42,
    image_size=size,
    batch_size=batch_size)



In [4]:
# Création X_train, X_test, y_train, y_test

# For train data
all_images = []
all_labels = []

for images, labels in train_ds.take(-1):  # -1 takes all
    all_images.append(images.numpy())
    all_labels.append(labels.numpy())

X_train = np.concatenate(all_images)
y_train = np.concatenate(all_labels)

# For test data
all_images = []
all_labels = []

for images, labels in val_ds.take(-1):  # -1 takes all
    all_images.append(images.numpy())
    all_labels.append(labels.numpy())

X_test = np.concatenate(all_images)
y_test = np.concatenate(all_labels)

# normalization
X_train = X_train / 255
X_test = X_test / 255

# encoding
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)


In [7]:
# On cree un generateur d image

train_datagen = ImageDataGenerator(
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    #horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    #width_shift_range=0.1,
    #height_shift_range=0.1  
    ) 

test_datagen = ImageDataGenerator()

# Augmenter respectivement les jeu de données d'entrainement
train_dataset = train_datagen.flow(X_train,y_train,   batch_size = 64)

test_dataset = test_datagen.flow(X_test, y_test, batch_size = 64)



# Modèle de DL
## Test d un premier modèle from scratch

In [None]:
# Construction d un modèle classique

model = Sequential()

# Couche d'entrée pour les images 224x224 avec 3 canaux (RGB) 
model.add(Input(shape=(224, 224, 3))) 

# Première couche de convolution 
model.add(Conv2D(16, (3, 3), activation='relu', padding='same')) 
model.add(MaxPooling2D((2, 2))) 

# Deuxième couche de convolution 
model.add(Conv2D(32, (3, 3), activation='relu', padding='same')) 
model.add(MaxPooling2D((2, 2))) 

# Troisième couche de convolution 
model.add(Conv2D(64, (3, 3), activation='relu', padding='same')) 
model.add(MaxPooling2D((2, 2)))

# Quatireme couche de convolution 
model.add(Conv2D(128, (3, 3), activation='relu', padding='same')) 
model.add(MaxPooling2D((2, 2)))


# Couche flatten pour transformer les cartes de caractéristiques en un vecteur 
model.add(Flatten()) 

# Ajouter des couches fully connected (denses) 

model.add(Dense(60, activation='relu')) 
model.add(Dense(4, activation='softmax')) 

#Compiler le modèle model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Définir le learning rate désiré 
learning_rate = 0.001 #

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=learning_rate) 

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

#Entrainement du modèle, utiliser le jeu de données augmenté, et préciser les callbacks 

batch_size = 64
epochs = 10 
model_history = model.fit(
            train_dataset, # use augmented images for train 
            steps_per_epoch=X_train.shape[0] // batch_size,
            validation_data = test_dataset, # use augmented images for test
            epochs = epochs,
            verbose=True)

## RESNET préentrainé sur image net
### En utilisant directement le modèle

In [None]:
# On importe un modele préentrainé : REsnet50
# https://keras.io/api/applications/#finetune-inceptionv3-on-a-new-set-of-classes
#import h5py

from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

base_model = ResNet50(weights='imagenet')

#base_model.summary()


len(base_model.layers)


#### Sans data augmentation
##### Avec un learning rate de 0.001


In [None]:
del history
del model
del base_model
del cm
tf.keras.backend.clear_session()
gc.collect()

# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation
batch_size=64
# On reprocesse les jeux de données d'entrainement et de validation
  
# Pas ideal car on applique un image generator sur le jeu de validation

datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
        validation_split=validation_ratio,
    ) 


# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42, shuffle=True)

test_dataset = datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)

learning_rate = 0.001 #

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = ResNet50(weights='imagenet')
x = base_model.output
x = Flatten()(x)
x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 


# first:  freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False


model = Model(inputs=base_model.input, outputs=predictions)

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=learning_rate) 

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs =10,
            verbose=True)

In [None]:
plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()

In [None]:
test_dataset = datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

##### Avec un learning rate de 0.0001

In [None]:
#del history
# del model
# del base_model
#del cm
tf.keras.backend.clear_session()
gc.collect()

# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation
batch_size=64
# On reprocesse les jeux de données d'entrainement et de validation
  
# Pas ideal car on applique un image generator sur le jeu de validation

datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
        validation_split=validation_ratio,
    ) 

train_dataset = datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42, shuffle=True)

test_dataset = datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)

learning_rate = 0.0001 #

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = ResNet50(weights='imagenet')
x = base_model.output
x = Flatten()(x)
x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 


# first:  freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False


model = Model(inputs=base_model.input, outputs=predictions)

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=learning_rate) 

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs =5,
            verbose=True)

##### Avec un learning rate de 0.01

In [None]:
#del history
# del model
# del base_model
#del cm
tf.keras.backend.clear_session()
gc.collect()

# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation
batch_size=64
# On reprocesse les jeux de données d'entrainement et de validation
  
# Pas ideal car on applique un image generator sur le jeu de validation

datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
        validation_split=validation_ratio,
    ) 

train_dataset = datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42, shuffle=True)

test_dataset = datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)

learning_rate = 0.01 #

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = ResNet50(weights='imagenet')
x = base_model.output
x = Flatten()(x)
x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 


# first:  freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False


model = Model(inputs=base_model.input, outputs=predictions)

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=learning_rate) 

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs =10,
            verbose=True)

In [None]:
plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()

In [None]:
test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

##### ReduceLRonPlateau et early_stop

In [None]:
#del history
del model
del base_model
#del cm
tf.keras.backend.clear_session()
gc.collect()

# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation
batch_size=64
# On reprocesse les jeux de données d'entrainement et de validation
  
# Pas ideal car on applique un image generator sur le jeu de validation

datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
        validation_split=validation_ratio,
    ) 

train_dataset = datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42, shuffle=True)

test_dataset = datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)

#learning_rate = 0.01 #

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = ResNet50(weights='imagenet')
x = base_model.output
x = Flatten()(x)
x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 


# first:  freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False


model = Model(inputs=base_model.input, outputs=predictions)

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=0.01) 

# On crée des callback pour diminuer le LR

lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    min_lr=0.0001,
    min_delta =  0.05,
    verbose = 1
)


stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=2, 
    mode="min", 
    min_delta =  0.01,
    verbose = 1 )

my_callbacks = [
    lr_callback,
    stop_callback,
]

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

#K.set_value(model.optimizer.lr, 1e-2)

# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs =20,
            verbose=True,
            callbacks=[my_callbacks])

##### LR scheduler et early stop

In [None]:
#del history
del model
del base_model
#del cm
tf.keras.backend.clear_session()
gc.collect()

# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation
batch_size=64
# On reprocesse les jeux de données d'entrainement et de validation
  
# Pas ideal car on applique un image generator sur le jeu de validation

datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
        validation_split=validation_ratio,
    ) 

train_dataset = datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42, shuffle=True)

test_dataset = datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)

#learning_rate = 0.01 #

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = ResNet50(weights='imagenet')
x = base_model.output
x = Flatten()(x)
x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 


# first:  freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

    


model = Model(inputs=base_model.input, outputs=predictions)

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=0.01) 

# On crée des callback pour diminuer le LR

def scheduler(epoch, lr):
    if epoch < 5:
        return lr
    else:
        return lr * np.exp(-0.1)
    
scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=2, 
    mode="min", 
    min_delta =  0.01,
    verbose = 1 )

my_callbacks = [
    scheduler_callback,
    stop_callback,
]

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])


# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs =20,
            verbose=True,
            callbacks=[my_callbacks])


In [None]:
plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()

In [None]:
test_dataset = datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

#### Avec data augmentation, LR scheduler et early stop

In [None]:
# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation
batch_size=64
# On reprocesse les jeux de données d'entrainement et de validation
  
# Pas ideal car on applique un image generator sur le jeu de validation

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    validation_split=validation_ratio,
    ) 

test_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     validation_split=validation_ratio,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42, shuffle=True)

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = ResNet50(weights='imagenet')
x = base_model.output
x = Flatten()(x)
x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 


# first:  freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False


model = Model(inputs=base_model.input, outputs=predictions)

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=0.01) 

# On crée des callback pour diminuer le LR

def scheduler(epoch, lr):
    if epoch < 3:
        return lr
    else:
        return lr * np.exp(-0.1)
    
scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=2, 
    mode="min", 
    min_delta =  0.005,
    verbose = 1 )

my_callbacks = [
    scheduler_callback,
    stop_callback,
]

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])


# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs =20,
            verbose=True,
            callbacks=[my_callbacks])


In [None]:
plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()


In [None]:

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

### Degel des paramètres des dernières couches
#### On dégèle les 7 dernieres couches

In [None]:
# del history
#del model
#del base_model
#del cm
tf.keras.backend.clear_session()
gc.collect()


# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation
batch_size=64
# On reprocesse les jeux de données d'entrainement et de validation
  
# Pas ideal car on applique un image generator sur le jeu de validation

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    validation_split=validation_ratio,
    ) 

test_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     validation_split=validation_ratio,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42, shuffle=True)

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = ResNet50(weights='imagenet')
x = base_model.output
x = Flatten()(x)
x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 

model = Model(inputs=base_model.input, outputs=predictions)


# first:  freeze all convolutional InceptionV3 layers

for layer in model.layers[:170]:
   layer.trainable = False
for layer in model.layers[170:]:
   layer.trainable = True


#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=0.01) 

# On crée des callback pour diminuer le LR

def scheduler(epoch, lr):
    if epoch < 3:
        return lr
    else:
        return lr * np.exp(-0.1)
    
scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=2, 
    mode="min", 
    min_delta =  0.005,
    verbose = 1 )

my_callbacks = [
    scheduler_callback,
    stop_callback,
]

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])


# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs =20,
            verbose=True,
            callbacks=[my_callbacks])


In [None]:
plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()


In [None]:

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

In [None]:
## Sauvegarde du model



#### Degel des 15 dernieres couches

In [None]:
#del history
#del model
#del base_model
#del cm
tf.keras.backend.clear_session()
gc.collect()


# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation
batch_size=64
# On reprocesse les jeux de données d'entrainement et de validation
  
# Pas ideal car on applique un image generator sur le jeu de validation

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    validation_split=validation_ratio,
    ) 

test_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     validation_split=validation_ratio,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42, shuffle=True)

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = ResNet50(weights='imagenet')
x = base_model.output
x = Flatten()(x)
x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 

model = Model(inputs=base_model.input, outputs=predictions)


# first:  freeze all convolutional InceptionV3 layers

for layer in model.layers[:160]:
   layer.trainable = False
for layer in model.layers[160:]:
   layer.trainable = True


#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=0.01) 

# On crée des callback pour diminuer le LR

def scheduler(epoch, lr):
    if epoch < 5:
        return lr
    else:
        return lr * np.exp(-0.1)
    
scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=2, 
    mode="min", 
    min_delta =  0.005,
    verbose = 1 )

my_callbacks = [
    scheduler_callback,
    stop_callback,
]

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])


# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs =20,
            verbose=True,
            callbacks=[my_callbacks])


In [None]:
plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()


In [None]:
# On redescebnd à un btach de 32 par manque de mémoire

#del base_model
test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

### On degele la moitie des couches et on teste un cosine decay

In [None]:
try:
    model
    del model
except NameError:
    print("Error: model No value detected")

try:
    base_model
    del base_model
except NameError:
    print("Error: base_model No value detected")

try:
    preprocess_input
    del preprocess_input
except NameError:
    print("Error: preprocess_input No value detected")


try:
    history
    del history
except NameError:
    print("Error:history No value detected")


# try:
#     train_datagen
#     del train_datagen
#     except NameError:
#     print("Error:train_datagen No value detected")


# try:
#     train_dataset
#     del train_dataset
#     except NameError:
#     print("Error:train_dataset No value detected")



# Libérer de la mémoire
tf.keras.backend.clear_session()
gc.collect()

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np
import gc
from tensorflow.keras.optimizers.schedules import CosineDecay

from sklearn.utils import class_weight
import numpy as np

#del history
#del model
#del base_model
#del cm
tf.keras.backend.clear_session()
gc.collect()


# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation
batch_size=48
# On reprocesse les jeux de données d'entrainement et de validation
  
# Pas ideal car on applique un image generator sur le jeu de validation

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    validation_split=validation_ratio,
    ) 

test_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     validation_split=validation_ratio,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(data_dir,batch_size = batch_size, class_mode="categorical", target_size=(224,224), subset='training', seed=42, shuffle=True)

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = batch_size, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)

# On prépare les poids pour le dataset

class_weights = class_weight.compute_class_weight(
               class_weight='balanced',
               classes=np.unique(train_dataset.classes),
               y=train_dataset.classes)

class_weightDICT = dict(zip(np.unique(train_dataset.classes), class_weights))
print(class_weightDICT)

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = ResNet50(weights='imagenet', include_top= False, pooling='max')
x = base_model.output
#x = Flatten()(x)
#x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 

model = Model(inputs=base_model.input, outputs=predictions)


# first:  freeze all convolutional InceptionV3 layers

for layer in model.layers[:100]:
   layer.trainable = False
for layer in model.layers[100:]:
   layer.trainable = True



# Définir le scheduler CosineDecay 
initial_learning_rate = 0.001 
decay_steps = (train_dataset.n // batch_size) * 20 # Nombre total d'étapes (epochs * steps_per_epoch) 
cosine_decay = CosineDecay(initial_learning_rate, decay_steps)


# On crée des callback pour diminuer le LR

# def scheduler(epoch, lr):
#     if epoch <= 2:
#         return lr
#     else:
#         return lr * np.exp(-0.05)
    
# scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

# On cree un callback pour sauvegarder le meilleur modèle
checkpoint_filepath = '../models/checkpoint/model_resnet50_CHO_v1'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)


stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=2, 
    mode="min", 
    min_delta =  0.001,
    verbose = 1 )

my_callbacks = [
 #   scheduler_callback,
    stop_callback,
    model_checkpoint_callback,
]

# Créer l'optimiseur Adam avec CosineDecay 
optimizer = Adam(learning_rate=cosine_decay)

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer, loss='categorical_crossentropy', metrics=["accuracy"])



# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for 
            class_weight=class_weightDICT,
            epochs =20,
            verbose=True,
            callbacks=[my_callbacks])


In [None]:
plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()

In [None]:

#del base_model
test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

In [None]:
# On charge le modèle

#model = model.load('../models/model_resnet50_CHO_v1')
model = tf.keras.models.load_model("../models/model_resnet50_CHO_v1")

#model.save('../models/model_resnet50_CHO_v1')

#model.summary()

In [None]:
from PIL import Image
import cv2

img_size = (299, 299)
preprocess_input = tf.keras.applications.resnet50.preprocess_input
decode_predictions = tf.keras.applications.resnet50.decode_predictions

last_conv_layer_name = "conv5_block3_3_conv"

def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    # Modèle pour obtenir les gradients des activations de la dernière couche convolutive
    grad_model = Model([model.inputs], [model.get_layer(last_conv_layer_name).output, model.output])
    
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    # Obtenir les gradients des activations de la dernière couche convolutive
    grads = tape.gradient(class_channel, last_conv_layer_output)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # Multiplier chaque canal par l'importance de ce canal pour la classe prédite
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # Normaliser le heatmap entre 0 et 1
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

def save_and_display_gradcam(img_path, heatmap, cam_path="cam.jpg", alpha=0.4):
    # Charger l'image
    img = tf.keras.preprocessing.image.load_img(img_path)
    img = tf.keras.preprocessing.image.img_to_array(img)
    #img = cv2.imread(img_path) 
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Redimensionner le heatmap pour qu'il corresponde à l'image
    heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
    heatmap = np.uint8(255 * heatmap)
    heatmap = np.array(Image.fromarray(heatmap).resize((img.shape[1], img.shape[0])))

    # Appliquer le heatmap sur l'image
    heatmap = np.expand_dims(heatmap, axis=-1)
    superimposed_img = alpha * img + heatmap  
    superimposed_img = np.clip(superimposed_img, 0, 255).astype("uint8")

    # # Afficher l'image seul
    plt.figure(figsize=(20, 10))
    plt.subplot(1, 2, 1) 
    # Afficher le heatmap seul plt.subplot(1, 2, 1) 
    plt.imshow(heatmap[..., 0], cmap='viridis') 
    plt.title('Heatmap') 
    plt.axis('off') 
    
    # Afficher l'image superposée 
    plt.subplot(1, 2, 2) 
    plt.imshow(superimposed_img, cmap='viridis') 
    plt.title('Superimposed Image') 
    plt.axis('off')


# Charger et prétraiter l'image
img_path = os.path.join(data_dir,"COVID","COVID-30.png_masked.png")
img = tf.keras.preprocessing.image.load_img(img_path, target_size=(224, 224))
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array = preprocess_input(img_array)

# Générer le heatmap Grad-CAM
last_conv_layer_name = "conv5_block3_out"
heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name)
# Superposer le heatmap sur l'image
save_and_display_gradcam(img_path,heatmap)


In [None]:
import random

# Obtenir un batch d'images
dataiter = iter(test_dataset)
images, labels = next(dataiter)

x = [random.randint(0, len(images)) for p in range(0, 6)]

# # Parcourir les images du batch
for i in x :
     img_array = np.expand_dims(images[i], axis=0)
     heatmap = make_gradcam_heatmap(img_array, model, "conv5_block3_out")
     img_path = data_dir + '/' + test_dataset.filenames[i]
     save_and_display_gradcam(img_path, heatmap)


In [None]:
import random

# On met -0.5 et pas 0.5 pour ne pas avoir le fond en rouge

def display_gradcam(img_path, heatmap, pred_label, true_label, alpha=-0.5):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
    heatmap = np.uint8(255 * heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    superimposed_img = heatmap * alpha + img
    superimposed_img = np.clip(superimposed_img, 0, 255).astype("uint8")

    plt.figure(figsize=(20, 10))
 # Afficher l'image superposée 
    plt.subplot(2, 1, 1) 
    plt.imshow(img, cmap='viridis') 
    plt.title(f'True: {true_label}') 
    plt.axis('off')

    plt.subplot(2, 2, 2) 
    plt.title(f' Pred: {pred_label}')
    plt.imshow(superimposed_img)
    plt.axis('off')
    plt.show()

# Obtenir un batch d'images
dataiter = iter(test_dataset)
images, labels = next(dataiter)
class_labels = list(test_dataset.class_indices.keys())


x = [random.randint(0, (len(images)-1)) for p in range(0, 10)]


# Parcourir les images du batch
for i in x:
    img_array = np.expand_dims(images[i], axis=0)
    heatmap = make_gradcam_heatmap(img_array, model, "conv5_block3_out")
    
    pred_label = class_labels[tf.argmax(model.predict(img_array)[0])]
    true_label = class_labels[tf.argmax(labels[i])]

    img_path = f"{data_dir}/{test_dataset.filenames[i]}"
    display_gradcam(img_path, heatmap, pred_label, true_label)



In [None]:
try:
    model
    del model
except NameError:
    print("Error: model No value detected")

try:
    base_model
    del base_model
except NameError:
    print("Error: base_model No value detected")

try:
    preprocess_input
    del preprocess_input
except NameError:
    print("Error: preprocess_input No value detected")


try:
    history
    del history
except NameError:
    print("Error:history No value detected")


# Libérer de la mémoire
tf.keras.backend.clear_session()
gc.collect()

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np
import gc
from tensorflow.keras.optimizers.schedules import CosineDecay

#del history
#del model
#del base_model
#del cm
tf.keras.backend.clear_session()
gc.collect()


# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation
batch_size=64
# On reprocesse les jeux de données d'entrainement et de validation
  
# Pas ideal car on applique un image generator sur le jeu de validation

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    validation_split=validation_ratio,
    ) 

test_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     validation_split=validation_ratio,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42, shuffle=True)

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = ResNet50(weights='imagenet')
x = base_model.output
x = Flatten()(x)
x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 

model = Model(inputs=base_model.input, outputs=predictions)


# first:  freeze all convolutional InceptionV3 layers

for layer in model.layers[:150]:
   layer.trainable = False
for layer in model.layers[150:]:
   layer.trainable = True


#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=0.001) 

# On crée des callback pour diminuer le LR

# Définir le scheduler CosineDecay 
initial_learning_rate = 0.001 
decay_steps = (train_dataset.n // batch_size) * 20 # Nombre total d'étapes (epochs * steps_per_epoch) 
cosine_decay = CosineDecay(initial_learning_rate, decay_steps)

# On cree un callback pour sauvegarder le meilleur modèle
checkpoint_filepath = '../models/checkpoint/model_resnet50_CHO_v1'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)


stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=2, 
    mode="min", 
    min_delta =  0,
    verbose = 1 )

my_callbacks = [
    stop_callback,
    model_checkpoint_callback,
]

# Créer l'optimiseur Adam avec CosineDecay 
optimizer = Adam(learning_rate=cosine_decay)

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])


# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs =20,
            verbose=True,
            callbacks=[my_callbacks])


In [None]:
plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()

In [None]:

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

## Efficientnet B2
### Directement 


In [None]:
# pour refaire de la place sur la gpu
del model
del cm
import gc
gc.collect()
tf.keras.backend.clear_session()


In [None]:

from tensorflow.keras.applications.efficientnet import EfficientNetB2
from tensorflow.keras.applications.efficientnet import preprocess_input, decode_predictions

base_model = EfficientNetB2(weights='imagenet')

#base_model.summary()

len(base_model.layers)

In [None]:
#del history
#del model
#del base_model
#del cm
tf.keras.backend.clear_session()
gc.collect()

# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation
batch_size=64
# On reprocesse les jeux de données d'entrainement et de validation
  
# Pas ideal car on applique un image generator sur le jeu de validation

datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
        validation_split=validation_ratio,
    ) 


# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42, shuffle=True)

test_dataset = datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)

learning_rate = 0.001 #

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model =  EfficientNetB2(weights='imagenet')
x = base_model.output
x = Flatten()(x)
x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 


# first:  freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False


model = Model(inputs=base_model.input, outputs=predictions)

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=learning_rate) 

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs =10,
            verbose=True)

In [None]:
# On trace l evolution de la loss

plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()

In [None]:

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

### Degel des dernieres couches

In [None]:
# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation

# On reprocesse les jeux de données d'entrainement et de validation
 
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    validation_split=validation_ratio,
    ) 

test_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     validation_split=validation_ratio,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42)

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42)

learning_rate = 0.001 #

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

x = base_model.output
x = Flatten()(x)
#x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 

model = Model(inputs=base_model.input, outputs=predictions)

# on degele les dix dernieres couches:
for layer in model.layers:
   layer.trainable = False

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=0.1) 

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs = 20,
            verbose=True)

In [None]:
# On trace l evolution de la loss

plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()

In [None]:
# On fait la matrice de confusion et le rapport de classification

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

## Efficient B0

In [None]:

from tensorflow.keras.applications.efficientnet import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input, decode_predictions

base_model = EfficientNetB0(weights='imagenet')

#base_model.summary()


len(base_model.layers)

In [None]:
# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation

batch_size=64

# On reprocesse les jeux de données d'entrainement et de validation
 
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    validation_split=validation_ratio,
    ) 

test_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     validation_split=validation_ratio,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42)

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42)

learning_rate = 0.001 #

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

x = base_model.output
x = Flatten()(x)
#x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 

model = Model(inputs=base_model.input, outputs=predictions)

# on degele les dix dernieres couches:
for layer in model.layers:
   layer.trainable = False

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=learning_rate) 

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

# # train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs = 10,
            verbose=True)

In [None]:

plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()

In [None]:
test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

In [None]:
# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation

# On reprocesse les jeux de données d'entrainement et de validation
 
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    validation_split=validation_ratio,
    ) 

test_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     validation_split=validation_ratio,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42)

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42)

learning_rate = 0.001 #

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

x = base_model.output
x = Flatten()(x)
#x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 

model = Model(inputs=base_model.input, outputs=predictions)

# on degele les dix dernieres couches:
for layer in model.layers[:300]:
   layer.trainable = False

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=learning_rate) 

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

# # train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs = 10,
            verbose=True)

In [None]:

plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()

In [None]:

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42, shuffle=False)
labels = list(test_dataset.class_indices.keys())

Y_pred = model.predict(test_dataset, test_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(test_dataset.classes, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(test_dataset.classes, y_pred, target_names=labels))

In [None]:
# Définir les paramètres pour la séparation des données 
train_ratio = 0.7 # Pourcentage de données d'entraînement 
validation_ratio = 0.3 # Pourcentage de données de validation

# On reprocesse les jeux de données d'entrainement et de validation
 
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    validation_split=validation_ratio,
    ) 

test_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     validation_split=validation_ratio,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(data_dir,batch_size = 64, class_mode="categorical", target_size=(224,224), subset='training', seed=42)

test_dataset = test_datagen.flow_from_directory(data_dir, batch_size = 64, class_mode= "categorical", target_size=(224,224), subset='validation', seed=42)

learning_rate = 0.001 #

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

x = base_model.output
x = Flatten()(x)
#x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 

model = Model(inputs=base_model.input, outputs=predictions)

# on degele l ensemble des couches:
for layer in model.layers :
   layer.trainable = True

#Créer une instance de l'optimiseur Adam avec le learning rate personnalisé  
optimizer = Adam(learning_rate=learning_rate) 

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"])

# # train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for test
            epochs = 10,
            verbose=True)

## Pour simplifier les tests futurs : on divise directement le dataset en trois jeux de données distincts : 
## train, test, val

In [None]:
import os
import shutil
import random

# Définir les chemins  # Répertoire original contenant les classes
train_dir = os.path.join(div_dir,"train")
val_dir =  os.path.join(div_dir,"val")
test_dir =  os.path.join(div_dir,"test")

# Créer les répertoires de destination s'ils n'existent pas
for dir in [train_dir, val_dir, test_dir]:
    if not os.path.exists(dir):
        os.makedirs(dir)

# Définir les ratios de partition
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Vérifier que les ratios totalisent 1
assert (train_ratio + val_ratio + test_ratio) == 1.0, "Les ratios doivent totaliser 1"

# Parcourir les classes dans le répertoire de données
for class_name in os.listdir(data_dir):
    class_path = os.path.join(data_dir, class_name)
    
    # Si ce n'est pas un dossier, on ignore
    if not os.path.isdir(class_path):
        continue

    # Créer les répertoires de classe dans chaque ensemble
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)
    
    # Obtenir la liste des images dans la classe
    images = os.listdir(class_path)
    images = [img for img in images if img.lower().endswith(('.png', '.jpg', '.jpeg'))]  # Filtrer les images

    # Mélanger les images de manière aléatoire
    random.shuffle(images)

    # Calculer les indices de séparation
    total_images = len(images)
    train_end = int(train_ratio * total_images)
    val_end = int((train_ratio + val_ratio) * total_images)

    # Diviser les images en 3 ensembles
    #total_images = len(images)
    train_images = images[:train_end]
    val_images = images[train_end:val_end]
    test_images = images[val_end:]

    #print((train_images + val_images + test_images), total_images, train_images, val_images, test_images)
    # Vérifier que le total est ok
    assert (len(train_images) + len(val_images) + len(test_images) == total_images), "Le nombre total d images doit correspondre"

    # Déplacer les fichiers dans les répertoires appropriés
    for img in train_images:
        shutil.copy(os.path.join(class_path, img), os.path.join(train_dir, class_name, img))

    for img in val_images:
        shutil.copy(os.path.join(class_path, img), os.path.join(val_dir, class_name, img))

    for img in test_images:
        shutil.copy(os.path.join(class_path, img), os.path.join(test_dir, class_name, img))

    print(f"Class {class_name} partitionnée : {len(train_images)} train, {len(val_images)} val, {len(test_images)} test")


### ResNET50 avec CosineDecay

In [None]:
del model
del base_model
tf.keras.backend.clear_session()
gc.collect()

tf.keras.backend.clear_session()
gc.collect()

tf.keras.backend.clear_session()
gc.collect()

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np
import gc
from tensorflow.keras.optimizers.schedules import CosineDecay

from sklearn.utils import class_weight
import numpy as np


batch_size=8
# On reprocesse les jeux de données d'entrainement et de validation

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    ) 

test_val_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(train_dir,batch_size = batch_size, class_mode="categorical", target_size=(224,224), seed=42, shuffle=True)

test_dataset = test_val_datagen.flow_from_directory(test_dir, batch_size = batch_size, class_mode= "categorical", target_size=(224,224), seed=42, shuffle=True)

val_dataset = test_val_datagen.flow_from_directory(val_dir, batch_size = batch_size, class_mode= "categorical", target_size=(224,224), seed=42, shuffle=False)

# On prépare les poids pour le dataset

class_weights = class_weight.compute_class_weight(
               class_weight='balanced',
               classes=np.unique(train_dataset.classes),
               y=train_dataset.classes)

class_weightDICT = dict(zip(np.unique(train_dataset.classes), class_weights))
print(class_weightDICT)

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = ResNet50(weights='imagenet', include_top= False, pooling='max')
x = base_model.output
#x = Flatten()(x)
#x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 

model = Model(inputs=base_model.input, outputs=predictions)


# first:  freeze all convolutional InceptionV3 layers

for layer in model.layers[:100]:
   layer.trainable = False
for layer in model.layers[100:]:
   layer.trainable = True



# Définir le scheduler CosineDecay 
initial_learning_rate = 0.001 
decay_steps = (train_dataset.n // batch_size) * 20 # Nombre total d'étapes (epochs * steps_per_epoch) 
cosine_decay = CosineDecay(initial_learning_rate, decay_steps)


# On cree un callback pour sauvegarder le meilleur modèle
checkpoint_filepath = '../models/checkpoint/model_resnet50_CHO'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)


stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=2, 
    mode="min", 
    min_delta =  0.001,
    verbose = 1 )

my_callbacks = [
 #   scheduler_callback,
 #   stop_callback,
    model_checkpoint_callback,
]

# Créer l'optimiseur Adam avec CosineDecay 
optimizer = Adam(learning_rate=cosine_decay)

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer, loss='categorical_crossentropy', metrics=["accuracy"])



# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for 
            class_weight=class_weightDICT,
            epochs =10,
            verbose=True,
            callbacks=[my_callbacks])


## EfficientNetB4 avec cosine decay

In [None]:
del model
del base_model
tf.keras.backend.clear_session()
gc.collect()

tf.keras.backend.clear_session()
gc.collect()

tf.keras.backend.clear_session()
gc.collect()

In [None]:
from tensorflow.keras.applications.efficientnet import EfficientNetB4, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np
import gc
from tensorflow.keras.optimizers.schedules import CosineDecay

from sklearn.utils import class_weight
import numpy as np


batch_size=8

# Pas ideal car on applique un image generator sur le jeu de validation

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    ) 

test_val_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(train_dir,batch_size = batch_size, class_mode="categorical", target_size=(224,224), seed=42, shuffle=True)

test_dataset = test_val_datagen.flow_from_directory(test_dir, batch_size = batch_size, class_mode= "categorical", target_size=(224,224), seed=42, shuffle=True)

val_dataset = test_val_datagen.flow_from_directory(val_dir, batch_size = batch_size, class_mode= "categorical", target_size=(224,224), seed=42, shuffle=False)


# On prépare les poids pour le dataset

class_weights = class_weight.compute_class_weight(
               class_weight='balanced',
               classes=np.unique(train_dataset.classes),
               y=train_dataset.classes)

class_weightDICT = dict(zip(np.unique(train_dataset.classes), class_weights))
print(class_weightDICT)

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = EfficientNetB4(weights='imagenet', include_top= False, pooling='max')
x = base_model.output
#x = Flatten()(x)
#x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 

model = Model(inputs=base_model.input, outputs=predictions)


# first:  freeze all convolutional InceptionV3 layers

for layer in model.layers[:100]:
   layer.trainable = False
for layer in model.layers[100:]:
   layer.trainable = True



# Définir le scheduler CosineDecay 
initial_learning_rate = 0.001 
decay_steps = (train_dataset.n // batch_size) * 20 # Nombre total d'étapes (epochs * steps_per_epoch) 
cosine_decay = CosineDecay(initial_learning_rate, decay_steps)

# On cree un callback pour sauvegarder le meilleur modèle
checkpoint_filepath = '../models/checkpoint/model_efficientnetB4_CHO_cos'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)


stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy', 
    patience=2, 
    mode="max", 
    min_delta =  0.001,
    verbose = 1 )

my_callbacks = [
 #   scheduler_callback,
    stop_callback,
    model_checkpoint_callback,
]

# Créer l'optimiseur Adam avec CosineDecay 
optimizer = Adam(learning_rate=cosine_decay)

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer, loss='categorical_crossentropy', metrics=["accuracy"])


# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for 
            class_weight=class_weightDICT,
            epochs =20,
            verbose=True,
            callbacks=[my_callbacks])


In [None]:
plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()

In [None]:

labels = list(val_dataset.class_indices.keys())

Y_pred = model.predict(val_dataset, val_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(val_dataset.classes, y_pred, normalize="true")
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(val_dataset.classes, y_pred, target_names=labels))

In [None]:
model.save('../models/model_CHO_efficientnetB4_cos')

## EfficientNetB4 avec expdecay

In [None]:
del history
del model
del base_model
tf.keras.backend.clear_session()
gc.collect()

tf.keras.backend.clear_session()
gc.collect()

tf.keras.backend.clear_session()
gc.collect()

In [None]:
from tensorflow.keras.applications.efficientnet import EfficientNetB4, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np
import gc
from tensorflow.keras.optimizers.schedules import CosineDecay

from sklearn.utils import class_weight
import numpy as np


batch_size=8

# Pas ideal car on applique un image generator sur le jeu de validation

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    shear_range = 0.1, # random application of shearing
    zoom_range = 0.1,
    horizontal_flip = False,
    brightness_range = (0.4, 0.6),
    width_shift_range=0.1,
    height_shift_range=0.1,
    fill_mode="nearest",
    ) 

test_val_datagen = ImageDataGenerator(
     preprocessing_function=preprocess_input,
     ) 

# Est-on sur dans ce cas que la validation est bien indépendante de l'apprentissage ?

train_dataset = train_datagen.flow_from_directory(train_dir,batch_size = batch_size, class_mode="categorical", target_size=(224,224), seed=42, shuffle=True)

test_dataset = test_val_datagen.flow_from_directory(test_dir, batch_size = batch_size, class_mode= "categorical", target_size=(224,224), seed=42, shuffle=True)

val_dataset = test_val_datagen.flow_from_directory(val_dir, batch_size = batch_size, class_mode= "categorical", target_size=(224,224), seed=42, shuffle=False)


# On prépare les poids pour le dataset

class_weights = class_weight.compute_class_weight(
               class_weight='balanced',
               classes=np.unique(train_dataset.classes),
               y=train_dataset.classes)

class_weightDICT = dict(zip(np.unique(train_dataset.classes), class_weights))
print(class_weightDICT)

# On recrée le modèle de zéro car sinon les poids s ajustent au fur et à mesure

base_model = EfficientNetB4(weights='imagenet', include_top= False, pooling='max')
x = base_model.output
#x = Flatten()(x)
#x = Dense(60, activation='relu')(x) 
predictions = Dense(4, activation='softmax')(x) 

model = Model(inputs=base_model.input, outputs=predictions)


# first:  freeze all convolutional InceptionV3 layers

for layer in model.layers[:100]:
   layer.trainable = False
for layer in model.layers[100:]:
   layer.trainable = True



# Définir le scheduler CosineDecay 
initial_learning_rate = 0.001 
decay_steps = (train_dataset.n // batch_size) * 20 # Nombre total d'étapes (epochs * steps_per_epoch) 
decay_rate = 0.96

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=decay_steps,
    decay_rate=decay_rate,
    staircase=True)

# Créer l'optimiseur Adam avec CosineDecay
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)


# On cree un callback pour sauvegarder le meilleur modèle
checkpoint_filepath = '../models/checkpoint/model_efficientnetB4_CHO_exp'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)


stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy', 
    patience=2, 
    mode="max", 
    min_delta =  0.001,
    verbose = 1 )

my_callbacks = [
 #   scheduler_callback,
#    stop_callback,
    model_checkpoint_callback,
]

# Compiler le modèle avec l'optimiseur personnalisé 
model.compile(optimizer, loss='categorical_crossentropy', metrics=["accuracy"])


# train the model on the new data for a few epochs
history = model.fit(train_dataset, # use augmented images for train 
            steps_per_epoch=train_dataset.n // batch_size,
            validation_data = test_dataset, # use initial images for 
            class_weight=class_weightDICT,
            epochs =20,
            verbose=True,
            callbacks=[my_callbacks])


In [None]:
plt.figure(figsize=(12,4))
plt.subplot(121)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss by epoch')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')

plt.subplot(122)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model acc by epoch')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='right')
plt.show()

In [None]:

labels = list(val_dataset.class_indices.keys())

Y_pred = model.predict(val_dataset, val_dataset.n // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
cm = confusion_matrix(val_dataset.classes, y_pred, normalize="true")
disp = ConfusionMatrixDisplay(cm, display_labels= labels)
disp.plot(cmap='Blues')

print('Classification Report')
print(classification_report(val_dataset.classes, y_pred, target_names=labels))