###  Modèle type LeNet ajout des blasts et smudge

In [48]:
%matplotlib inline
import numpy as np

import matplotlib.pyplot as plt
import cv2
import seaborn as sns

import pandas as pd
from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, Input, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn import metrics
import tensorflow as tf


#### Récupération de la base d'images et création jeu d'évaluation, de test et d'entrainement

In [2]:
data = pd.read_csv('../base_apl_aml.csv',index_col=0)

In [3]:
lst_classes = {'basophil':0,'blast':1,'eosinophil':2,
               'erythroblast':3,'ig':4,'lymphocyte':5,'monocyte':6,'neutrophil':7,'platelet':8,'smudge':9}

In [4]:
data['label'] = data['class'].apply(lambda cat: lst_classes[cat])

In [5]:
data.head()

Unnamed: 0,Patient_ID,file_path,filename,class,size,label
0,Patient_00,../images_apl_aml/All/All/Patient_00/Signed sl...,BL_15336348.jpg,blast,"(363, 360, 3)",1
1,Patient_00,../images_apl_aml/All/All/Patient_00/Signed sl...,BL_15336349.jpg,blast,"(363, 360, 3)",1
2,Patient_00,../images_apl_aml/All/All/Patient_00/Signed sl...,BL_15336361.jpg,blast,"(363, 360, 3)",1
3,Patient_00,../images_apl_aml/All/All/Patient_00/Signed sl...,BL_15336369.jpg,blast,"(363, 360, 3)",1
4,Patient_00,../images_apl_aml/All/All/Patient_00/Signed sl...,BL_15336376.jpg,blast,"(363, 360, 3)",1


In [6]:
data['class'].value_counts()

lymphocyte      3404
blast           3296
smudge          2909
neutrophil      2071
monocyte        1299
ig               920
erythroblast     517
eosinophil       108
platelet          66
basophil          53
Name: class, dtype: int64

In [12]:
data['id'] = data['Patient_ID'].apply(lambda p_id: int(p_id[8:]))

In [28]:
data_train = data[data['id']<75]
data_test = data[(data['id']>=75) & (data['id']<90)]
data_eval = data[(data['id']>=90) ]

#### Utilisation d'un ImageDataGenerator pour augmentation de données, surtout pour les cellules ayant peu de données

In [32]:
def preprocess(x):
    img = x/255.
    return img

In [42]:
from keras.preprocessing.image import ImageDataGenerator
train_generator = ImageDataGenerator(preprocessing_function=preprocess,rotation_range=180, horizontal_flip=True, vertical_flip=True)
test_generator = ImageDataGenerator(preprocessing_function=preprocess)

In [43]:
data_train_generator = train_generator.flow_from_dataframe(data_train, directory=None, x_col='file_path',y_col='class',class_mode ='sparse',target_size=(224,224),
                                              batch_size=32)

data_test_generator = test_generator.flow_from_dataframe(data_test, directory=None, x_col='file_path',y_col='class',class_mode ='sparse',target_size=(224,224),
                                              batch_size=32)

Found 10542 validated image filenames belonging to 10 classes.
Found 1988 validated image filenames belonging to 10 classes.


#### Modèle type LeNet
* Couches Dense 1024, 512, 256 
* Dropout pour éviter overfitting
* Compilation avec Adam

In [49]:
# Création Callback pour stopper si stagnation et réduire learning rate
cb_early_stopping= EarlyStopping(monitor='val_loss', patience=6,restore_best_weights=True)
cb_lr = ReduceLROnPlateau(monitor='val_loss',
                                         patience=4,
                                         factor=0.5,
                                         verbose=2,
                                         mode='min')

In [54]:
# Création d'un modèle type LeNet
def model_LeNet():
    model =  Sequential()
    model.add(Conv2D(filters = 30  , kernel_size=(5,5), padding='valid', input_shape=(224,224,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Conv2D(filters = 16  , kernel_size=(3,3), padding='valid', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(rate=0.2))
    model.add(Flatten())
    model.add(Dense(units=1024,activation='relu'))
    model.add(Dense(units=512,activation='relu'))
    model.add(Dense(units=256,activation='relu'))
    model.add(Dense(units=10,activation='softmax')) #10 Classes
    #Compilation
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [55]:
model = model_LeNet()

In [56]:
#Entrainement
history = model.fit(data_train_generator, epochs = 16, batch_size=32, validation_data = data_test_generator,callbacks=[cb_early_stopping,cb_lr])

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


#### Prédiction sur les données d'évaluation

In [60]:
test_generator = ImageDataGenerator(preprocessing_function=preprocess)
data_eval_generator = test_generator.flow_from_dataframe(data_eval, directory=None, x_col='file_path',y_col='class',class_mode ='sparse',target_size=(224,224),
                                              batch_size=32,shuffle=False)

Found 2113 validated image filenames belonging to 10 classes.


In [62]:
model.evaluate(data_eval_generator)



[0.692894697189331, 0.7647894024848938]

 * Score de 76%

In [64]:
#Prédiction
predict = model.predict(data_eval_generator)

In [66]:
#Classes prédites
eval_predict_class = predict.argmax(axis = 1)
lst_classes = [key for key, val in data_eval_generator.class_indices.items()]
eval_predict_class_name = [lst_classes[i] for i in eval_predict_class]
eval_class_name = [lst_classes[i] for i in data_eval_generator.classes]
print(metrics.classification_report( eval_class_name,eval_predict_class_name ))

              precision    recall  f1-score   support

    basophil       0.00      0.00      0.00         8
       blast       0.66      0.89      0.76       372
  eosinophil       0.00      0.00      0.00        16
erythroblast       0.79      0.94      0.86       202
          ig       0.18      0.01      0.03       142
  lymphocyte       0.87      0.90      0.89       448
    monocyte       0.48      0.37      0.41       189
  neutrophil       0.94      0.97      0.95       320
    platelet       0.00      0.00      0.00        22
      smudge       0.74      0.79      0.76       394

    accuracy                           0.76      2113
   macro avg       0.47      0.49      0.47      2113
weighted avg       0.71      0.76      0.73      2113



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
