### Premiers modèles convolutifs

> Premier modèle avec seulement 4000 images chargées en mémoire. Pas encore vu possibilité de charger les images pendant l'entrainement
> Pas encore vu les callbacks non plus

In [1]:
%matplotlib inline
import numpy as np

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout 
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D 
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils 

from sklearn import metrics

import matplotlib.pyplot as plt
from matplotlib import cm

import itertools

In [2]:
from sklearn.model_selection  import train_test_split
import pandas as pd

In [3]:
import utils_blood_cells as utils

**Classification  CNN images en gris**

In [4]:
# en gris non flatten
utils.change_path_root('../images_mendeley/dataset_redim/')
df_data_cells, data =  utils.get_mendeley_cells(size=4000, stratify_category = True,flatten=False)
data.shape

(4000, 256, 256, 1)

In [5]:
#Cible : le type de cellule
target = df_data_cells['category'] 
target.value_counts()

neutrophil      500
eosinophil      500
ig              500
platelet        500
erythroblast    500
monocyte        500
basophil        500
lymphocyte      500
Name: category, dtype: int64

In [6]:
# Mise en forme target /data 
y = pd.get_dummies(data=target) 
X = data/255

In [7]:
#Séparation données entrainement, données test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.20)

In [13]:
model =  Sequential()
first_layer = Conv2D(filters = 32 , kernel_size=(5,5),padding='valid',input_shape=(256,256,1),activation='relu')

second_layer = MaxPooling2D(pool_size = (2,2))

model.add(first_layer)
model.add(second_layer)


In [14]:
third_layer = Dropout(rate=0.2)
fourth_layer = Flatten()
fifth_layer = Dense(units=128,activation='relu')
output_layer = Dense(units=len(y.columns),activation='softmax')

model.add(third_layer)
model.add(fourth_layer)
model.add(fifth_layer)
model.add(output_layer)

In [15]:
model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy']) 

training_history = model.fit(X_train, y_train, epochs=10,batch_size=32, validation_split=0.2) 

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
model.evaluate(X_test,y_test)



[0.9973558187484741, 0.7350000143051147]

> Overfitting, score moins élevé que Random Forest (84%)

**Classification CNN avec les images en couleurs**

In [8]:
# en couleur non flatten
df_data_cells, data =  utils.get_mendeley_cells(size=4000, stratify_category = True,flatten=False, color=True)
data.shape

(4000, 256, 256, 3)

In [9]:
#Cible : le type de cellule
target = df_data_cells['category'] 
target.value_counts()

neutrophil      500
eosinophil      500
ig              500
platelet        500
erythroblast    500
monocyte        500
basophil        500
lymphocyte      500
Name: category, dtype: int64

In [10]:
# Mise en forme target /data 
y = pd.get_dummies(data=target) 
X = data/255

In [11]:
#Séparation données entrainement, données test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.20)

In [12]:
#CNN simple
model =  Sequential()
#Images en couleurs
first_layer = Conv2D(filters = 32 , kernel_size=(5,5),padding='valid',input_shape=(256,256,3),activation='relu')

second_layer = MaxPooling2D(pool_size = (2,2))

model.add(first_layer)
model.add(second_layer)
third_layer = Dropout(rate=0.2)
fourth_layer = Flatten()
fifth_layer = Dense(units=128,activation='relu')
output_layer = Dense(units=len(y.columns),activation='softmax')

model.add(third_layer)
model.add(fourth_layer)
model.add(fifth_layer)
model.add(output_layer)

In [13]:
model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy']) 

training_history = model.fit(X_train, y_train, epochs=10,batch_size=32, validation_split=0.2) 

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
model.evaluate(X_test,y_test)



[0.9235027432441711, 0.7425000071525574]

>Modèle CNN en overfitting , amélioration de la précision avec les images en couleurs, mais précision en dessous RandomForest
>
>Revoir les paramètres convolutifs ???

### LeNet

In [16]:
#Couches LeNet 
model =  Sequential()
model.add(Conv2D(filters = 30  , kernel_size=(5,5), padding='valid', input_shape=(256,256,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters = 16  , kernel_size=(3,3), padding='valid', activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(rate=0.2))
model.add(Flatten())
model.add(Dense(units=128,activation='relu'))
model.add(Dense(units=len(y.columns),activation='softmax'))
#Compilation
model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy']) 
#Entrainement
training_history_lenet = model.fit(X_train, y_train, epochs=16,batch_size=32, validation_split=0.2) 

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [17]:
model.evaluate(X_test,y_test)



[0.5753348469734192, 0.8725000023841858]

In [19]:
test_pred = model.predict(X_test)

In [20]:
y.columns

Index(['basophil', 'eosinophil', 'erythroblast', 'ig', 'lymphocyte',
       'monocyte', 'neutrophil', 'platelet'],
      dtype='object')

In [21]:
test_predict_class = test_pred.argmax(axis = 1)
test_predict_class_name = [y.columns[i] for i in test_predict_class]

In [47]:
pd.crosstab(pd.Series(y_test.idxmax(axis=1), name='Réalité'),pd.Series(test_predict_class_name, name='Prédiction', index =y_test.index))

Prédiction,basophil,eosinophil,erythroblast,ig,lymphocyte,monocyte,neutrophil,platelet
Réalité,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
basophil,94,0,0,5,1,0,0,0
eosinophil,4,92,0,0,0,0,1,0
erythroblast,0,0,94,2,4,0,2,0
ig,14,4,3,81,7,6,7,0
lymphocyte,0,0,1,2,99,0,1,0
monocyte,2,0,0,18,2,73,0,0
neutrophil,2,2,1,9,0,0,74,0
platelet,0,1,0,0,0,0,1,91


In [48]:
print(metrics.classification_report( test_predict_class_name,y_test.idxmax(axis=1) ))

              precision    recall  f1-score   support

    basophil       0.94      0.81      0.87       116
  eosinophil       0.95      0.93      0.94        99
erythroblast       0.92      0.95      0.94        99
          ig       0.66      0.69      0.68       117
  lymphocyte       0.96      0.88      0.92       113
    monocyte       0.77      0.92      0.84        79
  neutrophil       0.84      0.86      0.85        86
    platelet       0.98      1.00      0.99        91

    accuracy                           0.87       800
   macro avg       0.88      0.88      0.88       800
weighted avg       0.88      0.87      0.87       800



> La catégorie IG pose vraiment problème car très souvent confondu avec une autre catégorie