In [1]:
import import_ipynb

In [2]:
%run iteration_0_imports.py

In [3]:
%run iteration_0_parameters.py

In [4]:
import iteration_0_utils as utils

importing Jupyter notebook from iteration_0_utils.ipynb


In [5]:
%whos

Variable                          Type        Data/Info
-------------------------------------------------------
HEALTHY                           str         H
IMG_IN_COLOR                      int         1
IMG_SIZE                          int         64
Image                             module      <module 'PIL.Image' from <...>packages\\PIL\\Image.py'>
NBR_SAMPLE                        int         20000
PATH_DATA                         str         ../data
PATH_DATA_EDA                     str         ../data/eda
PATH_DATA_LIST                    str         ../data/list
PATH_DATA_SAMPLE                  str         ../data/sample
PATH_MODEL                        str         ../model
PATH_ROOT                         str         ..
PATH_SOURCE_IMG                   str         ../cell_images
RandomForestClassifier            ABCMeta     <class 'sklearn.ensemble.<...>.RandomForestClassifier'>
SICK                              str         S
classification_report             function

# make X, y

In [6]:
DATE_TIME = "2021-11-18_20-39-00"

In [7]:
# get list 
list_path_sample_img = []
list_path_sample_img.extend(glob.glob(PATH_DATA_SAMPLE+"/"+DATE_TIME+ "/"+ "*.pklz"))

In [8]:
# create dataframe with two columns: 
## path_sample_img: image path in sample
## label: Healthy ou Sick
def make_dataFrame(list_path_sample_img: list) -> pd.DataFrame:

    df_sample_img = pd.DataFrame(columns = ["path_sample_img", "label"])
    for path_img in tqdm(list_path_sample_img):

        _, file_name, _ = utils.split_file_info(path_img)
        label = file_name[0]
        df_temporary = pd.DataFrame({"path_sample_img": path_img,
                                     "label": label}, index = [0])
        df_sample_img = pd.concat([df_sample_img, df_temporary])

    # randomise and reset index
    df_sample_img = df_sample_img.sample(frac=1).reset_index(drop = True)
    return df_sample_img

In [9]:
# set options 
pd.set_option('display.max_colwidth', None)
# to rest options
# pd.reset_option('^display.', silent=True)

In [10]:
df_sample_img = make_dataFrame(list_path_sample_img)

100%|██████████| 20000/20000 [00:28<00:00, 694.76it/s]


In [11]:
df_sample_img.sample(5)

Unnamed: 0,path_sample_img,label
10320,../data/sample/2021-11-18_20-39-00\S_C182P143NThinF_IMG_20151201_172257_cell_193.pklz,S
6379,../data/sample/2021-11-18_20-39-00\S_C180P141NThinF_IMG_20151201_165528_cell_185.pklz,S
740,../data/sample/2021-11-18_20-39-00\S_C88P49ThinF_IMG_20150820_153042_cell_212.pklz,S
13095,../data/sample/2021-11-18_20-39-00\S_C126P87ThinF_IMG_20151004_105100_cell_127.pklz,S
16877,../data/sample/2021-11-18_20-39-00\S_C97P58ThinF_IMG_20150917_152032_cell_180.pklz,S


In [12]:
# split train, validation, test set
df_path_sample_img_train, df_path_sample_img_test  = train_test_split(df_sample_img, test_size=0.2, random_state=1, \
                                                                          stratify=df_sample_img.label)

df_path_sample_img_train, df_path_sample_img_valid = train_test_split(df_path_sample_img_train, test_size=0.2, \
                                                                      random_state=1, stratify=df_path_sample_img_train.label)

In [13]:
# # split train, validation, test set
# list_path_sample_img_train, list_path_sample_img_test  = train_test_split(list_path_sample_img, test_size=0.2)
# list_path_sample_img_train, list_path_sample_img_valid = train_test_split(list_path_sample_img_train, test_size=0.2)

In [14]:
def make_Xy(list_path_sample_img: list) -> (np.array, np.array):
    X = np.ndarray( (len(list_path_sample_img), IMG_SIZE, IMG_SIZE, 3) ,dtype = "float16")
    y = np.array([""] * len(list_path_sample_img))
    for i, path in tqdm(enumerate(list_path_sample_img)):
        
        X[i] = utils.pickle_read(path).astype("float16")
    
        _, file_name, _ = utils.split_file_info(path)
        y[i] = file_name[0]
    
    return X, y
    

In [15]:
X_train, y_train  = make_Xy(df_path_sample_img_train.path_sample_img)

12800it [00:11, 1075.62it/s]


In [16]:
X_valid, y_valid  = make_Xy(df_path_sample_img_valid.path_sample_img)

3200it [00:02, 1079.51it/s]


In [17]:
X_test, y_test  = make_Xy(df_path_sample_img_test.path_sample_img)

4000it [00:03, 1068.14it/s]


In [18]:
# verification
print(np.unique(y_train, return_counts=True))
print(np.unique(y_valid, return_counts=True))
print(np.unique(y_test,  return_counts=True))

(array(['H', 'S'], dtype='<U1'), array([6400, 6400], dtype=int64))
(array(['H', 'S'], dtype='<U1'), array([1600, 1600], dtype=int64))
(array(['H', 'S'], dtype='<U1'), array([2000, 2000], dtype=int64))


In [19]:
print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)

(12800, 64, 64, 3)
(3200, 64, 64, 3)
(4000, 64, 64, 3)


# model_cnn

* Ref: https://towardsdatascience.com/detecting-malaria-with-deep-learning-9e45c1e34b60
* https://towardsdatascience.com/understanding-and-calculating-the-number-of-parameters-in-convolution-neural-networks-cnns-fc88790d530d

In [20]:
BATCH_SIZE = 500
#percentage = 0.10
#BATCH_SIZE = int(len(X_train) * percentage)

NUM_CLASSES = 2
EPOCHS = 25
INPUT_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

In [21]:
# Encoder y
from sklearn.preprocessing import LabelEncoder

In [22]:
le = LabelEncoder()

In [23]:
y_train_enc = le.fit_transform(y_train)

In [24]:
y_valid_enc = le.transform(y_valid)
y_test_enc  = le.transform(y_test)

In [25]:
le.classes_

array(['H', 'S'], dtype='<U1')

In [26]:
#!pip uninstall tensorflow

Créer un fichier .reg avec les instructions suivantes :

===

Windows Registry Editor Version 5.00

[HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\FileSystem]
"LongPathsEnabled"=dword:00000001

---

Exécuter le .reg

In [27]:
#!pip install tensorflow

In [28]:
import tensorflow as tf

In [29]:
from keras.models import Sequential

In [30]:
#assert False

In [31]:
def plot_history(history):
    plt.figure(figsize=(16,9))
    # plot loss
    plt.subplot(211)
    plt.grid()
    plt.title('binary_crossentropy')
    plt.plot(history.history['loss'], "o-", color='blue', label='train')
    plt.plot(history.history['val_loss'], "o-", color='orange', label='validation')
    plt.legend()    
    # plot accuracy
    plt.subplot(212)
    plt.grid()
    plt.title('Classification Accuracy')
    plt.plot(history.history['accuracy'], "o-",color='blue', label='train')
    plt.plot(history.history['val_accuracy'], "o-",color='orange', label='validation')
    plt.legend()
    plt.show()

In [32]:
model_cnn_5 = Sequential()
model_cnn_5.add(tf.keras.layers.Input(name = "input", shape=INPUT_SHAPE))
model_cnn_5.add(tf.keras.layers.Conv2D(name = "conv2D",
                                     filters=32,
                                     kernel_size=(3,3),
                                     activation="LeakyReLU",
                                     padding="same",
                                     kernel_initializer="he_uniform"
                                    ))
model_cnn_5.add(tf.keras.layers.MaxPooling2D(name="pooling", pool_size=(2,2)))

model_cnn_5.add(tf.keras.layers.Conv2D(name = "conv2D_2",
                                     filters=64,
                                     kernel_size=(3,3),
                                     activation="LeakyReLU",
                                     padding="same",
                                     kernel_initializer="he_uniform"
                                    ))
model_cnn_5.add(tf.keras.layers.MaxPooling2D(name="pooling_2", pool_size=(2,2)))

model_cnn_5.add(tf.keras.layers.Conv2D(name = "conv2D_3",
                                     filters=128,
                                     kernel_size=(3,3),
                                     activation="LeakyReLU",
                                     padding="same",
                                     kernel_initializer="he_uniform"
                                    ))
model_cnn_5.add(tf.keras.layers.MaxPooling2D(name="pooling_3", pool_size=(2,2)))

model_cnn_5.add(tf.keras.layers.Flatten(name="flatten"))
model_cnn_5.add(tf.keras.layers.Dense(name="dense_hidden", units = 1024, activation="LeakyReLU",
                                    kernel_initializer="he_uniform" ))
model_cnn_5.add(tf.keras.layers.Dense(name="dense_hidden_2", units = 128, activation="LeakyReLU",
                                    kernel_initializer="he_uniform" ))
model_cnn_5.add(tf.keras.layers.Dense(name="ouput", units = 1, activation="sigmoid"))


In [33]:
# compiler modele
model_cnn_5.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3),\
                    loss ="binary_crossentropy", metrics=["accuracy"])

In [34]:
model_cnn_5.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2D (Conv2D)              (None, 64, 64, 32)        896       
_________________________________________________________________
pooling (MaxPooling2D)       (None, 32, 32, 32)        0         
_________________________________________________________________
conv2D_2 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
pooling_2 (MaxPooling2D)     (None, 16, 16, 64)        0         
_________________________________________________________________
conv2D_3 (Conv2D)            (None, 16, 16, 128)       73856     
_________________________________________________________________
pooling_3 (MaxPooling2D)     (None, 8, 8, 128)         0         
_________________________________________________________________
flatten (Flatten)            (None, 8192)              0

In [35]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [36]:
aug = ImageDataGenerator(rotation_range=359, \
                         zoom_range=0.15, \
                         width_shift_range=0.2, height_shift_range=0.2, \
                         shear_range=0.15, \
                         horizontal_flip=True, \
                         vertical_flip=True, \
                         fill_mode="nearest")

In [37]:
len(X_train) // BATCH_SIZE

25

In [38]:
tf.__version__

'2.6.0'

In [None]:
history_5 = model_cnn_5.fit( \
                            aug.flow(X_train, y_train_enc, batch_size=BATCH_SIZE, subset=None), \
                            validation_data = (X_valid, y_valid_enc), \
                            steps_per_epoch = len(X_train) // BATCH_SIZE, \
                            epochs = EPOCHS
                        )

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25

In [None]:
plot_history(history_5)

# TODO

* Mettre des dropOut
* faire équivalent de predict_classes() qui n'existe plus dans tf nouvelle version.:
    * On pourra utiliser la proba retournée par model.predict, if >0.50 classe 1
* evaluer avec X_test et matrice de confusion
* reprendre les lignes de suivi des metriques avec MLFlow comme on a fait pour RandomForest

In [None]:
# y_pred = model_cnn_5.predict_classes(X_test)

In [None]:
# # matrice de confusion
# from sklearn import metrics
# metrics.confusion_matrix(y_test_enc, y_pred, normalize="true")

In [None]:
plot_history(history_5)

In [None]:
#Augmenter Batch Size = int(len(X_Tain) * %) + 1
#Reduire le learning rate

In [None]:
#model_cnn_4.save(PATH_MODEL + "/model_cnn_4.h5")

Keras avec GPU AMD
https://medium.com/@Vatsal410/keras-without-nvidia-gpus-with-plaidml-and-amd-gpu-4ba6f60025ce