<h1> Classification de poissons </h1>



## Téléchargement de la base de données

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import preprocessing
from tensorflow.keras.preprocessing import image_dataset_from_directory



In [2]:
IMG_SIZE = 224
train_ds = tf.keras.utils.image_dataset_from_directory(
    directory='/home/lucien/Documents/data_fish/Salmonidae_split/train',
    labels='inferred',
    label_mode='categorical',
    shuffle = False,
    batch_size=16,
    image_size=(IMG_SIZE, IMG_SIZE))

Found 3000 files belonging to 12 classes.


In [3]:
class_names = train_ds.class_names
print(class_names)
nb_classes = len(class_names)
print(nb_classes)

['Grand_Corégone', 'Ombre_Commun', 'Saumon_Atlantique', 'Saumon_Chinook', 'Saumon_Coho', 'Saumon_Kéta', 'Saumon_Rose', 'Saumon_Rouge', 'Touladi', 'Truite_Arc-En-Ciel', 'Truite_Commune_Européenne', 'Truite_Fardée']
12


## Chargement des données

In [4]:
from tensorflow import keras
from tensorflow.keras import layers

In [5]:
# Paramètres
IMG_SIZE = 224 # pour utiliser ResNet

In [6]:
# Récupération des dataset pour l'entraînement (train, val)
# Shuffle à false pour avoir accès aux images depuis
# leur chemin d'accès avec train_ds.file_paths
train_ds = keras.utils.image_dataset_from_directory(
    directory='/home/lucien/Documents/data_fish/Salmonidae_split/train/',
    labels='inferred',
    label_mode='categorical',
    shuffle = False,
    batch_size=4,
    image_size=(IMG_SIZE, IMG_SIZE))

validation_ds = keras.utils.image_dataset_from_directory(
    directory='/home/lucien/Documents/data_fish/Salmonidae_split/val/',
    labels='inferred',
    label_mode='categorical',
    batch_size=4,
    image_size=(IMG_SIZE, IMG_SIZE))

Found 3000 files belonging to 12 classes.
Found 371 files belonging to 12 classes.


## Augmentation de données : Sequence et Albumentations

In [7]:
from albumentations import (Compose, Rotate, HorizontalFlip, VerticalFlip, Affine, RandomBrightnessContrast, ChannelShuffle)
import albumentations as A

AUGMENTATIONS_TRAIN = Compose([
    Rotate(limit=[0,100], p=0.5),
    HorizontalFlip(p=0.5),
    VerticalFlip(p=0.5),
    Affine(shear=[-45, 45], p=0.5),
    RandomBrightnessContrast(p=0.5)
])

In [8]:
from tensorflow.keras.utils import Sequence
import numpy as np
import cv2 as cv

class PoissonSequence(Sequence):
    # Initialisation de la séquence avec différents paramètres
    def __init__(self, x_train, y_train, batch_size, augmentations):
        self.x_train = x_train
        self.y_train = y_train
        self.classes = class_names
        self.batch_size = batch_size
        self.augment = augmentations
        self.indices1 = np.arange(len(x_train))
        np.random.shuffle(self.indices1) # Les indices permettent d'accéder
        # aux données et sont randomisés à chaque epoch pour varier la composition
        # des batches au cours de l'entraînement

    # Fonction calculant le nombre de pas de descente du gradient par epoch
    def __len__(self):
        return int(np.ceil(self.x_train.shape[0] / float(self.batch_size)))
    
    # Application de l'augmentation de données à chaque image du batch
    def apply_augmentation(self, bx, by):

        batch_x = np.zeros((bx.shape[0], IMG_SIZE, IMG_SIZE, 3))
        batch_y = by
        
        # Pour chaque image du batch
        for i in range(len(bx)):
            class_labels = []
            class_id = np.argmax(by[i])
            class_labels.append(self.classes[class_id])

            # Application de l'augmentation à l'image
            img = cv.imread(bx[i])
            img = cv.cvtColor(img, cv.COLOR_BGR2RGB)

            #on veut que notre image soit de taille 224 sur 224
            img = cv.resize(img, (224,224))

            transformed = self.augment(image=img)
            batch_x[i] = transformed['image']
      
        return batch_x, batch_y

    # Fonction appelée à chaque nouveau batch : sélection et augmentation des données
    # idx = position du batch (idx = 5 => on prend le 5ème batch)
    def __getitem__(self, idx):
        batch_x = self.x_train[self.indices1[idx * self.batch_size:(idx + 1) * self.batch_size]]
        batch_y = self.y_train[self.indices1[idx * self.batch_size:(idx + 1) * self.batch_size]]
           
        batch_x, batch_y = self.apply_augmentation(batch_x, batch_y)

        # Normalisation des données
        batch_x = tf.keras.applications.resnet.preprocess_input(batch_x)
        
        return batch_x, batch_y

    # Fonction appelée à la fin d'un epoch ; on randomise les indices d'accès aux données
    def on_epoch_end(self):
        np.random.shuffle(self.indices1)

In [9]:
# Les images sont stockées avec les chemins d'accès
import numpy as np
import os
print(nb_classes)
taille_train = 0
for i in range(nb_classes):
  taille_train += len(os.listdir('/home/lucien/Documents/data_fish/Salmonidae_split/train/'+class_names[i]))
print(taille_train)
taille_val = 0
for i in range(nb_classes):
  taille_val += len(os.listdir('/home/lucien/Documents/data_fish/Salmonidae_split/val/'+class_names[i]))
x_train = np.array(train_ds.file_paths)
y_train = np.zeros((taille_train, nb_classes))#rentrer la taille du train

ind_data = 0
for bx, by in train_ds.as_numpy_iterator():
  y_train[ind_data:ind_data+bx.shape[0]] = by
  ind_data += bx.shape[0]

12
3000


In [10]:
# Instanciation de la Sequence
train_ds_aug = PoissonSequence(x_train, y_train, batch_size=4, augmentations=AUGMENTATIONS_TRAIN)

# Normalisation des données de validation
import numpy as np
import tensorflow as tf

x_val = np.zeros((taille_val, IMG_SIZE, IMG_SIZE, 3))#rentrer la taille du valval
y_val = np.zeros((taille_val, nb_classes))#rentrer la taille du val

ind_data = 0
for bx, by in validation_ds.as_numpy_iterator():
  x_val[ind_data:ind_data+bx.shape[0]] = bx
  y_val[ind_data:ind_data+bx.shape[0]] = by
  ind_data += bx.shape[0]

x_val = tf.keras.applications.resnet.preprocess_input(x_val)

## Création du modèle

In [11]:
from tensorflow.keras import regularizers
from tensorflow.keras import optimizers
import tensorflow as tf

### Poids d'imagenet

In [12]:
conv_base = keras.applications.resnet.ResNet50(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    pooling=None,
    classes=nb_classes,
) #transfer learning sur imagenet

model = keras.Sequential(
    [
        conv_base,
        layers.GlobalAveragePooling2D(),
        layers.Dense(nb_classes, kernel_regularizer=regularizers.L2(1e-4), activation='softmax')
    ]
)

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 7, 7, 2048)        23587712  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 12)                24588     
                                                                 
Total params: 23,612,300
Trainable params: 23,559,180
Non-trainable params: 53,120
_________________________________________________________________


In [14]:
loss = keras.losses.CategoricalCrossentropy()

## Entraînement du modèle

In [15]:
# Ajout de l'optimiseur, de la fonction coût et des métriques
lr = 1e-3
model.compile(optimizers.SGD(learning_rate=lr, momentum=0.9), loss=loss, metrics=['categorical_accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

In [16]:
# Les callbacks, là où on sauvegarde les poids du réseau

#filepath = path to save the model at the end of each epoch

model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    filepath='/home/lucien/Documents/final_project_Essec/salmon_ai/model_poisson/model', 
    save_weights_only=True,
    monitor='val_categorical_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1)

early_stopping_cb = tf.keras.callbacks.EarlyStopping(
   monitor="val_categorical_accuracy",
   min_delta=0.01,
   patience=8,
   verbose=1,
   mode="auto")

reduce_lr_cb = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.1,
                              patience=5, min_lr=0.00001, verbose=1) #on choisit un learning rate qui permet de bouger un tout petit peu si on est bloqué dans un extremum local

In [17]:
history = model.fit(train_ds_aug, epochs=30, validation_data = (x_val, y_val), callbacks=[model_checkpoint_cb, reduce_lr_cb])

Epoch 1/30
Epoch 1: val_categorical_accuracy improved from -inf to 0.31536, saving model to /home/lucien/Documents/final_project_Essec/salmon_ai/model_poisson/model
Epoch 2/30
Epoch 2: val_categorical_accuracy improved from 0.31536 to 0.36658, saving model to /home/lucien/Documents/final_project_Essec/salmon_ai/model_poisson/model
Epoch 3/30
Epoch 3: val_categorical_accuracy did not improve from 0.36658
Epoch 4/30
Epoch 4: val_categorical_accuracy did not improve from 0.36658
Epoch 5/30
Epoch 5: val_categorical_accuracy improved from 0.36658 to 0.43666, saving model to /home/lucien/Documents/final_project_Essec/salmon_ai/model_poisson/model
Epoch 6/30
Epoch 6: val_categorical_accuracy did not improve from 0.43666
Epoch 7/30
Epoch 7: val_categorical_accuracy did not improve from 0.43666
Epoch 8/30
Epoch 8: val_categorical_accuracy did not improve from 0.43666
Epoch 9/30
Epoch 9: val_categorical_accuracy improved from 0.43666 to 0.44205, saving model to /home/lucien/Documents/final_proje