# Téléchargement des données

In [1]:
import tensorflow as tf

In [2]:
# Taille des images
IMG_SIZE = 224
BATCH_SIZE = 16

In [3]:
!git clone https://github.com/fabiopereira59/abeillesgenres

Cloning into 'abeillesgenres'...
remote: Enumerating objects: 4336, done.[K
remote: Total 4336 (delta 0), reused 0 (delta 0), pack-reused 4336[K
Receiving objects: 100% (4336/4336), 44.69 MiB | 30.61 MiB/s, done.
Resolving deltas: 100% (4/4), done.


In [4]:
# Chargement des ensembles d'entraînement et de validation
train_ds = tf.keras.utils.image_dataset_from_directory(
    directory='abeillesgenres/Andrena/train',
    labels='inferred',
    label_mode='categorical',
    shuffle=False,
    batch_size=BATCH_SIZE,
    image_size=(IMG_SIZE, IMG_SIZE)
)

validation_ds = tf.keras.utils.image_dataset_from_directory(
    directory='abeillesgenres/Andrena/val',
    labels='inferred',
    label_mode='categorical',
    batch_size=BATCH_SIZE,
    image_size=(IMG_SIZE, IMG_SIZE)
)

Found 3446 files belonging to 17 classes.
Found 423 files belonging to 17 classes.


In [5]:
# Données sur le dataset
NB_CLASSES = len(train_ds.class_names)
NB_IMAGES_TRAIN = len(train_ds.file_paths)
NB_IMAGES_VAL = len(validation_ds.file_paths)
CLASS_NAMES = train_ds.class_names

In [6]:
# Pre-processing de l'ensemble de validation
import numpy as np

x_val = np.zeros((NB_IMAGES_VAL, IMG_SIZE, IMG_SIZE, 3))
y_val = np.zeros((NB_IMAGES_VAL, NB_CLASSES))

ind_data = 0
for bx, by in validation_ds.as_numpy_iterator():
  x_val[ind_data:ind_data+bx.shape[0]] = bx
  y_val[ind_data:ind_data+bx.shape[0]] = by
  ind_data += bx.shape[0]

x_val = tf.keras.applications.resnet.preprocess_input(x_val)

# Création du modèle

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Architecture du modèle et optimiseur

In [8]:
# Hyperparamètres
lr = 1e-2
eta = 0.9

In [9]:
# Création d'un ResNet50
conv_base = tf.keras.applications.resnet50.ResNet50(
    include_top=False,
    weights='imagenet',
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    classes=NB_CLASSES
)

model = tf.keras.Sequential(
    [
      conv_base,
      tf.keras.layers.GlobalAveragePooling2D(),
      tf.keras.layers.Dense(NB_CLASSES, activation='softmax', kernel_regularizer=tf.keras.regularizers.L2(1e-4))
    ]
)

# Ajout de l'optimiseur, de la fonction coût et des métriques
model.compile(tf.keras.optimizers.SGD(learning_rate=lr, momentum=eta), loss='categorical_crossentropy', metrics=['categorical_accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


## Augmentation de données

In [10]:
!pip uninstall opencv-python-headless==4.5.5.62
!pip install opencv-python-headless==4.1.2.30
!pip install -q -U albumentations
!echo "$(pip freeze | grep albumentations) is successfully installed"

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting opencv-python-headless==4.1.2.30
  Downloading opencv_python_headless-4.1.2.30-cp37-cp37m-manylinux1_x86_64.whl (21.8 MB)
[K     |████████████████████████████████| 21.8 MB 1.7 MB/s 
Installing collected packages: opencv-python-headless
Successfully installed opencv-python-headless-4.1.2.30
[K     |████████████████████████████████| 113 kB 5.4 MB/s 
[?25halbumentations==1.2.0 is successfully installed


In [11]:
# Définition des augmentations de données à réaliser
from albumentations import (Compose, Rotate, HorizontalFlip, VerticalFlip, Affine, RandomBrightnessContrast, ChannelShuffle)
import albumentations as A

AUGMENTATIONS_TRAIN = Compose([
    Rotate(limit=[0,100], p=0.5),
    HorizontalFlip(p=0.5),
    VerticalFlip(p=0.5),
    Affine(shear=[-45, 45], p=0.5),
    RandomBrightnessContrast(p=0.5),
    ChannelShuffle(p=0.5)
])

In [12]:
import numpy as np
import cv2 as cv

class AbeillesSequence(tf.keras.utils.Sequence):
    # Initialisation de la séquence avec différents paramètres
    def __init__(self, x_train, y_train, batch_size, augmentations):
        self.x_train = x_train
        self.y_train = y_train
        self.classes = CLASS_NAMES
        self.batch_size = batch_size
        self.augment = augmentations
        self.indices1 = np.arange(len(x_train))
        np.random.shuffle(self.indices1) # Les indices permettent d'accéder
        # aux données et sont randomisés à chaque epoch pour varier la composition
        # des batches au cours de l'entraînement

    # Fonction calculant le nombre de pas de descente du gradient par epoch
    def __len__(self):
        return int(np.ceil(x_train.shape[0] / float(self.batch_size)))
    
    # Application de l'augmentation de données à chaque image du batch
    def apply_augmentation(self, bx, by):

        batch_x = np.zeros((bx.shape[0], IMG_SIZE, IMG_SIZE, 3))
        batch_y = by
        
        # Pour chaque image du batch
        for i in range(len(bx)):
            class_labels = []
            class_id = np.argmax(by[i])
            class_labels.append(self.classes[class_id])

            # Application de l'augmentation à l'image et aux masques
            img = cv.imread(bx[i])
            img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
            transformed = self.augment(image=img)
            batch_x[i] = transformed['image']
            #print(batch_x[i])
      
        return batch_x, batch_y

    # Fonction appelée à chaque nouveau batch : sélection et augmentation des données
    # idx = position du batch (idx = 5 => on prend le 5ème batch)
    def __getitem__(self, idx):
        batch_x = self.x_train[self.indices1[idx * self.batch_size:(idx + 1) * self.batch_size]]
        batch_y = self.y_train[self.indices1[idx * self.batch_size:(idx + 1) * self.batch_size]]
           
        batch_x, batch_y = self.apply_augmentation(batch_x, batch_y)

        # Normalisation des données
        batch_x = tf.keras.applications.resnet.preprocess_input(batch_x)
        
        return batch_x, batch_y

    # Fonction appelée à la fin d'un epoch ; on randomise les indices d'accès aux données
    def on_epoch_end(self):
        np.random.shuffle(self.indices1)

In [None]:
import numpy as np

x_train = np.array(train_ds.file_paths)
y_train = np.zeros((NB_IMAGES_TRAIN, NB_CLASSES))

ind_data = 0
for bx, by in train_ds.as_numpy_iterator():
  y_train[ind_data:ind_data+bx.shape[0]] = by
  ind_data += bx.shape[0]

## Définition des callbacks

In [None]:
# Les callbacks
model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    filepath='./drive/MyDrive/Stage2A/Genres/Andrena/ResNet50/ResNet50_V1/Poids/best_model_1',
    save_weights_only=True,
    monitor='val_categorical_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1)

early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    monitor="val_categorical_accuracy",
    min_delta=0.01,
    patience=8,
    verbose=1,
    mode="auto")

reduce_lr_cb = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                              patience=5, min_lr=0.00001, verbose=1)

# Entraînement du modèle

In [None]:
# Augmentation de données
train_ds_aug = AbeillesSequence(x_train, y_train, batch_size=BATCH_SIZE, augmentations=AUGMENTATIONS_TRAIN)

In [None]:
# Sans pondération des classes
model.fit(train_ds_aug, epochs=150, validation_data = (x_val, y_val), callbacks=[model_checkpoint_cb, early_stopping_cb, reduce_lr_cb])

Epoch 1/150
  5/216 [..............................] - ETA: 37:30 - loss: 3.2176 - categorical_accuracy: 0.0875 - precision: 0.2222 - recall: 0.0500

KeyboardInterrupt: ignored