# Projet Deep Learning
by Thomas ODIN, Maïa JOUENNE et Benoit CATEZ

## Import et telechargement des paquets necessaires

In [1]:
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import random
import cv2
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import tensorflow_hub as hub

# For drawing onto the image.
import numpy as np
from PIL import Image
from PIL import ImageColor
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageOps

# For measuring the inference time.
import time

# augmenting dataset
import imblearn
from collections import Counter
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

2024-03-01 10:37:00.018623: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-01 10:37:00.248255: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-01 10:37:00.248330: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-01 10:37:00.281223: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-01 10:37:00.359026: I tensorflow/core/platform/cpu_feature_guar

## 1) Classification

### Crop and annotation

In [None]:
# Read the CSV file into a DataFrame
df_train = pd.read_csv('data/train/_annotations.csv')

# Read and store cropped images
cropped_image_list = []

for index, row in df_train.iterrows():
    image_path = row['filename']
    image = Image.open('data/train/' + image_path)

    # Extract cropping coordinates
    xmin, ymin, xmax, ymax = (
        max(0, row['xmin']),
        max(0, row['ymin']),
        min(image.width, row['xmax']),
        min(image.height, row['ymax'])
    )

    # Check if the adjusted coordinates are valid
    if xmin < xmax and ymin < ymax:
        # Crop the image
        cropped_image = np.array(image.crop((xmin, ymin, xmax, ymax)))

        # Store the cropped image in the list
        cropped_image_list.append(cropped_image)
    else:
        # If the coordinates are invalid, append a placeholder (e.g., None)
        cropped_image_list.append(None)

# Add a new column to the DataFrame with cropped images
df_train['cropped_image'] = cropped_image_list

# Filter out rows with None values in the 'cropped_image' column
df_valid_crops = df_train.dropna(subset=['cropped_image'])

# Display general information
print("Number of valid cropped images:", len(df_valid_crops))
print("Number of proposed images:", len(df_train))
print("Image sizes:")
print(df_valid_crops['cropped_image'].apply(lambda x: x.shape).value_counts())

# Get unique classes in the DataFrame
unique_classes = df_valid_crops['class'].unique()

# Number of random images to plot for each class
images_to_plot = 2

# Plot two random images for each class
for class_name in unique_classes:
    # Filter DataFrame based on the current class
    df_same_class = df_valid_crops[df_valid_crops['class'] == class_name]

    # Check the number of images in the current class
    num_images = len(df_same_class)

    if num_images >= images_to_plot:
        # Randomly select two images from the current class
        random_indices = random.sample(df_same_class.index.tolist(), images_to_plot)

        # Plot the two random images for the current class
        fig, axes = plt.subplots(1, images_to_plot, figsize=(10, 5))
        fig.suptitle(f'Two Random Images of Class: {class_name}')

        for i, ax in enumerate(axes):
            ax.imshow(df_same_class.loc[random_indices[i]]['cropped_image'])
            ax.set_title(f"Index: {random_indices[i]}")
            ax.axis('off')

        plt.show()
        
    elif num_images == 1:
        # Plot the single image for the current class
        plt.figure(figsize=(5, 5))
        plt.imshow(df_same_class.iloc[0]['cropped_image'])
        plt.title(f'Image of Class: {class_name} (Index: {df_same_class.index[0]}), only one sample')
        plt.axis('off')
        plt.show()
    else:
        print(f"No images of class '{class_name}' for plotting.")


We can see that some of the classes are the same but with different name and that some of the picture are very pixelized. So we are going to do some mapping for the classes and create a function to automate the recuperation of the data. We can also see that due to the cropping the image are all of different size so after we are going to resize them into the same size the biggest one. We can also see that some image are double but we won't do anythings about it.

In [None]:
unique_classes

In [None]:
old_class_to_actual_class = {
    'tuna' : 'tuna', 
    'surgeon': 'surgeon', 
    'shark': 'shark', 
    'jack': 'jack', 
    'grouper': 'grouper', 
    'parrot': 'parrot', 
    'snapper': 'snapper',
    'damsel': 'damsel', 
    'trigger': 'trigger', 
    'Zanclidae (Moorish Idol)': 'moorish idol',
    'Scaridae -Parrotfishes-': 'parrot', 
    'Carangidae -Jacks-': 'jack',
    'Scombridae -Tunas-': 'tuna', 
    'Shark -Selachimorpha-': 'shark',
    'Serranidae -Groupers-': 'grouper', 
    'Lutjanidae -Snappers-': 'snapper',
    'Acanthuridae -Surgeonfishes-': 'surgeon', 
    'Pomacentridae -Damselfishes-': 'damsel',
    'Labridae -Wrasse-': 'wrasse', 
    'angel': 'angel', 
    'wrasse': 'wrasse', 
    'Zanclidae -Moorish Idol-': 'moorish idol',
    'Ephippidae -Spadefishes-': 'spade', 
    'Pomacanthidae -Angelfishes-': 'angel',
    'Balistidae -Triggerfishes-': 'trigger', 
    'spade': 'spade'
}

In [None]:
# Copy valid dataframe 
df_new_class = df_valid_crops.copy()

# Replace old class names with new class names
df_new_class['class'] = df_new_class['class'].replace(old_class_to_actual_class)

# Get unique classes in the DataFrame
unique_classes = df_new_class['class'].unique()

# Number of random images to plot for each class
images_to_plot = 2

# Plot two random images for each class
for class_name in unique_classes:
    # Filter DataFrame based on the current class
    df_same_class = df_new_class[df_new_class['class'] == class_name]

    # Check the number of images in the current class
    num_images = len(df_same_class)

    if num_images >= images_to_plot:
        # Randomly select two images from the current class
        random_indices = random.sample(df_same_class.index.tolist(), images_to_plot)

        # Plot the two random images for the current class
        fig, axes = plt.subplots(1, images_to_plot, figsize=(10, 5))
        fig.suptitle(f'Two Random Images of Class: {class_name}')

        for i, ax in enumerate(axes):
            ax.imshow(df_same_class.loc[random_indices[i]]['cropped_image'])
            ax.set_title(f"Index: {random_indices[i]}")
            ax.axis('off')

        plt.show()
    elif num_images == 1:
        # Plot the single image for the current class
        plt.figure(figsize=(5, 5))
        plt.imshow(df_same_class.iloc[0]['cropped_image'])
        plt.title(f'Image of Class: {class_name} (Index: {df_same_class.index[0]}), only one sample')
        plt.axis('off')
        plt.show()
    else:
        print(f"No images of class '{class_name}' for plotting.")

In [None]:
df_new_class['class'].value_counts()

We can see that there is some of the data which is underreepresented as told in the website 

We are now going to look at the size of the images and reshape them

In [None]:
df_valid_crops['cropped_image'].apply(lambda x: x.shape).value_counts().sort_index()

In [None]:
# looking at the average image size
index = df_valid_crops['cropped_image'].apply(lambda x: x.shape).value_counts().sort_index().index

# Convert tuples to arrays
index_as_arrays = np.array([np.array(x) for x in index])

# Transpose the array to have dimensions in the order (height, width, channels)
index_transposed = index_as_arrays.transpose()

# Calculate the average along each dimension
average_size = np.mean(index_transposed, axis=1)

print(average_size)

In [None]:
# Define the target size for resizing
target_size = (138, 171)  

# Reshape the images in the 'cropped_image' column
df_new_class['cropped_image'] = df_new_class['cropped_image'].apply(lambda x: cv2.resize(x, target_size) if x is not None else None)

# Get unique classes in the DataFrame
unique_classes = df_new_class['class'].unique()

# Number of random images to plot for each class
images_to_plot = 2

# Plot two random images for each class
for class_name in unique_classes:
    # Filter DataFrame based on the current class
    df_same_class = df_new_class[df_new_class['class'] == class_name]

    # Check the number of images in the current class
    num_images = len(df_same_class)

    if num_images >= images_to_plot:
        # Randomly select two images from the current class
        random_indices = random.sample(df_same_class.index.tolist(), images_to_plot)

        # Plot the two random images for the current class
        fig, axes = plt.subplots(1, images_to_plot, figsize=(10, 5))
        fig.suptitle(f'Two Random Images of Class: {class_name}')

        for i, ax in enumerate(axes):
            ax.imshow(df_same_class.loc[random_indices[i]]['cropped_image'])
            ax.set_title(f"Index: {random_indices[i]}")
            ax.axis('off')

        plt.show()
    elif num_images == 1:
        # Plot the single image for the current class
        plt.figure(figsize=(5, 5))
        plt.imshow(df_same_class.iloc[0]['cropped_image'])
        plt.title(f'Image of Class: {class_name} (Index: {df_same_class.index[0]}), only one sample')
        plt.axis('off')
        plt.show()
    else:
        print(f"No images of class '{class_name}' for plotting.")

### Functions for crop 

In [28]:
def read_and_crop(path):
    
    # Read the CSV file into a DataFrame
    if path[-1] == '/':
        df = pd.read_csv(path +'_annotations.csv')
    else :
        df = pd.read_csv(path +'/_annotations.csv')

    # Read and store cropped images
    cropped_image_list = []

    for index, row in df.iterrows():
        image_path = row['filename']
        if path[-1] == '/':
            image = Image.open(path + image_path)
        else :
            image = Image.open(path + '/' + image_path)
            

        # Extract cropping coordinates
        xmin, ymin, xmax, ymax = (
            max(0, row['xmin']),
            max(0, row['ymin']),
            min(image.width, row['xmax']),
            min(image.height, row['ymax'])
        )

        # Check if the adjusted coordinates are valid
        if xmin < xmax and ymin < ymax:
            
            # Crop the image
            cropped_image = np.array(image.crop((xmin, ymin, xmax, ymax))) 

            # Reshape the images in the 'cropped_image' column
            cropped_reshape_image = cv2.resize(cropped_image, (171, 138))

            # Store the cropped image in the list
            cropped_image_list.append(cropped_reshape_image)
        else:
            # If the coordinates are invalid, append a placeholder (e.g., None)
            cropped_image_list.append(None)

    # Add a new column to the DataFrame with cropped images
    df['cropped_image'] = cropped_image_list.copy()

    # Filter out rows with None values in the 'cropped_image' column
    df_valid_crops = df.dropna(subset=['cropped_image']).copy()
    
    df_final = df_valid_crops[['class', 'cropped_image']].copy()
    
    # 
    old_class_to_actual_class = {
        'tuna' : 'tuna', 
        'surgeon': 'surgeon', 
        'shark': 'shark', 
        'jack': 'jack', 
        'grouper': 'grouper', 
        'parrot': 'parrot', 
        'snapper': 'snapper',
        'damsel': 'damsel', 
        'trigger': 'trigger', 
        'Zanclidae (Moorish Idol)': 'moorish idol',
        'Scaridae -Parrotfishes-': 'parrot', 
        'Carangidae -Jacks-': 'jack',
        'Scombridae -Tunas-': 'tuna', 
        'Shark -Selachimorpha-': 'shark',
        'Serranidae -Groupers-': 'grouper', 
        'Lutjanidae -Snappers-': 'snapper',
        'Acanthuridae -Surgeonfishes-': 'surgeon', 
        'Pomacentridae -Damselfishes-': 'damsel',
        'Labridae -Wrasse-': 'wrasse', 
        'angel': 'angel', 
        'wrasse': 'wrasse', 
        'Zanclidae -Moorish Idol-': 'moorish idol',
        'Ephippidae -Spadefishes-': 'spade', 
        'Pomacanthidae -Angelfishes-': 'angel',
        'Balistidae -Triggerfishes-': 'trigger', 
        'spade': 'spade'
    }
   
    # Replace old class names with new class names
    df_final['class'] = df_final['class'].replace(old_class_to_actual_class)
    
    return df_final


def print_image_by_classes(df,images_to_plot=2):
    
    # Get unique classes in the DataFrame
    unique_classes = df['class'].unique()

    # Plot two random images for each class
    for class_name in unique_classes:
        # Filter DataFrame based on the current class
        df_same_class = df[df['class'] == class_name]

        # Check the number of images in the current class
        num_images = len(df_same_class)

        if num_images >= images_to_plot:
            # Randomly select two images from the current class
            random_indices = random.sample(df_same_class.index.tolist(), images_to_plot)

            # Plot the two random images for the current class
            fig, axes = plt.subplots(1, images_to_plot, figsize=(10, 5))
            fig.suptitle(f'Two Random Images of Class: {class_name}')

            for i, ax in enumerate(axes):
                ax.imshow(df_same_class.loc[random_indices[i]]['cropped_image'])
                ax.set_title(f"Index: {random_indices[i]}")
                ax.axis('off')

            plt.show()
            continue
            
        elif num_images >= 1:
            # Plot the single image for the current class
            plt.figure(figsize=(5, 5))
            plt.imshow(df_same_class.iloc[0]['cropped_image'])
            plt.title(f'Image of Class: {class_name} (Index: {df_same_class.index[0]})')
            plt.axis('off')
            plt.show()
            continue
            
        else:
            print(f"No images of class '{class_name}' for plotting.")
    

In [29]:
# function to ressort the X and y of dataframe
def to_work(df):
    
    class_to_number = {
    'tuna': 0, 
    'surgeon': 1, 
    'shark': 2, 
    'jack': 3, 
    'grouper': 4, 
    'parrot': 5, 
    'snapper': 6,
    'damsel': 7, 
    'trigger': 8, 
    'moorish idol': 9, 
    'wrasse': 10, 
    'angel': 11, 
    'spade': 12
}

    X = np.stack(df['cropped_image'].to_numpy().copy(), axis=0)
    
    # standardize and center data (make my pc crash)
    X = (X / 255) - 0.5
    
    y = df['class'].replace(class_to_number).to_numpy().copy()
    y_cat = to_categorical(y, num_classes=13)  
    
    return X, y, y_cat 

In [30]:
df_train = read_and_crop('data/train/')
df_test = read_and_crop('data/test/')
df_valid = read_and_crop('data/valid/')

### Classification

#### Model creation

In [None]:
def init_model():
    
    # Start by creating a sequential model
    model = models.Sequential()
    
    ### First Convolution & MaxPooling
    model.add(layers.Conv2D(8, (4, 4), activation='relu', padding='same', input_shape=(138, 171, 3)))
    model.add(layers.MaxPooling2D((2, 2)))

    ### Second Convolution & MaxPoolingialize
    model.add(layers.Conv2D(16, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    ### Flattening
    model.add(layers.Flatten())

    ### One Fully Connected layer
    model.add(layers.Dense(10, activation='relu'))
    # droupout to minimise the overfitting
    model.add(layers.Dropout(0.3))
    ### Last layer - Classification Layer
    model.add(layers.Dense(13, activation='softmax')) # softmax for multiclass classification

    ### Model compilation
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', 'Precision','Recall'])
    
    return model
    

#### Model's Trainning and Evaluation

In [34]:
# to plot the result
def plot_history(history):
    fig, axs = plt.subplots(4,1, figsize=(10,10))
    
    axs[0].plot(history.history['loss'], color='red', label='train')
    axs[0].plot(history.history['val_loss'], color='blue', label='val')
    axs[0].set_title('Loss')
    axs[0].legend(['train', 'validation'],loc="upper right")
    
    axs[1].plot(history.history['accuracy'], color='red', label='train')
    axs[1].plot(history.history['val_accuracy'], color='blue', label='val')
    axs[1].set_title('Accuracy ')
    axs[1].legend(['train', 'validation'],loc="upper right")

    axs[2].plot(history.history['precision'], color='red', label='train')
    axs[2].plot(history.history['val_precision'], color='blue', label='val')
    axs[2].set_title('Precision ')
    axs[2].legend(['train', 'validation'],loc="upper right")

    axs[3].plot(history.history['recall'], color='red', label='train')
    axs[3].plot(history.history['val_recall'], color='blue', label='val')
    axs[3].set_title('Recall')
    axs[3].legend(['train', 'validation'],loc="upper right")
    

    for ax in axs.flat:
        ax.set(xlabel='Epoch', ylabel='')

    # Hide x labels and tick labels for top plots and y ticks for right plots.
    for ax in axs.flat:
        ax.label_outer()
        
    plt.show()

In [31]:
X_train, y_train, y_train_cat = to_work(df_train)
X_test, y_test, y_test_cat = to_work(df_test)
X_val, y_val, y_val_cat = to_work(df_valid)

Warning : Take around 20 min to run

In [None]:
# to save the models & the metrics
models_cnn = {}

# early stopping critera
es = EarlyStopping(patience=20, restore_best_weights=True)

for batch in [4,16,32,64,128,256] :
    
    model = init_model()
    
    history = model.fit(
        X_train,
        y_train_cat,
        validation_data=(X_val, y_val_cat),
        epochs = 100,
        batch_size = batch, 
        verbose = 0, 
        callbacks = [es]
    )
    
    print(f'------------------------------------------Batch Size {batch}------------------------------------------')
    
    # store the model
    models_cnn[batch] = model
        
    # plot the history of loss and accuracy
    plot_history(history)
    
    # print the evaluation of the model:
    trainEval = model.evaluate(X_train,y_train_cat, verbose=0)
    valEval = model.evaluate(X_val,y_val_cat, verbose=0)

    print("         Model Evaluation on Training :")
    print("     Training Loss:    ", trainEval[0])
    print("   Training Accuracy:  ", trainEval[1])
    print("  Training Precision:  ", trainEval[2])
    print("    Training Recall:   ", trainEval[3], '\n')
    print("         Model Evaluation on Validation :")
    print("    Validation Loss:   ", valEval[0])
    print("  Validation Accuracy: ", valEval[1])
    print(" Validation Precision: ", valEval[2])
    print("   Validation Recall:  ", valEval[3])


We can see for all of the test that the validation loss is going up as if the model is overfitting but most of time precision, recall and accuracy are going up or for batch size 128, 16 and 4 the precision is goind down but the recall is going up with the epochs. The overfitting may be caused by the underbalanced data in the training and validation. We can also see that the model with the best results is the model trained with a batch size of 16, it has the best loss and second best accuracy, precision and recall on validation by 0.01.

In [None]:
# take the 'best model' 
modelClassif = models_cnn[16]

# print the evaluation of the model:
testEval = modelClassif.evaluate(X_test,y_test_cat, verbose=0)

print("         Model Evaluation on Test :")
print("    Test Loss:   ", testEval[0])
print("  Test Accuracy: ", testEval[1])
print(" Test Precision: ", testEval[2])
print("   Test Recall:  ", testEval[3])

## 2) Transfer Learning

In [26]:
def init_model_with_pretrained_base(base_model):
    base_model.trainable = False  # Geler les couches du modèle pré-entraîné

    model = models.Sequential([
        base_model,
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(13, activation='softmax')  # Adaptez à votre nombre de classes
    ])
    
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', 'Precision', 'Recall'])
    
    return model


### VGG16

In [None]:
from tensorflow.keras.applications import VGG16

# Chargement de VGG16 comme base
base_model_vgg16 = VGG16(include_top=False, input_shape=(138, 171, 3), weights='imagenet')

# Initialisation du modèle VGG16
model_vgg16 = init_model_with_pretrained_base(base_model_vgg16)

# Entraînement
history_vgg16 = model_vgg16.fit(
    X_train, y_train_cat,
    validation_split=0.2,
    epochs=10,
    batch_size=32,
    verbose=1,
    callbacks=[EarlyStopping(patience=5, restore_best_weights=True)]
)

# Évaluation
plot_history(history_vgg16)

# print the evaluation of the model:
trainEval = model_vgg16.evaluate(X_train,y_train_cat, verbose=0)
valEval = model_vgg16.evaluate(X_val,y_val_cat, verbose=0)
valTest = model_vgg16.evaluate(X_test,y_test_cat, verbose=0)

print("         Model Evaluation on Training :")
print("     Training Loss:    ", trainEval[0])
print("   Training Accuracy:  ", trainEval[1])
print("  Training Precision:  ", trainEval[2])
print("    Training Recall:   ", trainEval[3], '\n')
print("         Model Evaluation on Validation :")
print("    Validation Loss:   ", valEval[0])
print("  Validation Accuracy: ", valEval[1])
print(" Validation Precision: ", valEval[2])
print("   Validation Recall:  ", valEval[3])
print("         Model Evaluation on Test :")
print("    Validation Loss:   ", valTest[0])
print("  Validation Accuracy: ", valTest[1])
print(" Validation Precision: ", valTest[2])
print("   Validation Recall:  ", valTest[3])

Avantages

- Haute précision et performance: atteignant une précision d'entraînement de 95.66% et une précision de validation de 66.97%.
- Amélioration constante des métriques: Les métriques telles que la précision, la précision et le rappel ont toutes montré des améliorations constantes, indiquant que le modèle apprend efficacement à partir des données.

Inconvénients

- Temps d'entraînement plus long: Avec des temps d'étape d'environ 70 à 77 secondes, VGG16 peut être considéré comme ayant un temps d'entraînement relativement long par rapport aux autres modèles.


### ResNet50

In [27]:
from tensorflow.keras.applications import ResNet50

base_model_resnet50 = ResNet50(include_top=False, input_shape=(138, 171, 3), weights='imagenet')
model_resnet50 = init_model_with_pretrained_base(base_model_resnet50)

# Chargement de ResNet50 comme base
base_model_resnet50 = ResNet50(include_top=False, input_shape=(138, 171, 3), weights='imagenet')

# Initialisation du modèle avec ResNet50 comme base
model_resnet50 = init_model_with_pretrained_base(base_model_resnet50)

# Entraînement
history_resnet50 = model_resnet50.fit(
    X_train, y_train_cat,
    validation_split=0.2,
    epochs=10,  
    batch_size=32,
    verbose=1,
    callbacks=[EarlyStopping(patience=5, restore_best_weights=True)]
)

# Évaluation
plot_history(history_resnet50)

# print the evaluation of the model:
trainEval = model_resnet50.evaluate(X_train,y_train_cat, verbose=0)
valEval = model_resnet50.evaluate(X_val,y_val_cat, verbose=0)
valTest = model_resnet50.evaluate(X_test,y_test_cat, verbose=0)

print("         Model Evaluation on Training :")
print("     Training Loss:    ", trainEval[0])
print("   Training Accuracy:  ", trainEval[1])
print("  Training Precision:  ", trainEval[2])
print("    Training Recall:   ", trainEval[3], '\n')
print("         Model Evaluation on Validation :")
print("    Validation Loss:   ", valEval[0])
print("  Validation Accuracy: ", valEval[1])
print(" Validation Precision: ", valEval[2])
print("   Validation Recall:  ", valEval[3])
print("         Model Evaluation on Test :")
print("    Validation Loss:   ", valTest[0])
print("  Validation Accuracy: ", valTest[1])
print(" Validation Precision: ", valTest[2])
print("   Validation Recall:  ", valTest[3])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


NameError: name 'X_train' is not defined

Avantages

- Architecture résiduelle: L'utilisation de connexions résiduelles aide à lutter contre le problème de disparition du gradient, permettant d'entraîner des réseaux plus profonds.
- Efficacité du temps d'entraînement: Avec des temps d'étape autour de 39 à 41 secondes, ResNet50 est plus de 2X plus rapide à l'entraînement que VGG16.

Inconvénients

- Faible performance: Le modèle a montré une faible performance sur les métriques d'évaluation, avec une précision d'entraînement de seulement 25.73% et une précision de validation encore plus faible.
- Absence d'apprentissage: Le modèle n'a pas amélioré ses prédictions au-delà du hasard, comme en témoignent les précisions et rappels nuls sur les données de validation.

=> Ne convient pas à nos données

### EfficientNet

In [None]:
from tensorflow.keras.applications import EfficientNetB5

def init_model_with_efficientnet():
    base_model = EfficientNetB5(include_top=False, weights='imagenet', input_shape=(138, 171, 3))
    base_model.trainable = False  # Freeze the base model

    # Création du modèle
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(13, activation='softmax')
    ])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', 'Precision', 'Recall'])
    
    return model


In [None]:
# Initialisation du modèle avec EfficientNet
model_efficientnet = init_model_with_efficientnet()

# Entraînement
history_efficientnet = model_efficientnet.fit(
    X_train, y_train_cat,
    validation_split=0.2,
    epochs=10,  # Ajustez selon vos besoins
    batch_size=32,
    verbose=1,
    callbacks=[EarlyStopping(patience=5, restore_best_weights=True)]
)

# Évaluation
plot_history(history_efficientnet)

# print the evaluation of the model:
trainEval = model_efficientnet.evaluate(X_train,y_train_cat, verbose=0)
valEval = model_efficientnet.evaluate(X_val,y_val_cat, verbose=0)
valTest = model_efficientnet.evaluate(X_test,y_test_cat, verbose=0)

print("         Model Evaluation on Training :")
print("     Training Loss:    ", trainEval[0])
print("   Training Accuracy:  ", trainEval[1])
print("  Training Precision:  ", trainEval[2])
print("    Training Recall:   ", trainEval[3], '\n')
print("         Model Evaluation on Validation :")
print("    Validation Loss:   ", valEval[0])
print("  Validation Accuracy: ", valEval[1])
print(" Validation Precision: ", valEval[2])
print("   Validation Recall:  ", valEval[3])
print("         Model Evaluation on Test :")
print("    Validation Loss:   ", valTest[0])
print("  Validation Accuracy: ", valTest[1])
print(" Validation Precision: ", valTest[2])
print("   Validation Recall:  ", valTest[3])

Avantages

- Équilibrage des dimensions du modèle: EfficientNet utilise une approche systématique pour équilibrer la largeur, la profondeur et la résolution du réseau, ce qui peut conduire à une efficacité accrue.
- Amélioration progressive: Bien que partant de performances initiales basses, le modèle montre une amélioration au fil des époques, suggérant une capacité d'apprentissage.

Inconvénients

- Performances basses: Le modèle commence avec des performances relativement basses, ce qui peut nécessiter plus de temps ou des ajustements pour atteindre une performance acceptable.
- Complexité de l'architecture: L'équilibrage des facteurs de dimensionnement peut rendre l'architecture plus complexe à ajuster et à optimiser par rapport à des modèles plus simples.

=> Le modèle ne convenait pas bien à notre jeu de donnée ou à cause de sa compléxité nous n'avons pas trouvé comment le régler.

### Mobilnet_V2

In [32]:
from tensorflow.keras.applications import MobileNetV2

def init_model_with_mobilenetV2():
    base_model = MobileNetV2(include_top=False, weights='imagenet', input_shape=(138, 171, 3))
    base_model.trainable = False  # Gel des couches du modèle pré-entraîné

    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(13, activation='softmax')  # Adaptez en fonction du nombre de vos classes
    ])

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', 'Precision', 'Recall'])
    
    return model


In [36]:
# Initialisation du modèle avec MobileNetV2
model_mobilenetV2 = init_model_with_mobilenetV2()

# Entraînement
history_mobilenetV2 = model_mobilenetV2.fit(
    X_train, y_train_cat,
    validation_split=0.2,
    epochs=10,  # Ajustez selon vos besoins
    batch_size=32,  # Ou tout autre nombre qui convient à votre configuration
    verbose=1,
    callbacks=[EarlyStopping(patience=5, restore_best_weights=True)]
)

# Évaluation
plot_history(history_mobilenetV2)


# print the evaluation of the model:
trainEval = model_mobilenetV2.evaluate(X_train,y_train_cat, verbose=0)
valEval = model_mobilenetV2.evaluate(X_val,y_val_cat, verbose=0)
valTest = model_mobilenetV2.evaluate(X_test,y_test_cat, verbose=0)

print("         Model Evaluation on Training :")
print("     Training Loss:    ", trainEval[0])
print("   Training Accuracy:  ", trainEval[1])
print("  Training Precision:  ", trainEval[2])
print("    Training Recall:   ", trainEval[3], '\n')
print("         Model Evaluation on Validation :")
print("    Validation Loss:   ", valEval[0])
print("  Validation Accuracy: ", valEval[1])
print(" Validation Precision: ", valEval[2])
print("   Validation Recall:  ", valEval[3])
print("         Model Evaluation on Test :")
print("    Validation Loss:   ", valTest[0])
print("  Validation Accuracy: ", valTest[1])
print(" Validation Precision: ", valTest[2])
print("   Validation Recall:  ", valTest[3])



2024/02/28 22:28:58 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'd7ea7dc1691d43759edcd2ff7ac6ce37', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: /tmp/tmpcc64_572/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/tmpcc64_572/model/data/model/assets


<Figure size 1000x1000 with 4 Axes>

         Model Evaluation on Training :
     Training Loss:     0.035399071872234344
   Training Accuracy:   0.9884337186813354
  Training Precision:   0.9913001656532288
    Training Recall:    0.9884337186813354 

         Model Evaluation on Validation :
    Validation Loss:    0.9832377433776855
  Validation Accuracy:  0.7526881694793701
 Validation Precision:  0.7719298005104065
   Validation Recall:   0.7434715628623962
         Model Evaluation on Test :
    Validation Loss:    0.884783148765564
  Validation Accuracy:  0.7681159377098083
 Validation Precision:  0.7727272510528564
   Validation Recall:   0.739130437374115


Avantages:

- Haute efficacité et performance: MobileNetV2 affiche des performances impressionnantes avec une précision, une précision et un rappel élevés sur les données d'entraînement, tout en conservant une bonne performance sur les données de validation. Cela indique une capacité de généralisation élevée malgré sa légèreté.
- Rapidité d'entraînement: Le modèle est rapide à entraîner (a duré 2min en tout pour 10 époches)s.
- Amélioration constante: Les scores de précision, de précision et de rappel s'améliorent régulièrement au fil des époques, montrant une bonne capacité d'apprentissage du modèle.

Inconvénients:

- Augmentation de la perte de validation: Bien que la précision de validation reste élevée, il y a une augmentation de la perte de validation dans les dernières époques, ce qui pourrait indiquer un début de surajustement.
- Possibilité d'un sur-apprentissage: sur le dataset la précision ainsi que l'accuracy étaient très proche de 100%.

### Comparaison entre les modeles

In [None]:
def compare_models(histories, names):
    plt.figure(figsize=(14, 6))
    
    # Accuracy
    plt.subplot(1, 2, 1)
    for name, history in zip(names, histories):
        plt.plot(history.history['val_accuracy'], label=f'{name} val_accuracy')
        plt.plot(history.history['accuracy'], '--', label=f'{name} train_accuracy')
    plt.title('Accuracy Comparison')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # Loss
    plt.subplot(1, 2, 2)
    for name, history in zip(names, histories):
        plt.plot(history.history['val_loss'], label=f'{name} val_loss')
        plt.plot(history.history['loss'], '--', label=f'{name} train_loss')
    plt.title('Loss Comparison')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Supposons que vous ayez l'historique d'EfficientNet et de MobileNetV2, et potentiellement d'autres
compare_models([history_vgg16, history_efficientnet, history_resnet50, history_mobilenetV2], ['VGG16','EfficientNet', 'ResNet50', 'MobileNetV2'])

VGG16 excelle en termes de performance brute, atteignant une haute précision et un rappel, mais au prix de temps d'entraînement plus longs et d'une plus grande consommation de ressources.
ResNet50 et EfficientNet ont montré des performances initiales moins impressionnantes, potentiellement dues à des besoins de réglage fin des hyperparamètres ou à des caractéristiques spécifiques de l'ensemble de données.
MobileNetV2 se distingue par son équilibre entre haute performance et efficacité, offrant une excellente option pour des applications nécessitant à la fois précision et rapidité, tout en étant conscient des ressources. Sa capacité à maintenir une haute précision avec moins de ressources le rend particulièrement attrayant pour des applications à réaliser rapidement.

## 3) Data Augmentation

### Training set augmentation

In [None]:
Counter(y_train)

We identify 7 classes we need to change the amount of data. One overrepresented and 6 lacking a lot of data. Those are the target of our "augmentation".

In [None]:
def over_undersampling(X, y):
    
    # Reshape each image to a flat vector
    X_flat = X.reshape(X.shape[0], -1)

        # summarize class distribution
    print(f"Original class distribution: {Counter(y)}")
    st = 376

    oversample = RandomOverSampler(sampling_strategy={7:st,8:st,9:st,10:st,11:st,12:st})
    undersample = RandomUnderSampler(sampling_strategy={1:st})

        # Fit and apply the transform
    X_over, y_over = oversample.fit_resample(X_flat, y)
    X_under, y_under = undersample.fit_resample(X_over, y_over)
        # Summarize class distribution after oversampling
    print(f"Class distribution after sampling: {Counter(y_under)}")

        # Reshape the resampled data back to the original image shape
    X_resampled = X_under.reshape(-1, 138, 171, 3)

    return X_resampled, y_under

This function is tailored for the original fish training dataset. It applies the oversampling and the undersampling of the fish training dataset.

In [None]:
# get the "augmented" data
X_resampled, y_resampled = over_undersampling(X_train, y_train)

In [None]:
# make sure you have y as categorical after the augmentation
y_resampled_cat=to_categorical(y_resampled, num_classes=13) 

### Original model

In [None]:
# to save the models & the metrics
models_cnn = {}

# early stopping critera
es = EarlyStopping(patience=20, restore_best_weights=True)

for batch in [4,16,32,64,128,256] :
    
    model = init_model()
    
    history = model.fit(
        X_resampled,
        y_resampled_cat,
        validation_data=(X_val, y_val_cat),
        epochs = 100,
        batch_size = batch, 
        verbose = 0, 
        callbacks = [es]
    )
    
    print(f'------------------------------------------Batch Size {batch}------------------------------------------')
    
    # store the model
    models_cnn[batch] = model
        
    # plot the history of loss and accuracy
    plot_history(history)
    
    # print the evaluation of the model:
    trainEval = model.evaluate(X_resampled,y_resampled_cat, verbose=0)
    valEval = model.evaluate(X_val,y_val_cat, verbose=0)

    print("         Model Evaluation on Training :")
    print("     Training Loss:    ", trainEval[0])
    print("   Training Accuracy:  ", trainEval[1])
    print("  Training Precision:  ", trainEval[2])
    print("    Training Recall:   ", trainEval[3], '\n')
    print("         Model Evaluation on Validation :")
    print("    Validation Loss:   ", valEval[0])
    print("  Validation Accuracy: ", valEval[1])
    print(" Validation Precision: ", valEval[2])
    print("   Validation Recall:  ", valEval[3])

### VGG16

In [None]:
# Chargement de VGG16 comme base
base_model_vgg16 = VGG16(include_top=False, input_shape=(138, 171, 3), weights='imagenet')

# Initialisation du modèle VGG16
model_vgg16 = init_model_with_pretrained_base(base_model_vgg16)

# Entraînement
history_vgg16 = model_vgg16.fit(
    X_resampled, y_resampled_cat,
    validation_split=(X_val,y_val_cat),
    epochs=10,
    batch_size=32,
    verbose=1,
    callbacks=[EarlyStopping(patience=5, restore_best_weights=True)]
)

# Évaluation
plot_history(history_vgg16)

# print the evaluation of the model:
trainEval = model_vgg16.evaluate(X_resampled,y_resampled_cat, verbose=0)
valEval = model_vgg16.evaluate(X_val,y_val_cat, verbose=0)
valTest = model_vgg16.evaluate(X_test,y_test_cat, verbose=0)

print("         Model Evaluation on Training :")
print("     Training Loss:    ", trainEval[0])
print("   Training Accuracy:  ", trainEval[1])
print("  Training Precision:  ", trainEval[2])
print("    Training Recall:   ", trainEval[3], '\n')
print("         Model Evaluation on Validation :")
print("    Validation Loss:   ", valEval[0])
print("  Validation Accuracy: ", valEval[1])
print(" Validation Precision: ", valEval[2])
print("   Validation Recall:  ", valEval[3])
print("         Model Evaluation on Test :")
print("    Validation Loss:   ", valTest[0])
print("  Validation Accuracy: ", valTest[1])
print(" Validation Precision: ", valTest[2])
print("   Validation Recall:  ", valTest[3])

### Mobilnet

In [None]:
# Initialisation du modèle avec MobileNetV2
model_mobilenetV2 = init_model_with_mobilenetV2()

# Entraînement
history_mobilenetV2 = model_mobilenetV2.fit(
    X_resampled, y_resampled_cat,
    validation_split=(X_val,y_val_cat),
    epochs=10,  
    batch_size=32,  
    verbose=1,
    callbacks=[EarlyStopping(patience=5, restore_best_weights=True)]
)

# Évaluation
plot_history(history_mobilenetV2)


# print the evaluation of the model:
trainEval = model_mobilenetV2.evaluate(X_train,y_train_cat, verbose=0)
valEval = model_mobilenetV2.evaluate(X_val,y_val_cat, verbose=0)
valTest = model_mobilenetV2.evaluate(X_test,y_test_cat, verbose=0)

print("         Model Evaluation on Training :")
print("     Training Loss:    ", trainEval[0])
print("   Training Accuracy:  ", trainEval[1])
print("  Training Precision:  ", trainEval[2])
print("    Training Recall:   ", trainEval[3], '\n')
print("         Model Evaluation on Validation :")
print("    Validation Loss:   ", valEval[0])
print("  Validation Accuracy: ", valEval[1])
print(" Validation Precision: ", valEval[2])
print("   Validation Recall:  ", valEval[3])
print("         Model Evaluation on Test :")
print("    Validation Loss:   ", valTest[0])
print("  Validation Accuracy: ", valTest[1])
print(" Validation Precision: ", valTest[2])
print("   Validation Recall:  ", valTest[3])

## 4) Detection

### Yolo model

In [1]:
from ultralytics import YOLO
from IPython.display import display, Image



In [2]:
model = YOLO(f'yolov8n.pt')

In [3]:
results = model.train(data='dataYolo/data.yaml', epochs=1, batch=16, patience=10, optimizer='Adam', lr0=0.01, lrf=0.01)

  return torch._C._cuda_getDeviceCount() > 0


Ultralytics YOLOv8.1.19 🚀 Python-3.10.12 torch-2.2.1+cu121 CPU (Intel Core(TM) i5-10300H 2.50GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=dataYolo/data.yaml, epochs=1, time=None, patience=10, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_co

[34m[1mtrain: [0mScanning /home/todin/OneDrive/DeepLearning/Projet_DeepLearning/dataYolo/train/labels.cache... 944 images, 0 backgrounds, 0 corrupt: 100%|██████████| 944/944 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /home/todin/OneDrive/DeepLearning/Projet_DeepLearning/dataYolo/valid/labels.cache... 270 images, 0 backgrounds, 0 corrupt: 100%|██████████| 270/270 [00:00<?, ?it/s]


Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m Adam(lr=0.01, momentum=0.937) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)


2024/02/28 12:14:33 INFO mlflow.tracking.fluent: Experiment with name '/Shared/YOLOv8' does not exist. Creating a new experiment.
2024/02/28 12:14:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2024/02/28 12:14:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for statsmodels.


[34m[1mMLflow: [0mlogging run_id(8496a51abbf0438bbb79d36f35e7f9f1) to runs/mlflow
[34m[1mMLflow: [0mview at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri runs/mlflow'
[34m[1mMLflow: [0mdisable with 'yolo settings mlflow=False'
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/1         0G       1.84      3.995      1.941        100        640: 100%|██████████| 59/59 [06:20<00:00,  6.44s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/9 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  11%|█         | 1/9 [00:09<01:13,  9.18s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  22%|██▏       | 2/9 [00:18<01:06,  9.56s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  33%|███▎      | 3/9 [00:28<00:56,  9.39s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  44%|████▍     | 4/9 [00:36<00:44,  8.83s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  56%|█████▌    | 5/9 [00:43<00:33,  8.35s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  67%|██████▋   | 6/9 [00:50<00:23,  7.88s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  78%|███████▊  | 7/9 [00:58<00:15,  7.75s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  89%|████████▉ | 8/9 [01:05<00:07,  7.57s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [01:09<00:00,  7.71s/it]


                   all        270        651    0.00133     0.0383   0.000892    0.00031

1 epochs completed in 0.128 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 6.3MB
Optimizer stripped from runs/detect/train/weights/best.pt, 6.3MB

Validating runs/detect/train/weights/best.pt...
Ultralytics YOLOv8.1.19 🚀 Python-3.10.12 torch-2.2.1+cu121 CPU (Intel Core(TM) i5-10300H 2.50GHz)
Model summary (fused): 168 layers, 3010718 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/9 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  11%|█         | 1/9 [00:08<01:09,  8.73s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  22%|██▏       | 2/9 [00:15<00:53,  7.70s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  33%|███▎      | 3/9 [00:23<00:45,  7.58s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  44%|████▍     | 4/9 [00:29<00:36,  7.23s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  56%|█████▌    | 5/9 [00:36<00:28,  7.20s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  67%|██████▋   | 6/9 [00:44<00:22,  7.43s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  78%|███████▊  | 7/9 [00:51<00:14,  7.10s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  89%|████████▉ | 8/9 [00:58<00:07,  7.02s/it]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [01:03<00:00,  7.09s/it]


                   all        270        651     0.0022     0.0405    0.00126   0.000468
Acanthuridae -Surgeonfishes-        270        104    0.00229     0.0288    0.00125   0.000288
    Carangidae -Jacks-        270         46    0.00173       0.13    0.00106   0.000405
     Labridae -Wrasse-        270          1          0          0          0          0
 Lutjanidae -Snappers-        270         62   0.000474     0.0161    0.00025   0.000175
Scaridae -Parrotfishes-        270         21          0          0          0          0
    Scombridae -Tunas-        270         23    0.00148       0.13   0.000995   0.000516
 Serranidae -Groupers-        270         30          0          0          0          0
 Shark -Selachimorpha-        270         33    0.00103     0.0909   0.000732   0.000262
Zanclidae (Moorish Idol)        270          1          0          0          0          0
Zanclidae -Moorish Idol-        270          1          0          0          0          0
          

In [15]:
result2 = model.train(data='dataYolo/data.yaml', epochs=10, batch=16, patience=10, optimizer='Adam', lr0=0.01, lrf=0.01)

Ultralytics YOLOv8.1.19 🚀 Python-3.10.12 torch-2.2.1+cu121 CPU (Intel Core(TM) i5-10300H 2.50GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=dataYolo/data.yaml, epochs=10, time=None, patience=10, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=0, project=None, name=train4, exist_ok=False, pretrained=True, optimizer=Adam, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_

[34m[1mtrain: [0mScanning /home/todin/OneDrive/DeepLearning/Projet_DeepLearning/dataYolo/train/labels.cache... 944 images, 0 backgrounds, 0 corrupt: 100%|██████████| 944/944 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /home/todin/OneDrive/DeepLearning/Projet_DeepLearning/dataYolo/valid/labels.cache... 270 images, 0 backgrounds, 0 corrupt: 100%|██████████| 270/270 [00:00<?, ?it/s]

Plotting labels to runs/detect/train4/labels.jpg... 





[34m[1moptimizer:[0m Adam(lr=0.01, momentum=0.937) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)


2024/02/28 15:50:42 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2024/02/28 15:50:42 INFO mlflow.tracking.fluent: Autologging successfully enabled for statsmodels.


[34m[1mMLflow: [0mlogging run_id(60210fa95ee3468887dc2aba2272d2ce) to runs/mlflow
[34m[1mMLflow: [0mview at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri runs/mlflow'
[34m[1mMLflow: [0mdisable with 'yolo settings mlflow=False'
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train4[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G      1.627      2.891      1.933         27        640: 100%|██████████| 59/59 [04:10<00:00,  4.25s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:30<00:00,  3.40s/it]

                   all        270        651     0.0819      0.162     0.0726     0.0368






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G      1.608      2.818      1.904         68        640: 100%|██████████| 59/59 [04:30<00:00,  4.58s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:29<00:00,  3.23s/it]

                   all        270        651      0.153      0.152     0.0525     0.0218






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G      1.592      2.788      1.882         28        640: 100%|██████████| 59/59 [04:30<00:00,  4.59s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:25<00:00,  2.87s/it]

                   all        270        651      0.107      0.184     0.0815     0.0398






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G      1.488      2.639      1.772         39        640: 100%|██████████| 59/59 [04:04<00:00,  4.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:29<00:00,  3.27s/it]

                   all        270        651     0.0889      0.203     0.0844     0.0432






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10         0G      1.528      2.651      1.806         33        640: 100%|██████████| 59/59 [04:07<00:00,  4.20s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:26<00:00,  2.94s/it]

                   all        270        651     0.0748      0.186     0.0716     0.0369






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10         0G      1.532      2.561      1.778         22        640: 100%|██████████| 59/59 [04:30<00:00,  4.58s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:37<00:00,  4.14s/it]

                   all        270        651      0.144      0.289      0.123     0.0698






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10         0G      1.437      2.414      1.705         73        640: 100%|██████████| 59/59 [04:52<00:00,  4.96s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:29<00:00,  3.23s/it]

                   all        270        651      0.166      0.282       0.14     0.0784






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10         0G      1.396      2.353      1.648         48        640: 100%|██████████| 59/59 [05:29<00:00,  5.58s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:36<00:00,  4.04s/it]

                   all        270        651      0.147      0.306      0.158     0.0938






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10         0G      1.348      2.284        1.6         52        640: 100%|██████████| 59/59 [06:12<00:00,  6.32s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:40<00:00,  4.53s/it]

                   all        270        651       0.14      0.308      0.138      0.081






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10         0G      1.266      2.154      1.555         47        640: 100%|██████████| 59/59 [06:13<00:00,  6.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:47<00:00,  5.31s/it]

                   all        270        651      0.168      0.302      0.168      0.103






10 epochs completed in 0.906 hours.
Optimizer stripped from runs/detect/train4/weights/last.pt, 6.3MB
Optimizer stripped from runs/detect/train4/weights/best.pt, 6.3MB

Validating runs/detect/train4/weights/best.pt...
Ultralytics YOLOv8.1.19 🚀 Python-3.10.12 torch-2.2.1+cu121 CPU (Intel Core(TM) i5-10300H 2.50GHz)
Model summary (fused): 168 layers, 3010718 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:34<00:00,  3.82s/it]


                   all        270        651      0.169      0.301      0.167      0.102
Acanthuridae -Surgeonfishes-        270        104      0.324      0.359      0.279      0.145
    Carangidae -Jacks-        270         46      0.169      0.326      0.184      0.127
     Labridae -Wrasse-        270          1          0          0          0          0
 Lutjanidae -Snappers-        270         62      0.208      0.323      0.174      0.108
Scaridae -Parrotfishes-        270         21      0.147     0.0476     0.0847     0.0665
    Scombridae -Tunas-        270         23      0.255      0.957      0.387      0.271
 Serranidae -Groupers-        270         30      0.281      0.233      0.219     0.0933
 Shark -Selachimorpha-        270         33      0.216      0.485      0.236      0.159
Zanclidae (Moorish Idol)        270          1          0          0          0          0
Zanclidae -Moorish Idol-        270          1          0          0          0          0
          

We can see that with one epoch the model is not good, same with 10 epoch

### Association of pre trained & Classification

In [43]:
def display_image(image):
  fig = plt.figure(figsize=(20, 15))
  plt.grid(False)
  plt.imshow(image)

  
def draw_bounding_box_on_image(image,
                               ymin,
                               xmin,
                               ymax,
                               xmax,
                               color,
                               font,
                               thickness=4,
                               display_str_list=()):
  """Adds a bounding box to an image."""
  draw = ImageDraw.Draw(image)
  im_width, im_height = image.size
  (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                ymin * im_height, ymax * im_height)
  draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
             (left, top)],
            width=thickness,
            fill=color)

  # If the total height of the display strings added to the top of the bounding
  # box exceeds the top of the image, stack the strings below the bounding box
  # instead of above.
  display_str_heights = [font.getbbox(ds)[3] for ds in display_str_list]
  # Each display_str has a top and bottom margin of 0.05x.
  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

  if top > total_display_str_height:
    text_bottom = top
  else:
    text_bottom = top + total_display_str_height
  # Reverse list and print from bottom to top.
  for display_str in display_str_list[::-1]:
    bbox = font.getbbox(display_str)
    text_width, text_height = bbox[2], bbox[3]
    margin = np.ceil(0.05 * text_height)
    draw.rectangle([(left, text_bottom - text_height - 2 * margin),
                    (left + text_width, text_bottom)],
                   fill=color)
    draw.text((left + margin, text_bottom - text_height - margin),
              display_str,
              fill="black",
              font=font)
    text_bottom -= text_height - 2 * margin


def draw_boxes(image, boxes, class_info):
    """Draw bounding boxes on the image."""
    colors = list(ImageColor.colormap.values())

    try:
        font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf", 25)
    except IOError:
        print("Font not found, using default font.")
        font = ImageFont.load_default()

    image_pil = Image.fromarray(np.uint8(image)).convert("RGB")
    draw = ImageDraw.Draw(image_pil)
    
    for i in range (boxes.shape[0]):
        ymin, xmin, ymax, xmax = boxes[i]
        color = colors[hash(class_info[i]) % len(colors)]

        # Draw bounding box on the image
        draw_bounding_box_on_image(
            image_pil,
            ymin,
            xmin,
            ymax,
            xmax,
            color,
            font,
            display_str_list=[f'Class: {class_info[i]}'],
        )
        np.copyto(image, np.array(image_pil))

    return image

def load_img(path):
  img = tf.io.read_file(path)
  img = tf.image.decode_jpeg(img, channels=3)
  return img

def run_detector(detector, path):
  img = load_img(path)

  converted_img  = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
  start_time = time.time()
  result = detector(converted_img)
  end_time = time.time()

  result = {key:value.numpy() for key,value in result.items()}

  print("Found %d objects." % len(result["detection_scores"]))
  print("Inference time: ", end_time-start_time)

  image_with_boxes = draw_boxes(
      img.numpy(), result["detection_boxes"],
      result["detection_class_entities"], result["detection_scores"])

  display_image(image_with_boxes)

def detect_img_2(image_path):
  start_time = time.time()
  run_detector(detector, image_path)
  end_time = time.time()
  print("Inference time:",end_time-start_time)
  
def resort_fish_boxes(boxes, class_names, scores, threshold=0.5):
    fish_boxes = []
    for i in range(boxes.shape[0]):
        if scores[i] >= threshold and class_names[i] == "Fish":
            fish_boxes.append(boxes[i])

    if len(fish_boxes) > 0:
        fish_boxes = np.array(fish_boxes)
        # Sort fish_boxes based on confidence scores
        # sorted_indices = np.argsort(scores[fish_boxes[:, 0]])
        # fish_boxes = fish_boxes[sorted_indices[::-1]]
        return fish_boxes
    else:
        return None


In [None]:
module_handle = "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1" #@param ["https://tfhub.dev/google/openimages_v4/ssd/mobilenet_v2/1", "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1"]

detector = hub.load(module_handle).signatures['default']

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
2024-02-28 21:29:13.893423: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error


In [38]:
df_train = pd.read_csv('data/train/_annotations.csv')

In [39]:
array = list()

img = load_img('data/train/' + df_train['filename'][1])

converted_img  = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
start_time = time.time()
result = detector(converted_img)
end_time = time.time()

fish_boxes = resort_fish_boxes(
        result["detection_boxes"],
        result["detection_class_entities"],
        result["detection_scores"],
        threshold=0.5)

if fish_boxes is  None:
    print("No fish detected.")

for coord in fish_boxes:
    # Extract coordinates
    ymin, xmin, ymax, xmax = coord

    # Crop the image
    cropped_img = img[int(ymin * img.shape[0]):int(ymax * img.shape[0]),
                      int(xmin * img.shape[1]):int(xmax * img.shape[1])]
    
    img_np = np.array([cv2.resize(cropped_img.numpy(), (171, 138))])
    
    a = model_mobilenetV2.predict(img_np, verbose=0)
    
    array.append([df_train['filename'][1], ymin, xmin, ymax, xmax, np.argmax(a)])

df_pred = pd.DataFrame(array, columns=['image','ymin', 'xmin', 'ymax', 'xmax', 'class'])

df_pred

Unnamed: 0,image,ymin,xmin,ymax,xmax,class
0,FishDataset232_png.rf.CC8AfGJBFvbo9JTlNPe4.jpg,0.187828,0.082099,0.85232,0.676879,1
1,FishDataset232_png.rf.CC8AfGJBFvbo9JTlNPe4.jpg,0.550713,0.627715,0.93842,0.959714,1


In [46]:
def detect_and_classify_fish(image_path, detector, classification_model, threshold=0.5):
    # Load the image
    img = load_img(image_path)

    # Run object detection
    converted_img = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
    result = detector(converted_img)

    # Get fish boxes
    fish_boxes = resort_fish_boxes(
        result["detection_boxes"],
        result["detection_class_entities"],
        result["detection_scores"],
        threshold=threshold
    )

    if fish_boxes is None:
        print("No fish detected.")
        return

    # Initialize an array to store the results
    array = []

    for coord in fish_boxes:
        # Extract coordinates
        ymin, xmin, ymax, xmax = coord

        # Crop the image
        cropped_img = img[int(ymin * img.shape[0]):int(ymax * img.shape[0]),
                          int(xmin * img.shape[1]):int(xmax * img.shape[1])]

        # Resize and preprocess the cropped image for classification
        img_np = np.array([cv2.resize(cropped_img.numpy(), (171, 138))])
        prediction = classification_model.predict(img_np, verbose=0)

        # Get the predicted class index
        predicted_class = np.argmax(prediction)

        # Append results to the array
        array.append([image_path, ymin, xmin, ymax, xmax, predicted_class])
        
    image_with_boxes = draw_boxes(
      img.numpy(), fish_boxes,
      np.array(array)[:,-1])
    
    display_image(image_with_boxes)

    # Convert the results to a DataFrame
    df_pred = pd.DataFrame(array, columns=['image', 'ymin', 'xmin', 'ymax', 'xmax', 'class'])

    return df_pred

# Example usage:
image_path = 'data/train/' + df_train['filename'][1]
df_results = detect_and_classify_fish(image_path, detector, model_mobilenetV2)
print(df_results)

                                               image      ymin      xmin  \
0  data/train/FishDataset232_png.rf.CC8AfGJBFvbo9...  0.187828  0.082099   
1  data/train/FishDataset232_png.rf.CC8AfGJBFvbo9...  0.550713  0.627715   

      ymax      xmax  class  
0  0.85232  0.676879      1  
1  0.93842  0.959714      1  
