In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import tarfile

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Conv2D, MaxPooling2D, Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix

2024-10-28 17:43:52.666056: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [12]:
# Chemin du fichier tgz
tgz_path = 'ships24.tar.gz'

# Décompresser le fichier tgz
with tarfile.open(tgz_path, 'r:gz') as tar_gz:
    tar_gz.extractall(path='./')
    with tarfile.open('ships24.tar', 'r') as tar:
        tar.extractall(path='./')

In [None]:
SEED = 16
np.random.seed(SEED)
tf.random.set_seed(SEED)
directory_path = "./ships_gray/"

batch_size = 32
image_size = (128, 192)

ds_train, ds_val = tf.keras.utils.image_dataset_from_directory(
    directory_path,
    labels='inferred',
    subset='both',
    shuffle=True,
    validation_split=0.2,
    seed=SEED,
    image_size=image_size,
    color_mode='grayscale',
    batch_size=batch_size
)

val_batches = tf.data.experimental.cardinality(ds_val)
ds_test = ds_val.take((1 * val_batches) // 2)
ds_val = ds_val.skip((1 * val_batches) // 2)
print("Number of elements in ds_test:", tf.data.experimental.cardinality(ds_test).numpy() * batch_size)

# Fonction pour one-hot encoder les étiquettes
def process_dataset(dataset):
    dataset = dataset.map(lambda x, y: (x, tf.one_hot(y, depth=10)))
    return dataset

ds_train = process_dataset(ds_train)
ds_val = process_dataset(ds_val)
ds_test = process_dataset(ds_test)

AUTOTUNE = tf.data.AUTOTUNE

ds_train = ds_train.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
ds_val = ds_val.cache().prefetch(buffer_size=AUTOTUNE)
ds_test = ds_test.cache().prefetch(buffer_size=AUTOTUNE)

nous avons choisis de répartir nos données tel quel:
* training:   80%
* validation: 10%
* test:       10%

# Création du Model et définition d'un callback

In [None]:
model = Sequential([
    Conv2D(64, (3, 3), padding='same', input_shape=(128, 192, 1)),
    LeakyReLU(alpha=0.1),
    Conv2D(64, (3, 3), padding='same'),
    LeakyReLU(alpha=0.1),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Conv2D(128, (3, 3), padding='same'),
    LeakyReLU(alpha=0.1),
    Conv2D(128, (3, 3), padding='same'),
    LeakyReLU(alpha=0.1),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Conv2D(256, (3, 3), padding='same'),
    LeakyReLU(alpha=0.1),
    Conv2D(256, (3, 3), padding='same'),
    LeakyReLU(alpha=0.1),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),

    Conv2D(512, (3, 3), padding='same'),
    LeakyReLU(alpha=0.1),
    Conv2D(512, (3, 3), padding='same'),
    LeakyReLU(alpha=0.1),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.3),
    
    GlobalAveragePooling2D(),
    Dense(256, activation=None),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),
    Dropout(0.5),

    Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Définir les callbacks
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)

model.summary()

In [None]:
print("Nombre de couches dans le modèle :", len(model.layers))

In [None]:
for images, labels in ds_train.take(1):
    pass
for images, labels in ds_val.take(1):
    pass
for images, labels in ds_test.take(1):
    pass

history = model.fit(ds_train, epochs=75, validation_data=ds_val, callbacks=[early_stopping])

# Etude des résultats

In [None]:
loss, accuracy = model.evaluate(ds_test)
print("Test loss:", loss)
print("Test Accuracy:", accuracy)

# Génération du fichier de résultat

In [None]:
# Charger les données de test pour la prédiction
X_test = np.load('./test.npy', allow_pickle=True)

# Ajouter une dimension de canal pour correspondre à l'entrée du modèle
X_test = np.expand_dims(X_test, axis=-1)

res = model.predict(X_test).argmax(axis=1)
df = pd.DataFrame({"Category": res})
df.to_csv("submission.csv", index_label="ID")

# Afficher les premières lignes du fichier CSV
!head submission.csv

import os
os.chdir(r'./')
from IPython.display import FileLink
FileLink(r'submission.csv')

# Matrice de confusion

In [None]:
import seaborn as sns

def plot_confusion_matrix(model, ds_test):
    # Récupérer les étiquettes vraies et les convertir en étiquettes de classe
    y_true = np.concatenate([y.numpy() for _, y in ds_test], axis=0)
    y_true = np.argmax(y_true, axis=1)  # Convertir de one-hot à classes entières
    
    # Prévoir les étiquettes
    y_pred = np.argmax(model.predict(ds_test), axis=1)

    # Créer la matrice de confusion
    cm = confusion_matrix(y_true, y_pred)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    # Définir les noms des classes
    class_names = ["coastguard", "container", "corvette", "cruiser", "cv", "destroyer", "methanier", "smallfish", "submarine", "tug"]

    # Tracer la matrice de confusion normalisée
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm_normalized, annot=True, cmap="Blues", cbar=False,
                xticklabels=class_names, yticklabels=class_names)
    plt.title("Normalized Confusion Matrix")
    plt.xlabel("Predicted Labels")
    plt.ylabel("True Labels")
    plt.xticks(rotation=45)
    plt.yticks(rotation=0)
    plt.show()

plot_confusion_matrix(model, ds_test)


# Courbes

In [None]:
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Metrics')
plt.title('Training Metrics')
plt.legend()
plt.show()