In [None]:
import os
import glob
import numpy as np
import pandas as pd
import tifffile
import matplotlib.pyplot as plt
from tensorflow import keras

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from tensorflow.keras.layers import MaxPooling1D
from pathlib import Path
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, Conv1D, GlobalAveragePooling1D, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.models import Sequential
from sklearn.metrics import classification_report

In [None]:
# Compte rendu de classification
def cpt_mal_classes(y_test_func, result_func):
    nb_func = 0
    for i in range(len(y_test_func)):
        if y_test_func[i] != result_func[i]:
            nb_func += 1
    print (f'Taille des données {len(y_test_func)} mal classés {nb_func}\n')

# Plot confusion matrix
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / np.sum(cm).astype('float')
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

In [None]:
Path('Output').mkdir(exist_ok=True, parents=True)

### **Recharger les données d'entraînement, de validation et test en fichier numpy**

In [None]:
# Recharger les données après avoir vidé la mémoire
train_X = np.load('Data/train_valid_test/train_X.npy')
train_y = np.load('Data/train_valid_test/train_y.npy')

valid_X = np.load('Data/train_valid_test/valid_X.npy')
valid_y = np.load('Data/train_valid_test/valid_y.npy')
valid_id = np.load('Data/train_valid_test/valid_id.npy')

test_X = np.load('Data/train_valid_test/test_X.npy')
test_id = np.load('Data/train_valid_test/test_id.npy')

train_X = train_X[:,2,2,:,:]
valid_X = valid_X[:,2,2,:,:]
test_X = test_X[:,2,2,:,:]

### **Encoder les labels entre 0 et 4 de sorte à matcher les prédictions des réseaux de neurones**

In [None]:
encoder = LabelEncoder()
encoder.fit(train_y)
train_y_enc = encoder.transform(train_y)
valid_y_enc = encoder.transform(valid_y)

In [None]:
model = Sequential()

model.add(Conv1D(32, kernel_size=5, activation='relu', input_shape=(8, 4)))
model.add(Conv1D(64, activation='relu', kernel_size=3))
model.add(Conv1D(128, activation='relu', kernel_size=1))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(5, activation='softmax'))

model.summary()

In [None]:
OPTIMIZER = tf.keras.optimizers.Adam(0.01)
BATCH_SIZE = 256
EPOCHS = 25
callbacks = [tf.keras.callbacks.ModelCheckpoint(
              'Model/model',
              verbose=1, # niveau de log
              monitor='val_accuracy', # nom de la métrique à surveiller
              save_best_only=True, # sauver uniquement le meilleur modèle
              save_weights_only=True)] # sauver uniquement les poids

model.compile(loss="sparse_categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"])

history = model.fit(train_X, train_y_enc, validation_data=(valid_X,valid_y_enc), batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=callbacks)
# loss: 0.2481 - accuracy: 0.9138 - val_loss: 0.2786 - val_accuracy: 0.9017 (10)
# loss: 0.0982 - acc: 0.9655 - val_loss: 0.4826 - val_acc: 0.8791 (100)

In [None]:
model.save('Output/Best')

model_loaded1 = model.load_weights('Model/model')
model_loaded2 = keras.models.load_model('Output/Best')

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['accuracy', 'val_accuracy'], loc='upper left')
plt.show()

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['loss', 'val_loss'], loc='upper left')
plt.show()

In [None]:
score = model.evaluate(valid_X, valid_y_enc, batch_size=256)

print(f'Loss : {score[0]:.2f}')
print(f'Accuracy : {score[1]:.2f}')

### **Prédiction des classes sur le jeu de validation et évaluation en aggrégeant au niveau objet**

In [None]:
# Récupérer les probabilités prédites sur le jeu de validation
valid_prob = model.predict(valid_X,batch_size=256)
valid_prob.shape

# Retourner la classe correspondant à la probabilité la plus haute
valid_pred = np.argmax(valid_prob,axis=1) # axe 1 car ceci concerne chaque ligne
valid_pred.shape

# Je réencode les prédictions entre 1 et 5
valid_pred_enc = encoder.inverse_transform(valid_pred)
np.unique(valid_pred_enc)

# Aggrégation au niveau objet
out_pred = []
unique_id = np.unique(valid_id)
for ID in unique_id :
    # Récupérer les prédictions des pixels appartenant au même objet
    pred = valid_pred_enc[np.where(valid_id==ID)]
    y_true = valid_y[np.where(valid_id==ID)]
    # Prendre la valeur majoritaire des prédictions sur les pixels
    out_pred.append([ np.bincount(y_true).argmax(), np.bincount(pred).argmax()]) #(Vérité terrain,Prédiction majoritaire)
out_pred = np.vstack(out_pred)

In [None]:
cpt_mal_classes(out_pred[:,0], out_pred[:,1])
print(f'F1 score : {f1_score(out_pred[:,0],out_pred[:,1],average="weighted"):.2f}\n')
print(f'Accuracy : {accuracy_score(out_pred[:,1], out_pred[:,0]):.2f}\n')
print(f'Matrice de confusion :\n{confusion_matrix(out_pred[:,0], out_pred[:,1])}\n')
print(f'Classification report :\n{classification_report(out_pred[:,0], out_pred[:,1])}')

In [None]:
plot_confusion_matrix(confusion_matrix(out_pred[:,0], out_pred[:,1]), None)

### **Prédire sur le jeu test et Préparer une soumission**

In [None]:
# Récupérer les probabilités prédites sur le jeu test
test_prob = model.predict(test_X,batch_size=256)
test_prob.shape

# Retourner la classe correspondant à la probabilité la plus haute
test_pred = np.argmax(test_prob,axis=1) # axe 1 car ceci concerne chaque ligne
test_pred.shape

# Je réencode les prédictions entre 1 et 5
test_pred_enc = encoder.inverse_transform(test_pred)
np.unique(test_pred_enc)

# Aggrégation au niveau objet
agg_pred = []
unique_id = np.unique(test_id)
for ID in unique_id :
    # Récupérer les prédictions des pixels appartenant au même objet
    pred = test_pred_enc[np.where(test_id==ID)]
    # Prendre la valeur majoritaire des prédictions sur les pixels
    agg_pred.append([ ID, np.bincount(pred).argmax()]) #(ID,Prédiction majoritaire)
agg_pred = np.vstack(agg_pred)

In [None]:
df = pd.DataFrame({'ID':agg_pred[:,0],'Class':agg_pred[:,1]})
df_test = pd.read_csv('Data/Test_id_Label.csv')

In [None]:
cpt_mal_classes(df_test.Class, df.Class)
print(f'F1 score : {f1_score(df_test.Class,df.Class,average="weighted"):.2f}\n')
print(f'Accuracy : {accuracy_score(df.Class, df_test.Class):.2f}\n')
print(f'Matrice de confusion :\n{confusion_matrix(df_test.Class, df.Class)}\n')
print(f'Classification report :\n{classification_report(df_test.Class, df.Class)}')

In [None]:
plot_confusion_matrix(confusion_matrix(df_test.Class, df.Class), None)