Nous avons tenté de transformer les images directement en matrice de valeurs représentant les pixels.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from functools import partial
import os

In [None]:
# Création listes d'image (X) et de catégorie (Y)
images=[]
categories=[]

# Chargement des données
data = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')

(listeIMG,listeCat) = data["image_id"],data["label"]

PATH = '../input/cassava-leaf-disease-classification/train_images'

#Limité à 1300 image parcequ'au-dela nous avions une erreur
listeIMG= listeIMG[:1000]
listeCat= listeCat[:1000]


# Parcoure le DataSet d'images et remplis les listes X et Y
for x in listeIMG:
    img = Image.open(f'{PATH}/{x}').convert("L")
    images.append(np.array(img))


#print(X)
#print(len(listeIMG))

# Normalisation
images = np.array(images, dtype=np.float) / 255.0

# Transforme nos catégories en vecteurs de 0 et 1, l'index du 1 correspondant à la catégorie de notre image
categories = keras.utils.to_categorical(listeCat, 5)


In [None]:
#Test de récupération et affichage d'image
imgFILE = '../input/cassava-leaf-disease-classification/train_images/' + listeIMG[0]
imgSRC = Image.open(imgFILE)
imgSRC

In [None]:
# Création du modèle 
def build_and_train_model1(x_train, y_train, x_test, y_test,batch_nb,epok):
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(1800, activation=keras.activations.relu))
    model.add(keras.layers.Dense(1800, activation=keras.activations.relu))
    model.add(keras.layers.Dense(1400, activation=keras.activations.relu))
    model.add(keras.layers.Dense(1200, activation=keras.activations.relu))
    model.add(keras.layers.Dense(5, activation="sigmoid"))
    #model.add(keras.layers.Dense(5, activation="softmax"))

    # Compilation du modèle
    model.compile(
        loss=keras.losses.mse,  # Calcul le loss
        optimizer=keras.optimizers.SGD(learning_rate),  # Minimise le loss
        # optimizer=keras.optimizers.Adam()
        metrics=keras.metrics.categorical_accuracy
    )
    
    # Entraînement du modèle
    model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epok, batch_size = batch_nb)
    model.summary()
    
    score = model.evaluate(x_test, y_test, verbose=0)




In [None]:
epok = 100
nbBatch=300
learning_rate = 0.95

# Split des données d'entrainement et de test
IMG_train, IMG_test, Categories_train, Categories_test = train_test_split(images, categories, test_size=0.33, random_state=42)

# Lancement du programme de notre modèle
build_and_train_model1( IMG_train , Categories_train,IMG_test,Categories_test, nbBatch,epok)

Nous avons abandonné cette méthodes pour essayer de déchiffrer les TFrecords.

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print("Device:", tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()
print("Number of replicas:", strategy.num_replicas_in_sync)
AUTOTUNE = tf.data.experimental.AUTOTUNE
PATH = "../input/cassava-leaf-disease-classification"
BATCH_SIZE = 300
IMAGE_SIZE = [512, 512,3]
HEIGHT = 100
WIDTH = 100
CHANNELS = 3
EPOCH = 100
learning_rate = 0.001

In [None]:
FILENAMES = tf.io.gfile.glob(PATH + "/train_tfrecords/*.tfrec")
split_ind = int(0.8 * len(FILENAMES))
TRAINING_FILENAMES, VALID_FILENAMES = FILENAMES[:split_ind], FILENAMES[split_ind:]

TEST_FILENAMES = tf.io.gfile.glob(PATH + "/test_tfrecords/*.tfrec")
print("Train TFRecord Files:", len(TRAINING_FILENAMES))
print("Validation TFRecord Files:", len(VALID_FILENAMES))
print("Test TFRecord Files:", len(TEST_FILENAMES))

In [None]:
def resize(x):
    x = tf.image.resize(x, (HEIGHT,WIDTH))
    return x

def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32)
    return resize(image)#Modifie la taille de l'image en [HEIGHT,WIDTH,3], ce qi nous permettra de travailler avec de plus petites images


In [None]:
def read_tfrecord(example, labeled):
    tfrecord_format = (
        {
            "image": tf.io.FixedLenFeature([], tf.string),
            "target": tf.io.FixedLenFeature([], tf.int64),
        }
        if labeled
        else {"image": tf.io.FixedLenFeature([], tf.string),}
    )
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example["image"])

    if labeled:
        label = tf.cast(example["target"], tf.int32)
        return image, label
    return image


In [None]:
def load_dataset(filenames, labeled=True):
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False  # disable order, increase speed
    dataset = tf.data.TFRecordDataset(filenames )  # automatically interleaves reads from multiple files
    dataset = dataset.with_options( ignore_order)  # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map( partial(read_tfrecord, labeled=labeled), num_parallel_calls=AUTOTUNE )
    # returns a dataset of (image, label) pairs if labeled=True or just images if labeled=False
    return dataset

In [None]:
def get_dataset(filenames, labeled=True):
    dataset = load_dataset(filenames, labeled=labeled)
    dataset = dataset.shuffle(2048)
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE)
    return dataset

In [None]:
train_dataset = get_dataset(TRAINING_FILENAMES)
valid_dataset = get_dataset(VALID_FILENAMES)
test_dataset = get_dataset(TEST_FILENAMES, labeled=False)



In [None]:
for images in test_dataset.take(1):
    plt.imshow(images[0]
               .numpy().astype("uint8"))
    plt.axis("off")

In [None]:
#AFFICHES des images et leurs catégories

def show_batch(data):
    plt.figure(figsize=(10, 10))

    for images, labels in data.take(1):
        for i in range(25):
            ax = plt.subplot(5, 5, i + 1)
            plt.imshow(images.numpy()[i].astype("uint8"))
            plt.axis("off")
            
        
show_batch(train_dataset)


In [None]:
def build_and_train_model(addlayers,train_dataset, valid_dataset):
    model = keras.models.Sequential()
    addlayers(model)
    
    model.add(keras.layers.Flatten())

    model.add(keras.layers.Dense(5, activation="sigmoid"))
    model.add(keras.layers.Dense(5, activation="softmax"))


    model.compile(
        loss=keras.losses.binary_crossentropy,  # calcul l'erreur
        optimizer=keras.optimizers.SGD(learning_rate),  # minimise l'erreur
        #optimizer=keras.optimizers.Adam(),
        metrics=['accuracy']
    )
    
    logs = history = model.fit(
        train_dataset,
        epochs= EPOCH,
        batch_size = BATCH_SIZE,
        validation_data = valid_dataset,
    )
    model.summary()
    
    score = model.evaluate(train_dataset, verbose=0)
    model.reset_states()


    return logs




In [None]:
def pmc(model):
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(800, activation='relu'))
    model.add(keras.layers.Dense(800, activation='relu'))
    model.add(keras.layers.Dense(400, activation='relu'))
    model.add(keras.layers.Dense(200, activation='relu'))
    

In [None]:
def convnet(model):
    model.add(keras.layers.Conv2D(64, (4, 4), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.MaxPool2D())
    model.add(keras.layers.Conv2D(32, (4, 4), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.MaxPool2D())
    model.add(keras.layers.Conv2D(16, (4, 4), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.MaxPool2D())
    model.add(keras.layers.Dropout(0.5))
    model.add(keras.layers.Dropout(0.5))
    
    model.add(keras.layers.Flatten())
    
#le loss et l'accuracy gardent la même valeur à chaque epoch

    

In [None]:
def resNet(model):
    model.add(keras.layers.Conv2D(32, (3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.BatchNormalization())

    model.add(keras.layers.Conv2D(64, (3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.BatchNormalization())


    model.add(keras.layers.Conv2D(64, (3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.BatchNormalization())



In [None]:
#import torchvision.models as models
#vgg16 = models.vgg16(pretrained=True)

In [None]:
#def vgg16_pytorch(model):
    #model = vgg16

In [None]:
#On a divisé le nombre initial de couche à cause d'un problème de mémoire

def vgg16(model):
    model.add(keras.layers.Conv2D(64,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.Conv2D(64,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.MaxPool2D())
    model.add(keras.layers.Dropout(0.5))

    
    model.add(keras.layers.Conv2D(128,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.Conv2D(128,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.MaxPool2D())
    model.add(keras.layers.Dropout(0.5))

    
    model.add(keras.layers.Conv2D(256,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.Conv2D(256,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.Conv2D(256,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.MaxPool2D())
    model.add(keras.layers.Dropout(0.5))

    
    model.add(keras.layers.Conv2D(512,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.Conv2D(512,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.Conv2D(512,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.MaxPool2D())
    model.add(keras.layers.Dropout(0.5))

    
    model.add(keras.layers.Conv2D(512,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.Conv2D(512,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.Conv2D(512,(3, 3), padding="same", activation=keras.activations.relu))
    model.add(keras.layers.MaxPool2D())
    model.add(keras.layers.Dropout(0.5))

    
    model.add(keras.layers.Flatten())

    # Ajout des couches fully-connected, suivie de couche ReLU
    model.add(keras.layers.Dense(4096, activation='relu'))
    model.add(keras.layers.Dense(4096, activation='relu'))
    model.add(keras.layers.Dense(1000, activation='relu'))

    

In [None]:

all_logs = [(build_and_train_model(pmc,train_dataset , valid_dataset),"modèle pmc"),
           (build_and_train_model(convnet,train_dataset , valid_dataset),"modèle convnet"),
            (build_and_train_model(resNet,train_dataset , valid_dataset),"modèle resnet"),
            (build_and_train_model(vgg16,train_dataset , valid_dataset),"modèle vgg-16")
            

           ]


In [None]:
#Pour l'affichage
def plot_all_logs(all_logs):
    # Loss
    for logs in all_logs:
        y_coords = logs[0].history["loss"]
        x_coords = list(range(len(y_coords)))
        plt.plot(x_coords, y_coords,label=logs[1])
        plt.legend()
        plt.title("Loss")

    plt.show()
    
        # accuracy
    for logs in all_logs:
        y_coords = logs[0].history["categorical_accuracy"]
        x_coords = list(range(len(y_coords)))
        plt.plot(x_coords, y_coords,label=logs[1])
        plt.legend()
        plt.title("Accuracy")
        
    plt.show()


In [None]:
plot_all_logs(all_logs)