In [None]:
import os
import time
import json
import keras
import numpy as np
import pandas as pd
import seaborn as sn
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
from keras.utils import plot_model
from keras.optimizers import RMSprop, Adam
from keras.models import Sequential, load_model
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB3, EfficientNetB7
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization

# Dados
## Explorando dados

In [None]:
# Importando csv do dataset
data = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv', sep = ',')

In [None]:
data.head()

In [None]:
# Lendo json e crinado mapping para para classe
with open('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json') as f:
    mapping = json.loads(f.read())
    mapping = {int(k): v for k, v in mapping.items()}

mapping

In [None]:
# Adicionando label_name ao conjunto de dados, e ondenando colunas
data['label_name'] = data['label'].map(mapping)
data = data[['image_id', 'label_name', 'label']]

In [None]:
data.head()

In [None]:
# Verificando frequencia
data.label_name.value_counts()

In [None]:
# Verificando proporção 
data.label_name.value_counts(normalize = True) * 100

# Preparando os dados
## HoldOut
Separando dados de treino e teste

In [None]:
# Definindo paths de treino e teste
train_path = '../input/cassava-leaf-disease-classification/train_images/'
test_path = '../input/cassava-leaf-disease-classification/test_images/'

In [None]:
# Criando amostra para treino e teste estrafiticada
train, test = train_test_split(data, test_size = 0.10, shuffle = True, random_state = 0, stratify = data['label_name'])

In [None]:
train.label_name.value_counts()

In [None]:
test.label_name.value_counts()

## Tratamento das imagens
Nossas imagens não estão no formato adequado para usarmos em uma rede neural, usaremos a classe **ImageDataGenerator** com o method **flow_from_dataframe** do keras. Ele irar percorrer nosso cvs e diretório de dados, carregando os dados de imagem e retornar a entrada (matrizes de pixels) e a saída (número inteiro de classe).

### Definindo parâmetros para tratamnetos das imagens

In [None]:
train_generator = ImageDataGenerator(
    preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
    rotation_range = 40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    vertical_flip = True,
    fill_mode = 'nearest',
)

test_generator = ImageDataGenerator(
    preprocessing_function = tf.keras.applications.efficientnet.preprocess_input
)

In [None]:
train_set = train_generator.flow_from_dataframe(
    dataframe = train,
    directory = train_path,
    seed = 42,
    x_col = 'image_id',
    y_col = 'label_name',
    target_size = (456, 456),
    class_mode = 'categorical',
    interpolation = 'nearest',
    shuffle = True,
    batch_size = 15
)

test_set = test_generator.flow_from_dataframe(
    dataframe = test,
    directory= train_path,
    seed = 42,
    x_col = 'image_id',
    y_col = 'label_name',
    target_size = (456, 456),
    class_mode = 'categorical',
    interpolation = 'nearest',
    shuffle = True,
    batch_size = 15
)

# Modelo

In [None]:
# Definindo parâmetros para treino e teste
STEP_SIZE_TRAIN = train_set.n // train_set.batch_size
STEP_SIZE_TEST = test_set.n // test_set.batch_size

# Definindo loss function
loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False, label_smoothing = 0.0001, name = 'categorical_crossentropy')
# Pare de treinar quando o val_loss parar de diminuir por 3 épocas
es = EarlyStopping(monitor = 'val_loss', patience = 3, verbose = 1, mode = 'min', restore_best_weights = True)
# Salve o modelo com o mínimo de perda de validação
checkpoint = ModelCheckpoint("CassavaLeafDisease.h5", monitor = 'val_loss', save_best_only = True, mode = 'min')
# Reduza a taxa de aprendizagem quando a aprendizagem estagnar
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 2, min_lr = 1e-6, mode = 'min', verbose = 1)

In [None]:
# Criando modelo de rede neural
modelo = Sequential()
#modelo.add(EfficientNetB7(input_shape = (456, 456, 3), include_top = False, weights = 'imagenet', drop_connect_rate = 0.6))
modelo.add(EfficientNetB3(input_shape = (456, 456, 3), include_top = False, weights = 'imagenet', drop_connect_rate = 0.6))
modelo.add(GlobalAveragePooling2D())
modelo.add(Flatten())
modelo.add(Dense(256, activation = 'relu', bias_regularizer = tf.keras.regularizers.L1L2(l1 = 0.01, l2 = 0.001)))
modelo.add(Dropout(0.5))
modelo.add(Dense(5, activation = 'softmax'))
# Compilando rede neural
modelo.compile(optimizer = Adam(lr = 0.001, decay = 0.0001, clipvalue = 0.5), loss = loss, metrics = ['categorical_accuracy'])

In [None]:
# Informações de parâmetros do modelo 
modelo.summary()

In [None]:
inicio = time.time()

# Treinamento e teste do modelo
results = modelo.fit(
        train_set, 
        validation_data = test_set, 
        epochs = 15, 
        batch_size = 15, 
        steps_per_epoch = STEP_SIZE_TRAIN,
        validation_steps = STEP_SIZE_TEST, 
        callbacks = [es, checkpoint, reduce_lr]
)

print()
fim = time.time()
print(fim - inicio)

# Salvando modelo
modelo.save('CassavaLeafDisease.h5')

In [None]:
# Carregando modelo
leaf_model = load_model('CassavaLeafDisease.h5')

In [None]:
# Realizando predição com imagem teste 
TEST_DIR = '../input/cassava-leaf-disease-classification/test_images/'
test_images = os.listdir(TEST_DIR)
predictions = []

for image in test_images:
    img = Image.open(TEST_DIR + image)
    img = img.resize((456, 456), resample = Image.NEAREST) 
    img = np.expand_dims(img, axis = 0)
    predictions.extend(leaf_model.predict(img).argmax(axis = 1))

In [None]:
# Gerando arquivo submission.csv
submit = pd.DataFrame({'image_id': test_images, 'label': predictions})
submit.to_csv('submission.csv', index = False)
display(submit)