In [None]:
import numpy as np
import pandas as pd

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))
        
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.applications import EfficientNetB3
# from efficientnet.keras import EfficientNetB3
from keras.preprocessing.image import ImageDataGenerator
from keras import models, optimizers

from PIL import Image 

import tensorflow as tf
import tensorflow_addons as tfa
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
dirr = '../input/cassava-leaf-disease-classification/'

In [None]:
df = pd.read_csv(dirr + 'train.csv')
df

In [None]:
df['label'].value_counts().sort_index()

In [None]:
df['label'].value_counts().sort_index() / df.shape[0]

In [None]:
d = pd.read_json(dirr + 'label_num_to_disease_map.json', typ='series').to_dict()
d

In [None]:
df['label'] = df['label'].map(d)

In [None]:
df

In [None]:
X_train, X_valid = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])

In [None]:
X_train['label'].value_counts().sort_index() / X_train.shape[0], X_valid['label'].value_counts().sort_index() / X_valid.shape[0]

In [None]:
datagen = ImageDataGenerator(
    
    preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
    rotation_range=90,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)


# image_size = 600
image_size = 512
batch_size = 12

X_train2 = datagen.flow_from_dataframe(
    X_train,
    directory=dirr + 'train_images',
    seed=42,
    x_col='image_id',
    y_col='label',
    target_size=(image_size, image_size),
    class_mode='categorical',
    interpolation='nearest',
    shuffle=True,
    batch_size=batch_size
)

X_valid2 = datagen.flow_from_dataframe(
    X_valid,
    directory=dirr + 'train_images',
    seed=42,
    x_col='image_id',
    y_col='label',
    target_size=(image_size, image_size),
    class_mode='categorical',
    interpolation='nearest',
    shuffle=True,
    batch_size=batch_size
)

In [None]:
imgs, labels = X_train2.next()
rows = 3
imgs = np.array(imgs).astype(np.uint8)

fig = plt.figure(figsize=(15, 10))
cols = len(imgs) // rows if len(imgs) % 2 == 0 else len(imgs) // rows + 1
for i in range(len(imgs)):
    fig.add_subplot(rows, cols, i + 1)
    plt.axis('off')
    plt.title(d[np.argmax(labels[i])], fontsize=12)
    plt.imshow(imgs[i])

In [None]:
model = models.Sequential()
model.add(
    EfficientNetB3(
        input_shape=(image_size, image_size, 3), 
        include_top=False,
        weights='imagenet',
        drop_connect_rate=0.6)
)

model.add(GlobalAveragePooling2D())
model.add(Flatten())
model.add(Dense(72, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(len(d), activation='softmax'))
model.summary()

In [None]:
model.compile(optimizer=optimizers.Adam(lr=2e-3),
              loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
checkpoint_path = f'bestmodel.hdf5'
checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_categorical_accuracy', 
                             verbose=1, save_best_only=True, mode='max')

scheduler = LearningRateScheduler(lambda epoch, lr: lr * 0.5, verbose=1)

early_stop = EarlyStopping(monitor='val_loss', min_delta=0, patience=1, mode='min', verbose=0)

tqdm_callback = tfa.callbacks.TQDMProgressBar(
    leave_epoch_progress=False, 
    leave_overall_progress=True, 
    show_epoch_progress=False,
    show_overall_progress=True
)

callbacks_list = [
    checkpoint, 
    scheduler, 
    tqdm_callback, 
    early_stop
]

In [None]:
X_train2.n // batch_size

In [None]:
history = model.fit_generator(generator=X_train2, validation_data=X_valid2, epochs=6,
                              steps_per_epoch=X_train2.n // batch_size,
                              callbacks=callbacks_list, verbose=1)

In [None]:
def graph_plot(history, typ=False):
    if typ:
        for i in history.history.keys():
            print(f'{i} = [{min(history.history[i])}; {max(history.history[i])}]\n')
    
    epoch = len(history.history['loss'])
    # на каждую: (train, val) + lr
    size = len(history.history.keys()) // 2 + 1
    
    fig = plt.figure(figsize=(30, 5))
    i = 1
    for k in list(history.history.keys()):
        if 'val' not in k:
            fig.add_subplot(1, size, i)
            plt.plot(history.history[k], marker='o', markersize=5)
            if k != 'lr':
                plt.plot(history.history['val_' + k], marker='o', markersize=5)
            plt.title(k, fontsize=10)

            plt.ylabel(k)
            plt.xlabel('epoch')
            plt.grid()

            plt.yticks(fontsize=10, rotation=30)
            plt.xticks(fontsize=10, rotation=30)
            plt.legend(['train', 'valid'], loc='upper left', fontsize=10, title_fontsize=15)
            i += 1
#         plt.show()

In [None]:
graph_plot(history)

In [None]:
cassava_model = models.load_model(checkpoint_path)

In [None]:
cassava_model.evaluate(X_train2)

In [None]:
cassava_model.evaluate(X_valid2)

In [None]:
dirr2 = '../input/cassava-leaf-disease-classification/test_images/'
test = os.listdir(dirr2)
preds = []

for link in test:
    img = Image.open(dirr2 + link).resize((image_size, image_size))
    img = np.expand_dims(img, axis=0)
    preds.append(cassava_model.predict(img).argmax(axis=1)[0])

In [None]:
df_test = pd.DataFrame({'image_id': test, 'label': preds})
df_test

In [None]:
df_test.to_csv('submission.csv', index=False)