# Library


In [None]:
#glob
from glob import glob

#logging
from logging import DEBUG
from logging import Formatter
from logging import getLogger
from logging import StreamHandler

#matplotlib
from matplotlib import pyplot

#numpy
from numpy import arange
from numpy import argmax
from numpy import asarray

#os
from os import path

#pandas
from pandas import DataFrame
from pandas import read_csv

#tensorflow
from tensorflow import random

#tensorflow.keras
from tensorflow.keras import callbacks
from tensorflow.keras import initializers
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import utils

#tensorflow.keras.preprocessing.image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import smart_resize


# Constant


## IO path


In [None]:
#Folder path
FOLDER_PATH_ROOT = '../input/cassava-leaf-disease-classification/'
FOLDER_PATH_TRAIN = FOLDER_PATH_ROOT + 'train_images/'
FOLDER_PATH_TEST = FOLDER_PATH_ROOT + 'test_images/'

#File name
FILE_NAME_TRAIN = 'train.csv'
FILE_NAME_SUBMISSION = 'submission.csv'


## Param


In [None]:
#Image size
IMAGE_SIZE = 256

#Batch size
BATCH_SIZE = 100

#Epochs
EPOCHS = 12


# Log


In [None]:
#Get logger
def get_logger():
    #Get handler
    fmt = '%(asctime)s'
    fmt += ' - %(levelname)s'
    fmt += ' - %(name)s'
    fmt += ' - %(lineno)d'
    fmt += ' - %(funcName)s'
    fmt += ' - %(message)s'
    handler = StreamHandler()
    handler.setLevel(DEBUG)
    handler.setFormatter(Formatter(fmt))
    
    #Get logger
    logger = getLogger(__name__)
    logger.setLevel(DEBUG)
    logger.addHandler(handler)
    logger.propagate = False
    
    #Return logger
    return logger

#Kill logger
def kill_logger(logger):
    for h in logger.handlers:
        logger.removeHandler(h)


# Initialize


In [None]:
#Initialize
def initialize():
    #Fix random seed
    random.set_seed(0)


# Terminate


In [None]:
#Terminate
def terminate():
    pass


# Model


In [None]:
#Get model
def get_model():
    #Get layers
    layer_in, layer_out = get_layers()
    
    #Compile model
    model = models.Model(
        name='Model',
        inputs=layer_in,
        outputs=layer_out
    )
    model.compile(
        loss='categorical_crossentropy',
        optimizer=optimizers.Nadam(learning_rate=0.01),
        metrics=['accuracy']
    )
    
    #Show model summary
    model.summary()
    
    #Return model
    return model

#Get layers
def get_layers():
    #Get input layer
    layer_in = get_input_layer('I01', IMAGE_SIZE, IMAGE_SIZE, 3)
    layer_out = layer_in
    
    #Get hidden layer group1
    layer_temp = layer_out
    layer_out = get_conv_layer(layer_out, 'H11', 3, 3, 16)
    layer_out = get_conv_layer(layer_out, 'H12', 3, 3, 16)
    layer_out = get_concatenate_layer([layer_out, layer_temp], 'H13')
    layer_out = get_pooling_layer(layer_out, 'H14', 4, 4)
    layer_out = get_dropout_layer(layer_out, 'H15', 0.1)
    layer_out = get_batchnormalization_layer(layer_out, 'H16')
    
    #Get hidden layer group2
    layer_temp = layer_out
    layer_out = get_conv_layer(layer_out, 'H21', 3, 3, 32)
    layer_out = get_conv_layer(layer_out, 'H22', 3, 3, 32)
    layer_out = get_concatenate_layer([layer_out, layer_temp], 'H23')
    layer_out = get_pooling_layer(layer_out, 'H24', 4, 4)
    layer_out = get_dropout_layer(layer_out, 'H25', 0.1)
    layer_out = get_batchnormalization_layer(layer_out, 'H26')
    
    #Get hidden layer group3
    layer_temp = layer_out
    layer_out = get_conv_layer(layer_out, 'H31', 3, 3, 64)
    layer_out = get_conv_layer(layer_out, 'H32', 3, 3, 64)
    layer_out = get_concatenate_layer([layer_out, layer_temp], 'H33')
    layer_out = get_pooling_layer(layer_out, 'H34', 4, 4)
    layer_out = get_dropout_layer(layer_out, 'H35', 0.1)
    layer_out = get_batchnormalization_layer(layer_out, 'H36')
    
    #Get hidden layer group4
    layer_out = get_global_pooling_layer(layer_out, 'H41')
    
    #Get output layer
    layer_out = get_output_layer(layer_out, 'O01', 5)
    
    #Return layers
    return layer_in, layer_out


## Input layer


In [None]:
#Get input layer
def get_input_layer(name, width, height, channel):
    return layers.Input(
        name=name,
        shape=(height, width, channel)
    )


## Convolutional layer


In [None]:
#Get convolutional layer
def get_conv_layer(layer, name, width, height, filters):
    #Get initializer
    stddev = (1 / height / width / layer.shape[-1]) ** 0.5
    initializer = initializers.TruncatedNormal(stddev=stddev)
    
    #Return convolutional layer
    return layers.Conv2D(
        name=name,
        filters=filters,
        kernel_size=(height, width),
        padding='same',
        kernel_initializer=initializer,
        activation='relu'
    )(layer)


## Pooling layer


In [None]:
#Get pooling layer
def get_pooling_layer(layer, name, width, height):
    return layers.MaxPooling2D(
        name=name,
        pool_size=(height, width)
    )(layer)

#Get global pooling layer
def get_global_pooling_layer(layer, name):
    return layers.GlobalAveragePooling2D(
        name=name
    )(layer)


## Dropout layer


In [None]:
#Get dropout layer
def get_dropout_layer(layer, name, rate):
    return layers.Dropout(
        name=name,
        rate=rate
    )(layer)


## Batchnormalization layer


In [None]:
#Get batchnormalization layer
def get_batchnormalization_layer(layer, name):
    return layers.BatchNormalization(
        name=name
    )(layer)


## Concatenate layer


In [None]:
#Get concatenate layer
def get_concatenate_layer(layer, name):
    return layers.Concatenate(
        name=name
    )(layer)


## Flatten layer


In [None]:
#Get flatten layer
def get_flatten_layer(layer, name):
    return layers.Flatten(
        name=name
    )(layer)


## Full connection layer


In [None]:
#Get full connection layer
def get_full_connect_layer(layer, name, units):
    #Get initializer
    stddev = (1 / layer.shape[-1]) ** 0.5
    initializer = initializers.TruncatedNormal(stddev=stddev)
    
    #Return full connection layer
    return layers.Dense(
        name=name,
        units=units,
        kernel_initializer=initializer,
        activation='relu'
    )(layer)


## Output layer


In [None]:
#Get output layer
def get_output_layer(layer, name, units):
    #Get initializer
    stddev = (1 / layer.shape[-1]) ** 0.5
    initializer = initializers.TruncatedNormal(stddev=stddev)
    
    #Return output layer
    return layers.Dense(
        name=name,
        units=units,
        kernel_initializer=initializer,
        activation='softmax'
    )(layer)


# Extract


## CSV file


In [None]:
#Read csv file(y)
def read_csv_y():
    return read_csv(
        FOLDER_PATH_ROOT + FILE_NAME_TRAIN,
        usecols=[
            'image_id',
            'label',
        ],
        dtype={
            'image_id': object,
            'label': object,
        }
    )


## Image file


### Image data generator


In [None]:
#Get image data generator(train)
def get_img_gen_train():
    return ImageDataGenerator(
        rescale=1 / 255,
        validation_split=0.1,
        rotation_range=360,
        width_shift_range=0.1,
        height_shift_range=0.1,
        brightness_range=[0.1, 0.9],
        shear_range=25,
        zoom_range=0.1,
        channel_shift_range=0.1,
        horizontal_flip=True,
        vertical_flip=True
    )

#Get image data generator(validate)
def get_img_gen_vali():
    return ImageDataGenerator(
        rescale=1 / 255,
        validation_split=0.1
    )


### Image data flow


In [None]:
#Get image data flow(train)
def get_img_flow_train():
    #Get image data generator
    gen = get_img_gen_train()
    
    #Return image data flow
    return gen.flow_from_dataframe(
        directory=FOLDER_PATH_TRAIN,
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        dataframe=read_csv_y(),
        x_col='image_id',
        y_col='label',
        class_mode='categorical',
        batch_size=BATCH_SIZE,
        shuffle=True,
        subset='training'
    )

#Get image data flow(validate)
def get_img_flow_vali():
    #Get image data generator
    gen = get_img_gen_vali()
    
    #Return image data flow
    return gen.flow_from_dataframe(
        directory=FOLDER_PATH_TRAIN,
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        dataframe=read_csv_y(),
        x_col='image_id',
        y_col='label',
        class_mode='categorical',
        batch_size=BATCH_SIZE,
        shuffle=False,
        subset='validation'
    )


# Train


In [None]:
#Train
def train(model):
    #Get image data flow
    flow_train = get_img_flow_train()
    flow_vali = get_img_flow_vali()
    
    #Get early stop
    early_stop = callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=0.001,
        patience=3,
        mode='min',
        verbose=1,
        restore_best_weights=True
    )
    
    #Train
    hist = model.fit(
        flow_train,
        steps_per_epoch=flow_train.samples // flow_train.batch_size,
        validation_data=flow_vali,
        validation_steps=flow_vali.samples // flow_vali.batch_size,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        callbacks=[early_stop]
    )
    
    #Show plots
    show_plots(hist)

#Show plots
def show_plots(hist):
    #Create plots
    fig, ax = pyplot.subplots(nrows=2, ncols=2, figsize=(16, 8))
    fig.subplots_adjust(hspace=0.25)
    set_plot(ax[0][0], hist, 'loss')
    set_plot(ax[0][1], hist, 'accuracy')
    set_plot(ax[1][0], hist, 'val_loss')
    set_plot(ax[1][1], hist, 'val_accuracy')
    
    #Show plots
    pyplot.show()

#Set plot
def set_plot(ax, hist, name):
    #Set data
    ax.plot(arange(len(hist.epoch)) + 1, hist.history[name])
    
    #Set title, label
    ax.set_title(name)
    ax.set_xlabel('epoch')
    ax.set_ylabel(name)
    
    #Set ticks, limit
    ax.set_xticks(arange(len(hist.epoch)) + 1)
    if name in ['loss', 'val_loss']:
        ax.set_ylim(0)
    else:
        ax.set_yticks(arange(0.4, 1.01, 0.06))
    
    #Set grid
    ax.grid(True)


# Predict


In [None]:
#Predict
def predicts(model):
    #Get file paths
    file_paths = glob(FOLDER_PATH_TEST + '*.jpg')
    
    #Return result
    return [predict(model, f) for f in file_paths]

#Predict
def predict(model, file_path):
    #Read image file
    x_read = smart_resize(
        img_to_array(load_img(file_path)),
        (IMAGE_SIZE, IMAGE_SIZE)
    )
    
    #Transform image
    x = x_read.reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 3) / 255.0
    
    #Return result
    return argmax(model.predict(x))


# Output


In [None]:
#Write csv file
def write_csv(result):
    #Get file paths
    file_paths = glob(FOLDER_PATH_TEST + '*.jpg')
    
    #Create dataframe
    df = DataFrame()
    df['image_id'] = [path.basename(f) for f in file_paths]
    df['label'] = result
    
    #Write csv file
    df.to_csv(FILE_NAME_SUBMISSION, index=False)


# Main


In [None]:
#Get logger
logger = get_logger()
logger.debug('Start')

#Initialize
initialize()

#Get model
model = get_model()

#Train
train(model)

#Predict
result = predicts(model)

#Write csv file
write_csv(result)

#Terminate
terminate()

#Kill logger
logger.debug('End')
kill_logger(logger)
