In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Contents
1. Import Libraries
2. Load data
3. Data Pre-processing
4. Structure Model
5. Setup Fitting conditions
6. Evaluate Model (See from Here, if you don't have time to train model)

> # 1. Import Libraries
    * systemical       : os,glob,shutil,json
    * data handling    : itertools,collections,numpy,pandas,seaborn,PIL,matplot,sklearn
    * Machine Learning : tensorflow, keras

In [None]:
# import system libraries
import os, glob,shutil,json

# import data handling libraries
import itertools
from collections import Counter
import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline

# import Machine Learning libraries
import keras
from keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

> ## 2. Data load
    * [images] are in 'train_images' folder without dividing train/test set
    * [labels] are in 'train.csv'
    ** key value is name of image

In [None]:
# directories of files
base_dir = '../input/cassava-leaf-disease-classification'
imgs_dir = 'train_images'
labels_file = 'train.csv'
test_img_dir = 'test_images'
json_file = 'label_num_to_disease_map.json'

In [None]:
# function for check null value in dataframe
def check_null_values(df):
    # check null values by col
    A = df.isnull().any(axis=0)
    A = pd.DataFrame(A,columns=['exist_null'])
    # check number of null values by col
    B = df.isnull().sum(axis=0)
    B = pd.DataFrame(B,columns=['num_of_null'])
    # merge data
    merge = pd.concat([A,B],axis=1)
    print(merge)
    return

In [None]:
# load json file & check classification target

print('[Classification Target]')
with open(os.path.join(base_dir,json_file)) as j:
    classes = json.load(j)
    classes_set = {int(k):v for k,v in classes.items()}
    for k,v in classes.items():
        print(k, ":", v)
    j.close()

# check useless value

print('\n[check null values]')
data = pd.read_csv(os.path.join(base_dir,labels_file))
data_cnt = Counter(data['label'])
print('Number of labels : ', data.shape[0])
check_null_values(data)

# check the data balance with graph

for key,value in data_cnt.items():
    plt.barh(classes[str(key)], value)
    plt.title('Distribution of Labels')
    plt.xlabel('Number of Labels')

# add more column with class names

data['class_name'] = data.label.map(classes_set)

> # 3. Data Pre-processing
* Split data on purpose by train/val
* Set parameters : image size, number of classes, batch size
* data generating rule

In [None]:
# Split train images with purpose

train,val = train_test_split(data,test_size=0.1, random_state=42, stratify = data['class_name'])

In [None]:
# Set parameters

IMG_SIZE = 240
size = (IMG_SIZE,IMG_SIZE)
N_CLASS = len(classes)
BATCH_SIZE = 15

In [None]:
# Setup Data augmetations and Generate Dataset

# Setup data augmentations

datagen_train = ImageDataGenerator(
                    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
                    rotation_range=40,
                    width_shift_range=0.2,
                    height_shift_range=0.2,
                    shear_range=0.2,
                    zoom_range=0.2,
                    horizontal_flip=True,
                    vertical_flip=True,
                    fill_mode='nearest')

datagen_val = ImageDataGenerator(
                preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,)

# Generate data with above conditions

img_path = os.path.join(base_dir,imgs_dir)

train_set = datagen_train.flow_from_dataframe(train,
                    directory = img_path,
                    seed=42,
                    x_col='image_id',
                    y_col='class_name',
                    target_size=size,
                    class_mode='categorical',
                    interpolation='nearest',
                    shuffle=True,
                    batch_size=BATCH_SIZE)
                    
val_set = datagen_val.flow_from_dataframe(val,
                    directory = img_path,
                    seed = 42,
                    x_col='image_id',
                    y_col='class_name',
                    target_size=size,
                    class_mode='categorical',
                    interpolation='nearest',
                    shuffle=True,
                    batch_size=BATCH_SIZE)

> # 4. Structure Model

In [None]:
# Stacking Model Layer Architecture

from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import RMSprop, Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB3

def create_model():
    
    model = Sequential()
    # initialize the model with input shape
    model.add(EfficientNetB3(input_shape = (IMG_SIZE, IMG_SIZE, 3), include_top = False,
                             weights = 'imagenet',
                             drop_connect_rate=0.6))
    model.add(GlobalAveragePooling2D())
    model.add(Flatten())
    model.add(Dense(256, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(N_CLASS, activation = 'softmax'))
    
    return model

leaf_model = create_model()
leaf_model.summary()

# Model layer diagram 
# keras.utils.plot_model(leaf_model)

> # 5. Setup Fitting conditions & Train Model
* loss fuction : categorical_crossentropy
* learning rate : 1e-3
* compile with /

In [None]:
EPOCHS = 5
STEP_SIZE_TRAIN = train_set.n//train_set.batch_size
STEP_SIZE_VALID = val_set.n//val_set.batch_size

# Setup fitting parameters

def Model_fit():
    leaf_model = create_model()
    
    # Select loss function
    
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False,
                                                   label_smoothing=0.0001,
                                                   name='categorical_crossentropy' )
    
    # Compile the model
    
    leaf_model.compile(optimizer = Adam(learning_rate = 1e-3),
                        loss = loss, #'categorical_crossentropy'
                        metrics = ['categorical_accuracy']) # 'accuracy'
    
    # Early Stopping train when loss value has stopped decreasing for 3 epochs
    
    es = EarlyStopping(monitor='val_loss', mode='min', patience=3,
                       restore_best_weights=True, verbose=1)
    
    # Save the model with the minimum validation loss

    checkpoint_cb = ModelCheckpoint("Cassava_best_model.h5",
                                    save_best_only=True,
                                    monitor = 'val_loss',
                                    mode='min')
    
    # reduce learning rate

    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = 0.2,
                                  patience = 2,
                                  min_lr = 1e-6,
                                  mode = 'min',
                                  verbose = 1)

    # log model fit histories 

    history = leaf_model.fit(train_set,
                             validation_data = val_set,
                             epochs= EPOCHS,
                             batch_size = BATCH_SIZE,
                             #class_weight = d_class_weights,
                             steps_per_epoch = STEP_SIZE_TRAIN,
                             validation_steps = STEP_SIZE_VALID,
                             callbacks=[es, checkpoint_cb, reduce_lr])
    
    # Save trained model
    
    leaf_model.save('Cassava_model'+'.h5')  
    
    return history

In [None]:
# results = Model_fit()

In [None]:
print('Train_Cat-Acc: ', max(results.history['categorical_accuracy']))
print('Val_Cat-Acc: ', max(results.history['val_categorical_accuracy']))

In [None]:
# Plotting Results (Train vs Validation FOLDER 1)

def Train_Val_Plot(acc,val_acc,loss,val_loss):
    
    fig, (ax1, ax2) = plt.subplots(1,2, figsize= (15,10))
    fig.suptitle(" Model Metrics Visualization ", fontsize=20)
    
    # Accuracy value Graph
    ax1.plot(range(1, len(acc) + 1), acc)
    ax1.plot(range(1, len(val_acc) + 1), val_acc)
    ax1.set_title('History of Accuracy', fontsize=15)
    ax1.set_xlabel('Epochs', fontsize=15)
    ax1.set_ylabel('Accuracy', fontsize=15)
    ax1.legend(['training', 'validation'])

    # Loss value Graph
    ax2.plot(range(1, len(loss) + 1), loss)
    ax2.plot(range(1, len(val_loss) + 1), val_loss)
    ax2.set_title('History of Loss', fontsize=15)
    ax2.set_xlabel('Epochs', fontsize=15)
    ax2.set_ylabel('Loss', fontsize=15)
    ax2.legend(['training', 'validation'])
    plt.show()
    

Train_Val_Plot(results.history['categorical_accuracy'],results.history['val_categorical_accuracy'],
               results.history['loss'],results.history['val_loss'])

> # 6. Evaluate Model (See from Here, if you don't have time to train model)
* load model file
* do inference(prediction)
* save submission.csv file

In [None]:
# Evaluate the model
import keras 

final_model = keras.models.load_model('Cassava_best_model.h5')

In [None]:
# load test_image & pred the classification

TEST_DIR = '../input/cassava-leaf-disease-classification/test_images/'
test_images = os.listdir(TEST_DIR)
datagen = ImageDataGenerator(horizontal_flip=True)


def pred(images):
    for image in test_images:
        img = Image.open(TEST_DIR + image)
        img = img.resize(size)
        samples = np.expand_dims(img, axis=0)
        it = datagen.flow(samples, batch_size=10)
        yhats = final_model.predict_generator(it, steps=10, verbose=0)
        summed = np.sum(yhats, axis=0)
    return np.argmax(summed)

predictions = pred(test_images)

In [None]:
# Save submission.csv file

sub = pd.DataFrame({'image_id': test_images, 'label': predictions})
display(sub)
sub.to_csv('submission.csv', index = False)