In [None]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
import tensorflow as tf
import keras
import albumentations as A
from sklearn import model_selection, preprocessing 
import cv2
import tensorflow as tf
import numpy as np 
from matplotlib import pyplot as plt
from PIL import Image

from albumentations import (
    Compose, RandomBrightness, JpegCompression, HueSaturationValue, RandomContrast, HorizontalFlip,
    Rotate
)
AUTOTUNE = tf.data.experimental.AUTOTUNE

# Loading Paths, Directory and Data-Folders 

In [None]:
INPUT_PATH = "../input/cassava-leaf-disease-classification/"
train_images_path = INPUT_PATH+"train_images/"
test_images_path = INPUT_PATH+"test_images/"
sample = "../input/cassava-leaf-disease-classification/sample_submission.csv"

In [None]:
df = pd.read_csv(INPUT_PATH+"train.csv")##../input/cassava-leaf-disease-classification/train.csv"
df.head(5)

In [None]:
for img in os.listdir(INPUT_PATH+"train_images/")[:1]:
    #print(img)
    img = Image.open(os.path.join(train_images_path+img))

    plt.imshow(img)
    plt.show()

In [None]:
sample_df = pd.read_csv(sample)
sample_df.head()

In [None]:
num_classes = sorted(df["label"].unique())
df.info()

# **Prepare Dataset For training**

In [None]:
df.label = df.label.astype("str")
batch_size=8
input_size = (300, 300)

**Data Augmentation**

In [None]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    validation_split = 0.1, #10 for validation 
    rotation_range=360,
    #zca_whitening=True,
    #zca_epsilon=1e-06,
    width_shift_range=0.2,
    #brightness_range=[-2, 2],
    height_shift_range=0.2,
    shear_range=0.1,
    zoom_range=[0.5,1.0],
    channel_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    rescale=None,
    preprocessing_function=None,
    )

train_generator = train_datagen.flow_from_dataframe(
    dataframe=df,
    directory=train_images_path,
    x_col="image_id",
    y_col="label",
    batch_size=batch_size,
    #target_size=input_size,
    class_mode="sparse", 
    subset = "training"
)
valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(validation_split=0.1)
valid_generator = valid_datagen.flow_from_dataframe(
    dataframe=df,
    directory=train_images_path,
    x_col="image_id",
    y_col="label",
    batch_size=batch_size,
    #target_size=input_size ,
    class_mode="sparse", 
    subset="validation")

**Let's Have a look of few data** 

In [None]:
plt.figure(figsize=(12,10))
for i in range(16):
    plt.subplot(4,4,i+1)
    batch = train_generator.next()
    image = batch[0].astype('uint8')
    plt.imshow(np.array(image[0,:,:,::-1]))
    plt.axis("off")
# show the figure
plt.show()            
                        

# Prepare Model, Train and Evaluate

In [None]:
from sklearn.model_selection import KFold, StratifiedKFold
from keras import Model
from keras import optimizers
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Dense

def my_model():
    model_weight_dir = "../input/tfkerasefficientnetimagenetnotop/efficientnetb5_notop.h5"
    model = Sequential()
    model.add( tf.keras.applications.EfficientNetB5(
        include_top=False,
        weights=model_weight_dir))


    model.add(GlobalAveragePooling2D())
    model.add(Dense(len(num_classes), activation="softmax"))
    model.summary()
    return model 

**trying KF**

In [None]:
# accuracy
def plot_hist(hist):
    plt.plot(hist.history['accuracy'])
    plt.plot(hist.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # loss
    plt.plot(hist.history['loss'])
    plt.plot(hist.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()


In [None]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    #validation_split = 0.1, #10 for validation 
    rotation_range=360,
    width_shift_range=0.2,
    #brightness_range=[-2, 2],
    height_shift_range=0.2,
    shear_range=0.1,
    zoom_range=[0.5,1.0],
    channel_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    rescale=None,
    preprocessing_function=None,
    )


valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator()


In [None]:
from sklearn.model_selection import KFold, StratifiedKFold
Y = df[['label']]
#kf = KFold(n_splits = 5)                      
skf = StratifiedKFold(n_splits= 5, random_state = 101, shuffle = True) 

In [None]:
def get_model_name(k):
    return 'model_'+str(k)+'.h5'

In [None]:
model = my_model()

model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
             metrics=['accuracy'])

In [None]:

VALIDATION_ACCURACY = []
VALIDATION_LOSS = []
num_epochs = 20
save_dir = './'
fold_var = 1

n = len(df.image_id.unique())
for train_index, val_index in skf.split(np.zeros(n),Y):
    training_data = df.iloc[train_index]
    validation_data = df.iloc[val_index]

    print(len(training_data), len(validation_data))
    
    train_generator = train_datagen.flow_from_dataframe(
    dataframe=training_data,
    directory=train_images_path,
    x_col="image_id",
    y_col="label",
    batch_size=batch_size,
    class_mode="sparse", 
    )
    
    valid_generator = valid_datagen.flow_from_dataframe(
        dataframe=validation_data,
        directory=train_images_path,
        x_col="image_id",
        y_col="label",
        batch_size=batch_size,
        class_mode="sparse", 
        )
    
    model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
          filepath= save_dir+get_model_name(fold_var),
          save_weights_only=False,
          monitor='val_accuracy',
          mode='max',
          save_best_only=True, 
          verbose = 1)
    
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3)
    callbacks_list = [model_checkpoint, early_stopping]
    
    history = model.fit(train_generator,
            epochs=num_epochs,
            callbacks= callbacks_list,
            validation_data=valid_generator)

    plot_hist(history)

    # LOAD BEST MODEL to evaluate the performance of the model
    model.load_weights(save_dir+"model_"+str(fold_var)+".h5")

    results = model.evaluate(valid_generator)
    results = dict(zip(model.metrics_names,results))

    VALIDATION_ACCURACY.append(results['accuracy'])
    VALIDATION_LOSS.append(results['loss'])

    tf.keras.backend.clear_session()

    fold_var += 1


**Predict and make Submission**

In [None]:
#model = model.load_weights("./model_3.h5")

In [None]:

predictions = []
for  image_id in sample_df.image_id:
    img = Image.open(os.path.join(test_images_path+image_id))
    img = np.expand_dims(img, axis=0)
    predictions.append(np.argmax(model.predict(img)))

sample_df["label"] = predictions
sample_df

In [None]:
sample_df.head


In [None]:
sample_df.to_csv("submission.csv", index = False)