In [None]:
!ls /kaggle/input/library4/ImageDataAugmentor-master
import sys
sys.path.append('/kaggle/input/library4/ImageDataAugmentor-master') #/ImageDataAugmentor')

from ImageDataAugmentor.image_data_augmentor import *

In [None]:
SUBMISSION_MODE = 1
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import random
import os
import cv2
import sys
from pylab import rcParams
from PIL import Image
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras import Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation, Input, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.applications import InceptionV3, Xception
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from sklearn.model_selection import StratifiedShuffleSplit



policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy) #shortens training time by 2x

df_train = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")
df_train.head()

df_train["label"] = df_train["label"].astype(str)

batch_size=32
image_size=300

input_shape = (image_size, image_size, 3)
target_size = (image_size, image_size)

img_augmentation = tf.keras.Sequential(
    [
        tf.keras.layers.experimental.preprocessing.RandomCrop(image_size, image_size),
        tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.experimental.preprocessing.RandomRotation(0.25),
        tf.keras.layers.experimental.preprocessing.RandomZoom((-0.25, 0.25), (-0.25, 0.25)),
    ])



path = "../input/cassava-leaf-disease-classification/train_images/"
import tensorflow.keras.utils
from ImageDataAugmentor.image_data_augmentor import *
import albumentations as A

train_augmentations = A.Compose([
            A.RandomCrop(image_size, image_size, p=1),
            A.CoarseDropout(p=0.5),
            A.Cutout(p=0.5),
            A.Flip(p=0.5),
            A.ShiftScaleRotate(p=0.5),
            A.HueSaturationValue(p=0.5, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2),
            A.RandomBrightnessContrast(p=0.5, brightness_limit=(-0.2,0.2), contrast_limit=(-0.2, 0.2)),
            A.ToFloat()
            ], p=1)

val_augmentations = A.Compose([
                A.CenterCrop(image_size, image_size, p=1),
                A.ToFloat()
                ], p=1)

def TFDataGenerator(train_set, val_set):
    train_generator = ImageDataAugmentor(augment=train_augmentations)
    val_generator = ImageDataAugmentor(augment=val_augmentations)
    
    train_datagen = train_generator.flow_from_dataframe(
                  dataframe = train_set,
                  directory='../input/cassava-leaf-disease-classification/train_images',
                  x_col='image_id',
                  y_col='label',
                  target_size=target_size,
                  batch_size=batch_size,
                  shuffle=True,
                  class_mode='categorical',
                  seed=2020)

    val_datagen = val_generator.flow_from_dataframe(
                dataframe = val_set,
                directory='../input/cassava-leaf-disease-classification/train_images',
                x_col='image_id',
                y_col='label',
                target_size=target_size,
                batch_size=batch_size,
                shuffle=False,
                class_mode='categorical',
                seed=2020)
    
    return train_datagen, val_datagen

df_train
df = df_train.groupby('label').count()


train_set = df_train.iloc[:int(len(df_train)*0.8)]
val_set = df_train.iloc[-int(len(df_train)*0.2):]
train_datagen, val_datagen = TFDataGenerator(train_set, val_set)


def create_Inception():
    base_model = InceptionV3(include_top=False, weights="imagenet", input_shape=input_shape)

    # Rebuild top
    inputs = Input(shape=input_shape)

    model = base_model(inputs)
    pooling = GlobalAveragePooling2D()(model)
    dropout = Dropout(0.2)(pooling)

    outputs = Dense(5, activation="softmax", name="dense", dtype='float32')(dropout)

    # Compile
    inception = Model(inputs=inputs, outputs=outputs)
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
    loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.2, from_logits=True)

    inception.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return inception

def create_Xception():
    base_model = Xception(include_top=False, weights="imagenet", input_shape=input_shape)

    # Rebuild top
    inputs = Input(shape=input_shape)

    model = base_model(inputs)
    pooling = GlobalAveragePooling2D()(model)
    dropout = Dropout(0.2)(pooling)

    outputs = Dense(5, activation="softmax", name="dense", dtype='float32')(dropout)

    # Compile
    xception = Model(inputs=inputs, outputs=outputs)
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
    loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.2, from_logits=True)

    xception.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return xception


if SUBMISSION_MODE == 0:

    fold_number = 0
    n_splits = 3
    epochs = 8

    tf.keras.backend.clear_session()
    KFoldSplit = StratifiedShuffleSplit(n_splits=n_splits, test_size=0.1, random_state=2020)
    for train_index, val_index in KFoldSplit.split(df_train["image_id"], df_train["label"]):
        train_set = df_train.loc[train_index]
        val_set = df_train.loc[val_index]
        train_datagen, val_datagen = TFDataGenerator(train_set, val_set)
        model = create_Inception()
        print("Training fold no.: " + str(fold_number+1))

        model_name = "inception "
        fold_name = "fold.h5"
        filepath = model_name + str(fold_number+1) + fold_name
        callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.2),
                     EarlyStopping(monitor='val_loss', patience=3),
                     ModelCheckpoint(filepath=filepath, monitor='val_loss', save_best_only=True)]

        history = model.fit(train_datagen, epochs=epochs, validation_data=val_datagen, callbacks=callbacks)
        fold_number += 1
        if fold_number == n_splits:
            print("Training finished!")
            
if SUBMISSION_MODE == 1:
    model = load_model("../input/models3/ResNet50V2 2fold.h5")

    SampleSubmit = pd.read_csv(os.path.join('../input/cassava-leaf-disease-classification', "sample_submission.csv"))
    preds = []
    results = []

    for image_id in os.listdir('../input/cassava-leaf-disease-classification/test_images'):
        image = Image.open(os.path.join('../input/cassava-leaf-disease-classification', "test_images", image_id))
        image = image.resize((image_size, image_size))
        image = np.expand_dims(image, axis = 0)/255.0
        SampleSubmit.loc[len(SampleSubmit)] = [image_id , np.argmax(model.predict(image), axis=1).item()]
    SampleSubmit.drop_duplicates().to_csv('submission.csv', index = False)