In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import glob
import shutil
import json
import keras
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
from collections import Counter

# Defining the working directories

work_dir = '/content/drive/MyDrive/KaggleCassava/'
os.listdir(work_dir) 
train_path = '/content/drive/MyDrive/KaggleCassava/train_images_new'

In [None]:
# !unzip -u "/content/drive/MyDrive/KaggleCassava/train_images.zip" -d "/content/drive/MyDrive/KaggleCassava/train_images_new"

In [None]:
import tensorflow

In [None]:
data = pd.read_csv(work_dir + 'train_folds_cassava.csv')
data.drop("image_path",axis=1,inplace=True)
data.head()

Unnamed: 0,image_id,label,kfold
0,2706759956.jpg,2,0
1,2720713385.jpg,3,0
2,403458333.jpg,1,0
3,4002244325.jpg,3,0
4,1370253627.jpg,3,0


# Read data 

In [None]:
f = open(work_dir + 'label_num_to_disease_map.json')
real_labels = json.load(f)
real_labels = {int(k):v for k,v in real_labels.items()}

# Defining the working dataset
data['class_name'] = data.label.map(real_labels)

# Spliting the data  as we are doing folds 18/1
# from sklearn.model_selection import train_test_split

# train,val = train_test_split(data, test_size = 0.05, random_state = 42, stratify = data['class_name'])

# Importing the data using ImageDataGenerator

from keras.preprocessing.image import ImageDataGenerator

IMG_SIZE = 512 #456
size = (IMG_SIZE,IMG_SIZE)
n_CLASS = 5
BATCH_SIZE = 15

datagen_train = ImageDataGenerator(
                    preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                    rotation_range = 40,
                    width_shift_range = 0.2,
                    height_shift_range = 0.2,
                    shear_range = 0.2,
                    zoom_range = 0.2,
                    horizontal_flip = True,
                    vertical_flip = True,
                    fill_mode = 'nearest')

datagen_val = ImageDataGenerator(
                    preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                    )

def generate_train_val(train, train_path, val):
  train_set = datagen_train.flow_from_dataframe(train,
                               directory = train_path,
                               seed=42,
                               x_col = 'image_id',
                               y_col = 'class_name',
                               target_size = size,
                                #color_mode="rgb",
                               class_mode = 'categorical',
                               interpolation = 'nearest',
                               shuffle = True,
                               batch_size = BATCH_SIZE)

  val_set = datagen_val.flow_from_dataframe(val,
                               directory = train_path,
                               seed=42,
                               x_col = 'image_id',
                               y_col = 'class_name',
                               target_size = size,
                               #color_mode="rgb",
                               class_mode = 'categorical',
                               interpolation = 'nearest',
                               shuffle = True,
                               batch_size = BATCH_SIZE)
  return train_set, val_set

In [None]:
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import RMSprop, Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB3, EfficientNetB4

def create_model():
    
    model = Sequential()
    # initialize the model with input shape
    model.add(EfficientNetB4(input_shape = (IMG_SIZE, IMG_SIZE, 3), include_top = False,
                             weights = 'imagenet',
                             drop_connect_rate=0.6))
    #for layer in model.layers[:-40]:  # Training just part of the architecture do not optimize the performance
    #    layer.trainable = False
    model.add(GlobalAveragePooling2D())
    model.add(Flatten())
    model.add(Dense(256, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(n_CLASS, activation = 'softmax'))
    
    return model

leaf_model = create_model()
leaf_model.summary()

ValueError: ignored

In [None]:
EPOCHS = 15

In [None]:
def symmetric_cross_entropy(alpha, beta):
    def loss(y_true, y_pred):
        y_true_1 = y_true
        y_pred_1 = y_pred

        y_true_2 = y_true
        y_pred_2 = y_pred

        y_pred_1 = tf.clip_by_value(y_pred_1, 1e-7, 1.0)
        y_true_2 = tf.clip_by_value(y_true_2, 1e-4, 1.0)

        return alpha*tf.math.reduce_mean(-tf.math.reduce_sum(y_true_1 * tf.math.log(y_pred_1), axis = -1)) + beta*tf.math.reduce_mean(-tf.math.reduce_sum(y_pred_2 * tf.math.log(y_true_2), axis = -1))
    return loss

In [None]:
def Model_fit(k):
    
    #leaf_model = None
    
    leaf_model = create_model()
    '''Compiling the model'''
    loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False,
                                                   label_smoothing=0.0001,
                                                   name='categorical_crossentropy' )
    
    leaf_model.compile(optimizer = Adam(learning_rate = 1e-3),
                        loss = loss, #'categorical_crossentropy'
                        metrics = ['categorical_accuracy']) #'acc'
    
    # Stop training when the val_loss has stopped decreasing for 3 epochs.
    es = EarlyStopping(monitor='val_loss', mode='min', patience=3,
                       restore_best_weights=True, verbose=1)
    
    # Save the model with the minimum validation loss
    checkpoint_cb = ModelCheckpoint(f"{work_dir}/Cassava_best_model_fold{k}.h5",
                                    save_best_only=True,
                                    monitor = 'val_loss',
                                    mode='min')
    
    # reduce learning rate
    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = 0.2,
                                  patience = 2,
                                  min_lr = 1e-6,
                                  mode = 'min',
                                  verbose = 1)
    
    # with Folds as of 18/1
    train = data[data['kfold'] != k]
    val = data[data['kfold'] == k]
    train.drop('kfold',axis=1,inplace=True)
    val.drop('kfold',axis=1,inplace=True)

    train_set, val_set = generate_train_val(train, train_path, val)
    STEP_SIZE_TRAIN = train_set.n//train_set.batch_size
    STEP_SIZE_VALID = val_set.n//val_set.batch_size

    # history fit
    history = leaf_model.fit(train_set,
                             validation_data = val_set,
                             epochs= EPOCHS,
                             batch_size = BATCH_SIZE,
                             #class_weight = d_class_weights,
                             steps_per_epoch = STEP_SIZE_TRAIN,
                             validation_steps = STEP_SIZE_VALID,
                             callbacks=[es, checkpoint_cb, reduce_lr])
    
    leaf_model.save(f'Cassava_model_fold{k}'+'.h5')  
    
    return history

In [None]:
data['kfold'].unique()

array([0, 1, 2, 3, 4])

In [None]:
#run from fold 4 onwards on 26/1
for fold in range(0,5):
  results = Model_fit(fold)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Found 17117 validated image filenames belonging to 5 classes.
Found 4280 validated image filenames belonging to 5 classes.
Epoch 1/15


ResourceExhaustedError: ignored

# Albumentation

In [None]:
!pip install git+https://github.com/mjkvaak/ImageDataAugmentor

In [None]:
image_size = 300

In [None]:
from ImageDataAugmentor.image_data_augmentor import *
import albumentations as A

train_augmentations = A.Compose([
            A.RandomCrop(image_size, image_size, p=1),
            A.CoarseDropout(p=0.5),
            A.Cutout(p=0.5),
            A.Flip(p=0.5),
            A.ShiftScaleRotate(p=0.5),
            A.HueSaturationValue(p=0.5, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2),
            A.RandomBrightnessContrast(p=0.5, brightness_limit=(-0.2,0.2), contrast_limit=(-0.2, 0.2)),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            A.ToFloat()
            ], p=1)

val_augmentations = A.Compose([
                A.CenterCrop(image_size, image_size, p=1),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
                A.ToFloat()
                ], p=1)

In [None]:
input_shape = (image_size, image_size, 3)
target_size = (image_size, image_size)
batch_size = 16

In [None]:
import tensorflow as tf

In [None]:
def TFDataGenerator(train_set, val_set):
    
    train_generator = ImageDataAugmentor(augment=train_augmentations,featurewise_center=True, featurewise_std_normalization=True)
    val_generator = ImageDataAugmentor(augment=val_augmentations,featurewise_center=True, featurewise_std_normalization=True)
    
    train_datagen = train_generator.flow_from_dataframe(
                  preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                  dataframe = train_set,
                  directory='../input/cassava-leaf-disease-classification/train_images',
                  x_col='image_id',
                  y_col='label',
                  target_size=target_size,
                  batch_size=batch_size,
                  shuffle=True,
                  class_mode='categorical',
                  seed=88)

    val_datagen = val_generator.flow_from_dataframe(
                preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                dataframe = val_set,
                directory='../input/cassava-leaf-disease-classification/train_images',
                x_col='image_id',
                y_col='label',
                target_size=target_size,
                batch_size=batch_size,
                shuffle=False,
                class_mode='categorical',
                seed=88)
    
    return train_datagen, val_datagen

# EfficientNet and InceptionNet

In [None]:
!wget https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5

In [None]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import CategoricalCrossentropy

In [None]:
def create_Inception():
    base_model = InceptionV3(include_top=False, weights="imagenet", input_shape=input_shape)

    # Rebuild top
    inputs = Input(shape=input_shape)

    model = base_model(inputs)
    pooling = GlobalAveragePooling2D()(model)
    dropout = Dropout(0.2)(pooling)

    outputs = Dense(5, activation="softmax", name="dense", dtype='float32')(dropout)

    # Compile
    inception = Model(inputs=inputs, outputs=outputs)
    optimizer = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
    loss = CategoricalCrossentropy(label_smoothing=0.2, from_logits=True)

    inception.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return inception

In [None]:
def create_EffnetB3():
    base_model = EfficientNetB3(include_top=False, weights="imagenet", input_shape=input_shape, drop_connect_rate=0.6)

    # Rebuild top
    inputs = Input(shape=input_shape)

    model = base_model(inputs)
    gpool = GlobalAveragePooling2D()(model)
    flat = Flatten()(gpool)
    dense = Dense(256, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001))(flat)
    drop = Dropout(0.5)(dense)
    outputs = Dense(5, activation = "softmax", dtype='float32')(drop)
#     model.add(Dense(256, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))
#     model.add(Dropout(0.5))
#     model.add(Dense(n_CLASS, activation = 'softmax'))
#     pooling = GlobalAveragePooling2D()(model)
#     dropout = Dropout(0.2)(pooling)

#    outputs = Dense(5, activation="softmax", name="dense", dtype='float32')(dropout)

    # Compile
    efficientnet = Model(inputs=inputs, outputs=outputs)
    optimizer = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
    loss = CategoricalCrossentropy(label_smoothing=0.2, from_logits=True)

    efficientnet.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return efficientnet

# Model run with folds

In [None]:
epochs = 8

In [None]:
def run_efficientnet(fold, df):
    train_set = df[df['kfold'] != fold]
    valid_set = df[df['kfold'] == fold]
    train_datagen, val_datagen = TFDataGenerator(train_set, valid_set)
    model = create_EffnetB3()
    
    model_name = "efficientnet"
    fold_name = "fold.h5"
    filepath = model_name + str(fold+1) + fold_name
    callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.2),
                 EarlyStopping(monitor='val_loss', patience=3),
                 ModelCheckpoint(filepath=filepath, monitor='val_loss', save_best_only=True)]
    history = model.fit(train_datagen, epochs=epochs, validation_data=val_datagen, callbacks=callbacks)
    return 1


In [None]:
def run_inceptionnet(fold, df):
    train_set = df[df['kfold'] != fold]
    valid_set = df[df['kfold'] == fold]
    train_datagen, val_datagen = TFDataGenerator(train_set, valid_set)
    model = create_Inception()
    
    model_name = "inceptionnet"
    fold_name = "fold.h5"
    filepath = model_name + str(fold+1) + fold_name
    callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.2),
                 EarlyStopping(monitor='val_loss', patience=3),
                 ModelCheckpoint(filepath=filepath, monitor='val_loss', save_best_only=True)]
    history = model.fit(train_datagen, epochs=epochs, validation_data=val_datagen, callbacks=callbacks)
    return 1


In [None]:
for fold in range(5):
    print(f"Training fold {fold}")
    run_efficientnet(fold, df)

for fold in range(5):
    print(f"Training fold {fold}")
    run_inceptionnet(fold, df)

# Inferences

In [None]:
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

In [None]:
weight_path = '../input/ensemblescassava'
my_model_list = os.listdir(weight_path)
models = []
for my_model in my_model_list:
    models.append(load_model(weight_path+"/"+my_model))

In [None]:
SEED = 42

In [None]:
my_model_list

In [None]:
import pandas as pd
import glob
import numpy as np

In [None]:
!pwd

In [None]:
os.chdir("../input/imagedataaugmentor/ImageDataAugmentor-master")

In [None]:
!ls

In [None]:
from ImageDataAugmentor.image_data_augmentor import *
import albumentations as A

image_size = 300
input_shape = (image_size, image_size, 3)
target_size = (image_size, image_size)
batch_size = 16

test_augmentations = A.Compose([
            A.RandomCrop(image_size, image_size, p=1),
            A.CoarseDropout(p=0.5),
            A.Cutout(p=0.5),
            A.Flip(p=0.5),
            A.ShiftScaleRotate(p=0.5),
            A.HueSaturationValue(p=0.5, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2),
            A.RandomBrightnessContrast(p=0.5, brightness_limit=(-0.2,0.2), contrast_limit=(-0.2, 0.2)),
            A.ToFloat()
            ], p=1)




def TFDataGenerator(test_set):
    
    test_generator = ImageDataAugmentor(augment=test_augmentations)
    
    test_datagen = test_generator.flow_from_dataframe(
                  dataframe = test_set,
                  x_col='path',
                  y_col = None,
                  class_mode = None,
                  target_size=target_size,
                  batch_size=batch_size,
                  shuffle=False)
    
    return test_datagen

In [None]:
INPUT_DIR = '/kaggle/input/cassava-leaf-disease-classification/'
TEST_IMAGE_DIR = INPUT_DIR + 'test_images'

In [None]:
import glob
TEST_IMAGES = TEST_IMAGE_DIR + '/*.jpg'

In [None]:
def create_df():
    test_images = glob.glob(TEST_IMAGES)
    test = pd.DataFrame(test_images, columns = ['path'])
    return test

In [None]:
test_set = create_df()

In [None]:
test_set

In [None]:
test_data = TFDataGenerator(test_set)

In [None]:
def predict_test(test_data, model):
    count = 3
    predictions = []
    for i in range(count):
        preds = model.predict_generator(test_data)
        predictions.append(preds)
    predictions = np.mean(predictions, axis=0)
    return predictions

In [None]:
preds = []
for model in models:
    preds.append(predict_test(test_data, model))

In [None]:
np.mean(preds, axis=0)

In [None]:
mean_pred = np.mean(preds, axis=0)
mean_pred = np.argmax(mean_pred, axis=-1)

In [None]:
mean_pred

In [None]:
test_set['image_id'] = test_set['path'].apply(lambda x: x.split('/')[-1])
test_set['label'] = mean_pred
test_set

In [None]:
os.chdir("../../../working")

In [None]:
!pwd

In [None]:
test_set[['image_id','label']].to_csv('submission.csv', index=False)