In [1]:
import os
import glob
import shutil
import json
import keras
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
from collections import Counter

# Defining the working directories

work_dir = '../input/cassava-leaf-disease-classification/'
os.listdir(work_dir) 
train_path = '/kaggle/input/cassava-leaf-disease-classification/train_images'

In [2]:
data = pd.read_csv(work_dir + 'train.csv')
print(Counter(data['label'])) # Checking the frequencies of the labels

Counter({3: 13158, 4: 2577, 2: 2386, 1: 2189, 0: 1087})


# Read data 

In [3]:
f = open(work_dir + 'label_num_to_disease_map.json')
real_labels = json.load(f)
real_labels = {int(k):v for k,v in real_labels.items()}

# Defining the working dataset
data['class_name'] = data.label.map(real_labels)

# Spliting the data
from sklearn.model_selection import train_test_split

train,val = train_test_split(data, test_size = 0.05, random_state = 42, stratify = data['class_name'])

# Importing the data using ImageDataGenerator

from keras.preprocessing.image import ImageDataGenerator

IMG_SIZE = 456
size = (IMG_SIZE,IMG_SIZE)
n_CLASS = 5
BATCH_SIZE = 15

datagen_train = ImageDataGenerator(
                    preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                    rotation_range = 40,
                    width_shift_range = 0.2,
                    height_shift_range = 0.2,
                    shear_range = 0.2,
                    zoom_range = 0.2,
                    horizontal_flip = True,
                    vertical_flip = True,
                    fill_mode = 'nearest')

datagen_val = ImageDataGenerator(
                    preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                    )

train_set = datagen_train.flow_from_dataframe(train,
                             directory = train_path,
                             seed=42,
                             x_col = 'image_id',
                             y_col = 'class_name',
                             target_size = size,
                             #color_mode="rgb",
                             class_mode = 'categorical',
                             interpolation = 'nearest',
                             shuffle = True,
                             batch_size = BATCH_SIZE)

val_set = datagen_val.flow_from_dataframe(val,
                             directory = train_path,
                             seed=42,
                             x_col = 'image_id',
                             y_col = 'class_name',
                             target_size = size,
                             #color_mode="rgb",
                             class_mode = 'categorical',
                             interpolation = 'nearest',
                             shuffle = True,
                             batch_size = BATCH_SIZE)

Found 20327 validated image filenames belonging to 5 classes.
Found 1070 validated image filenames belonging to 5 classes.


In [4]:
# from keras.models import Sequential
# from keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
# from keras.optimizers import RMSprop, Adam
# from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
# from tensorflow.keras.applications import EfficientNetB3

# def create_model():
    
#     model = Sequential()
#     # initialize the model with input shape
#     model.add(EfficientNetB3(input_shape = (IMG_SIZE, IMG_SIZE, 3), include_top = False,
#                              weights = 'imagenet',
#                              drop_connect_rate=0.6))
#     #for layer in model.layers[:-40]:  # Training just part of the architecture do not optimize the performance
#     #    layer.trainable = False
#     model.add(GlobalAveragePooling2D())
#     model.add(Flatten())
#     model.add(Dense(256, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))
#     model.add(Dropout(0.5))
#     model.add(Dense(n_CLASS, activation = 'softmax'))
    
#     return model

# leaf_model = create_model()
# leaf_model.summary()

In [5]:
# EPOCHS = 15
# STEP_SIZE_TRAIN = train_set.n//train_set.batch_size
# STEP_SIZE_VALID = val_set.n//val_set.batch_size

In [6]:
# def Model_fit():
    
#     #leaf_model = None
    
#     leaf_model = create_model()
    
#     '''Compiling the model'''
    
#     loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False,
#                                                    label_smoothing=0.0001,
#                                                    name='categorical_crossentropy' )
    
#     leaf_model.compile(optimizer = Adam(learning_rate = 1e-3),
#                         loss = loss, #'categorical_crossentropy'
#                         metrics = ['categorical_accuracy']) #'acc'
    
#     # Stop training when the val_loss has stopped decreasing for 3 epochs.
#     es = EarlyStopping(monitor='val_loss', mode='min', patience=3,
#                        restore_best_weights=True, verbose=1)
    
#     # Save the model with the minimum validation loss
#     checkpoint_cb = ModelCheckpoint("Cassava_best_model.h5",
#                                     save_best_only=True,
#                                     monitor = 'val_loss',
#                                     mode='min')
    
#     # reduce learning rate
#     reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
#                                   factor = 0.2,
#                                   patience = 2,
#                                   min_lr = 1e-6,
#                                   mode = 'min',
#                                   verbose = 1)
    
#     history = leaf_model.fit(train_set,
#                              validation_data = val_set,
#                              epochs= EPOCHS,
#                              batch_size = BATCH_SIZE,
#                              #class_weight = d_class_weights,
#                              steps_per_epoch = STEP_SIZE_TRAIN,
#                              validation_steps = STEP_SIZE_VALID,
#                              callbacks=[es, checkpoint_cb, reduce_lr])
    
#     leaf_model.save('Cassava_model'+'.h5')  
    
#     return history

In [7]:
# results = Model_fit()

# Albumentation

In [8]:
# !pip install git+https://github.com/mjkvaak/ImageDataAugmentor

In [9]:
# image_size = 300

In [10]:
# from ImageDataAugmentor.image_data_augmentor import *
# import albumentations as A

# train_augmentations = A.Compose([
#             A.RandomCrop(image_size, image_size, p=1),
#             A.CoarseDropout(p=0.5),
#             A.Cutout(p=0.5),
#             A.Flip(p=0.5),
#             A.ShiftScaleRotate(p=0.5),
#             A.HueSaturationValue(p=0.5, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2),
#             A.RandomBrightnessContrast(p=0.5, brightness_limit=(-0.2,0.2), contrast_limit=(-0.2, 0.2)),
#             A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
#             A.ToFloat()
#             ], p=1)

# val_augmentations = A.Compose([
#                 A.CenterCrop(image_size, image_size, p=1),
#                 A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
#                 A.ToFloat()
#                 ], p=1)

In [11]:
# input_shape = (image_size, image_size, 3)
# target_size = (image_size, image_size)
# batch_size = 16

In [12]:
# import tensorflow as tf

In [13]:
# def TFDataGenerator(train_set, val_set):
    
#     train_generator = ImageDataAugmentor(augment=train_augmentations,featurewise_center=True, featurewise_std_normalization=True)
#     val_generator = ImageDataAugmentor(augment=val_augmentations,featurewise_center=True, featurewise_std_normalization=True)
    
#     train_datagen = train_generator.flow_from_dataframe(
#                   preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
#                   dataframe = train_set,
#                   directory='../input/cassava-leaf-disease-classification/train_images',
#                   x_col='image_id',
#                   y_col='label',
#                   target_size=target_size,
#                   batch_size=batch_size,
#                   shuffle=True,
#                   class_mode='categorical',
#                   seed=88)

#     val_datagen = val_generator.flow_from_dataframe(
#                 preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
#                 dataframe = val_set,
#                 directory='../input/cassava-leaf-disease-classification/train_images',
#                 x_col='image_id',
#                 y_col='label',
#                 target_size=target_size,
#                 batch_size=batch_size,
#                 shuffle=False,
#                 class_mode='categorical',
#                 seed=88)
    
#     return train_datagen, val_datagen

# EfficientNet and InceptionNet

In [14]:
# !wget https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5

In [15]:
# from tensorflow.keras.applications import InceptionV3
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.optimizers import SGD
# from tensorflow.keras.losses import CategoricalCrossentropy

In [16]:
# def create_Inception():
#     base_model = InceptionV3(include_top=False, weights="imagenet", input_shape=input_shape)

#     # Rebuild top
#     inputs = Input(shape=input_shape)

#     model = base_model(inputs)
#     pooling = GlobalAveragePooling2D()(model)
#     dropout = Dropout(0.2)(pooling)

#     outputs = Dense(5, activation="softmax", name="dense", dtype='float32')(dropout)

#     # Compile
#     inception = Model(inputs=inputs, outputs=outputs)
#     optimizer = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
#     loss = CategoricalCrossentropy(label_smoothing=0.2, from_logits=True)

#     inception.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
#     return inception

In [17]:
# def create_EffnetB3():
#     base_model = EfficientNetB3(include_top=False, weights="imagenet", input_shape=input_shape, drop_connect_rate=0.6)

#     # Rebuild top
#     inputs = Input(shape=input_shape)

#     model = base_model(inputs)
#     gpool = GlobalAveragePooling2D()(model)
#     flat = Flatten()(gpool)
#     dense = Dense(256, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001))(flat)
#     drop = Dropout(0.5)(dense)
#     outputs = Dense(5, activation = "softmax", dtype='float32')(drop)
# #     model.add(Dense(256, activation = 'relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))
# #     model.add(Dropout(0.5))
# #     model.add(Dense(n_CLASS, activation = 'softmax'))
# #     pooling = GlobalAveragePooling2D()(model)
# #     dropout = Dropout(0.2)(pooling)

# #    outputs = Dense(5, activation="softmax", name="dense", dtype='float32')(dropout)

#     # Compile
#     efficientnet = Model(inputs=inputs, outputs=outputs)
#     optimizer = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
#     loss = CategoricalCrossentropy(label_smoothing=0.2, from_logits=True)

#     efficientnet.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
#     return efficientnet

# Model run with folds

In [18]:
# epochs = 8

In [19]:
# def run_efficientnet(fold, df):
#     train_set = df[df['kfold'] != fold]
#     valid_set = df[df['kfold'] == fold]
#     train_datagen, val_datagen = TFDataGenerator(train_set, valid_set)
#     model = create_EffnetB3()
    
#     model_name = "efficientnet"
#     fold_name = "fold.h5"
#     filepath = model_name + str(fold+1) + fold_name
#     callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.2),
#                  EarlyStopping(monitor='val_loss', patience=3),
#                  ModelCheckpoint(filepath=filepath, monitor='val_loss', save_best_only=True)]
#     history = model.fit(train_datagen, epochs=epochs, validation_data=val_datagen, callbacks=callbacks)
#     return 1


In [20]:
# def run_inceptionnet(fold, df):
#     train_set = df[df['kfold'] != fold]
#     valid_set = df[df['kfold'] == fold]
#     train_datagen, val_datagen = TFDataGenerator(train_set, valid_set)
#     model = create_Inception()
    
#     model_name = "inceptionnet"
#     fold_name = "fold.h5"
#     filepath = model_name + str(fold+1) + fold_name
#     callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.2),
#                  EarlyStopping(monitor='val_loss', patience=3),
#                  ModelCheckpoint(filepath=filepath, monitor='val_loss', save_best_only=True)]
#     history = model.fit(train_datagen, epochs=epochs, validation_data=val_datagen, callbacks=callbacks)
#     return 1


In [21]:
# for fold in range(5):
#     print(f"Training fold {fold}")
#     run_efficientnet(fold, df)

# for fold in range(5):
#     print(f"Training fold {fold}")
#     run_inceptionnet(fold, df)

# Inferences

In [22]:
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

In [23]:
# weight_path = '../input/ensemblescassava'
# my_model_list = os.listdir(weight_path)
# models = []
# for my_model in my_model_list:
#     models.append(load_model(weight_path+"/"+my_model))

In [24]:
# model = load_model("../input/effnetcassava/Cassava_best_model.h5")

In [25]:
# SEED = 42

In [26]:
# my_model_list

In [27]:
# import pandas as pd
# import glob
# import numpy as np

In [28]:
# !pwd

In [29]:
# os.chdir("../input/imagedataaugmentor/ImageDataAugmentor-master")

In [30]:
# !ls

In [31]:
# from ImageDataAugmentor.image_data_augmentor import *
# import albumentations as A

# image_size = 300
# input_shape = (image_size, image_size, 3)
# target_size = (image_size, image_size)
# batch_size = 16

# test_augmentations = A.Compose([
#             A.RandomCrop(image_size, image_size, p=1),
#             A.CoarseDropout(p=0.5),
#             A.Cutout(p=0.5),
#             A.Flip(p=0.5),
#             A.ShiftScaleRotate(p=0.5),
#             A.HueSaturationValue(p=0.5, hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2),
#             A.RandomBrightnessContrast(p=0.5, brightness_limit=(-0.2,0.2), contrast_limit=(-0.2, 0.2)),
#             A.ToFloat()
#             ], p=1)




# def TFDataGenerator(test_set):
    
#     test_generator = ImageDataAugmentor(augment=test_augmentations)
    
#     test_datagen = test_generator.flow_from_dataframe(
#                   dataframe = test_set,
#                   x_col='path',
#                   y_col = None,
#                   class_mode = None,
#                   target_size=target_size,
#                   batch_size=batch_size,
#                   shuffle=False)
    
#     return test_datagen

In [32]:
# INPUT_DIR = '/kaggle/input/cassava-leaf-disease-classification/'
# TEST_IMAGE_DIR = INPUT_DIR + 'test_images'

In [33]:
# import glob
# TEST_IMAGES = TEST_IMAGE_DIR + '/*.jpg'

In [34]:
# def create_df():
#     test_images = glob.glob(TEST_IMAGES)
#     test = pd.DataFrame(test_images, columns = ['path'])
#     return test

In [35]:
# test_set = create_df()

In [36]:
# test_set

In [37]:
# def TFDataGenerator(test_set):
#     test_data = datagen_train.flow_from_dataframe(test_set,
#                              seed=42,
#                              x_col = 'path',
#                              y_col = None,
#                              class_mode = None,
#                              interpolation = 'nearest',
#                              target_size = size,
#                              shuffle = True,
#                              batch_size = BATCH_SIZE)
#     return test_data

In [38]:
# test_data = TFDataGenerator(test_set)

In [39]:
# def predict_test(test_data, model):
#     count = 3
#     predictions = []
#     for i in range(count):
#         preds = model.predict_generator(test_data)
#         predictions.append(preds)
#     predictions = np.mean(predictions, axis=0)
#     return predictions

In [40]:
# preds = []
# for model in models:
#     preds.append(predict_test(test_data, model))

In [41]:
# preds = []
# preds.append(predict_test(test_data, model))

In [42]:
# preds

In [43]:
# np.mean(preds, axis=0)

In [44]:
# mean_pred = np.mean(preds, axis=0)
# mean_pred = np.argmax(mean_pred, axis=-1)

In [45]:
# mean_pred

In [46]:
# test_set['image_id'] = test_set['path'].apply(lambda x: x.split('/')[-1])
# test_set['label'] = mean_pred
# test_set

In [47]:
# os.chdir("../../../working")

In [48]:
# !pwd

In [49]:
# test_set[['image_id','label']].to_csv('submission.csv', index=False)

# New inference

In [50]:
from PIL import Image

In [51]:
models = []
model_list = os.listdir("../input/effnet-888-folds")
for model in model_list:
    models.append(keras.models.load_model(f'../input/effnet-888-folds/{model}'))

In [52]:
models.append(keras.models.load_model(r"../input/effnetcassava/Cassava_best_model.h5"))

In [53]:
# TEST_DIR = '../input/cassava-leaf-disease-classification/test_images/'
# test_images = os.listdir(TEST_DIR)
# preds = []
# pred=[]
# for image in test_images:
#     img = Image.open(TEST_DIR + image)
#     img = img.resize(size)
#     img = np.expand_dims(img, axis=0)
#     for model in models:
#         preds.append(np.argmax(model.predict(img)))
#     pred.append(max(set(preds), key=preds.count))
# #     pred.extend(stats.mode(predictions)[0][0])

In [54]:
import tensorflow as tf

In [55]:
TEST_DIR = '../input/cassava-leaf-disease-classification/test_images/'
submission = pd.DataFrame(columns=['image_id','label'])
for image_name in os.listdir(TEST_DIR):
    preds = np.empty((0,6))
    image_path = os.path.join(TEST_DIR, image_name)
    image = tf.keras.preprocessing.image.load_img(image_path)
    resized_image = image.resize(size)
    numpied_image = np.expand_dims(resized_image, 0)
    tensored_image = tf.cast(numpied_image, tf.float32)
    for model in models:
        preds = np.append(preds,model.predict_classes(tensored_image))
        preds = preds.astype(int)
    counts = np.bincount(preds)

    submission = submission.append(pd.DataFrame({'image_id': image_name,
                        'label': np.array([np.argmax(counts)])}))

In [56]:
submission

Unnamed: 0,image_id,label
0,2216849948.jpg,4


In [57]:
submission.to_csv('./submission.csv', index = False)