In [None]:
#Importing relevant libraries
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.applications import EfficientNetB2
from tensorflow.keras.applications import EfficientNetB4

#from tensorflow.keras.applications.resnet101 import ResNet101
from tensorflow.keras.applications.vgg16 import VGG16
from keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.optimizers import SGD
from keras.callbacks import EarlyStopping
import tensorflow as tf
from PIL import Image
import os
import matplotlib.pyplot as plt

In [None]:
#%% IMPORTING DATA
general_path = '../input/cassava-leaf-disease-classification/'
train = pd.read_csv(general_path + 'train.csv')
train['label'] = train['label'].astype('string')
train.head()

In [None]:
names_of_disease = pd.read_json(general_path + 'label_num_to_disease_map.json', typ='series')
names_of_disease

In [None]:
# Plotting count to see the count of labels
ax = train['label'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Count of disease",color='r')
ax.set_xlabel("Count")
ax.set_ylabel("Frequency")

In [None]:
# Sample Images
plt.figure(figsize=(20,16))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    image = Image.open(general_path + 'train_images/' + train.iloc[i]['image_id'])
    array = np.array(image)
    plt.imshow(array)
    label=train.iloc[i]['label']
    plt.title(f'{names_of_disease[int(label)]}')
plt.show()

In [None]:
sizes = []
for i in range(1, len(train), 250):
    image = Image.open(general_path + 'train_images/' + train.iloc[i]['image_id'])
    array = np.array(image)
    sizes.append(array.shape)
print('Picture size', set(sizes))

In [None]:
img_width, img_height = 256,256

In [None]:
# Training
datagen = ImageDataGenerator(validation_split=0.2,
                              rotation_range = 40,
                    width_shift_range = 0.2,
                    height_shift_range = 0.2,
                    shear_range = 0.2,
                    zoom_range = 0.2,
                    horizontal_flip = True,
                    vertical_flip = False,
                    fill_mode = 'nearest')
train_datagen_flow = datagen.flow_from_dataframe(
    dataframe=train,
    directory=general_path + 'train_images',
    x_col='image_id',
    y_col='label',
    target_size=(img_width, img_height),
    batch_size=20,
    subset='training',
    seed=12345)

In [None]:
valid_datagen_flow = datagen.flow_from_dataframe(
    dataframe=train,
    directory=general_path + 'train_images',
    x_col='image_id',
    y_col='label',
    target_size=(img_width, img_height),
    batch_size=20,
    subset='validation',
    seed=12345)

In [None]:
current_balance = train['label'].value_counts(normalize=True)
current_balance

In [None]:
class_weight = {0: (1 - current_balance[0]) / (1 - current_balance.min()),
                1: (1 - current_balance[1]) / (1 - current_balance.min()),
                2: (1 - current_balance[2]) / (1 - current_balance.min()),
                3: (1 - current_balance[3]) / (1 - current_balance.min()),
                4: (1 - current_balance[4]) / (1 - current_balance.min())}

class_weight

# Custom Loss
import numpy as np
from keras import backend as K

sigma 	  = 0.05
lamda_mmd = 0.1
def mmd(x1, x2, beta):
    x1x1 = gaussian_kernel(x1, x1, beta)
    x1x2 = gaussian_kernel(x1, x2, beta)
    x2x2 = gaussian_kernel(x2, x2, beta)
    diff = x1x1.mean() - 2 * x1x2.mean() + x2x2.mean()
    return diff

def gaussian_kernel(x1, x2, beta = 1.0):
    r = x1.dimshuffle(0,'x',1)
    return K.exp( -beta * K.square(r - x2).sum(axis=-1))

def our_MMD(features, labels):
	## I am assuming both features and labels to numpy arrays.
	## But with Keras, the functions such as (np.unique, np.where) will be different.
	## features will be batch_size x embedding_size in size; and labels will be batch_size in size.
	loss  = 0.0
	count = 0
	unique_labels 	  = np.unique(labels) ## Find the unique labels.
	num_unique_labels = unique_labels.size ## Number of unique labels.
	for i in range(num_unique_labels-1):
		label_i    = unique_labels[i] ## i^{th} label
		idx_i 	   = np.where(labels==label_i)[0] ## indices of i^{th} label.
		features_i = features[idx_i] ## features belonging to i^{th} label.
		for j in range(i+1, num_unique_labels): 
			label_j    = unique_labels[j] ## j^{th} label
			idx_j      = np.where(labels==label_j)[0]  ## indices of j^{th} label.
			features_j = features[idx_j] ## features belonging to j^{th} label.
			loss_ij    = mmd(x1=features_i, x2=features_j, beta=1.0/sigma) ## Calculate 
			count     += 1
			loss      += loss_ij
	loss = lamda_mmd * loss / float(count)
	return loss



In [None]:
es= EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
model = Sequential()
optimizer = RMSprop(lr=0.001)
backbone = EfficientNetB4(include_top=False, 
                          weights='imagenet', 
                          pooling='avg')
features=[]
labels=[]
model.add(backbone)
model.add(Dropout(0.3))
#model.add(Dense(512))
model.add(Dense(256))

model.add(Dropout(0.3))
#model.add(Dense(128))
#model.add(Dense(32))
model.add(Dense(5, activation='softmax'))


In [None]:
model.summary()

In [None]:
for layer in model.layers:
    print(layer.name)

In [None]:
#layer_output = model.get_layer('dropout_9').output
#features.append(layer_output)
#labels.append[y_col]
model.compile(loss='categorical_crossentropy', 
             optimizer=optimizer, 
              metrics=["accuracy"])
history=model.fit_generator(train_datagen_flow,
                    validation_data=valid_datagen_flow, 
                    epochs=50, 
                    verbose=1,class_weight=class_weight,callbacks=[es])



In [None]:
print('Training_Cat-Acc: ', max(history.history['accuracy']))
print('Validation_Cat-Acc: ',max(history.history['val_accuracy']))

In [None]:
def Plot(acc,val_acc,loss,val_loss):
    
    fig, (ax1, ax2) = plt.subplots(1,2, figsize= (15,10))
    fig.suptitle(" MODEL'S METRICS VISUALIZATION ", fontsize=20)

    ax1.plot(range(1, len(acc) + 1), acc)
    ax1.plot(range(1, len(val_acc) + 1), val_acc)
    ax1.set_title('History of Accuracy', fontsize=15)
    ax1.set_xlabel('Epochs', fontsize=15)
    ax1.set_ylabel('Accuracy', fontsize=15)
    ax1.legend(['training', 'validation'])


    ax2.plot(range(1, len(loss) + 1), loss)
    ax2.plot(range(1, len(val_loss) + 1), val_loss)
    ax2.set_title('History of Loss', fontsize=15)
    ax2.set_xlabel('Epochs', fontsize=15)
    ax2.set_ylabel('Loss', fontsize=15)
    ax2.legend(['training', 'validation'])
    plt.show()

Plot(history.history['accuracy'],history.history['val_accuracy'],
               history.history['loss'],history.history['val_loss'])
    


In [None]:
submission = pd.DataFrame(columns=['image_id','label'])
for image_name in os.listdir(general_path + 'test_images'):
    image_path = os.path.join(general_path + 'test_images', image_name)
    image = tf.keras.preprocessing.image.load_img(image_path)
    resized_image = image.resize((img_width, img_height))
    numpied_image = np.expand_dims(resized_image, 0)
    tensored_image = tf.cast(numpied_image, tf.float32)
    submission = submission.append(pd.DataFrame({'image_id': image_name,
                                                 'label': model.predict_classes(tensored_image)}))

submission

In [None]:
submission.to_csv('submission.csv', index = False)