## Importing Libraries

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from shutil import rmtree, copy

import warnings
warnings.filterwarnings('ignore')

from PIL import Image
from matplotlib import pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans, SpectralClustering

## Setting up the environment

In [None]:
# OS variables
seed = np.random.randint(0, 115)

# Data path
path = '../input/cassava-leaf-disease-classification/'

In [None]:
data = pd.read_csv(path + 'train.csv')
data['img_path'] = path + 'train_images/' + data.image_id

label_dict_ = {
    '0': 'Cassava Bacterial Blight (CBB)',
    '1': 'Cassava Brown Streak Disease (CBSD)',
    '2': 'Cassava Green Mottle (CGM)',
    '3': 'Cassava Mosaic Disease (CMD)',
    '4': 'Healthy'
}

data['class_label'] = [label_dict_[str(x)] for x in data['label']]

le = LabelEncoder()
data['class_'] = le.fit_transform(data['class_label'])

## Defining Parameters

In [None]:
IMAGE_SIZE = 224
BATCH_SIZE = 8
MODEL_IMAGE_SIZE = 512
EPOCHS = 20

## Removing Duplicates and Mis-labelled Images

In [None]:
data = data[~data['image_id'].isin(['1562043567.jpg', '3551135685.jpg', '2252529694.jpg'])]

## Clustering Analysis

### 1. Feature Extraction Baseline

In [None]:
feature_base_model = tf.keras.applications.ResNet50(include_top = False, weights = 'imagenet', input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3))
feature_model_pooling = tf.keras.layers.GlobalAveragePooling2D()(feature_base_model.output)

feature_model = tf.keras.models.Model(inputs = feature_base_model.input, outputs = feature_model_pooling)
#feature_model.summary()

### 2. Reading Data

In [None]:
im_paths = data['img_path']
clas = data['class_']
#x_images = np.array([np.float32(Image.open(im_path).resize((IMAGE_SIZE, IMAGE_SIZE))) / 255.0 for im_path in im_paths])
#Y = np.array([class_ for class_ in clas]) 

In [None]:
len(data['img_path'])

### 3. Predicting

In [None]:
y_pred = []
for i, im_path in enumerate(im_paths):
    #print("Doing " + str(i+1) )
    x_images = np.array([np.float32(Image.open(im_path).resize((IMAGE_SIZE, IMAGE_SIZE))) / 255.0])
    a = feature_model.predict(x_images)
    #print(a[0])
    #print(a[0].shape)
    y_pred.append(a[0])


In [None]:
y_pred = np.array(y_pred)
print('y: {}'.format(y_pred.shape))

### 4. Clustering using KMeans

In [None]:
"""max_clusters = 5

plt.figure(figsize=(10, 5))
plt.style.use('ggplot')

skip = 1
for K in range(2, max_clusters+1):
    KMC = KMeans(n_clusters=K).fit(y_pred)
    labels = KMC.labels_
    print('fitting for {} clusters completed..'.format(K))
    score = silhouette_score(y_pred, labels, metric='euclidean')
    print('silhouette_score for {} clusters: {}'.format(K, score))
    plt.plot(K, score, '^')

plt.xlabel('K')
plt.ylabel('Silhoutte Score')
plt.show()"""

### 5. Saving the Clusters

In [None]:
# Select the value of K based on silhouette_score
K = 2

KMC = KMeans(n_clusters=K, n_jobs=-1, random_state=seed)
KMC.fit(y_pred)
K_pred = KMC.predict(y_pred)

In [None]:
import pickle

# Save to file in the current working directory
pkl_filename = "./Kmeans.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(KMC, file)

In [None]:
try:
    rmtree('Clusters/')
    os.mkdir('Clusters/')
except: pass

for i in range(K):
    os.makedirs('Clusters/' + str(i))
    [os.makedirs('Clusters/{}/{}'.format(i, class_)) for class_ in list(label_dict_.values())]

In [None]:
for i, im_path in enumerate(im_paths):
    class_ = data[data['img_path'] == im_path].class_label.values[0]
    copy(im_path, 'Clusters/{}/{}'.format(K_pred[i], class_))

In [None]:
#!zip -r Clusters_ResNet.zip ./Clusters/

In [None]:
x_images = None
Y = None
y_pred = None

## Training Model for Cluster 0

### 1. Augumentation and Preprocessing

In [None]:
def preprocess(image):
    #Converting to numpy array from numpy tensor with rank 3
    image = np.array(image, dtype=np.uint8)
    #Converting to RGB
    #img = cv2.cvtCoor(img, cv2.COLOR_BGR2RGB)
    #Gaussian Blur
    gaussian_blur = cv2.GaussianBlur(image,(3,3),0)
    img = np.asarray(gaussian_blur, dtype=np.float64)
    return img

In [None]:
dir = './Clusters/0'

In [None]:
del feature_model

In [None]:
BATCH_SIZE = 4

In [None]:
#Training  Augumentation
datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255,
                             rotation_range=30,
                             zoom_range=0.3,
                             horizontal_flip=True,
                             brightness_range=[0.6, 1.2],
                             validation_split=0.2,
                             fill_mode='nearest',
                             preprocessing_function=preprocess)


train_datagen = datagen.flow_from_directory(dir,
                                            subset = "training",
                                            target_size = (MODEL_IMAGE_SIZE, MODEL_IMAGE_SIZE),
                                            batch_size = BATCH_SIZE,
                                            class_mode = "categorical")

#Validation
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255,
                                        validation_split=0.2,
                                       preprocessing_function=preprocess)


valid_datagen = validation_datagen.flow_from_directory(dir,
                                            subset = "validation",
                                            target_size = (MODEL_IMAGE_SIZE, MODEL_IMAGE_SIZE),
                                            batch_size = BATCH_SIZE,
                                            class_mode = "categorical")

### 2. Defining Model (Xception)

In [None]:
!pip install -q efficientnet
import efficientnet.tfkeras as efn

In [None]:
inp = tf.keras.layers.Input(shape = (MODEL_IMAGE_SIZE, MODEL_IMAGE_SIZE, 3))

x = efn.EfficientNetB5(weights = 'noisy-student', include_top = False)(inp)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
output = tf.keras.layers.Dense(5, activation = 'softmax')(x)
        
model_0 = tf.keras.models.Model(inputs = [inp], outputs = [output])

opt = tf.keras.optimizers.Adam(learning_rate = 0.0001)

model_0.compile(
optimizer = opt,
    loss = [tf.keras.losses.CategoricalCrossentropy(label_smoothing = 0.4)],
    metrics = [tf.keras.metrics.CategoricalAccuracy()]
)

#### model_0.summary()

In [None]:
filepath = "model_0.h5"
    
callbacks = [tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.2),
             tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3),
             tf.keras.callbacks.ModelCheckpoint(filepath=filepath, monitor='val_loss', save_best_only=True)]


In [None]:
h = model_0.fit(train_datagen, epochs = EPOCHS, validation_data = valid_datagen, callbacks=callbacks)

In [None]:
plt.style.use("ggplot")
plt.figure()
plt.plot(h.history["categorical_accuracy"], label="train_acc")
plt.plot(h.history["val_categorical_accuracy"], label="val_acc")
plt.title("Accuracy")
plt.xlabel("Epoch ")
plt.ylabel("Accuracy")
plt.legend(loc="upper left")
plt.show()

In [None]:
plt.style.use("ggplot")
plt.figure()
plt.plot(h.history["loss"], label="train_loss")
plt.plot(h.history["val_loss"], label="val_loss")
plt.title("Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(loc="upper left")
plt.show()


### 3. Model - Cluster 1

In [None]:
dir = './Clusters/1'

In [None]:
#Training  Augumentation
datagen_1 = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255,
                             rotation_range=30,
                             zoom_range=0.3,
                             horizontal_flip=True,
                             brightness_range=[0.6, 1.2],
                             validation_split=0.2,
                             fill_mode='nearest',
                             preprocessing_function=preprocess)


train_datagen_1 = datagen_1.flow_from_directory(dir,
                                            subset = "training",
                                            target_size = (MODEL_IMAGE_SIZE, MODEL_IMAGE_SIZE),
                                            batch_size = BATCH_SIZE,
                                            class_mode = "categorical")

#Validation
validation_datagen_1 = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255,
                                        validation_split=0.2,
                                       preprocessing_function=preprocess)


valid_datagen_1 = validation_datagen_1.flow_from_directory(dir,
                                            subset = "validation",
                                            target_size = (MODEL_IMAGE_SIZE, MODEL_IMAGE_SIZE),
                                            batch_size = BATCH_SIZE,
                                            class_mode = "categorical")

In [None]:
inp = tf.keras.layers.Input(shape = (MODEL_IMAGE_SIZE, MODEL_IMAGE_SIZE, 3))

x = efn.EfficientNetB5(weights = 'noisy-student', include_top = False)(inp)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
output = tf.keras.layers.Dense(5, activation = 'softmax')(x)
        
model_1 = tf.keras.models.Model(inputs = [inp], outputs = [output])

opt = tf.keras.optimizers.Adam(learning_rate = 0.0001)

model_1.compile(
optimizer = opt,
    loss = [tf.keras.losses.CategoricalCrossentropy(label_smoothing = 0.4)],
    metrics = [tf.keras.metrics.CategoricalAccuracy()]
)

In [None]:
filepath = "model_1.h5"
    
callbacks = [tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.2),
             tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3),
             tf.keras.callbacks.ModelCheckpoint(filepath=filepath, monitor='val_loss', save_best_only=True)]

In [None]:
h2 = model_1.fit(train_datagen, epochs = EPOCHS, validation_data = valid_datagen, callbacks=callbacks)

In [None]:
plt.style.use("ggplot")
plt.figure()
plt.plot(h2.history["categorical_accuracy"], label="train_acc")
plt.plot(h2.history["val_categorical_accuracy"], label="val_acc")
plt.title("Accuracy")
plt.xlabel("Epoch ")
plt.ylabel("Accuracy")
plt.legend(loc="upper left")
plt.show()

In [None]:
plt.style.use("ggplot")
plt.figure()
plt.plot(h2.history["loss"], label="train_loss")
plt.plot(h2.history["val_loss"], label="val_loss")
plt.title("Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(loc="upper left")
plt.show()

In [None]:
model_0.save('model_0.tf', include_optimizer=True, save_format='tf')

In [None]:
model_1.save('model_1.tf', include_optimizer=True, save_format='tf')

In [None]:
!zip -r model_0.zip 'model_0.tf'

In [None]:
!zip -r model_1.zip 'model_1.tf'