#### Necessary Imports 

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load


import os
import matplotlib.pyplot as plt

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
from skimage import io


from sklearn.preprocessing import MultiLabelBinarizer 

#### Load Training Data

In [None]:
train_read = pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv", sep=',')
print ('dataframe shape: ', train_read.shape)
train_read.head(3)

#### Visualize Images with Different Labels 

1. Healthy

In [None]:
train_im_path = "../input/plant-pathology-2021-fgvc8/train_images"

fig = plt.figure(figsize=(15, 10))
npics= 6

count = 1
image_list = train_read[train_read['labels'] == 'healthy']['image'].sample(frac=1)[:npics].to_list()  
for i, img in enumerate(image_list):
    
    sample = os.path.join(train_im_path, img) 
    sample_img = io.imread(sample)   
    ax = fig.add_subplot(npics/2 , 3, count, xticks=[],yticks=[])   
    plt.imshow(sample_img)
    count +=1
fig.suptitle('All Healthy')
plt.tight_layout()
plt.show()

2. Scab

In [None]:
fig = plt.figure(figsize=(15, 10))
npics= 6

count = 1
image_list = train_read[train_read['labels'] == 'scab']['image'].sample(frac=1)[:npics].to_list()  
for i, img in enumerate(image_list):
    
    sample = os.path.join(train_im_path, img) 
    sample_img = io.imread(sample)   
    ax = fig.add_subplot(npics/2 , 3, count, xticks=[],yticks=[])   
    plt.imshow(sample_img)
    count +=1
fig.suptitle('All Scabs')
plt.tight_layout()
plt.show()

3. Rust

In [None]:
fig = plt.figure(figsize=(15, 10))
npics= 6

count = 1
image_list = train_read[train_read['labels'] == 'rust']['image'].sample(frac=1)[:npics].to_list()  
for i, img in enumerate(image_list):
    
    sample = os.path.join(train_im_path, img) 
    sample_img = io.imread(sample)   
    ax = fig.add_subplot(npics/2 , 3, count, xticks=[],yticks=[])   
    plt.imshow(sample_img)
    count +=1
fig.suptitle('All Rusty')
plt.tight_layout()
plt.show()

4. Complex

In [None]:
fig = plt.figure(figsize=(15, 10))
npics= 6

count = 1
image_list = train_read[train_read['labels'] == 'complex']['image'].sample(frac=1)[:npics].to_list()  
for i, img in enumerate(image_list):
    
    sample = os.path.join(train_im_path, img) 
    sample_img = io.imread(sample)   
    ax = fig.add_subplot(npics/2 , 3, count, xticks=[],yticks=[])   
    plt.imshow(sample_img)
    count +=1
fig.suptitle('All Complex')
plt.tight_layout()
plt.show()

5. Frog Eye Leaf Spot

In [None]:
fig = plt.figure(figsize=(15, 10))
npics= 6

count = 1
image_list = train_read[train_read['labels'] == 'frog_eye_leaf_spot']['image'].sample(frac=1)[:npics].to_list()  
for i, img in enumerate(image_list):
    
    sample = os.path.join(train_im_path, img) 
    sample_img = io.imread(sample)   
    ax = fig.add_subplot(npics/2 , 3, count, xticks=[],yticks=[])   
    plt.imshow(sample_img)
    count +=1
fig.suptitle('All Frog Eye Leaf Spot')
plt.tight_layout()
plt.show()

6. Powdery Mildew

In [None]:
fig = plt.figure(figsize=(15, 10))
npics= 6

count = 1
image_list = train_read[train_read['labels'] == 'powdery_mildew']['image'].sample(frac=1)[:npics].to_list()  
for i, img in enumerate(image_list):
    
    sample = os.path.join(train_im_path, img) 
    sample_img = io.imread(sample)   
    ax = fig.add_subplot(npics/2 , 3, count, xticks=[],yticks=[])   
    plt.imshow(sample_img)
    count +=1
fig.suptitle('All Powdery Mildew')
plt.tight_layout()
plt.show()

##### Clean the Labels 

Split the Original Labels based on space (' ') to properly take into account of available multi-labels for a sinlge image.   

In [None]:
train_read['labels'] = train_read['labels'].apply(lambda s: s.split(' '))
train_read.head(3)

##### How many training images that have multi-labels ? 

In [None]:
### check how many images are actually multi-labelled 
multi_label_count = 0
for i in range(train_read.shape[0]):
    if len(train_read['labels'][i])>1:
        multi_label_count +=1
print ('number of images having multi-labels: ', multi_label_count)

##### Multi-Label Classification 

We have seen that 18632 images only 1355 images have more than a label. This marks the question whether it is indeed necessary to treat this problem as multi-label problem; To quote specific objectives from this year competition, we refer to the detailed objectives given in last year's competition.   

>Specific Objectives: 
Objectives of ‘Plant Pathology Challenge’ are to train a model using images of training dataset to 1) Accurately classify a given image from testing dataset into different diseased category or a healthy leaf; 2) Accurately distinguish between many diseases, sometimes more than one on a single leaf; 3) Deal with rare classes and novel symptoms; 4) Address depth perception—angle, light, shade, physiological age of the leaf; and 5) Incorporate expert knowledge in identification, annotation, quantification, and guiding computer vision to search for relevant features during learning. 

The first 2 objectives clearly state that the problem at hand should be considered as a multi-label problem. 
>Distinguish many diseases, **sometimes more than one on a single leaf**.

From here onwards, we decide to go via multi-label classification route. 

In [None]:
dfcheck = pd.DataFrame({'a':[y for x in train_read['labels'] for y in x]})
dfcheck.head(3)

In [None]:
f = plt.figure(figsize=(12, 8))

sns.set(font_scale=1.1)
year_count = sns.countplot(x='a', data=dfcheck, order = dfcheck['a'].value_counts().index)
year_count.set_xticklabels(year_count.get_xticklabels(), rotation=80)
plt.ylabel('Count', fontsize=12)
plt.xlabel('Labels', fontsize=12)
plt.show()

#### 2 Main Characteristics of Multi-Label Classification 

Multi-Label : Labls are mutually exclusive. 

1. Final Activation Function irrespective of neural-net structure should be `softmax`.  
2. Loss Function needs to be Binary Cross Entropy, and not categorical cross entropy.  It is independent for each vector component (class/label), meaning that the loss computed for every CNN output vector component is not affected by other component values. 

#### Binarize the Labels 

1. We will use Scikit-Learn [`MultiLabelBinarizer`](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MultiLabelBinarizer.html) to create the tabulated matrix 
2. Turn this matrix into a dataframe. 
3. Concatenate with the original dataframe, so our final training data is of the form images col and binarized labels cols

In [None]:
### it is important now to binarize the labels 

all_labels = list(train_read['labels'])
mlb = MultiLabelBinarizer()

train_labels_binary = pd.DataFrame(mlb.fit_transform(all_labels), columns=mlb.classes_, index=train_read.index)
train_labels_binary.head(3)

In [None]:
class_labels = list(mlb.classes_)
print (class_labels)

In [None]:
final_train_read = pd.concat([train_read['image'], train_labels_binary], axis=1)
final_train_read.head(3)

In [None]:
# # calculate class weight 

# positive_weights = {}
# negative_weights = {}
# for c in mlb.classes_:
#     positive_weights[c] = final_train_read.shape[0]/(2*np.count_nonzero(final_train_read[c]==1))
#     negative_weights[c] = final_train_read.shape[0]/(2*np.count_nonzero(final_train_read[c]==0))

# print(positive_weights)
# print ('\n')
# print(negative_weights)    

In [None]:
train_im_path = "../input/plant-pathology-2021-fgvc8/train_images" 

# # let's load some images 
# fig = plt.figure(figsize=(15, 10))
# npics= 16
# count = 1
# for i in range(npics):
# #   ipic = i # use this to see original and augmented image side by side
#   ipic = np.random.choice(train_read.shape[0])
#   sample = os.path.join(train_im_path, train_read['image'][ipic])
#   sample_img = io.imread(sample)   
#   ax = fig.add_subplot(npics/4 , 4, count, xticks=[],yticks=[])
#   title_string = ''
#   if len(train_read['labels'][ipic]) > 1:  
#       for x in train_read['labels'][ipic]:            
#         title_string = title_string + x +',' +f"{sample_img.shape}"
#   else: 
#     title_string += train_read['labels'][ipic][0] + f"{sample_img.shape}"
#   ax.set_title(title_string, fontsize=10)  
#   plt.imshow(sample_img)
#   count = count + 1  

# plt.tight_layout()
# plt.show()   

#### Problem with Original Image Size: Adding New Data-Set

Original images are huge, wouldn't it be great to have infinite time and resource to directly feed these high resolution images to our network? let's discuss some other time, but for now we need to now deal with smaller versions of them. 

I found resized imaged for this dataset [resized-plant2021](https://www.kaggle.com/ankursingh12/resized-plant2021) by Ankur Singh. He has already downsampled the images into size of 256, 384, 512 & 640px. Let's get started with the 384 ones.  

In [None]:
resized_train_im_path = "../input/resized-plant2021/img_sz_384"

fig = plt.figure(figsize=(15, 10))
npics= 16
count = 1
for i in range(npics):
#   ipic = i # use this to see original and augmented image side by side
  ipic = np.random.choice(train_read.shape[0])
  sample = os.path.join(resized_train_im_path, train_read['image'][ipic])
  sample_img = io.imread(sample)   
  ax = fig.add_subplot(npics/4 , 4, count, xticks=[],yticks=[])
  title_string = ''
  if len(train_read['labels'][ipic]) > 1:  
      for x in train_read['labels'][ipic]:            
        title_string = title_string + x +',' +f"{sample_img.shape}"
  else: 
    title_string += train_read['labels'][ipic][0] + f"{sample_img.shape}"
  ax.set_title(title_string, fontsize=10)  
  plt.imshow(sample_img)
  count = count + 1  

plt.tight_layout()
plt.show()   


Great ! so this will be our input image data-set. 


#### Augmentation and `ImageDataGenerator`
Next step is to include augmentation, for augmentation we can directly use [ImageDataGenerator](https://keras.io/api/preprocessing/image/) as this is not so complicated like facial keypoints data. I have also added blurring of images as a preprocessing function , because for one of the particular labels 'powdery-mildew' looked a bit blurry. 

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import cv2
def blur(img):
    return (cv2.blur(img,(5,5)))

train_DataGen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255.0, 
                                                                zoom_range=0.2, 
                                                                width_shift_range=0.1, 
                                                                height_shift_range = 0.1, 
                                                                rotation_range=15,
                                                                fill_mode="nearest",
                                                                preprocessing_function=blur,
                                                                validation_split=0.20)
 


    
    
    
train_data_flow = train_DataGen.flow_from_dataframe(final_train_read,
    directory = resized_train_im_path,
    x_col = 'image',
    y_col = class_labels,
    subset="training",
    color_mode="rgb",
    batch_size=32,
    class_mode="raw",                                                
    shuffle=True,
    seed=40)

valid_data_flow = train_DataGen.flow_from_dataframe(final_train_read,
    directory = resized_train_im_path,
    x_col = 'image',
    y_col = class_labels,
    subset="validation",
    color_mode="rgb",
    batch_size=32,
    class_mode="raw",                                                
    shuffle=True,
    seed=40)

#### Check Some Augmented Images

In [None]:
# Visualize Augmented Images 
# x,y = train_data_flow.next()
# for i in range(0,1):
#     image = x[i]
#     plt.imshow(image)
#     plt.show()
fig = plt.figure(figsize=(15, 10))
npics= 16
count = 1
for i in range(npics):
  x,y = train_data_flow.next()
  
  image = x[0]
#   print (image.shape)
  label = y[0]  
  ax = fig.add_subplot(npics/4 , 4, count, xticks=[],yticks=[])
  ax.set_title(label, fontsize=10)  
  plt.imshow(image)
  count = count + 1  

plt.tight_layout()
plt.show()

### Deep Neural Net: 

First I started by building ['Inception Like'](https://www.kaggle.com/suvoooo/facial-key-points-runthrough) from scratch and training it. Even with 50 epochs (due to large training data this takes a lot of time) it wasn't possible to reach F1 score of 77%. 

I checked some other notebooks and realized almost every one of them used pre-trained networks and corresponding weights and biases. Based on these, I finally concluded on InceptionV3 network pre-trained with Imagenet. Added some dense layers at the bottom of the original structure and as discussed before I've used sigmoid activation in the final layer.   

Total params: 21,935,062

Trainable params: 21,900,630

Non-trainable params: 34,432

In [None]:
 
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dropout, \
     Flatten, BatchNormalization, Dense, Concatenate, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.activations import elu, relu
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
# from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

from tensorflow.keras.applications import InceptionV3

In [None]:
#### build an inception like model

# def inception_like(input_layer, filter1, filter2, filter3):
#   # 1x1 conv
#   conv1 = Conv2D(filter1, (1,1), padding='same', activation='relu')(input_layer)
#   bn1 = BatchNormalization()(conv1)
#   # 3x3 conv
#   conv3 = Conv2D(filter2, (3,3), padding='same', activation='relu')(input_layer)
#   bn3 = BatchNormalization()(conv3)
#   # 5x5 conv
#   conv5 = Conv2D(filter3, (5,5), padding='same', activation='relu')(input_layer)
#   bn5 = BatchNormalization()(conv5)
#   # 3x3 max pooling
# #   pool = MaxPooling2D((3,3), strides=(1,1), padding='same')(input_layer)
#   pool = MaxPooling2D((2,2), strides=(1,1), padding='same')(input_layer)
#   # concatenate filters, assumes filters/channels last
#   layer_out = Concatenate(axis=-1)([ bn3, bn5, pool])
#   return layer_out

In [None]:
# input_im = Input(shape=(256, 384, 3))
# def model2():
#   x = Conv2D(64, (3, 3), padding='same', strides=(2, 2), activation='relu', )(input_im)
# #   x = MaxPooling2D((2, 2))(x)
# #   x = Conv2D(64, (1, 1), padding='same', strides=(1, 1), activation='relu', )(x)
# # #   x =  Conv2D(64, (3, 3), padding='same', strides=(1, 1), activation='relu', )(x) 
#   x = Conv2D(96, (3, 3), padding='same', strides=(1, 1), activation='relu', )(x)
#   x = MaxPooling2D((2, 2))(x)
# #   x = Conv2D(16, (3, 3), padding='same', activation='relu', )(input_im)
# #   x = Conv2D(32, (3, 3), padding='same', activation='relu', )(input_im)  
# #   x = Conv2D(64, (3, 3), padding='same', activation='relu', )(x)  
#   x1 = inception_like(x, 64, 64, 32)
#   x1 = MaxPooling2D((3, 3), padding='same', strides=(2, 2) )(x1)

#   x2 = inception_like(x1, 64, 64, 32)
#   x2 = MaxPooling2D((3, 3), padding='same', strides=(2, 2) )(x2)
  
#   x2_1 = inception_like(x2, 96, 96, 64)
#   x2_1 = MaxPooling2D((3, 3), padding='same', strides=(2, 2) )(x2_1)  

#   x3 = inception_like(x2_1, 96, 128, 64)
#   #x3 = MaxPooling2D((3, 3), padding='same', strides=(2, 2) )(x3)
#   x3 = MaxPooling2D()(x3)

#   x3_1 = inception_like(x3, 128, 192, 128)
#   x3_1 = MaxPooling2D((3, 3), padding='same', strides=(2, 2) )(x3_1) 
#   x4   = inception_like(x3_1, 128, 256, 128)  
#   x4 = GlobalAveragePooling2D()(x4)

#   x4 = Flatten()(x4)
#   x4 = Dense(256, kernel_regularizer=l2(l2=0.03))(x4)
#   x4 = Dropout(0.2)(x4)

#   #x5 = Dense(128, kernel_regularizer=l2(l2=0.02))(x4)
#   #x5 = Dropout(0.1)(x5)

#   pred = Dense(len(mlb.classes_), activation='sigmoid')(x4)
#   model = Model(inputs=input_im, outputs=pred, name='Inception_Like')

#   return model

In [None]:
pretrain_model = InceptionV3(input_shape=(256, 256, 3), include_top=False, 
                             weights="imagenet")

x = pretrain_model.output
x = GlobalAveragePooling2D()(x)
#fully connected layer
x = Dense(64, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(16, activation='relu')(x)
preds = Dense(len(mlb.classes_), activation='sigmoid')(x)

final_Inception_model = Model(inputs=pretrain_model.input, 
                              outputs=preds)

In [None]:
# plant_path_model2 = model2()
# plant_path_model2.summary()

In [None]:
final_Inception_model.summary()

In [None]:
# tf.keras.utils.plot_model(final_Inception_model, show_shapes=True)

In [None]:
class customCallbacks(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs=None):
    self.epoch = epoch + 1
    if self.epoch % 2 == 0:
      print (
          'epoch num {}, train loss: {}, validation loss: {}'.format(epoch, logs['loss'], logs['val_loss']))

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_f1_score', factor=0.8,
                              patience=5, min_lr=1e-5, verbose=1)

f1 = tfa.metrics.F1Score(num_classes=len(class_labels), average='macro')
adam = Adam(learning_rate=4e-4)

In [None]:
# positive_weights = {}
# negative_weights = {}
# for c in class_labels:
#     positive_weights[c] = final_train_read.shape[0]/(2*np.count_nonzero(final_train_read[c]==1))
#     negative_weights[c] = final_train_read.shape[0]/(2*np.count_nonzero(final_train_read[c]==0))
# print(positive_weights)
# print(negative_weights)

In [None]:
# def loss_fn(y_true,y_pred):
#     loss = 0
#     loss -= (positive_weights['complex'])*y_true[0]*K.log(y_pred[0]) + negative_weights['complex']*(1-y_true[0])*K.log(1-y_pred[0]))
#     loss -= (positive_weights['frog_eye_leaf_spot']*y_true[1]*K.log(y_pred[1]) + negative_weights['frog_eye_leaf_spot']*(1-y_true[1])*K.log(1-y_pred[1]))
#     loss -= (positive_weights['healthy']*y_true[2]*K.log(y_pred[2]) + negative_weights['healthy']*(1-y_true[2])*K.log(1-y_pred[2]))
#     loss -= (positive_weights['powdery_mildew']*y_true[3]*K.log(y_pred[3]) + negative_weights['powdery_mildew']*(1-y_true[3])*K.log(1-y_pred[3]))
#     loss -= (positive_weights['rust']*y_true[4]*K.log(y_pred[4]) + negative_weights['rust']*(1-y_true[4])*K.log(1-y_pred[4]))
#     loss -= (positive_weights['scab']*y_true[5]*K.log(y_pred[5]) + negative_weights['scab']*(1-y_true[5])*K.log(1-y_pred[5]))    

#     return loss

In [None]:
def multi_category_focal_loss2(gamma=2., alpha=.25):
    """
    focal loss for multi category of multi label problem
    Focal loss for multi-class or multi-label problems
         Alpha controls the weight when the true value y_true is 1/0
                 The weight of 1 is alpha, and the weight of 0 is 1-alpha.
         When your model is under-fitting and you have difficulty learning, you can try to apply this function as a loss.
         When the model is too aggressive (whenever it tends to predict 1), try to reduce the alpha
         When the model is too inert (whenever it always tends to predict 0, or a fixed constant, it means that no valid features are learned)
                 Try to increase the alpha and encourage the model to predict 1.
    Usage:
     model.compile(loss=[multi_category_focal_loss2(alpha=0.25, gamma=2)], metrics=["accuracy"], optimizer=adam)
    """
    epsilon = 1.e-7
    gamma = float(gamma)
    alpha = tf.constant(alpha, dtype=tf.float32)

    def multi_category_focal_loss2_fixed(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
    
        alpha_t = y_true*alpha + (tf.ones_like(y_true)-y_true)*(1-alpha)
        y_t = tf.multiply(y_true, y_pred) + tf.multiply(1-y_true, 1-y_pred)
        ce = -tf.math.log(y_t)
        weight = tf.pow(tf.subtract(1., y_t), gamma)
        fl = tf.multiply(tf.multiply(weight, ce), alpha_t)
        loss = tf.reduce_mean(fl)
        return loss
    return multi_category_focal_loss2_fixed
# plant_path_model2.compile(optimizer = adam, 
#               loss = "binary_crossentropy",
#               metrics = [tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.AUC()])
# plant_path_model2.compile(optimizer = adam, 
#               loss=[multi_category_focal_loss2(alpha=0.25, gamma=2)],
#               metrics = [tf.keras.metrics.AUC(), f1])

# plant_path_model2.compile(optimizer = adam, 
#               loss= "binary_crossentropy",
#               metrics = [tf.keras.metrics.AUC(), f1])

final_Inception_model.compile(optimizer = adam, 
              loss= "binary_crossentropy",
              metrics = [tf.keras.metrics.AUC(), f1])

In [None]:
# history = plant_path_model2.fit(train_data_flow, 
#                     epochs = 5, 
#                     verbose = 1, 
#                     validation_data = valid_data_flow,            
#                     shuffle = True, callbacks=[reduce_lr])

history = final_Inception_model.fit(train_data_flow, 
                    epochs = 30, 
                    verbose = 1, 
                    validation_data = valid_data_flow,            
                    shuffle = True, callbacks=[reduce_lr]) # can also add the customcallback 

#### Plot Training Curves 

Plot all the metrics vs epochs for training and validation. 

In [None]:
fig = plt.figure(figsize=(12, 5))
fig.add_subplot(131)
plt.plot(history.history['f1_score'], label='Train-f1-Score')
plt.plot(history.history['val_f1_score'], label='Valid-f1-Score')
# plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(fontsize=12)
# "Loss"
fig.add_subplot(132)
plt.plot(history.history['loss'], label='Train-Loss')
plt.plot(history.history['val_loss'], label='Valid-Loss')
plt.yscale('log')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(fontsize=12)
# "AUC"
fig.add_subplot(133)
plt.plot(history.history['auc'], label='Train-AUC')
plt.plot(history.history['val_auc'], label='Valid-AUC')
plt.ylabel('AUC')
plt.xlabel('Epoch')
plt.legend(fontsize=12)
plt.tight_layout()
plt.show()

In [None]:
# check the final scores on the validation data

# loss, auc, f1 = plant_path_model2.evaluate(valid_data_flow,verbose=1)

loss, auc, f1 = final_Inception_model.evaluate(valid_data_flow,verbose=1)
print ('check final loss, auc and f1 score: ', loss, auc, f1)

### Prepare for Submission  

In [None]:
test_csv ="../input/plant-pathology-2021-fgvc8/sample_submission.csv"
test_df = pd.read_csv(test_csv)
test_df.head(3)

For Test Data we only do scaling and no other augmentation (of course!) 

In [None]:
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255)
test_im_path = '../input/plant-pathology-2021-fgvc8/test_images/'

test_data_flow = test_datagen.flow_from_dataframe(test_df,
    directory = test_im_path,
    x_col = 'image',
    y_col = None,
    subset="training",
    color_mode="rgb",
    batch_size=32,
    target_size = (256,256), 
    class_mode=None,                                                
    shuffle=True,
    seed=40)

#### Get the Predicitions on the Test Data 

In [None]:
# predictions = plant_path_model2.predict(test_data_flow)
predictions = final_Inception_model.predict(test_data_flow)

print ('num predictions: ', predictions.shape)

In [None]:
print ('check predictions: ', predictions)

#### Predictions to Labels 

1. Set the threshold (like a hyperparameter) and get back indices for which the predictions are over this threshold for an image. 
2. From the indices get back the original labels. 
3. If more than 2 labels--- join them as a string with a space in between 

In [None]:
set_threshold = 0.30
predictions_list = predictions.tolist()

#['complex', 'frog_eye_leaf_spot', 'healthy', 'powdery_mildew', 'rust', 'scab']


indices = []
for pred in predictions_list:
    temp = []
    for category in pred:
        if category>=set_threshold:
            temp.append(pred.index(category))
    if temp!=[]:
        indices.append(temp)
    else:
        temp.append(np.argmax(pred))
        indices.append(temp)
    
print(indices)


class_labels_dict = dict(list(enumerate(class_labels)))
print (class_labels_dict)

testlabels = []


for im in indices:
    temp1 = []
    for i in im:
        temp1.append(str(class_labels_dict[i]))
    testlabels.append(' '.join(temp1))

print(testlabels)

#### View the Test Images and Corresponding Predictions

In [None]:

fig = plt.figure(figsize=(9, 6))
npics= 3
count = 1
for i in range(npics):
  ipic = i
#   ipic = np.random.choice(test_df.shape[0])
  sample = os.path.join(test_im_path, test_df['image'][ipic])
  sample_img = io.imread(sample)   
  ax = fig.add_subplot(npics/1 , 3, count, xticks=[],yticks=[])
#   title_string = ''
#   if len(train_read['labels'][ipic]) > 1:  
#       for x in train_read['labels'][ipic]:            
#         title_string = title_string + x +',' +f"{sample_img.shape}"
#   else: 
#     title_string += train_read['labels'][ipic][0] + f"{sample_img.shape}"
  ax.set_title(testlabels[ipic], fontsize=10)  
  plt.imshow(sample_img)
  count = count + 1  

plt.tight_layout()
plt.show()   

In [None]:
### submission

test_df['labels'] = testlabels
test_df.head()

In [None]:
test_df.to_csv('submission.csv', index=False)
test_df