In [1]:
# Note: colab has changed the tensorflow version from 27 March 2020
# %tensorflow_version 1.x

import sys
import os
# from keras.utils import to_categorical
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
# from keras.optimizers import SGD
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import load_model
import glob
import numpy as np
import cv2
import time
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report


img_size = 256

In [62]:
# load train and test dataset


# classes: 'Yellow_Leaf_Curl_Virus', 'Target_Spot', 'Spider_mites', 
#          'Septoria_leaf_spot', 'Mosaic_virus', 'Leaf_Mold', 'Late_blight', 
#          'Bacterial_spot', 'Early_blight', 'Healthy'

def prepare_data(path, img_size):
    X = []
    Y = []
    filenames = [img for img in glob.glob(path)]
    for i in tqdm(filenames):
        img = cv2.imread(i)
        img = cv2.resize(img, dsize=(img_size, img_size))
        X.append(img)
        split = i.split(os.sep)[-2]
        if split == "Tomato___Tomato_Yellow_Leaf_Curl_Virus":
            Y.append(0)
        elif split == "Tomato___Target_Spot":
            Y.append(1)
        elif split == "Tomato___Spider_mites Two-spotted_spider_mite":
            Y.append(2)
        elif split == "Tomato___Septoria_leaf_spot":
            Y.append(3)
        elif split == "Tomato___Tomato_mosaic_virus":
            Y.append(4)
        elif split == "Tomato___Leaf_Mold":
            Y.append(5)
        elif split == "Tomato___Late_blight":
            Y.append(6)
        elif split == "Tomato___Bacterial_spot":
            Y.append(7)
        elif split == "Tomato___Early_blight":
            Y.append(8)
        elif split == "Tomato___healthy":
            Y.append(9)  
    X = np.asarray(X)
    return X, Y

In [63]:
x_train, y_train = prepare_data(r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\dataset\train\*\*', img_size)

100%|█████████████████████████████████████████████████████████| 1000/1000 [00:01<00:00, 515.47it/s]


In [64]:
print(len(y_train))

1000


In [65]:
x_val, y_val = prepare_data(r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\dataset\val\*\*', img_size)

100%|███████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 235.79it/s]


In [66]:
print("x_train in MBs:", x_train.nbytes/(1024*1024))
print("x_val in MBs:", x_val.nbytes/(1024*1024))

x_train in MBs: 187.5
x_val in MBs: 37.5


In [67]:
on_save = 1
on_load = 1

In [68]:
from numpy import save

if on_save == 1:
  save(r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\saved_arrays\tomato\trainX_'+str(img_size)+'.npy', x_train)
  save(r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\saved_arrays\tomato\trainY_'+str(img_size)+'.npy', y_train)
  save(r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\saved_arrays\tomato\valX_'+str(img_size)+'.npy', x_val)
  save(r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\saved_arrays\tomato\valY_'+str(img_size)+'.npy', y_val)


In [69]:
from numpy import load

if on_load == 1:
  train_x = load(r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\saved_arrays\tomato\trainX_'+str(img_size)+'.npy')
  train_y = load(r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\saved_arrays\tomato\trainY_'+str(img_size)+'.npy')
  val_x   = load(r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\saved_arrays\tomato\valX_'+str(img_size)+'.npy')
  val_y   = load(r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\saved_arrays\tomato\valY_'+str(img_size)+'.npy')

In [70]:
# Scale up the pixels

def prep_pixels(trainX, valX):
	train_norm = trainX.astype('float32')
	val_norm = valX.astype('float32')

	train_norm = train_norm / 255.0
	val_norm = val_norm / 255.0

	return train_norm, val_norm

In [71]:
trainX, valX = prep_pixels(train_x, val_x)

# Hot Encoding
trainY = to_categorical(train_y)
valY = to_categorical(val_y)

In [72]:
print(trainX.shape)
print(trainY.shape)

print(valX.shape)
print(valY.shape)

(1000, 256, 256, 3)
(1000, 10)
(200, 256, 256, 3)
(200, 10)


In [73]:
print("Memory size of a NumPy array in GBs:", trainX.nbytes/(1024*1024*1024))

Memory size of a NumPy array in GBs: 0.732421875


In [74]:
#@title Default title text
# define cnn model
def define_model():
	kernel = (3, 3)
	first_conv  = 32
	second_conv = first_conv * 2
	third_conv  = second_conv * 2

	model = Sequential()
	model.add(Conv2D(first_conv, kernel, activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(img_size, img_size, 3)))
	model.add(BatchNormalization())
	
	model.add(Conv2D(first_conv, kernel, activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(BatchNormalization())
	model.add(MaxPooling2D((2, 2)))
	
	model.add(Dropout(0.2))
	model.add(Conv2D(second_conv, kernel, activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(BatchNormalization())
	
	model.add(Conv2D(second_conv, kernel, activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(BatchNormalization())
	model.add(MaxPooling2D((2, 2)))
	
	model.add(Dropout(0.3))
	model.add(Conv2D(third_conv, kernel, activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(BatchNormalization())
	
	model.add(Conv2D(third_conv, kernel, activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(BatchNormalization())
	model.add(MaxPooling2D((2, 2)))
	
	model.add(Dropout(0.4))
	model.add(Flatten())
 
	model.add(Dense(third_conv, activation='relu', kernel_initializer='he_uniform'))
	model.add(BatchNormalization())
 
	model.add(Dropout(0.5))
	model.add(Dense(10, activation='softmax'))
 
	# compile model
	opt = SGD(lr=0.001, momentum=0.9)
	model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
	model.summary()
	return model

# define model
model = define_model()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 256, 256, 32)      896       
_________________________________________________________________
batch_normalization (BatchNo (None, 256, 256, 32)      128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 256, 256, 32)      9248      
_________________________________________________________________
batch_normalization_1 (Batch (None, 256, 256, 32)      128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 128, 128, 32)      0         
_________________________________________________________________
dropout (Dropout)            (None, 128, 128, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 128, 128, 64)      1

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [10]:
# datagen = ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)
# it_train = datagen.flow(trainX, trainY, batch_size = 5)

In [75]:
pathModelSave = r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\SavedModels\tomato_'+str(img_size)+'.hdf5'
pathToSaveCSV = r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\SavedModels\csv\tomato_'+str(img_size)+'.csv'

In [None]:
from datetime import datetime
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

checkpoint = ModelCheckpoint(pathModelSave, monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
# early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=20, verbose=1, mode='auto')
csv_logger = CSVLogger(pathToSaveCSV, append=False, separator=',')

start = datetime.now()

steps = int(trainX.shape[0] / 32)
history = model.fit(
          trainX, trainY,
          batch_size= 16, 
          epochs= 30,
          callbacks = [checkpoint, csv_logger],
          validation_data = (valX, valY),
          use_multiprocessing = True,
          verbose=1)

duration = datetime.now() - start

print("Total training time is:", duration)

In [None]:
# evaluate model
_, acc = model.evaluate(valX, valY, verbose=0)
print('without model loading, validation acc is> %.3f' % (acc * 100.0))

In [None]:
import tensorflow as tf

gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
session = tf.compat.v1.InteractiveSession(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))

print("gpu_options:", gpu_options)
print("session:", session)

new_model = load_model(pathModelSave)
_, acc_train = new_model.evaluate(trainX, trainY, verbose=1)
print('training acc is> %.3f' % (acc_train * 100.0))

In [None]:
_val, acc_val = new_model.evaluate(valX, valY, verbose=0)
print('validation acc is> %.3f' % (acc_val * 100.0))

In [None]:
# it is used in below intermediate visualization

path_healthy = r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\dataset\train\Tomato___Late_blight\973b8b82-cccd-433c-a868-5dee559b2dce___GHLB Leaf 2.6 Day 12.JPG'
img_vis = cv2.imread(path_healthy)
img_vis = cv2.resize(img_vis, dsize=(img_size, img_size))
img_vis = cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB)
# plt.imshow(cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB))
plt.imshow(img_vis)
print(img_vis.shape)

In [84]:
model = load_model(pathModelSave)

In [None]:
# It is for intermediate conv2d layers visualization
from tensorflow.keras.models import Model

layer_outputs = [layer.output for layer in model.layers]
activation_model = Model(inputs = model.input, outputs=layer_outputs)
activations = activation_model.predict(img_vis.reshape(1,img_size,img_size,3))

def display_activation(activations, col_size, row_size, act_index): 
    activation = activations[act_index]
    activation_index = 0
    fig, ax = plt.subplots(row_size, col_size, figsize=(row_size*4.5, col_size*2.5)) # sharex=True, sharey=True
    for row in range(0,row_size):
        for col in range(0,col_size):
            ax[row][col].imshow(activation[0][:, :, activation_index], cmap='PuOr')
            ax[row,col].axis('off')
            activation_index += 1

display_activation(activations, 5, 5, 17)

In [86]:

import itertools

def convert_string_label(intLabel):
  Y = []
  length = len(intLabel)
    
  for i in range(length):
    if intLabel[i] == 0:
      Y.append("Yellow_Leaf_Curl_Virus")
    elif intLabel[i] == 1:
      Y.append("Target_Spot")
    elif intLabel[i] == 2:
      Y.append("Spider_mites")
    elif intLabel[i] == 3:
      Y.append("Septoria_leaf_spot")
    elif intLabel[i] == 4:
      Y.append("Mosaic_virus")
    elif intLabel[i] == 5:
      Y.append("Leaf_Mold")
    elif intLabel[i] == 6:
      Y.append("Late_blight")
    elif intLabel[i] == 7:
      Y.append("Bacterial_spot")
    elif intLabel[i] == 8:
      Y.append("Early_blight")
    elif intLabel[i] == 9:
      Y.append("Healthy")    
  return Y
  
    
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
        
    figure(num=None, figsize=(8, 6), dpi=120, facecolor='w', edgecolor='red')
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")
    
    plt.ylabel('True label')
    plt.xlabel('Predicted label')    

In [None]:
model = load_model(pathModelSave)

y_pred = model.predict(valX)
y_pred = np.rint(y_pred.argmax(axis=1))
y_pred = y_pred.tolist()

xyz = [np.argmax(y, axis=None, out=None) for y in val_y]
valY = np.rint(xyz)
valY = valY.tolist()

y_true      = convert_string_label(valY)
y_predicted = convert_string_label(y_pred)

# print("y_true:", y_true, "\n\n")
# print("y_predicted:", y_predicted)

cnf_matrix = confusion_matrix(y_true, y_predicted, labels=['Yellow_Leaf_Curl_Virus', 'Target_Spot', 'Spider_mites', 'Septoria_leaf_spot', 'Mosaic_virus', 'Leaf_Mold', 'Late_blight', 'Bacterial_spot', 'Early_blight', 'Healthy'])
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=['Yellow_Leaf_Curl_Virus', 'Target_Spot', 'Spider_mites', 'Septoria_leaf_spot', 'Mosaic_virus', 'Leaf_Mold', 'Late_blight', 'Bacterial_spot', 'Early_blight', 'Healthy'], title='Confusion matrix')

In [88]:
# plot diagnostic learning curves
def summarize_diagnostics(history):
	
  # plot loss
  plt.subplot(211)
  plt.title('Cross Entropy Loss')
  plt.plot(history.history['loss'], color='blue', label='train')
  plt.plot(history.history['val_loss'], color='orange', label='test')
  plt.show()

  # plot accuracy
  plt.subplot(212)
  plt.title('Classification Accuracy')
  plt.plot(history.history['accuracy'], color='blue', label='train')
  plt.plot(history.history['val_accuracy'], color='orange', label='test')
  plt.show()
  
  # save plot to file
  # filename = sys.argv[0].split('/')[-1]
  # pyplot.savefig(filename + '_plot.png')
  # pyplot.close()

  plt.plot(history.history["accuracy"])
  plt.plot(history.history['val_accuracy'])
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title("model accuracy")

  plt.ylabel("Accuracy")
  plt.xlabel("Epoch")
  plt.legend(["Training Accuracy","Validation Accuracy","Training loss","Validation Loss"])
  plt.show()

  # Plot training & validation accuracy values
  plt.plot(history.history['accuracy'])
  plt.plot(history.history['val_accuracy'])
  plt.title('Model accuracy')
  plt.ylabel('Accuracy')
  plt.xlabel('Epoch')
  plt.legend(['Training Accuracy', 'Validation Accuracy'], loc='upper left')
  plt.show()

  # Plot training & validation loss values
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('Model loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['Training Loss', 'Validation Loss'], loc='upper left')
  plt.show()


In [None]:
# learning curves
summarize_diagnostics(history)

In [90]:
# this is the new prediction function <optimized> to calcultae prediction time per image
model = load_model(pathModelSave)

def new_prediction_fun(path):
  filenames = [img for img in glob.glob(path + '/*')]
  count = 0
  total_time = 0
  YLCV, TS, SM, SLS, MV, LM, LB, BS, EB, H = 0,0,0,0,0,0,0,0,0,0
  for i in filenames:
      count += 1
      tic = time.clock()
      img = cv2.imread(i)
      img = cv2.resize(img, dsize=(img_size, img_size))
      img = img.astype('float32')
      img = img / 255.0
      img = img[np.newaxis, :]
      p = model.predict(img)[0]
      max_val_index = p.argmax()
      toc = time.clock()
      time_t = toc-tic
      
      if (max_val_index == 0):
        YLCV += 1
      elif (max_val_index == 1):
        TS += 1
      elif (max_val_index == 2):
        SM += 1
      elif (max_val_index == 3):
        SLS += 1
      elif (max_val_index == 4):
        MV += 1
      elif (max_val_index == 5):
        LM += 1
      elif (max_val_index == 6):
        LB += 1
      elif (max_val_index == 7):
        BS += 1
      elif (max_val_index == 8):
        EB += 1
      elif (max_val_index == 9):
        H += 1  
      
    
      total_time += time_t
  
  print("\n")    
  print("Total prediction time for "+str(count)+" images is: ", total_time, "sec")
  print("Average prediction time is:", total_time/count, "sec")
  
  print("\n")
  print("YLCV predictions out of total "+str(count)+" "+str(os.path.split(path)[1])+" images are: ", YLCV)
  print("TS predictions out of total "+str(count)+" "+str(os.path.split(path)[1])+" images are: ", TS)
  print("SM predictions out of total "+str(count)+" "+str(os.path.split(path)[1])+" images are: ", SM)
  print("SLS predictions out of total "+str(count)+" "+str(os.path.split(path)[1])+" images are: ", SLS)
  print("MV predictions out of total "+str(count)+" "+str(os.path.split(path)[1])+" images are: ", MV)
  print("LM predictions out of total "+str(count)+" "+str(os.path.split(path)[1])+" images are: ", LM)
  print("LB predictions out of total "+str(count)+" "+str(os.path.split(path)[1])+" images are: ", LB)
  print("BS predictions out of total "+str(count)+" "+str(os.path.split(path)[1])+" images are: ", BS)
  print("EB predictions out of total "+str(count)+" "+str(os.path.split(path)[1])+" images are: ", EB)
  print("H predictions out of total "+str(count)+" "+str(os.path.split(path)[1])+" images are: ", H)

In [None]:
# augumented testing, more images
new_prediction_fun(r'D:\Work\Projects\Meity\tomato-disease-prediction\model01\dataset\train\Tomato___Late_blight')

In [None]:
print(type(history))
...
# list all data in history
print(history.history.keys())

In [None]:
print(history.history['loss'])

In [None]:
print(history.history['val_loss'])

In [None]:
print(history.history['accuracy'])

In [None]:
print(history.history['val_accuracy'])