# Plant disease detection

### Configuration

In [0]:
# auth
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

# mount google drive
!mkdir -p drive
!google-drive-ocamlfuse drive

### Dependencies


In [0]:
# install dependencies
!pip install keras==2.1.6

In [0]:
# import dependencies
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
from keras.utils import Sequence
from keras.utils import np_utils
from keras.applications.resnet50 import ResNet50

import numpy as np

import os

from PIL import Image

### Classes

List of classes:
*   Apple: Apple scab
*   Apple: Black rot
*   Apple: Cedar apple rust
*   Apple: healthy
*   Blueberry: healthy
*   Cherry (including sour): healthy
*   Cherry (including sour): Powdery mildew
*   Corn (maize): Cercospora leaf spot, Gray leaf spot
*   Corn (maize): Common rust
*   Corn (maize): healthy
*   Corn (maize): Northern Leaf Blight
*   Grape: Black rot
*   Grape: Esca (Black Measles)
*   Grape: healthy
*   Grape: Leaf blight (Isariopsis Leaf Spot)
*   Orange: Haunglongbing (Citrus greening)
*   Peach: Bacterial spot
*   Peach: healthy
*   Pepper bell: Bacterial spot
*   Pepper bell: healthy
*   Potato: Early blight
*   Potato: healthy
*   Potato: Late blight
*   Raspberry: healthy
*   Soybean: healthy
*   Squash: Powdery mildew
*   Strawberry: healthy
*   Strawberry: Leaf scorch
*   Tomato: Bacterial spot
*   Tomato: Early blight
*   Tomato: healthy
*   Tomato: Late blight
*   Tomato: Leaf Mold
*   Tomato: Septoria leaf spot
*   Tomato: Spider mites, Two spotted spider mite
*   Tomato: Target Spot
*   Tomato: Tomato mosaic virus
*   Tomato: Tomato Yellow Leaf Curl Virus

In [0]:
# define classes
classes = {
    'Apple___Apple_scab': 0,
    'Apple___Black_rot': 1,
    'Apple___Cedar_apple_rust': 2,
    'Apple___healthy': 3,
    'Blueberry___healthy': 4,
    'Cherry_(including_sour)___healthy': 5,
    'Cherry_(including_sour)___Powdery_mildew': 6,
    'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot': 7,
    'Corn_(maize)___Common_rust_': 8,
    'Corn_(maize)___healthy': 9,
    'Corn_(maize)___Northern_Leaf_Blight': 10,
    'Grape___Black_rot': 11,
    'Grape___Esca_(Black_Measles)': 12,
    'Grape___healthy': 13,
    'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 14,
    'Orange___Haunglongbing_(Citrus_greening)': 15,
    'Peach___Bacterial_spot': 16,
    'Peach___healthy': 17,
    'Pepper,_bell___Bacterial_spot': 18,
    'Pepper,_bell___healthy': 19,
    'Potato___Early_blight': 20,
    'Potato___healthy': 21,
    'Potato___Late_blight': 22,
    'Raspberry___healthy': 23,
    'Soybean___healthy': 24,
    'Squash___Powdery_mildew': 25,
    'Strawberry___healthy': 26,
    'Strawberry___Leaf_scorch': 27,
    'Tomato___Bacterial_spot': 28,
    'Tomato___Early_blight': 29,
    'Tomato___healthy': 30,
    'Tomato___Late_blight': 31,
    'Tomato___Leaf_Mold': 32,
    'Tomato___Septoria_leaf_spot': 33,
    'Tomato___Spider_mites Two-spotted_spider_mite': 34,
    'Tomato___Target_Spot': 35,
    'Tomato___Tomato_mosaic_virus': 36,
    'Tomato___Tomato_Yellow_Leaf_Curl_Virus': 37
}

### Data


In [0]:
# fetch dataset
!git clone https://github.com/lzoran/plant-disease-dataset.git
# genereate train and test sets
%cd plant-disease-dataset
!rm -rf train
!rm -rf test
!python3 -c 'from build_dataset import build_dataset; build_dataset()'
%cd ../
# display dataset structure
!ls plant-disease-dataset

In [0]:
# loading data functions

def load_data(path):
    items = []
    labels = []
    
    counter = 0
    for subdir, dirs, files in os.walk(path):
        for file in files:
            img = Image.open(os.path.join(subdir, file))
            img = img.resize((224, 224), Image.ANTIALIAS)
            img_array = np.asarray(img)
            
            # sanitize dimensions
            if (img_array.shape != (224, 224, 3)):
                if img_array.shape[2] > 3:
                    img_array = img_array[:,:,:3] # remove alpha channel
                else:
                    img.close()
                    continue
           
            items.append(img_array)
            img.close()

            folder_name = os.path.basename(subdir)
            labels.append(classes[folder_name])
            
        if len(files) > 0:
            counter += 1
            print('Loading data: {}/38'.format(counter), end='\r')

    print('\nData successfully loaded')
    items = np.asarray(items)
    labels = np.asarray(labels)

    return items, labels


def load_raw_data(path):
    items = []
    labels = []

    counter = 0
    for subdir, dirs, files in os.walk(path):
        for file in files:
            items.append(os.path.join(subdir, file))

            folder_name = os.path.basename(subdir)
            labels.append(classes[folder_name])

        if len(files) > 0:
            counter += 1
            print('Loading data: {}/38'.format(counter), end='\r')

    print('\nData successfully loaded')
    items = np.asarray(items)
    labels = np.asarray(labels)

    return items, labels

In [0]:
class DataSequence(Sequence):
    def __init__(self, file_paths, labels, batch_size):
        """
        :param file_paths: list of paths to the images
        :param labels: list of associated classes
        :param batch_size: batch size
        """

        self.file_paths = file_paths
        self.labels = labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.file_paths) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.file_paths[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]

        items = []
        for file_path in batch_x:
            img = Image.open(file_path)
            img = img.resize((224, 224), Image.ANTIALIAS)
            img_array = np.asarray(img)
            
            # sanitize dimensions
            if (img_array.shape != (224, 224, 3)):
                if img_array.shape[2] > 3:
                    img_array = img_array[:,:,:3] # remove alpha channel
                else:
                    img.close()
                    continue
           
            items.append(img_array)
            img.close()
            
        items = np.asarray(items)
        labels = np.asarray(batch_y)
        
        # normalize items
        items = items.astype('float32')
        items /= 255

        return items, labels


### Training

In [0]:
# config
batch_size = 64
nb_classes = 38
nb_epoch = 200
data_seq = True

# model construction
model = ResNet50(weights=None, classes=nb_classes)

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

checkpointer = ModelCheckpoint(filepath='drive/plant-disease-detection/weights.hdf5', verbose=0, save_weights_only=True)

# load weights
if os.path.exists('drive/plant-disease-detection/weights.hdf5'):
    model.load_weights('drive/plant-disease-detection/weights.hdf5')

model.summary()

if data_seq:
    # load raw training data
    print('Loading training data')
    x_train, y_train = load_raw_data('plant-disease-dataset/train/')
    # load raw testing data
    print('Loading testing data')
    x_test, y_test = load_raw_data('plant-disease-dataset/test/')

    # convert class vectors to binary class matrices
    y_train = np_utils.to_categorical(y_train, nb_classes)
    y_test = np_utils.to_categorical(y_test, nb_classes)
    
    # shuffle train data
    train_shuffle = np.arange(x_train.shape[0])
    np.random.seed(4)
    np.random.shuffle(train_shuffle)
    x_train = x_train[train_shuffle]
    y_train = y_train[train_shuffle]
    # shuffle test data
    test_shuffle = np.arange(x_test.shape[0])
    np.random.seed(4)
    np.random.shuffle(test_shuffle)
    x_test = x_test[test_shuffle]
    y_test = y_test[test_shuffle]

    training_seq = DataSequence(x_train, y_train, batch_size)
    validation_seq = DataSequence(x_test, y_test, batch_size)

    # train
    model.fit_generator(generator=training_seq,
                        steps_per_epoch=(len(x_train) // batch_size),
                        epochs=nb_epoch,
                        verbose=1,
                        validation_data=validation_seq,
                        validation_steps=(len(x_test) // batch_size),
                        callbacks=[checkpointer])

else:
    # load training data
    print('Loading training data')
    x_train, y_train = load_data('plant-disease-dataset/train/')
    # load testing data
    print('Loading testing data')
    x_test, y_test = load_data('plant-disease-dataset/test/')

    # normalize images
    x_train = x_train.astype('float32')
    x_train /= 255
    x_test = x_test.astype('float32')
    x_test /= 255

    # convert class vectors to binary class matrices
    y_train = np_utils.to_categorical(y_train, nb_classes)
    y_test = np_utils.to_categorical(y_test, nb_classes)
                               
    # shuffle train data
    train_shuffle = np.arange(x_train.shape[0])
    np.random.seed(4)
    np.random.shuffle(train_shuffle)
    x_train = x_train[train_shuffle]
    y_train = y_train[train_shuffle]
    # shuffle test data
    test_shuffle = np.arange(x_test.shape[0])
    np.random.seed(4)
    np.random.shuffle(test_shuffle)
    x_test = x_test[test_shuffle]
    y_test = y_test[test_shuffle]

    # train
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=nb_epoch,
              validation_data=(x_test, y_test),
              callbacks=[checkpointer])

### Testing

In [0]:
# test
if data_seq:
    score = model.evaluate_generator(generator=validation_seq, steps=(len(x_test) // batch_size))
    predictions = model.predict_generator(generator=validation_seq, steps=(len(x_test) // batch_size))
else:
    score = model.evaluate(x_test, y_test, batch_size=batch_size)
    predictions = model.predict(x_test, batch_size=batch_size)
    
print('Score: {}'.format(score))
print('Predictions: {}'.format(predictions))