In [27]:
#These are imports for models and utilities for working with models
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import BernoulliNB, GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score, auc, classification_report, confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [5]:
#for array manipulations
import numpy as np
import pandas as pd
#for image processing
import cv2 
#for displaying images
import matplotlib.pyplot as plt
#to display images in this notebook, not in a separate window
%matplotlib inline
#to access system resources such as directories
import os

In [6]:
#set our base directory. This should point to the location of the plant-diseases folder
base_dir = 'C:\\Users\\Admin\\Documents\\plant-diseases'
data_folder = os.path.join(base_dir, 'data')
maize_data_folder = os.path.join(data_folder, 'maize')

In [141]:
#This function loads 32 images of a particular disease
def get_32(disease):
    '''
    disease:
        A string that could be common_rust, healthy, leaf_spot, nothern_leaf_blight
    ........
    disease_images:
        A list of images for the selected disease
    '''
    #this list will contain the 20 images returned
    disease_images = []
    #path to the images
    disease_images_path = os.path.join(maize_data_folder, disease)
    for image_path in os.listdir(disease_images_path):
        image_path = os.path.join(disease_images_path, image_path)
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        disease_images.append(image)
    return disease_images

#This function will help us plot 10 images
def plot_images(images, title):
    '''
    images: List
        List of images
    title: String
        Title for each image i.e name of disease
    '''
    plt.figure(figsize=(15,6))
    for i in range(10):
        plt.subplot(2,5, i+1)
        plt.imshow(images[i])
        plt.title(title)
        plt.xticks([])
        plt.yticks([])
    plt.show()
    
#This function generates ORB features
def extract_features_orb(image, vector_size=32):
    try:
        feature_generator = cv2.ORB_create()
        orb_keypoints = feature_generator.detect(image)
        orb_keypoints = orb_keypoints[:32]
        orb_keypoints, orb_descriptors = feature_generator.compute(image, orb_keypoints)
        orb_descriptors = orb_descriptors.flatten()
        #The descriptor vector size is 128
        needed_size = (vector_size*128)
        if orb_descriptors.size < needed_size:
            #If we have less than 32 keypoints, add zeros to the end of our vector
            orb_descriptors = np.concatenate([orb_descriptors, np.zeros(needed_size - orb_descriptors.size)])
    except cv2.error as e:
        print(f'Error: {e}')
        return None
    return orb_descriptors

#This function generates KAZE features
def extract_features_kaze(image, vector_size=32):
    try:
        feature_generator = cv2.KAZE_create()
        kaze_keypoints = feature_generator.detect(image)
        kaze_keypoints = kaze_keypoints[:32]
        kaze_keypoints, kaze_descriptors = feature_generator.compute(image, kaze_keypoints)
        kaze_descriptors = kaze_descriptors.flatten()
        #The descriptor vector size is 128
        needed_size = (vector_size*128)
        if kaze_descriptors.size < needed_size:
            #If we have less than 32 keypoints, add zeros to the end of our vector
            kaze_descriptors = np.concatenate([kaze_descriptors, np.zeros(needed_size - kaze_descriptors.size)])
    except cv2.error as e:
        print(f'Error: {e}')
        return None
    return kaze_descriptors

def extract_features_hog(image, feature_size=4096):
    hog = cv2.HOGDescriptor()
    features = hog.compute(image)
    required_features = features[:feature_size].ravel()
    return required_features

#Let us extraxt KAZE features
def extract_features(algorithm=0):
    '''
    Algorithm:
        1 for ORB
        0 for KAZE
        2 for HOG
    '''
    #Now let us perform these steps for all the 32 images loaded
    #This will contain all our images
    all_images = []
    #This will contain all our labels
    all_labels = []
    labels = ['common_rust', 'healthy', 'leaf_spot', 'nothern_leaf_blight']
    for i, image_folder in enumerate([common_rust_images, healthy_images, leaf_spot_images, nothern_leaf_blight_images]):
        for image in image_folder:
            all_images.append(image)
            all_labels.append(labels[i])
    features, labels = [], []
    for i, image in enumerate(all_images):
        image_features = []
        try:
            if algorithm == 1:
                image_features = extract_features_orb(image)
            elif algorithm == 0:
                image_features = extract_features_kaze(image)
            else:
                image_features = extract_features_hog(image)
            image_label = all_labels[i]
            features.append(image_features)
            labels.append(image_label)
        except AttributeError as e:
            print(e)
    features = np.array(features)
    labels = np.array(labels)
    features = StandardScaler().fit_transform(features)
    labels = LabelEncoder().fit_transform(labels)
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3)
    
    return X_train, X_test, y_train, y_test

In [29]:
models = [
    RandomForestClassifier(n_estimators=100),
    LogisticRegression(solver='lbfgs', multi_class='auto'),
    AdaBoostClassifier(),
    BaggingClassifier(),
    GradientBoostingClassifier(),
    BernoulliNB(),
    GaussianNB(),
    KNeighborsClassifier(),
    MLPClassifier(),
    LinearSVC(),
    SVC(gamma='scale')
]
names = [
    'Random Forest',
    'Logistic Regression',
    'AdaBoost',
    'Bagging',
    'Gradient Boosting',
    'Bernoulli NB',
    'Gaussian NB',
    'K-Nearest Neighbors',
    'Neural Network',
    'Linear SVC',
    'Support Vector Machine'
]
def train_base_models(X_train, y_train, X_test, y_test):
    model_accuracy = []
    model_names = []
    for i, classifier in enumerate(models):
        try:
            classifier.fit(X_train, y_train)
            predictions = classifier.predict(X_test)
            accuracy = accuracy_score(y_test, predictions)
            model_accuracy.append(round(accuracy, 3))
            model_names.append(names[i])
            print(f'{names[i]}: {round(accuracy, 3)}')
        except Exception as e:
            print(f'Could not train {names[i]} because of {e}')
    df = pd.DataFrame({'Model':model_names, 'Accuracy':model_accuracy})
    df = df.sort_values(by=['Accuracy'], ascending=False)
    return df

In [89]:
common_rust_images = get_32('common_rust')
healthy_images = get_32('healthy')
leaf_spot_images = get_32('leaf_spot')
nothern_leaf_blight_images = get_32('nothern_leaf_blight')

In [102]:
#These fatures are generated using KAZE
X_train, X_test, y_train, y_test = extract_features() 

In [103]:
X_train.shape, y_train.shape

((89, 4096), (89,))

In [104]:
X_test.shape, y_test.shape

((39, 4096), (39,))

In [105]:
df = train_base_models(X_train, y_train, X_test, y_test)
df

Random Forest: 0.641
Logistic Regression: 0.615
AdaBoost: 0.513
Bagging: 0.59
Gradient Boosting: 0.641
Bernoulli NB: 0.615
Gaussian NB: 0.718
K-Nearest Neighbors: 0.615
Neural Network: 0.615
Linear SVC: 0.615
Support Vector Machine: 0.513


Unnamed: 0,Model,Accuracy
6,Gaussian NB,0.718
0,Random Forest,0.641
4,Gradient Boosting,0.641
1,Logistic Regression,0.615
5,Bernoulli NB,0.615
7,K-Nearest Neighbors,0.615
8,Neural Network,0.615
9,Linear SVC,0.615
3,Bagging,0.59
2,AdaBoost,0.513


In [106]:
#These features are gnerated using ORB
X_train2, X_test2, y_train2, y_test2 = extract_features(algorithm=1)

'NoneType' object has no attribute 'flatten'
'NoneType' object has no attribute 'flatten'
'NoneType' object has no attribute 'flatten'


In [107]:
df2 = train_base_models(X_train2, y_train2, X_test2, y_test2)
df2

Random Forest: 0.395
Logistic Regression: 0.316
AdaBoost: 0.263
Bagging: 0.526
Gradient Boosting: 0.342
Bernoulli NB: 0.395
Gaussian NB: 0.342
K-Nearest Neighbors: 0.263
Neural Network: 0.342
Linear SVC: 0.421
Support Vector Machine: 0.289


Unnamed: 0,Model,Accuracy
3,Bagging,0.526
9,Linear SVC,0.421
0,Random Forest,0.395
5,Bernoulli NB,0.395
4,Gradient Boosting,0.342
6,Gaussian NB,0.342
8,Neural Network,0.342
1,Logistic Regression,0.316
10,Support Vector Machine,0.289
2,AdaBoost,0.263


In [108]:
#The base models perform so badly with ORB.

In [109]:
d = np.zeros((X_train.shape[0], X_train.shape[1]))
for i in range(X_train2.shape[0]):
    for j in range( X_train2.shape[1]):
        d[i,j] = X_train2[i,j]
X_train3 = X_train + d

In [110]:
e = np.zeros((X_test.shape[0], X_test.shape[1]))
for i in range(X_test2.shape[0]):
    for j in range( X_test2.shape[1]):
        e[i,j] = X_train2[i,j]
X_test3 = X_test + e

In [111]:
df3 = train_base_models(X_train3, y_train, X_test3, y_test)
df3

Random Forest: 0.615
Logistic Regression: 0.641
AdaBoost: 0.513
Bagging: 0.641
Gradient Boosting: 0.538
Bernoulli NB: 0.667
Gaussian NB: 0.692
K-Nearest Neighbors: 0.538
Neural Network: 0.692
Linear SVC: 0.718
Support Vector Machine: 0.538


Unnamed: 0,Model,Accuracy
9,Linear SVC,0.718
6,Gaussian NB,0.692
8,Neural Network,0.692
5,Bernoulli NB,0.667
1,Logistic Regression,0.641
3,Bagging,0.641
0,Random Forest,0.615
4,Gradient Boosting,0.538
7,K-Nearest Neighbors,0.538
10,Support Vector Machine,0.538


In [112]:
scaler = StandardScaler()
X_train4 = scaler.fit_transform(X_train3)
X_test4 = scaler.transform(X_test3)

In [113]:
df4 = train_base_models(X_train4, y_train, X_test4, y_test)
df4

Random Forest: 0.692
Logistic Regression: 0.641
AdaBoost: 0.513
Bagging: 0.744
Gradient Boosting: 0.538
Bernoulli NB: 0.667
Gaussian NB: 0.692
K-Nearest Neighbors: 0.667
Neural Network: 0.667
Linear SVC: 0.667
Support Vector Machine: 0.513


Unnamed: 0,Model,Accuracy
3,Bagging,0.744
0,Random Forest,0.692
6,Gaussian NB,0.692
5,Bernoulli NB,0.667
7,K-Nearest Neighbors,0.667
8,Neural Network,0.667
9,Linear SVC,0.667
1,Logistic Regression,0.641
4,Gradient Boosting,0.538
2,AdaBoost,0.513


In [142]:
#These fatures are generated using HOG
X_train5, X_test5, y_train5, y_test5 = extract_features(algorithm=2) 

In [143]:
X_train5.shape, y_train5.shape

((89, 4096), (89,))

In [144]:
X_test5.shape, y_test5.shape

((39, 4096), (39,))

In [145]:
df5 = train_base_models(X_train5, y_train5, X_test5, y_test5)
df5

Random Forest: 0.692




Logistic Regression: 0.769
AdaBoost: 0.41
Bagging: 0.513
Gradient Boosting: 0.641
Bernoulli NB: 0.538
Gaussian NB: 0.564
K-Nearest Neighbors: 0.538
Neural Network: 0.744
Linear SVC: 0.718
Support Vector Machine: 0.667


Unnamed: 0,Model,Accuracy
1,Logistic Regression,0.769
8,Neural Network,0.744
9,Linear SVC,0.718
0,Random Forest,0.692
10,Support Vector Machine,0.667
4,Gradient Boosting,0.641
6,Gaussian NB,0.564
5,Bernoulli NB,0.538
7,K-Nearest Neighbors,0.538
3,Bagging,0.513


In [146]:
X_train6 = X_train + X_train5
X_test6 = X_test + X_test5
df6 = train_base_models(X_train6, y_train, X_test6, y_test)
df6 

Random Forest: 0.462




Logistic Regression: 0.564
AdaBoost: 0.308
Bagging: 0.564
Gradient Boosting: 0.462
Bernoulli NB: 0.462
Gaussian NB: 0.462
K-Nearest Neighbors: 0.359
Neural Network: 0.513
Linear SVC: 0.641
Support Vector Machine: 0.436


Unnamed: 0,Model,Accuracy
9,Linear SVC,0.641
1,Logistic Regression,0.564
3,Bagging,0.564
8,Neural Network,0.513
0,Random Forest,0.462
4,Gradient Boosting,0.462
5,Bernoulli NB,0.462
6,Gaussian NB,0.462
10,Support Vector Machine,0.436
7,K-Nearest Neighbors,0.359
