In [1]:
#for array manipulations
import numpy as np
import pandas as pd
#for image processing
import cv2 
#for displaying images
import matplotlib.pyplot as plt
#to display images in this notebook, not in a separate window
%matplotlib inline
import seaborn as sns
#to access system resources such as directories
import os
#This wilallow us to get the training time of each model
import time
import glob

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (accuracy_score, auc, classification_report, confusion_matrix, recall_score,
            f1_score, precision_score, precision_recall_curve, precision_recall_fscore_support)
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from sklearn.feature_selection import RFECV
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score, cross_val_predict 
from sklearn.model_selection import GridSearchCV

from sklearn.ensemble import VotingClassifier

In [3]:
#set our base directory. This should point to the location of the plant-diseases folder
base_dir = 'C:\\Users\\USER\\Documents\\GitHub\\maize-disease-detection'
#This is where our data is stored
data_folder = os.path.join(base_dir, 'data')
#Points to the folder containing the maize diseseases data
maize_data_folder = os.path.join(data_folder, 'maize')
#Contains our test data in different folders
test_data_folder = os.path.join(data_folder, 'test')
#this will contain our models and other model related data
models_dir = os.path.join(base_dir,'models')

In [4]:
def extract_features_hog(image, feature_size=4096):
    hog = cv2.HOGDescriptor()
    features = hog.compute(image)
    required_features = features[:feature_size].ravel()
    return required_features

In [39]:
def extract_features_hog(image, feature_size=4096):
    hog = cv2.HOGDescriptor()
    features = hog.compute(image)
    required_features = features[:feature_size].ravel()
    return required_features

def extract_features():
    features = []
    labels = []
    diasese_names = ['common_rust', 'healthy', 'leaf_spot', 'nothern_leaf_blight']
    for i, folder in enumerate(diasese_names):
        folder_path = os.path.join(maize_data_folder, diasese_names[i])
        image_paths = glob.glob(folder_path+'/*.jpg')
        for image_path in image_paths[:300]:
            image = cv2.imread(image_path, cv2.IMREAD_COLOR)
            image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
            image_features = extract_features_hog(image)
            image_label = diasese_names[i]
            features.append(image_features)
            labels.append(image_label)
    features = np.array(features)
    labels = np.array(labels)
    features = StandardScaler().fit_transform(features)
    labels = LabelEncoder().fit_transform(labels)
    #X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3)
    
    return  features, labels

def extract_test_features():
    test_features = []
    test_labels = []
    diasese_names = ['common_rust', 'healthy', 'leaf_spot', 'nothern_leaf_blight']
    for i, folder in enumerate(diasese_names):
        folder_path = os.path.join(test_data_folder, diasese_names[i])
        image_paths = glob.glob(folder_path+'/*.jpg')
        for image_path in image_paths[:30]:
            image = cv2.imread(image_path, cv2.IMREAD_COLOR)
            image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
            image_features = extract_features_hog(image)
            image_label = diasese_names[i]
            test_features.append(image_features)
            test_labels.append(image_label)
    test_features = np.array(test_features)
    test_labels = np.array(test_labels)
    test_features = StandardScaler().fit_transform(test_features)
    test_labels = LabelEncoder().fit_transform(test_labels)
    #X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3)
    
    return  test_features, test_labels

#This method trains a modelon the new features
def train_model(model, features, labels):
    start = time.time()
    scores = cross_val_score(model, features, labels, scoring='accuracy', cv=10)
    stop = time.time()
    t = (stop - start)/10
    return round(scores.mean(), 2), round(t, 4)

def test_model(model, features, labels):
    predictions = cross_val_predict(model, features, labels, cv=10)
    accuracy = accuracy_score(predictions, labels)
    return round(accuracy, 2)

In [47]:
#The Random Forest classifier after hyperparameter tuning
hog_random_forest = RandomForestClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features=100,
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)
#The Logistic Regression classifier after hyperparameter tuning
hog_logistic_regression = LogisticRegression(C=0.5, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=200,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.1, verbose=0,
                   warm_start=False)
#The K-Nearest Neighbors classifier after hyperparameter tuning
hog_knearest_neighbors = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=2, p=1,
                     weights='distance')
#The Linear Support Vector classifier after hyperparameter tuning
hog_linear_svc = LinearSVC(C=0.0001, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.01,
          verbose=0)
#The Support Vector classifier after hyperparameter tuning
hog_svc = SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.1,
    verbose=False)
#A list of our models
hog_models = [
    hog_random_forest,
    hog_logistic_regression,
    hog_knearest_neighbors,
    hog_linear_svc,
    hog_svc
]
#A list of the names of our models
model_names = [
    'Random Forest',
    'Logistic Regression',
    'K-Nearest Neighbor',
    'Linear SVC',
    'Support Vector Classifier'
]

estimators = [
    ('Random Forest', hog_random_forest),
    ('Logistic Regression', hog_logistic_regression),
    ('K-Nearest Neighbor', hog_knearest_neighbors),
    #('Linear SVC', hog_linear_svc),
    ('Support Vector Classifier', hog_svc)
]

In [32]:
features, labels = extract_features()

In [33]:
features.shape, labels.shape

((1200, 4096), (1200,))

In [40]:
test_features, test_labels = extract_test_features()

In [41]:
test_features.shape, test_labels.shape

((120, 4096), (120,))

In [34]:
accuracy, train_time = train_model(hog_knearest_neighbors, features, labels)
accuracy, train_time

(0.63, 2.3112)

In [35]:
accuracy, train_time = train_model(hog_logistic_regression, features, labels)
accuracy, train_time

(0.73, 2.3589)

In [36]:
accuracy, train_time = train_model(hog_linear_svc, features, labels)
accuracy, train_time

(0.72, 0.7747)

In [37]:
accuracy, train_time = train_model(hog_svc, features, labels)
accuracy, train_time

(0.73, 23.5523)

In [38]:
accuracy, train_time = train_model(hog_random_forest, features, labels)
accuracy, train_time

(0.77, 68.5912)

In [42]:
test_model(hog_knearest_neighbors, test_features, test_labels)

0.52

In [43]:
test_model(hog_logistic_regression, test_features, test_labels)

0.56

In [44]:
test_model(hog_linear_svc, test_features, test_labels)

0.57

In [45]:
test_model(hog_svc, test_features, test_labels)

0.57

In [46]:
test_model(hog_random_forest, test_features, test_labels)

0.67

In [48]:
voting_classifier = VotingClassifier(
    estimators=estimators,
    voting='soft'
)
voting_classifier.fit(features, labels)
accuracy = voting_classifier.score(test_features, test_labels)
round(accuracy, 2)

0.69

In [49]:
best_estimators = [
    ('Random Forest', hog_random_forest),
    ('Logistic Regression', hog_logistic_regression),
    ('Support Vector Classifier', hog_svc)
]
voting_classifier = VotingClassifier(
    estimators=best_estimators,
    voting='soft'
)
voting_classifier.fit(features, labels)
accuracy = voting_classifier.score(test_features, test_labels)
round(accuracy, 2)

0.65