In [1]:
#for array manipulations
import numpy as np
#for image processing
import cv2 
#for displaying images
import matplotlib.pyplot as plt
#to display images in this notebook, not in a separate window
%matplotlib inline
#to access system resources such as directories
import os
import pandas as pd

In [2]:
#Set this to point to the project root; all paths will be relative to this one
project_dir = '/home/lyle/notebooks/maize-disease-detection/'

In [3]:
def set_up_directories(project_dir=project_dir):
    """Sets up the paths to important direcoties
    
    Parameters
    ----------
    project_dir : string; default is the current working directory
        The path to the project root i.e '/home/lyle/tutorials/AI/scikit-learn/maize-disease-detection/'
    
    returns
    -------
    base_dir : string
        The project directory path
    data_folder : string
        The data subfolder path
    maize_data_folder : 
        The path to the subdirectory containing the maize images
        
    example usage
    -------------
    base_dir, data_folder, maize_data_folder = set_up_directories()
    """
    
    #set our base directory. This should point to the location of the plant-diseases folder
    base_dir = project_dir
    #set the path to our data folder
    data_folder = os.path.join(base_dir, 'data')
    #set the path to the maize folder and list the various categories available
    maize_data_folder = os.path.join(data_folder, 'maize')

    return base_dir, data_folder, maize_data_folder

def get_32(disease):
    """Loads 32 images for a given maize disease
    
    parameters
    ----------
    disease: string
        A string that could be common_rust, healthy, leaf_spot, nothern_leaf_blight
    returns
    -------
    disease_images: list
        A list of images for the selected disease
    """
    
    #this list will contain the 20 images returned
    disease_images = []
    #path to the images
    disease_images_path = os.path.join(maize_data_folder, disease)
    for image_path in os.listdir(disease_images_path):
        image_path = os.path.join(disease_images_path, image_path)
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        disease_images.append(image)
    return disease_images

#This function will help us plot 10 images
def plot_images(images, title):
    """Plots 10 images of a particular disease category
    
    parameters
    ----------
    images: list
        List of images(each image is an array)
    title: string
        Title for each image i.e name of disease
    """
    
    plt.figure(figsize=(12,8))
    for i in range(10):
        plt.subplot(2,5, i+1)
        plt.imshow(images[i])
        plt.title(title)
        plt.xticks([])
        plt.yticks([])
    plt.show()
    
#This function allows us to resize images
def resize(image, new_size=(600,600)):
    """Resize the given image
    
    parameters
    ----------
    image : numpy array
        The image to be resized
    new_size : tuple
        The new image size
    returns
    -------
    resized_image : numpy arra
        The resized image
    """
    
    resized_image = cv2.resize(image, new_size)
    return resized_image

#This function generates ORB features
def extract_features_orb(image, vector_size=32):
    """Extracts orb features for the given image
    
    parameters
    ----------
    image : numpy array
        The image whose features are to be extracted
    vector_size : int
        The number of keypoints to use
    returns
    -------
        orb_decriptors : 
        
    raises
    ------
    cv2.error
    """
    try:
        feature_generator = cv2.ORB_create()
        orb_keypoints = feature_generator.detect(image)
        orb_keypoints = orb_keypoints[:32]
        orb_keypoints, orb_descriptors = feature_generator.compute(image, orb_keypoints)
        orb_descriptors = orb_descriptors.flatten()
        #The descriptor vector size is 128
        needed_size = (vector_size*128)
        if orb_descriptors.size < needed_size:
            #If we have less than 32 keypoints, add zeros to the end of our vector
            orb_descriptors = np.concatenate([orb_descriptors, np.zeros(needed_size - orb_descriptors.size)])
    except cv2.error as e:
        print(f'Error: {e}')
        return None
    return orb_descriptors

#This function generates KAZE features
def extract_features_kaze(image, vector_size=32):
    """Extracts kaze features for the given image
    
    parameters
    ----------
    image : numpy array
        The image whose features are to be extracted
    vector_size : int
        The number of keypoints to use
    returns
    -------
        kaze_descriptors : 
        
    raises
    ------
    cv2.error
    """
    try:
        feature_generator = cv2.KAZE_create()
        kaze_keypoints = feature_generator.detect(image)
        kaze_keypoints = kaze_keypoints[:32]
        kaze_keypoints, kaze_descriptors = feature_generator.compute(image, kaze_keypoints)
        kaze_descriptors = kaze_descriptors.flatten()
        #The descriptor vector size is 128
        needed_size = (vector_size*128)
        if kaze_descriptors.size < needed_size:
            #If we have less than 32 keypoints, add zeros to the end of our vector
            kaze_descriptors = np.concatenate([kaze_descriptors, np.zeros(needed_size - kaze_descriptors.size)])
    except cv2.error as e:
        print(f'Error: {e}')
        return None
    return kaze_descriptors

def extract_hog_features(image, feature_size=4096):
    """Extracts hog features for the image
    
    parameters
    ----------
    image : numpy array
        The image whose features are to be extracted
    feature_size : int
        The number of features to generate
    returns
    -------
        hog_features : numpy array 
        
    raises
    ------
    cv2.error
    """
    hog = cv2.HOGDescriptor()
    features = hog.compute(common_rust_images[0])
    required_features = features[:feature_size].ravel()
    return required_features

In [4]:
#This function generates SIFT features
def extract_features_sift(image, vector_size=32):
    """Extracts sift features for the given image
    
    parameters
    ----------
    image : numpy array
        The image whose features are to be extracted
    vector_size : int
        The number of keypoints to use
    returns
    -------
        sift_descriptors : 
        
    raises
    ------
    cv2.error
    """
    try:
        feature_generator = cv2.SIFT_create()
        sift_keypoints = feature_generator.detect(image)
        sift_keypoints = sift_keypoints[:32]
        sift_keypoints, sift_descriptors = feature_generator.compute(image, sift_keypoints)
        sift_descriptors = sift_descriptors.flatten()
        #The descriptor vector size is 128
        needed_size = (vector_size*128)
        if sift_descriptors.size < needed_size:
            #If we have less than 32 keypoints, add zeros to the end of our vector
            sift_descriptors = np.concatenate([sift_descriptors, np.zeros(needed_size - sift_descriptors.size)])
    except cv2.error as e:
        print(f'Error: {e}')
        return None
    return sift_descriptors

#This function generates SURF features
def extract_features_surf(image, vector_size=32):
    """Extracts surf features for the given image
    
    parameters
    ----------
    image : numpy array
        The image whose features are to be extracted
    vector_size : int
        The number of keypoints to use
    returns
    -------
        surf_descriptors : 
        
    raises
    ------
    cv2.error
    """
    try:
        # Initiate FAST detector
        star = cv2.xfeatures2d.StarDetector_create()
        surf_keypoints = feature_generator.detect(image)
        surf_keypoints = surf_keypoints[:32]
        surf_keypoints, surf_descriptors = feature_generator.compute(image, surf_keypoints)
        surf_descriptors = surf_descriptors.flatten()
        #The descriptor vector size is 128
        needed_size = (vector_size*128)
        if surf_descriptors.size < needed_size:
            #If we have less than 32 keypoints, add zeros to the end of our vector
            sift_descriptors = np.concatenate([surf_descriptors, np.zeros(needed_size - surf_descriptors.size)])
    except cv2.error as e:
        print(f'Error: {e}')
        return None
    return surf_descriptors

#This function generates SIFT features
def extract_features_brief(image, vector_size=32, algorithm="star"):
    """Extracts features for the given image using BRIEF
    
    parameters
    ----------
    image : numpy array
        The image whose features are to be extracted
    vector_size : int
        The number of keypoints to use
    algorithm : string
        The algorithm to use; can be star or fast
    returns
    -------
        brief_descriptors : 
        
    raises
    ------
    cv2.error
    """
    try:
        alg = cv2.xfeatures2d.StarDetector_create()
        if algorithm == "fast":
            alg = cv2.FastFeatureDetector_create()
        brief = cv2.xfeatures2d.BriefDescriptorExtractor_create()
        
        kp = alg.detect(image, None)
        kp = kp[:32]
        kp, des = brief.compute(image, kp)
        des = des.flatten()
        #The descriptor vector size is 128
        needed_size = (vector_size*128)
        if des.size < needed_size:
            #If we have less than 32 keypoints, add zeros to the end of our vector
            des = np.concatenate([des, np.zeros(needed_size - des.size)])
    except cv2.error as e:
        print(f'Error: {e}')
        return np.array([])
    except AttributeError:
        return np.array([])
    return des

In [5]:
#Directories set up
base_dir, data_folder, maize_data_folder = set_up_directories()

In [6]:
common_rust_images = get_32('common_rust')
healthy_images = get_32('healthy')
leaf_spot_images = get_32('leaf_spot')
nothern_leaf_blight_images = get_32('nothern_leaf_blight')

In [7]:
common_rust_image_orb_features = extract_features_orb(common_rust_images[0])
common_rust_image_kaze_features = extract_features_kaze(common_rust_images[0])
common_rust_image_hog_features = extract_hog_features(common_rust_images[0])
common_rust_image_sift_features = extract_features_sift(common_rust_images[0])
common_rust_image_brief_features = extract_features_brief(common_rust_images[0])

In [8]:
healthy_image_orb_features = extract_features_orb(healthy_images[0])
healthy_image_kaze_features = extract_features_kaze(healthy_images[0])
healthy_image_hog_features = extract_hog_features(healthy_images[0])
healthy_image_brief_features = extract_features_brief(healthy_images[0])
healthy_image_sift_features = extract_features_sift(healthy_images[0])

In [9]:
leaf_spot_image_orb_features = extract_features_orb(leaf_spot_images[0])
leaf_spot_image_kaze_features = extract_features_kaze(leaf_spot_images[0])
leaf_spot_image_hog_features = extract_hog_features(leaf_spot_images[0])
leaf_spot_image_brief_features = extract_features_brief(leaf_spot_images[0])
leaf_spot_image_sift_features = extract_features_sift(leaf_spot_images[0])

In [10]:
nothern_leaf_blight_image_orb_features = extract_features_orb(nothern_leaf_blight_images[0])
nothern_leaf_blight_image_kaze_features = extract_features_kaze(nothern_leaf_blight_images[0])
nothern_leaf_blight_image_hog_features = extract_hog_features(nothern_leaf_blight_images[0])
nothern_leaf_blight_image_brief_features = extract_features_brief(nothern_leaf_blight_images[0])
nothern_leaf_blight_image_sift_features = extract_features_sift(nothern_leaf_blight_images[0])

In [11]:
#In this example, we generate a sample dataset, using the sample kaze features we just generated
#A dataset usually consists of an items features and the corresponding labels
#This will be our feature set
sample_kaze_features =[]
#This will contain our labels
sample_kaze_labels = []
sample_kaze_features.append(common_rust_image_kaze_features)
sample_kaze_labels.append('common_rust')
sample_kaze_features.append(healthy_image_kaze_features)
sample_kaze_labels.append('healthy')
sample_kaze_features.append(leaf_spot_image_kaze_features)
sample_kaze_labels.append('leaf_spot')
sample_kaze_features.append(nothern_leaf_blight_image_kaze_features)
sample_kaze_labels.append('nothern_leaf_blight')
sample_kaze_labels = np.array(sample_kaze_labels)
sample_kaze_features = np.array(sample_kaze_features)
#In our dataset, we have four instances and each instance has 4096 features
sample_kaze_features.shape

(4, 4096)

In [12]:
#We have four labels, each label just tells us the disease type
sample_kaze_labels.shape

(4,)

In [13]:
#Here, we create a sample dataset for the model. X and y are the standard variables used
X = sample_kaze_features
y = sample_kaze_labels

In [16]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

#Scaling is one of the preprocessing steps to enable our models work better
X = StandardScaler().fit_transform(X)
#We split into a train set and a test set
X_train, X_test = train_test_split(X, test_size=0.1, random_state=4)

In [17]:
print(f'Our train data set is of shape {X_train.shape}')
print(f'Our test dataset is of shape {X_test.shape}')

Our train data set is of shape (3, 4096)
Our test dataset is of shape (1, 4096)


In [18]:
from sklearn.preprocessing import LabelEncoder

#We encode our labels into numerical values since that is the kind of dataset that models work with
y = LabelEncoder().fit_transform(y)
#We then split into a train and test set
y_train, y_test = train_test_split(y, test_size=0.1, random_state=4)

In [19]:
y_train.shape, y_test.shape

((3,), (1,))

In [20]:
y_train

array([1, 3, 2])

In [21]:
#This will contain all our images
all_images = []
#This will contain all our labels
all_labels = []
labels = ['common_rust', 'healthy', 'leaf_spot', 'nothern_leaf_blight']
for i, image_folder in enumerate([common_rust_images, healthy_images, leaf_spot_images, nothern_leaf_blight_images]):
    for image in image_folder:
        all_images.append(image)
        all_labels.append(labels[i])

In [22]:
len(all_images), len(all_labels)

(128, 128)

In [23]:
#Let us extraxt KAZE features for all the images
def extract_features_kaze_all():
    """Extracts kaze features for all the images in our dataset
    
    returns
    -------
    X_train : numpy array
        An array of shape (n, 4096) containing the kaze features used in training 
    X_test : numpy array
        An array of shape (m, 4096) containing the kaze features used for testing
    y_train : numpy array
        An array of labels for the trainig set
    y_test : numpy array
        An array of labels for the test set
    """
    features, labels = [], []
    for i, image in enumerate(all_images):
        image_features = extract_features_kaze(image)
        image_label = all_labels[i]
        features.append(image_features)
        labels.append(image_label)
    features = np.array(features)
    labels = np.array(labels)
    features = StandardScaler().fit_transform(features)
    labels = LabelEncoder().fit_transform(labels)
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3)
    
    return X_train, X_test, y_train, y_test

In [24]:
X_train, X_test, y_train, y_test = extract_features_kaze_all()

In [25]:
X_train.shape, y_train.shape

((89, 4096), (89,))

In [26]:
X_test.shape, y_test.shape

((39, 4096), (39,))

In [27]:
X_test[0]

array([0.1275366 , 0.24601872, 0.19321055, ..., 0.        , 0.        ,
       0.        ])

In [28]:
X_train[0]

array([ 0.1095539 ,  0.12280111, -0.6038938 , ...,  0.        ,
        0.        ,  0.        ])

In [29]:
y_train[:10]

array([2, 0, 0, 1, 3, 1, 0, 1, 3, 3])

In [30]:
y_test[:10]

array([0, 2, 3, 3, 3, 1, 1, 3, 3, 1])

In [None]:
#Now these two, y_train and X_train can be fed into any classifier

In [33]:
#Let us extraxt ORB features for all the images
def extract_features_orb_all():
    """Extracts orb features for all the images in our dataset
    
    returns
    -------
    X_train : numpy array
        An array of shape (n, 4096) containing the orb features used in training 
    X_test : numpy array
        An array of shape (m, 4096) containing the orb features used for testing
    y_train : numpy array
        An array of labels for the trainig set
    y_test : numpy array
        An array of labels for the test set
    """
    features, labels = [], []
    for i, image in enumerate(all_images):
        try:
            image_features = extract_features_orb(image)
            image_label = all_labels[i]
            features.append(image_features)
            labels.append(image_label)
        except AttributeError:
            pass
    features = np.array(features)
    labels = np.array(labels)
    features = StandardScaler().fit_transform(features)
    labels = LabelEncoder().fit_transform(labels)
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3)
    
    return X_train, X_test, y_train, y_test

In [34]:
X_train, X_test, y_train, y_test = extract_features_orb_all()
X_train.shape, y_train.shape

((87, 4096), (87,))

In [35]:
X_test.shape, y_test.shape

((38, 4096), (38,))

In [36]:
#Let us extraxt hog features for all the images
def extract_features_hog_all():
    """Extracts hog features for all the images in our dataset
    
    returns
    -------
    X_train : numpy array
        An array of shape (n, 4096) containing the hog features used in training 
    X_test : numpy array
        An array of shape (m, 4096) containing the hog features used for testing
    y_train : numpy array
        An array of labels for the trainig set
    y_test : numpy array
        An array of labels for the test set
    """
    features, labels = [], []
    for i, image in enumerate(all_images):
        image_features = extract_hog_features(image)
        image_label = all_labels[i]
        features.append(image_features)
        labels.append(image_label)
    features = np.array(features)
    labels = np.array(labels)
    features = StandardScaler().fit_transform(features)
    labels = LabelEncoder().fit_transform(labels)
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3)
    
    return X_train, X_test, y_train, y_test

In [37]:
X_train, X_test, y_train, y_test = extract_features_hog_all()
X_train.shape, y_train.shape

((89, 4096), (89,))

In [38]:
X_test.shape, y_test.shape

((39, 4096), (39,))

In [39]:
#Let us extraxt SIFT features for all the images
def extract_features_sift_all():
    """Extracts sift features for all the images in our dataset
    
    returns
    -------
    X_train : numpy array
        An array of shape (n, 4096) containing the kaze features used in training 
    X_test : numpy array
        An array of shape (m, 4096) containing the kaze features used for testing
    y_train : numpy array
        An array of labels for the trainig set
    y_test : numpy array
        An array of labels for the test set
    """
    features, labels = [], []
    for i, image in enumerate(all_images):
        image_features = extract_features_sift(image)
        image_label = all_labels[i]
        features.append(image_features)
        labels.append(image_label)
    features = np.array(features)
    labels = np.array(labels)
    features = StandardScaler().fit_transform(features)
    labels = LabelEncoder().fit_transform(labels)
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3)
    
    return X_train, X_test, y_train, y_test

In [40]:
X_train, X_test, y_train, y_test = extract_features_sift_all()
X_train.shape, y_train.shape

((89, 4096), (89,))

In [41]:
X_test.shape, y_test.shape

((39, 4096), (39,))

In [44]:
#Let us extraxt BRIEF features for all the images
def extract_features_brief_all():
    """Extracts sift features for all the images in our dataset
    
    returns
    -------
    X_train : numpy array
        An array of shape (n, 4096) containing the kaze features used in training 
    X_test : numpy array
        An array of shape (m, 4096) containing the kaze features used for testing
    y_train : numpy array
        An array of labels for the trainig set
    y_test : numpy array
        An array of labels for the test set
    """
    features, labels = [], []
    for i, image in enumerate(all_images):
        image_features = extract_features_brief(image)
        if image_features.shape[0]:
            image_label = all_labels[i]
            features.append(image_features)
            labels.append(image_label)
        else:
            pass
    features = np.array(features)
    labels = np.array(labels)
    features = StandardScaler().fit_transform(features)
    labels = LabelEncoder().fit_transform(labels)
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3)
    
    return X_train, X_test, y_train, y_test

In [45]:
X_train, X_test, y_train, y_test = extract_features_brief_all()
X_train.shape, y_train.shape

((77, 4096), (77,))

In [46]:
X_test.shape, y_test.shape

((34, 4096), (34,))