In [1]:
import cv2
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [2]:
# path to training data
data_path = "Dataset"

In [3]:
# bins for histogram
bins = 8

In [4]:
# feature-1: Color Histogram
def fd_histogram(image, mask=None):
    # convert the image to HSV (Hue, Saturation, Value)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # compute the color histogram
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    # normalize the histogram
    cv2.normalize(hist, hist)
    # return the histogram
    return hist.flatten()

In [5]:
# feature 2: ORB
def fd_orb(image):
    # Initiate STAR detector
    vector_size = 32
    orb = cv2.ORB_create()
   
    kps, des1 = orb.detectAndCompute(image, None)
    flat_des =  des1.flatten()
    
    # Descriptor vector size is 64
    needed_size = (vector_size * 64)
    flat_des = (flat_des)[:vector_size]
    if len(flat_des) < needed_size:
            # if we have less the 32 descriptors then just adding zeros at the
            # end of our feature vector
        flat_des = np.hstack([flat_des, np.zeros(needed_size - len(flat_des))])
    
    return flat_des

In [6]:
# get the labels
data_labels = os.listdir(data_path)

In [7]:
# empty lists to hold feature vectors and labels
feats = []
labels = []
# fixed-sizes for image
fixed_size = tuple((500, 500))

In [8]:
# loop over the data sub-folders
for data_name in data_labels:
    # join the data path and each species folder
    dir = os.path.join(data_path, data_name)
    # get the current training label
    current_label = data_name
    
   
    for img in os.listdir(dir):
        file = dir + "/" +img 
        image = cv2.imread(file)
        image = cv2.resize(image, fixed_size)

        ####################################
        #Feature extraction
        ####################################
    
        fv_histogram  = fd_histogram(image)
        fv_orb = fd_orb(image)
       
        ###################################
        # Concatenate global features
        ###################################
        feat = np.hstack([fv_histogram, fv_orb])

        # update the list of labels and feature vectors
        labels.append(current_label)
        feats.append(feat)

In [9]:
feat_vector = np.array(feats)
# get the overall feature vector size
print ("[STATUS] feature vector size {}".format(feat_vector.shape))

# get the overall label size
print ("[STATUS] Labels {}".format(np.array(labels).shape))

[STATUS] feature vector size (42, 2560)
[STATUS] Labels (42,)


In [10]:
# split the training and testing data
(X_train, X_tes, y_train, y_test) = train_test_split(np.array(feat_vector),np.array(labels), test_size=0.1, random_state=8)

In [11]:
# create all the machine learning models
models = []
models.append(('LR', LogisticRegression(random_state=9, max_iter=1000)))
models.append(('DT', DecisionTreeClassifier(random_state=9)))

In [12]:
for name, md in models:
    model = md.fit(X_train, y_train)
    pred = model.predict(X_tes)
    acc = accuracy_score(y_test, pred)
    f1 = f1_score(y_test, pred, average='macro')
    msg = "%s: Akurasi: %f F1-Score: %f" % (name, acc, f1)
    print(msg)

LR: Akurasi: 0.600000 F1-Score: 0.583333
DT: Akurasi: 0.400000 F1-Score: 0.400000
