In [73]:
# organize imports
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py

# fixed-sizes for image
fixed_size = tuple((500, 500))

# path to training data
train_path = "image-classification/dataset/train_tp"

# no.of.trees for Random Forests
num_trees = 100

# bins for histogram
bins = 8

# train_test_split size
test_size = 0.10

# seed for reproducing same results
seed = 9

In [74]:
# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

In [75]:
# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
    # convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # compute the haralick texture feature vector
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    # return the result
    return haralick

In [76]:
# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
    # convert the image to HSV color-space
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # compute the color histogram
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    # normalize the histogram
    cv2.normalize(hist, hist)
    # return the histogram
    return hist.flatten()


In [88]:
# get the training labels
train_labels = os.listdir(train_path)

# sort the training labels
train_labels.sort()
print(train_labels)

# empty lists to hold feature vectors and labels
global_features = []
labels = []

i, j = 0, 0
k = 0

# num of images per class
images_per_class = 80

['bluebell', 'buttercup', 'coltsfoot', 'cowslip', 'crocus', 'daffodil', 'daisy', 'dandelion', 'fritillary', 'iris', 'lillyvalley', 'pansy', 'snowdrop', 'sunflower', 'tigerlily', 'tulip', 'windflower']


In [89]:

# loop over the training data sub-folders
for training_name in train_labels:
    # join the training data path and each species training folder
    dir = train_path + '/'+ training_name

    # get the current training label
    current_label = training_name
    print (current_label)
    k = 1
    # loop over the images in each sub-folder
    for x in range(1,images_per_class+1):
        # get the image file name
        file = dir + "/" + "1 ("+ str(x) + ").jpg"
        print (file)

        # read the image and resize it to a fixed-size
        
        image = cv2.imread(file)
        image = cv2.resize(image, fixed_size)

        ####################################
        # Global Feature extraction
        ####################################
        fv_hu_moments = fd_hu_moments(image)
        fv_haralick   = fd_haralick(image)
        fv_histogram  = fd_histogram(image)

        ###################################
        # Concatenate global features
        ###################################
        global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])

        # update the list of labels and feature vectors
        labels.append(current_label)
        global_features.append(global_feature)

        i += 1
        k += 1
    print ("[STATUS] processed folder: {}".format(current_label))
    j += 1

print("[STATUS] completed Global Feature Extraction...")

bluebell
image-classification/dataset/train_tp/bluebell/1 (1).jpg
image-classification/dataset/train_tp/bluebell/1 (2).jpg
image-classification/dataset/train_tp/bluebell/1 (3).jpg
image-classification/dataset/train_tp/bluebell/1 (4).jpg
image-classification/dataset/train_tp/bluebell/1 (5).jpg
image-classification/dataset/train_tp/bluebell/1 (6).jpg
image-classification/dataset/train_tp/bluebell/1 (7).jpg
image-classification/dataset/train_tp/bluebell/1 (8).jpg
image-classification/dataset/train_tp/bluebell/1 (9).jpg
image-classification/dataset/train_tp/bluebell/1 (10).jpg
image-classification/dataset/train_tp/bluebell/1 (11).jpg
image-classification/dataset/train_tp/bluebell/1 (12).jpg
image-classification/dataset/train_tp/bluebell/1 (13).jpg
image-classification/dataset/train_tp/bluebell/1 (14).jpg
image-classification/dataset/train_tp/bluebell/1 (15).jpg
image-classification/dataset/train_tp/bluebell/1 (16).jpg
image-classification/dataset/train_tp/bluebell/1 (17).jpg
image-classifi

image-classification/dataset/train_tp/buttercup/1 (63).jpg
image-classification/dataset/train_tp/buttercup/1 (64).jpg
image-classification/dataset/train_tp/buttercup/1 (65).jpg
image-classification/dataset/train_tp/buttercup/1 (66).jpg
image-classification/dataset/train_tp/buttercup/1 (67).jpg
image-classification/dataset/train_tp/buttercup/1 (68).jpg
image-classification/dataset/train_tp/buttercup/1 (69).jpg
image-classification/dataset/train_tp/buttercup/1 (70).jpg
image-classification/dataset/train_tp/buttercup/1 (71).jpg
image-classification/dataset/train_tp/buttercup/1 (72).jpg
image-classification/dataset/train_tp/buttercup/1 (73).jpg
image-classification/dataset/train_tp/buttercup/1 (74).jpg
image-classification/dataset/train_tp/buttercup/1 (75).jpg
image-classification/dataset/train_tp/buttercup/1 (76).jpg
image-classification/dataset/train_tp/buttercup/1 (77).jpg
image-classification/dataset/train_tp/buttercup/1 (78).jpg
image-classification/dataset/train_tp/buttercup/1 (79).j

image-classification/dataset/train_tp/cowslip/1 (42).jpg
image-classification/dataset/train_tp/cowslip/1 (43).jpg
image-classification/dataset/train_tp/cowslip/1 (44).jpg
image-classification/dataset/train_tp/cowslip/1 (45).jpg
image-classification/dataset/train_tp/cowslip/1 (46).jpg
image-classification/dataset/train_tp/cowslip/1 (47).jpg
image-classification/dataset/train_tp/cowslip/1 (48).jpg
image-classification/dataset/train_tp/cowslip/1 (49).jpg
image-classification/dataset/train_tp/cowslip/1 (50).jpg
image-classification/dataset/train_tp/cowslip/1 (51).jpg
image-classification/dataset/train_tp/cowslip/1 (52).jpg
image-classification/dataset/train_tp/cowslip/1 (53).jpg
image-classification/dataset/train_tp/cowslip/1 (54).jpg
image-classification/dataset/train_tp/cowslip/1 (55).jpg
image-classification/dataset/train_tp/cowslip/1 (56).jpg
image-classification/dataset/train_tp/cowslip/1 (57).jpg
image-classification/dataset/train_tp/cowslip/1 (58).jpg
image-classification/dataset/tr

image-classification/dataset/train_tp/daffodil/1 (29).jpg
image-classification/dataset/train_tp/daffodil/1 (30).jpg
image-classification/dataset/train_tp/daffodil/1 (31).jpg
image-classification/dataset/train_tp/daffodil/1 (32).jpg
image-classification/dataset/train_tp/daffodil/1 (33).jpg
image-classification/dataset/train_tp/daffodil/1 (34).jpg
image-classification/dataset/train_tp/daffodil/1 (35).jpg
image-classification/dataset/train_tp/daffodil/1 (36).jpg
image-classification/dataset/train_tp/daffodil/1 (37).jpg
image-classification/dataset/train_tp/daffodil/1 (38).jpg
image-classification/dataset/train_tp/daffodil/1 (39).jpg
image-classification/dataset/train_tp/daffodil/1 (40).jpg
image-classification/dataset/train_tp/daffodil/1 (41).jpg
image-classification/dataset/train_tp/daffodil/1 (42).jpg
image-classification/dataset/train_tp/daffodil/1 (43).jpg
image-classification/dataset/train_tp/daffodil/1 (44).jpg
image-classification/dataset/train_tp/daffodil/1 (45).jpg
image-classifi

image-classification/dataset/train_tp/dandelion/1 (17).jpg
image-classification/dataset/train_tp/dandelion/1 (18).jpg
image-classification/dataset/train_tp/dandelion/1 (19).jpg
image-classification/dataset/train_tp/dandelion/1 (20).jpg
image-classification/dataset/train_tp/dandelion/1 (21).jpg
image-classification/dataset/train_tp/dandelion/1 (22).jpg
image-classification/dataset/train_tp/dandelion/1 (23).jpg
image-classification/dataset/train_tp/dandelion/1 (24).jpg
image-classification/dataset/train_tp/dandelion/1 (25).jpg
image-classification/dataset/train_tp/dandelion/1 (26).jpg
image-classification/dataset/train_tp/dandelion/1 (27).jpg
image-classification/dataset/train_tp/dandelion/1 (28).jpg
image-classification/dataset/train_tp/dandelion/1 (29).jpg
image-classification/dataset/train_tp/dandelion/1 (30).jpg
image-classification/dataset/train_tp/dandelion/1 (31).jpg
image-classification/dataset/train_tp/dandelion/1 (32).jpg
image-classification/dataset/train_tp/dandelion/1 (33).j

image-classification/dataset/train_tp/fritillary/1 (74).jpg
image-classification/dataset/train_tp/fritillary/1 (75).jpg
image-classification/dataset/train_tp/fritillary/1 (76).jpg
image-classification/dataset/train_tp/fritillary/1 (77).jpg
image-classification/dataset/train_tp/fritillary/1 (78).jpg
image-classification/dataset/train_tp/fritillary/1 (79).jpg
image-classification/dataset/train_tp/fritillary/1 (80).jpg
[STATUS] processed folder: fritillary
iris
image-classification/dataset/train_tp/iris/1 (1).jpg
image-classification/dataset/train_tp/iris/1 (2).jpg
image-classification/dataset/train_tp/iris/1 (3).jpg
image-classification/dataset/train_tp/iris/1 (4).jpg
image-classification/dataset/train_tp/iris/1 (5).jpg
image-classification/dataset/train_tp/iris/1 (6).jpg
image-classification/dataset/train_tp/iris/1 (7).jpg
image-classification/dataset/train_tp/iris/1 (8).jpg
image-classification/dataset/train_tp/iris/1 (9).jpg
image-classification/dataset/train_tp/iris/1 (10).jpg
image-

image-classification/dataset/train_tp/lillyvalley/1 (59).jpg
image-classification/dataset/train_tp/lillyvalley/1 (60).jpg
image-classification/dataset/train_tp/lillyvalley/1 (61).jpg
image-classification/dataset/train_tp/lillyvalley/1 (62).jpg
image-classification/dataset/train_tp/lillyvalley/1 (63).jpg
image-classification/dataset/train_tp/lillyvalley/1 (64).jpg
image-classification/dataset/train_tp/lillyvalley/1 (65).jpg
image-classification/dataset/train_tp/lillyvalley/1 (66).jpg
image-classification/dataset/train_tp/lillyvalley/1 (67).jpg
image-classification/dataset/train_tp/lillyvalley/1 (68).jpg
image-classification/dataset/train_tp/lillyvalley/1 (69).jpg
image-classification/dataset/train_tp/lillyvalley/1 (70).jpg
image-classification/dataset/train_tp/lillyvalley/1 (71).jpg
image-classification/dataset/train_tp/lillyvalley/1 (72).jpg
image-classification/dataset/train_tp/lillyvalley/1 (73).jpg
image-classification/dataset/train_tp/lillyvalley/1 (74).jpg
image-classification/dat

image-classification/dataset/train_tp/snowdrop/1 (46).jpg
image-classification/dataset/train_tp/snowdrop/1 (47).jpg
image-classification/dataset/train_tp/snowdrop/1 (48).jpg
image-classification/dataset/train_tp/snowdrop/1 (49).jpg
image-classification/dataset/train_tp/snowdrop/1 (50).jpg
image-classification/dataset/train_tp/snowdrop/1 (51).jpg
image-classification/dataset/train_tp/snowdrop/1 (52).jpg
image-classification/dataset/train_tp/snowdrop/1 (53).jpg
image-classification/dataset/train_tp/snowdrop/1 (54).jpg
image-classification/dataset/train_tp/snowdrop/1 (55).jpg
image-classification/dataset/train_tp/snowdrop/1 (56).jpg
image-classification/dataset/train_tp/snowdrop/1 (57).jpg
image-classification/dataset/train_tp/snowdrop/1 (58).jpg
image-classification/dataset/train_tp/snowdrop/1 (59).jpg
image-classification/dataset/train_tp/snowdrop/1 (60).jpg
image-classification/dataset/train_tp/snowdrop/1 (61).jpg
image-classification/dataset/train_tp/snowdrop/1 (62).jpg
image-classifi

image-classification/dataset/train_tp/tigerlily/1 (25).jpg
image-classification/dataset/train_tp/tigerlily/1 (26).jpg
image-classification/dataset/train_tp/tigerlily/1 (27).jpg
image-classification/dataset/train_tp/tigerlily/1 (28).jpg
image-classification/dataset/train_tp/tigerlily/1 (29).jpg
image-classification/dataset/train_tp/tigerlily/1 (30).jpg
image-classification/dataset/train_tp/tigerlily/1 (31).jpg
image-classification/dataset/train_tp/tigerlily/1 (32).jpg
image-classification/dataset/train_tp/tigerlily/1 (33).jpg
image-classification/dataset/train_tp/tigerlily/1 (34).jpg
image-classification/dataset/train_tp/tigerlily/1 (35).jpg
image-classification/dataset/train_tp/tigerlily/1 (36).jpg
image-classification/dataset/train_tp/tigerlily/1 (37).jpg
image-classification/dataset/train_tp/tigerlily/1 (38).jpg
image-classification/dataset/train_tp/tigerlily/1 (39).jpg
image-classification/dataset/train_tp/tigerlily/1 (40).jpg
image-classification/dataset/train_tp/tigerlily/1 (41).j

image-classification/dataset/train_tp/windflower/1 (10).jpg
image-classification/dataset/train_tp/windflower/1 (11).jpg
image-classification/dataset/train_tp/windflower/1 (12).jpg
image-classification/dataset/train_tp/windflower/1 (13).jpg
image-classification/dataset/train_tp/windflower/1 (14).jpg
image-classification/dataset/train_tp/windflower/1 (15).jpg
image-classification/dataset/train_tp/windflower/1 (16).jpg
image-classification/dataset/train_tp/windflower/1 (17).jpg
image-classification/dataset/train_tp/windflower/1 (18).jpg
image-classification/dataset/train_tp/windflower/1 (19).jpg
image-classification/dataset/train_tp/windflower/1 (20).jpg
image-classification/dataset/train_tp/windflower/1 (21).jpg
image-classification/dataset/train_tp/windflower/1 (22).jpg
image-classification/dataset/train_tp/windflower/1 (23).jpg
image-classification/dataset/train_tp/windflower/1 (24).jpg
image-classification/dataset/train_tp/windflower/1 (25).jpg
image-classification/dataset/train_tp/wi

In [90]:
# get the overall feature vector size
print ("[STATUS] feature vector size {}".format(np.array(global_features).shape))

# get the overall training label size
print ("[STATUS] training Labels {}".format(np.array(labels).shape))

# encode the target labels
targetNames = np.unique(labels)
le = LabelEncoder()
target = le.fit_transform(labels)
print ("[STATUS] training labels encoded...")

# normalize the feature vector in the range (0-1)
scaler = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)
print ("[STATUS] feature vector normalized...")

print ("[STATUS] target labels: {}".format(target))
print ("[STATUS] target labels shape: {}".format(target.shape))

# save the feature vector using HDF5
h5f_data = h5py.File('image-classification/output/data.h5', 'w')
h5f_data.create_dataset('dataset_1', data=np.array(rescaled_features))

h5f_label = h5py.File('image-classification/output/labels.h5', 'w')
h5f_label.create_dataset('dataset_1', data=np.array(target))

h5f_data.close()
h5f_label.close()

print ("[STATUS] end of training..")

[STATUS] feature vector size (1360, 532)
[STATUS] training Labels (1360,)
[STATUS] training labels encoded...
[STATUS] feature vector normalized...
[STATUS] target labels: [ 0  0  0 ... 16 16 16]
[STATUS] target labels shape: (1360,)
[STATUS] end of training..
