In [1]:
import h5py
import numpy as np
import os
import glob
import cv2
import mahotas as mt
import csv
from matplotlib import pyplot
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold, ShuffleSplit
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.externals import joblib
from itertools import combinations
import warnings
warnings.filterwarnings('ignore')

In [2]:
features_name = ['glcm',
                 'lbp',
                 'hsv',
                 'correlogram',
                 'cld',
                 'ngtdm']
features_name_perm = []
results = []
scoring = "accuracy"
global_features = []

models = []
models.append(('Logistic Regression', LogisticRegression(random_state=9)))
models.append(('Linear Discriminant Analysis', LinearDiscriminantAnalysis()))
models.append(('KNeighbors', KNeighborsClassifier(n_neighbors=7)))
models.append(('Decision Tree', DecisionTreeClassifier(random_state=9)))
models.append(('RandomForest', RandomForestClassifier(n_estimators=100, random_state=9)))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(random_state=9)))
models.append(('Neural', MLPClassifier(solver='lbfgs',alpha=0.1,random_state=9,epsilon=1e-04,hidden_layer_sizes=100,activation='logistic')))

In [3]:
for num in range(1,len(features_name)+1):
    for name in combinations(features_name,num):
        features_name_perm.append('_'.join(map(str, name)))

In [4]:
images_path = "./fruit_picture"
files = [os.path.join(p) for p in os.listdir(images_path)]
files = np.array(files)

In [5]:
# print(global_features[1].shape, global_labels.shape)

In [6]:
# for f in features_name_perm:
#     print(f)
#     h5f_data = h5py.File('./output/data.h5', 'r')
#     h5f_label = h5py.File('./output/labels.h5', 'r')

#     global_features_string = h5f_data[f]
#     global_labels_string = h5f_label['labels']

#     global_features = np.array(global_features_string)
#     global_labels = np.array(global_labels_string)

#     h5f_data.close()
#     h5f_label.close()
    
#     global_features

#     for name, model in models:
#         cv_results = cross_val_score(model, global_features, global_labels, cv=10, scoring=scoring)
#         results.append({'feature':f  , 'model': name, 'accuracy_mean':cv_results.mean()})
#         msg = "%s: %f " % (name, cv_results.mean())
#         print(msg)

In [7]:
s = StratifiedKFold(n_splits=5,random_state=9,shuffle=True)
cnt = 0
targetName = ['banana', 'blueberry', 'cherry', 'chinese_pear', 'dragon_fruit',
              'green_apple', 'green_grape', 'guava', 'kiwi', 'longan', 'mango',
              'marian_plum', 'orange', 'passion_fruit', 'rambutan', 'red_apple',
              'rose_apple', 'salacca', 'strawberry', 'tamarind']

for f in features_name_perm:
    avg = []
    print('---------' , f , '---------')
    h5f_data = h5py.File('./output/data.h5', 'r')
    h5f_label = h5py.File('./output/labels.h5', 'r')

    global_features_string = h5f_data[f]
    global_labels_string = h5f_label['labels']

    global_features = np.array(global_features_string)
    global_labels = np.array(global_labels_string)

    h5f_data.close()
    h5f_label.close()
    
    print(np.any(np.isnan(global_features)))
    for name, model in models:
        j = 0
        accuracy = []
        msg = "%s: " % (name)
        for train_index, test_index in s.split(global_features,global_labels):
            missMatch = []
            x_train, x_test = global_features[train_index], global_features[test_index]
            y_train, y_test = global_labels[train_index], global_labels[test_index]
            name_train, name_test = files[train_index], files[test_index]
            model.fit(x_train,y_train) 
            predict = model.predict(x_test)
            accuracy.append(accuracy_score(predict,y_test))
            j = j + 1
        print(msg,np.mean(accuracy))
        avg.append(np.mean(accuracy))
    results.append({'Feature': f,
                  'Logistic Regression': avg[0],
                  'Linear Discriminant Analysis': avg[1],
                  'KNeighbors': avg[2],
                  'Decision Tree': avg[3],
                  'Random Forest': avg[4],
                  'GaussianNB': avg[5],
                  'SVM': avg[6],
                  'Neural': avg[7],
                  'Avg': np.mean(avg)})
    cnt = cnt + 1

--------- glcm ---------
False
Logistic Regression:  0.666
Linear Discriminant Analysis:  0.76
KNeighbors:  0.716
Decision Tree:  0.6610000000000001
RandomForest:  0.765
NB:  0.677
SVM:  0.715
Neural:  0.821
--------- lbp ---------
False
Logistic Regression:  0.787
Linear Discriminant Analysis:  0.8049999999999999
KNeighbors:  0.653
Decision Tree:  0.5160000000000001
RandomForest:  0.6910000000000001
NB:  0.562
SVM:  0.713
Neural:  0.837
--------- hsv ---------
False
Logistic Regression:  0.983
Linear Discriminant Analysis:  0.982
KNeighbors:  0.9709999999999999
Decision Tree:  0.966
RandomForest:  0.9950000000000001
NB:  0.9720000000000001
SVM:  0.975
Neural:  0.9890000000000001
--------- correlogram ---------
False
Logistic Regression:  0.309
Linear Discriminant Analysis:  0.305
KNeighbors:  0.27799999999999997
Decision Tree:  0.197
RandomForest:  0.346
NB:  0.289
SVM:  0.33299999999999996
Neural:  0.313
--------- cld ---------
False
Logistic Regression:  0.867
Linear Discriminant An

SVM:  0.994
Neural:  0.9940000000000001
--------- lbp_correlogram_cld ---------
False
Logistic Regression:  0.9349999999999999
Linear Discriminant Analysis:  0.9879999999999999
KNeighbors:  0.679
Decision Tree:  0.889
RandomForest:  0.9719999999999999
NB:  0.9369999999999999
SVM:  0.8709999999999999
Neural:  0.961
--------- lbp_correlogram_ngtdm ---------
False
Logistic Regression:  0.86
Linear Discriminant Analysis:  0.874
KNeighbors:  0.584
Decision Tree:  0.5820000000000001
RandomForest:  0.796
NB:  0.688
SVM:  0.742
Neural:  0.8540000000000001
--------- lbp_cld_ngtdm ---------
False
Logistic Regression:  0.96
Linear Discriminant Analysis:  0.992
KNeighbors:  0.8309999999999998
Decision Tree:  0.8949999999999999
RandomForest:  0.975
NB:  0.959
SVM:  0.952
Neural:  0.983
--------- hsv_correlogram_cld ---------
False
Logistic Regression:  0.985
Linear Discriminant Analysis:  0.9810000000000001
KNeighbors:  0.907
Decision Tree:  0.954
RandomForest:  0.9950000000000001
NB:  0.97
SVM:  0

In [8]:
# s = StratifiedKFold(n_splits=5,random_state=9,shuffle=True)
# cnt = 0
# targetName = ['banana', 'blueberry', 'cherry', 'chinese_pear', 'dragon_fruit',
#               'green_apple', 'green_grape', 'guava', 'kiwi', 'longan', 'mango',
#               'marian_plum', 'orange', 'passion_fruit', 'rambutan', 'red_apple',
#               'rose_apple', 'salacca', 'strawberry', 'tamarind']

# for f in features_name_perm:
#     if f == "glcm_lbp_hsv_cld":
#         avg = []
#         print('---------' , f , '---------')
#         h5f_data = h5py.File('./output/data20-1000.h5', 'r')
#         h5f_label = h5py.File('./output/labels.h5', 'r')

#         global_features_string = h5f_data[f]
#         global_labels_string = h5f_label['labels']

#         global_features = np.array(global_features_string)
#         global_labels = np.array(global_labels_string)

#         h5f_data.close()
#         h5f_label.close()

#         print(np.any(np.isnan(global_features)))
#         for name, model in models:
#             print(name,"------------")
#             j = 0
#             accuracy = []
#             msg = "%s: " % (name)         
#             for train_index, test_index in s.split(global_features,global_labels):
#                 print(j)
#                 missMatch = []
#                 x_train, x_test = global_features[train_index], global_features[test_index]
#                 y_train, y_test = global_labels[train_index], global_labels[test_index]
#                 name_train, name_test = files[train_index], files[test_index]
#                 model.fit(x_train,y_train) 
#                 predict = model.predict(x_test)

#                 for i in range(len(predict)):
#                     if predict[i] != y_test[i]:
#                         miss = "%s : %s" % (targetName[y_test[i]] , targetName[predict[i]])
#                         missMatch.append(miss)
#                         print(name_test[i], miss)
#                 accuracy.append(accuracy_score(predict,y_test))
#                 j = j + 1            
# #             print(msg,np.mean(accuracy),accuracy)
#             avg.append(np.mean(accuracy))
#         results.append({'Feature': f,
#                       'Logistic Regression': avg[0],
#                       'Linear Discriminant Analysis': avg[1],
#                       'KNeighbors': avg[2],
#                       'Decision Tree': avg[3],
#                       'Random Forest': avg[4],
#                       'GaussianNB': avg[5],
#                       'SVM': avg[6],
#                       'Neural': avg[7],
#                       'Avg': np.mean(avg)})
#     else: pass
#     cnt = cnt + 1

In [9]:
keys = results[0].keys()

with open('results-5fold.csv', mode='w', newline='') as csv_file:
    fieldnames = ['Feature',
                  'Logistic Regression',
                  'Linear Discriminant Analysis',
                  'KNeighbors',
                  'Decision Tree',
                  'Random Forest',
                  'GaussianNB',
                  'SVM',
                  'Neural',
                  'Avg']
    writer = csv.DictWriter(csv_file, keys)
    writer.writeheader()
    writer.writerows(results)