In [8]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from PIL import Image
from imutils import paths
import numpy as np
import os
import random
import cv2
import mahotas as mt
import matplotlib.pyplot as plt
import itertools
from sklearn.tree import export_graphviz

In [9]:
def extract_features(image):
    textures = mt.features.haralick(image)
    ht_mean = textures.mean(axis=0)
    return ht_mean


In [10]:
def fd_histogram(image, mask=None):
    # convert the image to HSV color-shape
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # compute the color histogram
    hist = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    # normalize the histogram
    cv2.normalize(hist, hist)
    #return the histogram
    return hist.flatten()


In [11]:
def fd_hu_moments(image):
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

In [12]:
# grab all image paths in the input dataset directory, then initialize
# our list of images and corresponding class labels
print("[INFO] loading images...")
imagePaths = sorted(list(paths.list_images("dataset")))

random.seed(42)
random.shuffle(imagePaths)
global_feature=[]
data = []
labels = []
IMAGE_DIMS = (50 ,50, 1)
bins = 8

[INFO] loading images...


In [13]:
# loop over the input images
for imagePath in imagePaths:
    # load the image, pre-process it, and store it in the data list
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    fv_histogram = fd_histogram(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    fv_hu_moments = fd_hu_moments(image)
    features = extract_features(image)
    global_feature = np.hstack([fv_histogram, features, fv_hu_moments])
    data.append(global_feature)
    l = label = imagePath.split(os.path.sep)[-2]
    labels.append(l)


In [14]:
# define the dictionary of models our script can use, where the key 
# to the dictionary is the name of the model (supplied via command
# line argument) and the value is the model itself
models = {
    "knn": KNeighborsClassifier(n_neighbors=1),
    "naive_bayes": GaussianNB(),
    "logit": LogisticRegression(solver="1bfgs", multi_class="auto"),
    "svm":SVC(kernel="poly", degree=2),
    "decision_tree": DecisionTreeClassifier(),
    "random_forest": RandomForestClassifier(n_estimators=100),
    "mlp": MLPClassifier()
}

In [None]:
# partition the data into training and testing splits, using 75%
# of the data for training and the remaining 25% for testing
print("[INFO] constructing training/testing split...")
(trainData, testData, trainLabels, testLabels) = train_test_split(data, labels, test_size = 0.25, random_state=42)
