In [1]:
# import the necessary packages
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.cross_validation import train_test_split
from imutils import paths
import numpy as np
import argparse
import imutils
import cv2
import os
import re




In [5]:
def image_to_feature_vector(image, size=(128, 128)):
	# resize the image to a fixed size, then flatten the image into
	# a list of raw pixel intensities
	return cv2.resize(image, size).flatten()

def extract_color_histogram(image, bins=(64, 64, 64)):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
        [0, 180, 0, 256, 0, 256])

    if imutils.is_cv2():
        hist = cv2.normalize(hist)
    else:
        cv2.normalize(hist, hist)

    return hist.flatten()

In [6]:
from timeit import default_timer as timer
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import matplotlib.pyplot as plt

print("[INFO] describing images...")

imagePaths = [os.path.join("C:\\Users\\Java\\Desktop\\knntest",x) for x in os.listdir("C:\\Users\\Java\\Desktop\\knntest")]

rawImages = []
features = []
labels = []


for (i, imagePath) in enumerate(imagePaths):
    image = cv2.imread(imagePath)
    label = re.findall('\D+',imagePath.split(os.path.sep)[-1].split(".")[0])
    
    pixels = image_to_feature_vector(image)
    hist = extract_color_histogram(image)
    
    rawImages.append(pixels)
    features.append(hist)
    labels.append(label)
    if i > 0 and i % 1000 == 0:
        print("[INFO] processed {}/{}".format(i, len(imagePaths)))


rawImages = np.array(rawImages)
features = np.array(features)
labels = np.array(labels)
print("[INFO] pixels matrix: {:.2f}MB".format(
	rawImages.nbytes / (1024 * 1000.0)))
print("[INFO] features matrix: {:.2f}MB".format(
	features.nbytes / (1024 * 1000.0)))

(trainRI, testRI, trainRL, testRL) = train_test_split(
	rawImages, labels, test_size=0.15, random_state=42)
(trainFeat, testFeat, trainLabels, testLabels) = train_test_split(
	features, labels, test_size=0.15, random_state=42)

# print("\n")
# print("[INFO] evaluating histogram accuracy...")
# start = timer()

# model = KNeighborsClassifier(n_neighbors=7,n_jobs=-1)
# model.fit(trainFeat, trainLabels)
# acc1 = model.score(trainFeat, trainLabels)
# acc = model.score(testFeat, testLabels)


# end = timer()
# print("Total time in {} minutes".format(0.1*round((end - start)/6)))
# print("[INFO] k-NN classifier: k=%d" % 7)
# print("[INFO] trainset histogram accuracy: {:.2f}%".format(acc1 * 100))
# print("[INFO] testset histogram accuracy: {:.2f}%".format(acc * 100))

# SVC
# scaler = StandardScaler()
# trainFeat = scaler.fit_transform(trainFeat)
# from sklearn.decomposition import PCA
# pca = PCA(n_components = 3000)
# X_train_pca = pca.fit_transform(trainFeat)
# X_test_pca = pca.transform(testFeat)
#pca.explained_variance_ratio_
# plt.bar(range(1,30), pca.explained_variance_ratio_, alpha=0.5, align='center')
# plt.step(range(1,30), np.cumsum(pca.explained_variance_ratio_), where='mid')
# plt.ylabel('Explained variance ratio')
# plt.xlabel('Principal components')
# plt.show()

print("\n[INFO] evaluating histogram accuracy...")

start = timer()
model = SVC(max_iter=500,class_weight='balanced')
model.fit(trainFeat, trainLabels)
acc1 = model.score(trainFeat, trainLabels)
acc = model.score(testFeat, testLabels)
end = timer()
print("Total time in {} minutes".format(0.1*round((end - start)/6)))
print("[INFO] SVM-SVC trainset histogram accuracy: {:.2f}%".format(acc1 * 100))
print("[INFO] SVM-SVC testset histogram accuracy: {:.2f}%".format(acc * 100))


[INFO] describing images...
[INFO] processed 1000/5167
[INFO] processed 2000/5167
[INFO] processed 3000/5167
[INFO] processed 4000/5167
[INFO] processed 5000/5167
[INFO] pixels matrix: 248.02MB
[INFO] features matrix: 5291.01MB

[INFO] evaluating histogram accuracy...


  y = column_or_1d(y, warn=True)


Total time in 197.70000000000002 minutes
[INFO] SVM-SVC trainset histogram accuracy: 8.06%
[INFO] SVM-SVC testset histogram accuracy: 10.95%


In [8]:
trainFeat.shape


(4391, 32768)