# Cat and dog classification with various classifier algorithms

## import req. libraries

In [48]:
# import the necessary packages
from sklearn.cross_validation import train_test_split
from imutils import paths
import numpy as np
import argparse
import imutils # by adrian
import cv2
import os

## define functions

In [49]:
def image_to_feature_vector(image, size=(32, 32)):
	# resize the image to a fixed size, then flatten the image into
	# a list of raw pixel intensities
	return cv2.resize(image, size).flatten()
	
	
def extract_color_histogram(image, bins=(8, 8, 8)):
	# extract a 3D color histogram from the HSV color space using
	# the supplied number of `bins` per channel
	hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
		[0, 180, 0, 256, 0, 256])
 
	# handle normalizing the histogram if we are using OpenCV 2.4.X
	if imutils.is_cv2():
		hist = cv2.normalize(hist)
 
	# otherwise, perform "in place" normalization in OpenCV 3 (I
	# personally hate the way this is done
	else:
		cv2.normalize(hist, hist)
 
	# return the flattened histogram as the feature vector
	return hist.flatten()

## read images 

In [50]:
# grab the list of images that we'll be describing
print("[INFO] describing images...")
imagePaths = list(paths.list_images("Cat_vs_Dog/"))
print(imagePaths)
# initialize the raw pixel intensities matrix, the features matrix,
# and labels list
rawImages = []
features = []
labels = []

[INFO] describing images...
['Cat_vs_Dog/cat.0.jpg', 'Cat_vs_Dog/cat.1.jpg', 'Cat_vs_Dog/cat.10.jpg', 'Cat_vs_Dog/cat.100.jpg', 'Cat_vs_Dog/cat.1000.jpg', 'Cat_vs_Dog/cat.1001.jpg', 'Cat_vs_Dog/cat.1002.jpg', 'Cat_vs_Dog/cat.1003.jpg', 'Cat_vs_Dog/cat.1004.jpg', 'Cat_vs_Dog/cat.1005.jpg', 'Cat_vs_Dog/cat.1006.jpg', 'Cat_vs_Dog/cat.1007.jpg', 'Cat_vs_Dog/cat.1008.jpg', 'Cat_vs_Dog/cat.1009.jpg', 'Cat_vs_Dog/cat.101.jpg', 'Cat_vs_Dog/cat.1010.jpg', 'Cat_vs_Dog/cat.1011.jpg', 'Cat_vs_Dog/cat.1012.jpg', 'Cat_vs_Dog/cat.1013.jpg', 'Cat_vs_Dog/cat.1014.jpg', 'Cat_vs_Dog/cat.1015.jpg', 'Cat_vs_Dog/cat.1016.jpg', 'Cat_vs_Dog/cat.1017.jpg', 'Cat_vs_Dog/cat.1018.jpg', 'Cat_vs_Dog/cat.1019.jpg', 'Cat_vs_Dog/cat.102.jpg', 'Cat_vs_Dog/cat.1020.jpg', 'Cat_vs_Dog/cat.1021.jpg', 'Cat_vs_Dog/cat.103.jpg', 'Cat_vs_Dog/cat.104.jpg', 'Cat_vs_Dog/cat.105.jpg', 'Cat_vs_Dog/cat.106.jpg', 'Cat_vs_Dog/cat.107.jpg', 'Cat_vs_Dog/cat.108.jpg', 'Cat_vs_Dog/cat.109.jpg', 'Cat_vs_Dog/cat.11.jpg', 'Cat_vs_Dog/cat.110

## convert to feature vectors

In [8]:

# loop over the input images
for (i, imagePath) in enumerate(imagePaths):
	# load the image and extract the class label (assuming that our
	# path as the format: /path/to/dataset/{class}.{image_num}.jpg
	image = cv2.imread(imagePath)
	label = imagePath.split(os.path.sep)[-1].split(".")[0]
 
	# extract raw pixel intensity "features", followed by a color
	# histogram to characterize the color distribution of the pixels
	# in the image
	pixels = image_to_feature_vector(image)
	hist = extract_color_histogram(image)
 
	# update the raw images, features, and labels matricies,
	# respectively
	rawImages.append(pixels)
	features.append(hist)
	labels.append(label)
 
	# show an update every 1,000 images
	if i > 0 and i % 1000 == 0:
		print("[INFO] processed {}/{}".format(i, len(imagePaths)))
		
		
		
# show some information on the memory consumed by the raw images
# matrix and features matrix
rawImages = np.array(rawImages)
features = np.array(features)
labels = np.array(labels)
print("[INFO] pixels matrix: {:.2f}MB".format(
	rawImages.nbytes / (1024 * 1000.0)))
print("[INFO] features matrix: {:.2f}MB".format(
	features.nbytes / (1024 * 1000.0)))

[INFO] processed 1000/1981
[INFO] pixels matrix: 5.94MB
[INFO] features matrix: 3.96MB


## partition the data into training and test sets 

In [None]:
# partition the data into training and testing splits, using 75%
# of the data for training and the remaining 25% for testing
(trainRI, testRI, trainRL, testRL) = train_test_split(
	rawImages, labels, test_size=0.25, random_state=42)
(trainFeat, testFeat, trainLabels, testLabels) = train_test_split(
	features, labels, test_size=0.25, random_state=42)


# kNN 

In [53]:
from sklearn.neighbors import KNeighborsClassifier

# train and evaluate a k-NN classifer on the raw pixel intensities
print("[INFO] evaluating raw pixel accuracy...")
model = KNeighborsClassifier(n_neighbors=5,
	n_jobs=10)
model.fit(trainRI, trainRL)
acc = model.score(testRI, testRL)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating raw pixel accuracy...
[INFO] raw pixel accuracy: 60.69%


In [25]:

# train and evaluate a k-NN classifer on the histogram
# representations
print("[INFO] evaluating histogram accuracy...")
model = KNeighborsClassifier(n_neighbors=5,
	n_jobs=10)
model.fit(trainFeat, trainLabels)
acc = model.score(testFeat, testLabels)
print("[INFO] histogram accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating histogram accuracy...
[INFO] histogram accuracy: 58.06%


# SVM

In [36]:
from sklearn import svm
# train and evaluate a k-NN classifer on the raw pixel intensities
print("[INFO] evaluating raw pixel accuracy...")
model = svm.SVC(C=1,gamma=0.001,kernel='poly')
model.fit(trainRI, trainRL)
acc = model.score(testRI, testRL)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating raw pixel accuracy...
[INFO] raw pixel accuracy: 63.71%


In [39]:
# train and evaluate a k-NN classifer on the raw pixel intensities
print("[INFO] evaluating raw pixel accuracy...")
model = svm.SVC(C=1,gamma=0.001,kernel='linear')
model.fit(trainFeat, trainLabels)
acc = model.score(testFeat, testLabels)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating raw pixel accuracy...
[INFO] raw pixel accuracy: 60.08%


# Decision tree

In [40]:
from sklearn import tree
# train and evaluate a k-NN classifer on the raw pixel intensities
print("[INFO] evaluating raw pixel accuracy...")
model = tree.DecisionTreeClassifier()
model.fit(trainRI, trainRL)
acc = model.score(testRI, testRL)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating raw pixel accuracy...
[INFO] raw pixel accuracy: 53.83%


In [41]:

# train and evaluate a k-NN classifer on the raw pixel intensities
print("[INFO] evaluating raw pixel accuracy...")
model = tree.DecisionTreeClassifier()
model.fit(trainFeat, trainLabels)
acc = model.score(testFeat, testLabels)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating raw pixel accuracy...
[INFO] raw pixel accuracy: 55.44%


# Random forest

In [60]:
from sklearn.ensemble import RandomForestClassifier
# train and evaluate a k-NN classifer on the raw pixel intensities
print("[INFO] evaluating raw pixel accuracy...")
model = RandomForestClassifier(n_estimators=75, max_depth=5,random_state=0)
model.fit(trainRI, trainRL)
acc = model.score(testRI, testRL)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating raw pixel accuracy...
[INFO] raw pixel accuracy: 63.10%


In [61]:
# train and evaluate a k-NN classifer on the raw pixel intensities
print("[INFO] evaluating raw pixel accuracy...")
model = RandomForestClassifier(n_estimators=75, max_depth=5,random_state=0)
model.fit(trainFeat, trainLabels)
acc = model.score(testFeat, testLabels)
print("[INFO] raw pixel accuracy: {:.2f}%".format(acc * 100))

[INFO] evaluating raw pixel accuracy...
[INFO] raw pixel accuracy: 67.34%


# Confusion matrix - accuracy - precision - recall 

In [82]:
from sklearn.metrics import confusion_matrix, precision_score , recall_score
#from sklearn import metrics

y = model.predict(testFeat)
confusion_matrix(y, testLabels)


array([[193,  90],
       [ 72, 141]], dtype=int64)

In [86]:
model.score(testFeat, testLabels)

0.67338709677419351

In [83]:
precision_score(y, testLabels, average='macro')  

0.66934574859103158

In [84]:
recall_score(y, testLabels, average='macro')  

0.67197531478624395