# Classifying the Pandora Dataset

In [1]:
import numpy as np # linear algebra
import json
from matplotlib import pyplot as plt
from skimage import color
from skimage.feature import hog
from sklearn import svm
from sklearn.metrics import classification_report,accuracy_score
from skimage import feature
import glob
import os
import cv2

## Importing Data

 - Import Images
 - Get Class Labels
 - Get Categories


In [2]:
#print(next(os.walk('F:\\datamining\\rawdata'))[1])
class_labels = []
categories = []
category_count = 0
images_total = []
raw_data = "F:\\datamining\\rawdata"

for item in os.listdir(raw_data):
    img_dir = os.path.join(raw_data, item)
    categories.append(item)
    data_path = os.path.join(img_dir,'*g')
    files = glob.glob(data_path)
    
    for f1 in files:
        img = cv2.imread(f1)
        images_total.append(img)
        class_labels.append(category_count)
    category_count+=1

## Processing the Data

### Feature Extraction
Convert images to grayscale

In [3]:
images_gray = [ color.rgb2gray(i) for i in images_total]

Extract HOG Features

In [4]:
ppc = 16
hog_images = []
hog_features = []
for image in images_gray:
    fd,hog_image = hog(image, orientations=8, pixels_per_cell=(ppc,ppc),cells_per_block=(4, 4),block_norm= 'L2',visualise=True)
    hog_images.append(hog_image)
    hog_features.append(fd)

 ### Creating the Test and Training Datasets
 Combining Extracted Features and Class Labels <br> Shuffling the data

In [5]:
class_labels=np.array(class_labels).reshape(len(class_labels),1)
data_total = np.array(hog_features)
data_frame = np.hstack((data_total,class_labels))
np.random.shuffle(data_frame)

Partitioning the Data into Test and Training sets

In [6]:
percentage = 80
partition = int(len(data_total)*percentage/100)
x_train, x_test = data_frame[:partition,:-1],  data_frame[partition:,:-1]
y_train, y_test = data_frame[:partition,-1:].ravel() , data_frame[partition:,-1:].ravel()

## Classifying the Data
### Random Forest Classifier
Create and train the classifier

In [7]:
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=100)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(x_train,y_train)
y_pred=clf.predict(x_test)

Accuracy of the Random Forest Classifier

In [8]:
print("Accuracy: "+str(accuracy_score(y_test, y_pred)))
print('\n')
print(classification_report(y_test, y_pred,target_names=categories))


Accuracy: 0.5416666666666666


             precision    recall  f1-score   support

     Cubism       0.60      0.72      0.65       190
     Rococo       0.48      0.50      0.49       155
Romanticism       0.53      0.39      0.45       183

avg / total       0.54      0.54      0.53       528



In [9]:
from sklearn.metrics import accuracy_score, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB

classifiers = [
    KNeighborsClassifier(3),
    NuSVC(probability=True),
    LinearSVC(C=100.0, random_state=42),
    DecisionTreeClassifier(),
    RandomForestClassifier(n_estimators=100),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    GaussianNB()]

In [10]:
for clf in classifiers:
    clf.fit(x_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('Results')
    print('\n')
    y_pred = clf.predict(x_test)
    acc = accuracy_score(y_test, y_pred)
    print("Accuracy: {:.4%}".format(acc))
    print(classification_report(y_test, y_pred,target_names=categories))
    
    

KNeighborsClassifier
Results


Accuracy: 46.2121%
             precision    recall  f1-score   support

     Cubism       0.47      0.73      0.57       190
     Rococo       0.44      0.39      0.41       155
Romanticism       0.46      0.25      0.32       183

avg / total       0.46      0.46      0.44       528

NuSVC
Results


Accuracy: 51.8939%
             precision    recall  f1-score   support

     Cubism       0.60      0.66      0.63       190
     Rococo       0.47      0.50      0.48       155
Romanticism       0.46      0.39      0.42       183

avg / total       0.51      0.52      0.51       528

LinearSVC
Results


Accuracy: 43.5606%
             precision    recall  f1-score   support

     Cubism       0.54      0.43      0.48       190
     Rococo       0.38      0.60      0.47       155
Romanticism       0.42      0.31      0.35       183

avg / total       0.45      0.44      0.43       528

DecisionTreeClassifier
Results


Accuracy: 42.8030%
             precisi

## Feature Extraction - Local Binary Patterns

In [11]:
class LocalBinaryPatterns:
    def __init__(self, numPoints, radius):
        # store the number of points and radius
        self.numPoints = numPoints
        self.radius = radius
 
    def describe(self, image, eps=1e-7):
        # compute the Local Binary Pattern representation
        # of the image, and then use the LBP representation
        # to build the histogram of patterns
        lbp = feature.local_binary_pattern(image, self.numPoints,
            self.radius, method="uniform")
        (hist, _) = np.histogram(lbp.ravel(),
            bins=np.arange(0, self.numPoints + 3),
            range=(0, self.numPoints + 2))
 
        # normalize the histogram
        hist = hist.astype("float")
        hist /= (hist.sum() + eps)
 
        # return the histogram of Local Binary Patterns
        return hist

Finding local binary patterns of numPoints 24 and radius 8 pixels

In [12]:
desc = LocalBinaryPatterns(24, 8)

In [13]:
lbp_features = []
for image in images_gray:
    hist = desc.describe(image)
    lbp_features.append(hist)

## Partitioning and Classifiying Data - LBP 

In [14]:
data_frame = np.hstack((lbp_features,class_labels))
np.random.shuffle(data_frame)

In [15]:
percentage = 80
partition = int(len(data_total)*percentage/100)
x_train, x_test = data_frame[:partition,:-1],  data_frame[partition:,:-1]
y_train, y_test = data_frame[:partition,-1:].ravel() , data_frame[partition:,-1:].ravel()

In [16]:
clf=RandomForestClassifier(n_estimators=100)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(x_train,y_train)
y_pred=clf.predict(x_test)
print("Accuracy: "+str(accuracy_score(y_test, y_pred)))
print('\n')
print(classification_report(y_test, y_pred,target_names=categories))

Accuracy: 0.5265151515151515


             precision    recall  f1-score   support

     Cubism       0.65      0.71      0.68       177
     Rococo       0.51      0.45      0.48       177
Romanticism       0.41      0.41      0.41       174

avg / total       0.52      0.53      0.52       528



In [17]:
classifiers = [
    KNeighborsClassifier(3),
    NuSVC(probability=True),
    LinearSVC(C=100.0, random_state=42),
    DecisionTreeClassifier(),
    RandomForestClassifier(n_estimators=100),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    GaussianNB()]

for clf in classifiers:
    clf.fit(x_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('Results')
    print('\n')
    y_pred = clf.predict(x_test)
    acc = accuracy_score(y_test, y_pred)
    print("Accuracy: {:.4%}".format(acc))
    print(classification_report(y_test, y_pred,target_names=categories))

KNeighborsClassifier
Results


Accuracy: 46.5909%
             precision    recall  f1-score   support

     Cubism       0.52      0.63      0.57       177
     Rococo       0.43      0.36      0.39       177
Romanticism       0.42      0.41      0.42       174

avg / total       0.46      0.47      0.46       528

NuSVC
Results


Accuracy: 45.6439%
             precision    recall  f1-score   support

     Cubism       0.54      0.76      0.63       177
     Rococo       0.26      0.09      0.13       177
Romanticism       0.42      0.52      0.46       174

avg / total       0.41      0.46      0.41       528

LinearSVC
Results


Accuracy: 55.6818%
             precision    recall  f1-score   support

     Cubism       0.61      0.79      0.68       177
     Rococo       0.52      0.54      0.53       177
Romanticism       0.51      0.34      0.41       174

avg / total       0.55      0.56      0.54       528

DecisionTreeClassifier
Results


Accuracy: 43.5606%
             precisi