In [82]:
import os

import numpy as np
from PIL import Image
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

size_100 = (100,100)
path = "images"

def loadImages(path):

    '''
    :param path: absolute path of images
    :return: 2 arrays. The first one is an numpy array of numpy arrays
    30 rows of (10000,3). 100*100 pixels each one described
    by a tuple of 3 values (r,g,b)
    The second one dimensional array contains the image labels
    '''
    listOfImages = []
    labels = []
    for imageFileName in os.listdir(path):
        image = Image.open(path+"/"+imageFileName)
        image = image.resize(size_100)
        '''
        we reshape every image to an 1d array with size 100*100.
        each row contains a tuple with (r,g,b) values
        '''
        listOfImages.append(np.reshape(image, (100, 100*3)))
        if imageFileName.startswith('W'):
            labels.append(0)
        elif imageFileName.startswith('F'):
            labels.append(1)
        else:
            imageFileName.startswith('S')
            labels.append(2)
    return np.asarray(listOfImages),np.asarray(labels)

def PCA_ImageSpaceVisualization(arrayOfImages):
    arrayOfReducedSizeImages = []

    for image in arrayOfImages:
        pca = PCA(2).fit(image)
        imageCompressed = pca.transform(image)
        '''
        we inverse transform image to see the difference
        between the initial and the "pcaed" one. The PCAed
        image has size (100,2)
        '''
        imageReversed = pca.inverse_transform(imageCompressed)
        '''
        we reshape that image to the initial size
        at (100,100,3). FOr sure we have lost a lot of image
        information
        '''
        imageReversedToInitialSize = np.reshape(imageReversed, (100,100,3))
        arrayOfReducedSizeImages.append(imageReversedToInitialSize)


    # fig = plt.figure()
    # for n,image in enumerate(plotData):
    #     plt.imshow(image.astype('uint8'))
    #     # ax = fig.add_subplot(1,30,n+1)
    #     # plt.imshow(image.astype('uint8'),ax)
    #
    # # fig.set_size_inches(np.array(fig.get_size_inches()) * 30)
    # plt.show()
    return arrayOfReducedSizeImages

def classification(images,labels):
    """
    flatten images to size (100*100*3,1)
    so as to match the knn input data
    """
    images = [np.reshape(image,-1) for image in images]
    knn = KNeighborsClassifier(1)
    scores = cross_val_score(knn, images, labels, cv=5, scoring='accuracy')
    knn_mean = scores.mean()

    svm_clf = svm.SVC(kernel='linear', C=1)
    scores = cross_val_score(svm_clf, images, labels, cv=5, scoring='accuracy')
    svm_mean = scores.mean()
    print("1-NN Mean score: ",knn_mean)
    print("SVM Mean score: ",svm_mean)



if __name__ == '__main__':
   arrayOfImages, arrayOfLabels = loadImages(path)
   arrayOfReducedSizeImages = PCA_ImageSpaceVisualization(arrayOfImages)
   print("Run classifiers with initial size")
   classification(arrayOfImages,arrayOfLabels)
   print(80*"*")
   print("Run classifiers with pca size")
   classification(arrayOfReducedSizeImages,arrayOfLabels)

Run classifiers with initial size
1-NN Mean score:  0.6333333333333332
SVM Mean score:  0.7333333333333334
********************************************************************************
Run classifiers with pca size
1-NN Mean score:  0.5999999999999999
SVM Mean score:  0.7666666666666667


In [None]:
def RegNMF(X,k,l,epsilon):
    pass
