In [31]:
from __future__ import print_function
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
from os.path import isfile, isdir, join, split, splitext, exists
from os import listdir, makedirs

In [32]:
def calculate_SAD(arr1, arr2):
    """Calculate sum of absolute distance"""
    z = abs(arr1/100 - arr2/100)
    z = z * 100
    return np.sum(z)

def calculate_SSD(arr1, arr2):
    """Calculate sum of squared distance"""
    z = abs(arr1/100 - arr2/100)
    z = z * 100
    return np.sum(z**2)

In [33]:
# Require the following library:
# import matplotlib.pyplot as plt

# Receive np.array, show the image
def show_image(arr, title=""):
    fig, ax = plt.subplots(figsize=(10, 8))
    plt.imshow(np.asarray(arr))
    plt.title(title)

# Question 1
Read all images and convert to grayscale images

In [34]:
path = "CroppedYale/"

In [37]:
directories = [join(path, d)
               for d in listdir(path) 
               if isdir(join(path, d))]

image_files = [join(d, f)
                for d in directories
                for f in listdir(d)
                if isfile(join(d, f))]

print("Total image files found: {}".format(len(image_files)))
# print("{:^3} - {}".format("#", "Filename"))
# ["{:03} - {}".format(n+1, f) for n, f in enumerate(image_files)]

Total image files found: 2433


In [38]:
grayscale_path = "CroppedYale_Grayscale/"

for n, d in enumerate(directories):
    _d = d.split('/')
    _dir = join(grayscale_path, _d[1])
    if not exists(_dir):
#         print("{} Not exist".format(_dir))
#         print("Creating directory {}".format(_dir))
        makedirs(_dir)
        print("Created '{}' directory\n".format(_dir))
    else:
#         print("'{}' folder exist".format(_dir))
        continue

The following code converts the images into grayscale images<br>
Logical operations as following:
1. `Line 3~5`: Since the program may takes some times and computational resources, we define a timer for measurement
2. `Line 8~11`: Through the previous run, we found some images have .bad extension. We used `os.path.splitext` to filter out the extension of the file and `continue` the loop upon finding `.pgm` extension.
3. `Line 13~17`: Since the grayscale images may already exists, we did a check-up before actually converting the images and save them into grayscale image
4. `Line 19~22`: Open image, convert image into grayscale with `L` parameter, save it into `gray_img_name`, add `converted` counter by 1

In [39]:
import time

start = time.time()
converted = 0
file_existed = 0

for i in image_files:
    ext = splitext(i)
    if ext[-1] != ".pgm":
        print("Excluding image {}".format(i))
        continue

    img_name = i.split('/')
    gray_img_name = join(grayscale_path, img_name[1], img_name[2])
    if exists(gray_img_name):
        file_existed = file_existed + 1
        continue
    
    img = Image.open(i)
    img = img.convert("L")
    img.save(gray_img_name)
    converted = converted + 1
print("{} of grayscale images already existed.".format(file_existed))
print("Finished converting {} images to grayscale image in {:.3f}s".format(converted, time.time() - start))

Excluding image CroppedYale/yaleB17/yaleB17_P00A-010E+00.pgm.bad
Excluding image CroppedYale/yaleB18/yaleB18_P00A-010E+00.pgm.bad
Excluding image CroppedYale/yaleB16/yaleB16_P00A+095E+00.pgm.bad
Excluding image CroppedYale/yaleB16/yaleB16_P00A-010E+00.pgm.bad
Excluding image CroppedYale/yaleB11/yaleB11_P00A-050E-40.pgm.bad
Excluding image CroppedYale/yaleB11/yaleB11_P00A+050E-40.pgm.bad
Excluding image CroppedYale/yaleB11/yaleB11_P00A-110E+15.pgm.bad
Excluding image CroppedYale/yaleB11/yaleB11_P00A+095E+00.pgm.bad
Excluding image CroppedYale/yaleB13/yaleB13_P00A+095E+00.pgm.bad
Excluding image CroppedYale/yaleB13/yaleB13_P00A-110E+15.pgm.bad
Excluding image CroppedYale/yaleB13/yaleB13_P00A+050E-40.pgm.bad
Excluding image CroppedYale/yaleB13/yaleB13_P00A-050E-40.pgm.bad
Excluding image CroppedYale/yaleB12/yaleB12_P00A+095E+00.pgm.bad
Excluding image CroppedYale/yaleB12/yaleB12_P00A-110E+15.pgm.bad
Excluding image CroppedYale/yaleB12/yaleB12_P00A+050E-40.pgm.bad
Excluding image CroppedYa

# Question 2
Split the images into training set / test set
- First 35 images as training, the rest 30 images as testing

Slicing up the first 35 identical person images as training image and the rest 30 images as testing

In [86]:
train_image = image_files[:35]
test_image  = image_files[35:64]
print("len(train_image): {}\n" \
      "len(test_image) : {}".format(len(train_image), len(test_image)))

len(train_image): 35
len(test_image) : 29


Introduce two `np.array` variables as `train_arr` and `test_arr` to contain the array

In [83]:
train_arr = np.array([], ndmin=1, dtype=np.int64)
test_arr = np.array([], ndmin=1, dtype=np.int64)

for a_train_image in train_image:
    train_arr = np.append(train_arr,
                         np.array(Image.open(a_train_image)))

for a_test_image in test_image:
    test_arr = np.append(test_arr,
                         np.array(Image.open(a_test_image)))

print("train_arr shape : {}".format(train_arr.shape))
print("test_arr shape  : {}".format(test_arr.shape))

train_arr shape: (1128960,)
test_arr shape : (935424,)


Example above does not use `np.ravel` on its implementation because `np.append` append values to the end of an array.