In [1]:
import numpy as np
import cv2

In [2]:
def compute_pca(data):
    m = np.mean(data, axis=0)
    datac = np.array([obs - m for obs in data])
    T = np.dot(datac, datac.T)
    [u,s,v] = np.linalg.svd(T)

    # here iteration is over rows but the columns are the eigenvectors of T
    pcs = [np.dot(datac.T, item) for item in u.T ]

    # note that the eigenvectors are not normed after multiplication by T^T
    pcs = np.array([d / np.linalg.norm(d) for d in pcs])

    return pcs

In [3]:
def normalize(samples, maxs = None):
    # Normalize data to [0,1] intervals. Supply the scale factor or
    # compute the maximum value among all the samples.

    if not maxs:
        maxs = float(np.max(samples))
    return np.array([np.ravel(s) / maxs for s in samples])

In [4]:
def find_same_img(test_sample, database, num_same_img = 1):
    compare = np.array([(test_sample - data)**2 for data in database])
    compare = np.array([np.sqrt(np.sum(ele)) for ele in compare])
    idx_min = np.argmin(compare)
    
    return idx_min

In [5]:
# create training_data and test_data
def create_data(val, s, name):
    train_image = []
    test_image = []
    
    for i in range(100):
        for na in name[val:]:
            img = cv2.imread(s + '0' * (3 - len(str(i))) + str (i) + na + '.jpg')
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            train_image.append(gray)
        for na in name[:val]:
            img = cv2.imread(s + '0' * (3 - len(str(i))) + str (i) + na + '.jpg')
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            test_image.append(gray)
    
    training_data = np.array([img.reshape((img.size)) for img in train_image])
    test_data = np.array([img.reshape((img.size)) for img in test_image])
    
    return training_data, test_data

In [6]:
def compute_performance(num_test, num_train, num_samples, num_loop):
    training_data, test_data = create_data(num_test, s, name)
    
    for loop in range(num_loop):
        # a loop
        print 'Loop #%d' % (loop + 1)

        val_train = num_samples * num_train
        val_test = num_samples * num_test
        training = normalize(training_data[val_train * loop: val_train * (loop + 1)]) # 120 element
        test = normalize(test_data[val_test * loop: val_test * (loop + 1)]) # 20 element

        # pcs
        pcs = compute_pca(training)

        #
        train_Ap = np.dot(pcs, training.T).T
        test_Bp = np.dot(pcs, test.T).T

        #
        result = np.array([find_same_img(te, train_Ap) for te in test_Bp])

        #
        _sum = 0
        for i, j in zip(range(0, val_test, num_test), range(0, val_train, num_train)):
            for k in range(i, i + num_test):
                re = result[k] + loop * val_train
                left = j + loop * val_train
                right =  j + num_train + loop * val_train
                print 'test %d in [%d, %d) is %d' % (re, left, right, re in range(left, right))
                _sum += re in range(left, right)
        print 'Performance is %d/%d = %.2f%%.' % (_sum, val_test, float(_sum)/val_test*100)
        print '-'*50

In [7]:
s = 'AMI_Ear_Database/'
name = ['_front_ear', '_left_ear', '_right_ear', '_up_ear', '_back_ear', '_down_ear', '_zoom_ear']
cases = [[1, 20], [2, 25], [3, 33], [4, 33], [5, 50]] # [num_test per samples, num_samples]

In [8]:
#######################################################################

In [9]:
# num_test per samples = cases[0][0] = 1
# num_train per samples = 7 - cases[0][0] = 7 - 1 = 6
# num_samples for a training loop = cases[0][1] = 20
# num_loop for training = 5
compute_performance(cases[0][0], 7 - cases[0][0], cases[0][1], 5)

Loop #1
test 0 in [0, 6) is 1
test 10 in [6, 12) is 1
test 12 in [12, 18) is 1
test 20 in [18, 24) is 1
test 26 in [24, 30) is 1
test 30 in [30, 36) is 1
test 41 in [36, 42) is 1
test 44 in [42, 48) is 1
test 48 in [48, 54) is 1
test 54 in [54, 60) is 1
test 60 in [60, 66) is 1
test 70 in [66, 72) is 1
test 73 in [72, 78) is 1
test 78 in [78, 84) is 1
test 84 in [84, 90) is 1
test 6 in [90, 96) is 0
test 100 in [96, 102) is 1
test 106 in [102, 108) is 1
test 112 in [108, 114) is 1
test 116 in [114, 120) is 1
Performance is 19/20 = 95.00%.
--------------------------------------------------
Loop #2
test 125 in [120, 126) is 1
test 126 in [126, 132) is 1
test 134 in [132, 138) is 1
test 140 in [138, 144) is 1
test 145 in [144, 150) is 1
test 152 in [150, 156) is 1
test 239 in [156, 162) is 0
test 164 in [162, 168) is 1
test 173 in [168, 174) is 1
test 179 in [174, 180) is 1
test 180 in [180, 186) is 1
test 190 in [186, 192) is 1
test 193 in [192, 198) is 1
test 200 in [198, 204) is 1
test

In [10]:
# num_test per samples = cases[1][0] = 2
# num_train per samples = 7 - cases[1][0] = 7 - 2 = 5
# num_samples for a training loop = cases[1][1] = 25
# num_loop for training = 4
compute_performance(cases[1][0], 7 - cases[1][0], cases[1][1], 4)

Loop #1
test 1 in [0, 5) is 1
test 3 in [0, 5) is 1
test 8 in [5, 10) is 1
test 76 in [5, 10) is 0
test 10 in [10, 15) is 1
test 14 in [10, 15) is 1
test 116 in [15, 20) is 0
test 18 in [15, 20) is 1
test 21 in [20, 25) is 1
test 14 in [20, 25) is 0
test 29 in [25, 30) is 1
test 29 in [25, 30) is 1
test 34 in [30, 35) is 1
test 33 in [30, 35) is 1
test 36 in [35, 40) is 1
test 38 in [35, 40) is 1
test 43 in [40, 45) is 1
test 43 in [40, 45) is 1
test 118 in [45, 50) is 0
test 78 in [45, 50) is 0
test 50 in [50, 55) is 1
test 50 in [50, 55) is 1
test 58 in [55, 60) is 1
test 109 in [55, 60) is 0
test 60 in [60, 65) is 1
test 63 in [60, 65) is 1
test 65 in [65, 70) is 1
test 68 in [65, 70) is 1
test 114 in [70, 75) is 0
test 114 in [70, 75) is 0
test 76 in [75, 80) is 1
test 78 in [75, 80) is 1
test 83 in [80, 85) is 1
test 83 in [80, 85) is 1
test 88 in [85, 90) is 1
test 88 in [85, 90) is 1
test 93 in [90, 95) is 1
test 93 in [90, 95) is 1
test 96 in [95, 100) is 1
test 98 in [95, 100)

In [11]:
# num_test per samples = cases[2][0] = 3
# num_train per samples = 7 - cases[2][0] = 7 - 3 = 4
# num_samples for a training loop = cases[2][1] = 33
# num_loop for training = 3
compute_performance(cases[2][0], 7 - cases[2][0], cases[2][1], 3)

Loop #1
test 0 in [0, 4) is 1
test 2 in [0, 4) is 1
test 0 in [0, 4) is 1
test 6 in [4, 8) is 1
test 60 in [4, 8) is 0
test 124 in [4, 8) is 0
test 11 in [8, 12) is 1
test 11 in [8, 12) is 1
test 8 in [8, 12) is 1
test 92 in [12, 16) is 0
test 14 in [12, 16) is 1
test 10 in [12, 16) is 0
test 16 in [16, 20) is 1
test 11 in [16, 20) is 0
test 16 in [16, 20) is 1
test 23 in [20, 24) is 1
test 23 in [20, 24) is 1
test 104 in [20, 24) is 0
test 27 in [24, 28) is 1
test 118 in [24, 28) is 0
test 24 in [24, 28) is 1
test 28 in [28, 32) is 1
test 30 in [28, 32) is 1
test 118 in [28, 32) is 0
test 34 in [32, 36) is 1
test 34 in [32, 36) is 1
test 7 in [32, 36) is 0
test 94 in [36, 40) is 0
test 62 in [36, 40) is 0
test 104 in [36, 40) is 0
test 40 in [40, 44) is 1
test 42 in [40, 44) is 1
test 42 in [40, 44) is 1
test 46 in [44, 48) is 1
test 87 in [44, 48) is 0
test 55 in [44, 48) is 0
test 91 in [48, 52) is 0
test 50 in [48, 52) is 1
test 91 in [48, 52) is 0
test 55 in [52, 56) is 1
test 54 

In [12]:
# num_test per samples = cases[3][0] = 4
# num_train per samples = 7 - cases[3][0] = 7 - 4 = 3
# num_samples for a training loop = cases[3][1] = 33
# num_loop for training = 3
compute_performance(cases[3][0], 7 - cases[3][0], cases[3][1], 3)

Loop #1
test 1 in [0, 3) is 1
test 1 in [0, 3) is 1
test 13 in [0, 3) is 0
test 2 in [0, 3) is 1
test 4 in [3, 6) is 1
test 49 in [3, 6) is 0
test 94 in [3, 6) is 0
test 92 in [3, 6) is 0
test 8 in [6, 9) is 1
test 8 in [6, 9) is 1
test 8 in [6, 9) is 1
test 8 in [6, 9) is 1
test 10 in [9, 12) is 1
test 10 in [9, 12) is 1
test 7 in [9, 12) is 0
test 11 in [9, 12) is 1
test 13 in [12, 15) is 1
test 8 in [12, 15) is 0
test 13 in [12, 15) is 1
test 14 in [12, 15) is 1
test 17 in [15, 18) is 1
test 17 in [15, 18) is 1
test 88 in [15, 18) is 0
test 17 in [15, 18) is 1
test 20 in [18, 21) is 1
test 88 in [18, 21) is 0
test 20 in [18, 21) is 1
test 20 in [18, 21) is 1
test 65 in [21, 24) is 0
test 22 in [21, 24) is 1
test 88 in [21, 24) is 0
test 65 in [21, 24) is 0
test 25 in [24, 27) is 1
test 25 in [24, 27) is 1
test 5 in [24, 27) is 0
test 17 in [24, 27) is 0
test 70 in [27, 30) is 0
test 46 in [27, 30) is 0
test 92 in [27, 30) is 0
test 19 in [27, 30) is 0
test 31 in [30, 33) is 1
test 3

In [13]:
# num_test per samples = cases[4][0] = 5
# num_train per samples = 7 - cases[4][0] = 7 - 5 = 2
# num_samples for a training loop = cases[5][1] = 50
# num_loop for training = 2
compute_performance(cases[4][0], 7 - cases[4][0], cases[4][1], 2)

Loop #1
test 1 in [0, 2) is 1
test 0 in [0, 2) is 1
test 8 in [0, 2) is 0
test 1 in [0, 2) is 1
test 0 in [0, 2) is 1
test 2 in [2, 4) is 1
test 78 in [2, 4) is 0
test 62 in [2, 4) is 0
test 61 in [2, 4) is 0
test 76 in [2, 4) is 0
test 5 in [4, 6) is 1
test 5 in [4, 6) is 1
test 5 in [4, 6) is 1
test 5 in [4, 6) is 1
test 76 in [4, 6) is 0
test 6 in [6, 8) is 1
test 6 in [6, 8) is 1
test 4 in [6, 8) is 0
test 7 in [6, 8) is 1
test 18 in [6, 8) is 0
test 8 in [8, 10) is 1
test 5 in [8, 10) is 0
test 8 in [8, 10) is 1
test 9 in [8, 10) is 1
test 76 in [8, 10) is 0
test 11 in [10, 12) is 1
test 97 in [10, 12) is 0
test 58 in [10, 12) is 0
test 11 in [10, 12) is 1
test 76 in [10, 12) is 0
test 13 in [12, 14) is 1
test 58 in [12, 14) is 0
test 13 in [12, 14) is 1
test 13 in [12, 14) is 1
test 76 in [12, 14) is 0
test 43 in [14, 16) is 0
test 94 in [14, 16) is 0
test 58 in [14, 16) is 0
test 43 in [14, 16) is 0
test 76 in [14, 16) is 0
test 16 in [16, 18) is 1
test 16 in [16, 18) is 1
test 