In [17]:
from os import listdir
from os.path import isfile, join
import sys
import random
import numpy as np
import cv2
from sklearn.metrics import accuracy_score
from google.colab import drive
#drive.mount('drive')

In [3]:
# https://github.com/lobokoch/unsupervised-learning2/blob/main/pca-eigenfaces/src/br/furb/Person.java
class Person():
  def __init__(self, id, label, data):
    self.id = id
    self.label = label
    self.data = data

In [38]:
# https://github.com/lobokoch/unsupervised-learning2/blob/main/pca-eigenfaces/src/br/furb/PCA.java
class PCA():
  def image_data(self, filename):
      #path = '/content/drive/MyDrive/Colab Notebooks/ORL'      
      img = cv2.imread(self.path+'/'+filename, cv2.IMREAD_GRAYSCALE)
      
      #dsize = (80, 80)
      #output = cv2.resize(img, dsize)
      output = cv2.resize(img, (80, 80), interpolation = cv2.INTER_AREA)

      dst = output.T.reshape((1, output.shape[1] * output.shape[0]))
      return np.float64(dst)

  def to_person(self, filename):
    data_part = filename[filename.rfind('/')+1 :filename.rfind('.jpg')]   
    data = data_part.split('_')
    
    return Person(int(data[0]), int(data[1]), self.image_data(filename))
  
  def load_dataset(self, train, test, p):
    files = [f for f in listdir(self.path) if isfile(join(self.path, f)) if f.endswith('.jpg')]    
    people = list(map(self.to_person, files))

    people.sort(key=lambda x: x.id, reverse=True)

    num_samples_per_person = 10
    samples = []
    for person in people:
      samples.append(person)
      if len(samples) == num_samples_per_person:
        while len(samples) > p:
          index = random.randint(0, len(samples)-1)          
          test.append(samples.pop(index))

        if p == num_samples_per_person:
          test.extend(samples)

        train.extend(samples)
        samples = []

  def main(self):
    self.path = '/content/drive/MyDrive/Colab Notebooks/ORL'
    
    train = []
    test = []
    p = 7

    self.load_dataset(train, test, p)

    start = 2
    end = 30

    for k in range(start, end):
      recognizer = cv2.face.EigenFaceRecognizer_create(k)
      src = []
      labels = []
      for person_train in train:
        src.append(person_train.data)
        labels.append(person_train.label)

      recognizer.train(src, np.asarray(labels))
      
      test_labels = []
      prediction = []

      for person_test in test:
        label, confidence = recognizer.predict(person_test.data)
        
        test_labels.append(person_test.label)
        prediction.append(label)

      accuracy = accuracy_score(prediction, test_labels)
      accuracy = round(accuracy * 100, 2)
      
      print('{} componentes principais, acurácia: {}%'.format(k,accuracy))
      

In [40]:
pca = PCA()
pca.main()

2 componentes principais, acurácia: 30.89%
3 componentes principais, acurácia: 59.35%
4 componentes principais, acurácia: 77.24%
5 componentes principais, acurácia: 82.11%
6 componentes principais, acurácia: 82.93%
7 componentes principais, acurácia: 84.55%
8 componentes principais, acurácia: 87.8%
9 componentes principais, acurácia: 89.43%
10 componentes principais, acurácia: 91.87%
11 componentes principais, acurácia: 91.06%
12 componentes principais, acurácia: 90.24%
13 componentes principais, acurácia: 89.43%
14 componentes principais, acurácia: 89.43%
15 componentes principais, acurácia: 89.43%
16 componentes principais, acurácia: 89.43%
17 componentes principais, acurácia: 89.43%
18 componentes principais, acurácia: 90.24%
19 componentes principais, acurácia: 91.06%
20 componentes principais, acurácia: 91.06%
21 componentes principais, acurácia: 91.06%
22 componentes principais, acurácia: 91.87%
23 componentes principais, acurácia: 92.68%
24 componentes principais, acurácia: 92.6