In [3]:
import numpy as np  
import cv2
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [4]:
# path to the database - change it if needed
path = "./yalefaces_data/"

ids = range(1, 16)  # 15 people
states = [
    "centerlight",
    "glasses",
    "happy",
    "leftlight",
    "noglasses",
    "normal",
    "rightlight",
    "sad",
    "sleepy",
    "surprised",
    "wink",
]
prefix = "subject"
surfix = ".png"  # file extension is png

# open one picture to get the image's size
fn = prefix + "01." + states[0] + surfix
im = cv2.imread(path + fn, 0)

h = im.shape[0]  # hight
w = im.shape[1]  # width

D = h * w
N = len(states) * 15
print(N, D, h, w)

X = np.zeros((D, N))
y = np.zeros(N) 

# collect all data
count = 0

# there are 15 people
for person_id in range(1, 16):
    for state in states:

        # get name of each image file
        fn = path + prefix + str(person_id).zfill(2) + "." + state + surfix

        # open the file and read as grey image
        tmp = cv2.imread(fn, cv2.IMREAD_GRAYSCALE)

        # then add image to dataset X
        X[:, count] = tmp.reshape(D)
        
        y[count] = person_id - 1
        count += 1

165 77760 243 320


In [5]:
pca = PCA(n_components=125)
X_reduced = pca.fit_transform(X.T)

print("Shape of X after PCA:", X_reduced.shape)

Shape of X after PCA: (165, 125)


In [6]:
def softmax(V):
    e_V = np.exp(V - np.max(V, axis=0, keepdims=True))
    Z = e_V / e_V.sum(axis=0)
    return Z

In [7]:
# cost or loss function 
def cost(Y, Yhat): 
    return -np.sum(Y*np.log(Yhat))/Y.shape[1]

In [8]:
## One-hot coding 
from scipy import sparse 
def convert_labels(y, C):
    Y = sparse.coo_matrix((np.ones_like(y),
                            (y, np.arange(len(y)))), shape=(C, len(y))).toarray()
    return Y

# Split data into train (70%) and test (30%)
X_train, X_test, y_train, y_test = train_test_split(X_reduced, y, test_size=0.3, random_state=42)

## 1. Multinomial Logistic Regression
logistic_model = LogisticRegression(solver='lbfgs', max_iter=1000)
logistic_model.fit(X_train, y_train)
y_pred_logistic = logistic_model.predict(X_test)
logistic_accuracy = accuracy_score(y_test, y_pred_logistic)
print(f'Logistic Regression Accuracy: {logistic_accuracy:.2f}')

## 2. Naïve Bayes (using GaussianNB for continuous features)
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
y_pred_nb = nb_model.predict(X_test)
nb_accuracy = accuracy_score(y_test, y_pred_nb)
print(f'Naive Bayes Accuracy: {nb_accuracy:.2f}')

Logistic Regression Accuracy: 0.98
Naive Bayes Accuracy: 0.72


In [9]:
X_train = X_train.T
X_test = X_test.T 

# Các tham số mạng
d0 = X_train.shape[0]  
d1 = h = 165         
d2 = C = 15            

# Khởi tạo tham số ngẫu nhiên
W1 = 0.01 * np.random.randn(d0, d1)
b1 = np.zeros((d1, 1))
W2 = 0.01 * np.random.randn(d1, d2)
b2 = np.zeros((d2, 1))

# Chuyển đổi nhãn thành one-hot encoding
Y = convert_labels(y_train, C)
N = X_train.shape[1]  # Số mẫu trong tập huấn luyện
eta = 0.01  # Tốc độ học

# Huấn luyện mạng ANN
for i in range(10000):
    ## Feedforward
    Z1 = np.dot(W1.T, X_train) + b1
    A1 = np.maximum(Z1, 0)  # ReLU
    Z2 = np.dot(W2.T, A1) + b2
    Yhat = softmax(Z2)

    # In chi phí sau mỗi 1000 lần lặp
    if i % 1000 == 0:
        loss = cost(Y, Yhat)
        print("iter %d, loss: %f" % (i, loss))

    # Backpropagation
    E2 = (Yhat - Y) / N
    dW2 = np.dot(A1, E2.T)
    db2 = np.sum(E2, axis=1, keepdims=True)
    E1 = np.dot(W2, E2)
    E1[Z1 <= 0] = 0  # Gradient của ReLU
    dW1 = np.dot(X_train, E1.T)
    db1 = np.sum(E1, axis=1, keepdims=True)

    W1 += -eta * dW1
    b1 += -eta * db1
    W2 += -eta * dW2
    b2 += -eta * db2

Z1 = np.dot(W1.T, X_train) + b1
A1 = np.maximum(Z1, 0)
Z2 = np.dot(W2.T, A1) + b2
predicted_class = np.argmax(Z2, axis=0)
acc = 100 * np.mean(predicted_class == y_train)
print('Training accuracy: %.2f %%' % (acc))

Z1_test = np.dot(W1.T, X_test) + b1
A1_test = np.maximum(Z1_test, 0)
Z2_test = np.dot(W2.T, A1_test) + b2
predicted_class_test = np.argmax(Z2_test, axis=0)
test_acc = 100 * np.mean(predicted_class_test == y_test)
print('Test accuracy: %.2f %%' % (test_acc))

iter 0, loss: 38.301905


  e_V = np.exp(V - np.max(V, axis=0, keepdims=True))
  e_V = np.exp(V - np.max(V, axis=0, keepdims=True))


iter 1000, loss: nan
iter 2000, loss: nan
iter 3000, loss: nan
iter 4000, loss: nan
iter 5000, loss: nan
iter 6000, loss: nan
iter 7000, loss: nan
iter 8000, loss: nan
iter 9000, loss: nan
Training accuracy: 7.83 %
Test accuracy: 4.00 %


In [10]:
def load_and_resize_image(file_path, height=320, width=243):
    img = cv2.imread(file_path)
    if img is None:
        raise ValueError(f"Image not found at {file_path}")
    img_resized = cv2.resize(img, (width, height))
    return img_resized

image_paths = [
    './4/01.png',
    './4/02.png',
    './4/03.png',
    './4/04.png',
    './4/05.png',
]

images = [load_and_resize_image(path) for path in image_paths]

images_array = np.array(images).reshape(len(images), -1) 

def predict(images):
    Z1 = np.dot(W1.T, images.T) + b1 
    A1 = np.maximum(Z1, 0) 
    Z2 = np.dot(W2.T, A1) + b2  
    return np.argmax(Z2, axis=0)  


predicted_classes = predict(images_array)
print("Predicted classes for the new images:", predicted_classes) 

ValueError: shapes (165,125) and (233280,5) not aligned: 125 (dim 1) != 233280 (dim 0)