In [None]:
import cv2
import numpy as np
import os

# showing image
def show_image(image, title="Image", scale=5):
    h, w = image.shape
    resized_image = cv2.resize(image, (w * scale, h * scale), interpolation=cv2.INTER_NEAREST)
    cv2.imshow(title, resized_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
# load data
data_path = "GEI"
def load_data(folder_paths):
    train, test = [], []
    for object in os.listdir(folder_paths):
        for type in os.listdir(f"{folder_paths}/{object}"):
            for dir in os.listdir(f"{folder_paths}/{object}/{type}"):
                img = cv2.imread(f"{folder_paths}/{object}/{type}/{dir}", cv2.IMREAD_GRAYSCALE)
                if(object != "010"):
                    train.append(img)
                else:
                    test.append(img)
    
    return train, test
                
real_train, real_test = load_data(data_path)

In [30]:
print(len(real_train))
print(len(real_test)) 

980
110


In [4]:
import math

# 3. Generate synthetic GEI templates (oke)
def synthetic_templates(real_imgs, iter = 4, k = 2):
    x, y = real_imgs[0].shape
    synthetic_templates = []
    h = 2, max(x, y)
    
    for img in real_imgs:
        for i in range(1,1+iter):
            temp = np.copy(img)
            # remove k*i rows from the bottoms of the original img
            temp = temp[0:x-i*k, 0:y]
            d = math.floor(x*y/(x-i*k))
            # resize the remaining template
            temp = cv2.resize(temp, (d, x))
            # equally cut left and right borders to generate a x*y size synthetic template
            temp = temp[:,i:d-i]
            if temp.shape[1] == 65: temp = temp[:,1:65]
            synthetic_templates.append(temp)
    
    return synthetic_templates
    

synthetic_train_temp = synthetic_templates(real_train)
synthetic_test_temp = synthetic_templates(real_test)
print(len(synthetic_train_temp))
print(len(synthetic_test_temp))

3920
440


In [None]:
# 4. Implement PCA manually
from sklearn.decomposition import PCA   

# dữ liệu đưa vào PCA là dữ liệu 2 chiều (n x (64x64)) w. n = số lượng dữ liệu trong tập data
def pca_transform(data, n_components):
    # input
    #   data 
    #   number of components
    # output
    #   the transformed data
    #   eigen vectors 
    f_data = []
    for i in data:
        f_data.append(i.flatten())

    mean = np.mean(f_data, axis=0)
    centered_data = f_data - mean
    covariance_matrix = np.cov(centered_data, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvectors = eigenvectors[:, sorted_indices[:n_components]]
    transformed_data = np.dot(centered_data, eigenvectors)
    return transformed_data, eigenvectors

pca_data , pca_eigen = pca_transform(real_train, 300)

In [27]:
pca_data.shape, pca_eigen.shape

((980, 300), (4096, 300))

In [28]:
pca_in = []
for d in real_train:
    pca_in.append(d.flatten())
    

pca = PCA(n_components=0.95)
pca_data = pca.fit_transform(pca_in)

In [29]:
pca_data.shape

(980, 47)

In [15]:
# 5. Implement MDA manually
def mda_transform(data, labels, n_components):
    data = np.array(data) if not isinstance(data, np.ndarray) else data
    labels = np.array(labels) if not isinstance(labels, np.ndarray) else labels
    
    unique_classes = np.unique(labels)
    mean_total = np.mean(data, axis=0)
    S_b = np.zeros((data.shape[1], data.shape[1]))
    S_w = np.zeros((data.shape[1], data.shape[1]))
    
    for cls in unique_classes:
        class_data = data[labels == cls]
        mean_class = np.mean(class_data, axis=0)
        S_b += len(class_data) * np.outer(mean_class - mean_total, mean_class - mean_total)
        S_w += np.cov(class_data, rowvar=False) * (len(class_data) - 1)
    
    eigvals, eigvecs = np.linalg.eigh(np.linalg.pinv(S_w).dot(S_b))
    sorted_indices = np.argsort(eigvals)[::-1]
    eigvecs = eigvecs[:, sorted_indices[:n_components]]
    transformed_data = np.dot(data, eigvecs)
    return transformed_data, eigvecs

In [None]:
def features_extract(real, synthetic, labels):
    pca_features_real, pca_eigen, pca_mean = pca_transform(real, 40)
    mda_features_real, mda_vec = mda_transform(pca_features_real, labels, 9)
    pca_features_syn, pca_eigen_syn, pca_mean_syn = pca_transform(synthetic, 40)
    mda_features_syn, mda_vec_syn = mda_transform(pca_features_syn, labels, 9)
    
    return mda_features_real, mda_features_syn
    

(980, 40)

In [6]:
no_samples = pca_data.shape[0]
no_features = 40
no_classes = 10

dummy_data = np.random.rand(no_samples, no_features)

dummy_labels = np.repeat(np.arange(no_classes), no_samples//no_classes)


In [7]:
mda_data, mda_eigen = mda_transform(pca_data, dummy_labels,9)

In [8]:
print(mda_data.shape)
print(mda_eigen.shape)

(4900, 9)
(40, 9)


In [9]:
# 6. Feature extraction using PCA and MDA
def extract_features(train_geis, labels, pca_components=30, mda_components=10):
    pca_features, pca_eigenvectors, pca_mean = pca_transform(train_geis.reshape(len(train_geis), -1), pca_components)
    mda_features, mda_eigenvectors = mda_transform(pca_features, labels, mda_components)
    return mda_features, pca_eigenvectors, mda_eigenvectors, pca_mean