In [None]:
DATASET = 'lfwcrop'
COMPONENTS = 25
PLOT_N_ROWS, PLOT_N_COLS = 5, 5

In [None]:
dataset_config = {
    'celeba': ("../input/celeba-dataset/img_align_celeba/img_align_celeba/*.jpg", 218, 178),
    'lfwcrop': ("../input/lfwcrop/lfwcrop_color/lfwcrop_color/faces/*.ppm", 64, 64)
}

IMG_PATHS, IMG_WIDTH, IMG_HEIGHT = dataset_config[DATASET]

In [None]:
import glob

images_paths = glob.glob(IMG_PATHS)

In [None]:
import cv2

def process_img(img_path):
    img = cv2.imread(img_path).astype(np.float32)
    img /= 255.  # Normalize imgage
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img.flatten()

def reconstruct_img(img_array):
    return img_array.reshape((IMG_WIDTH, IMG_HEIGHT,3))

def map_to_01(img_array, a, b):
    return (img_array-a) / (b-a)

In [None]:
import numpy as np
import tqdm

data = np.empty((len(images_paths), IMG_WIDTH*IMG_HEIGHT*3))

for i, path in enumerate(tqdm.tqdm(images_paths)):
    data[i:,] = process_img(path)

# Archetypes code

In [None]:
import numpy as np
from abc import ABC, abstractmethod


class AA_Abstract(ABC):

    def __init__(self, n_archetypes, max_iter=100, tol=1e-6, verbose=False):
        self.n_archetypes = n_archetypes
        self.max_iter = max_iter
        self.tol = tol
        self.verbose = verbose
        self.Z = None
        self.n_samples, self.n_features = None, None
        self.RSS = None

    def fit(self, X):
        """

        :param X:
        :return:
        """
        self.n_samples, self.n_features = X.shape
        self._fit(X)
        return self

    def _fit(self, X):
        """

        :param X:
        :return:
        """
        # Initialize the archetypes
        B = np.eye(self.n_archetypes, self.n_samples)
        Z = B @ X

        A = np.eye(self.n_samples, self.n_archetypes)
        prev_RSS = None

        for _ in range(self.max_iter):
            A = self._computeA(X, Z, A)
            B = self._computeB(X, A, B)
            Z = B @ X
            RSS = self._rss(X, A, Z)
            if prev_RSS is not None and abs(prev_RSS - RSS) / prev_RSS < self.tol:
                break
            prev_RSS = RSS

        self.Z = Z
        self.RSS = RSS

    @staticmethod
    @abstractmethod
    def _computeA(X, Z, A=None):
        """

        :param X:
        :param Z:
        :param A:
        :return:
        """
        pass

    @staticmethod
    @abstractmethod
    def _computeB(X, A, B=None):
        """

        :param X:
        :param A:
        :param B:
        :return:
        """
        pass

    def archetypes(self):
        return self.Z

    def transform(self, X):
        return self._computeA(X, self.Z)

    @staticmethod
    def _rss(X, A, Z):
        return np.linalg.norm(X - A @ Z)**2

    
# Code adapted from https://github.com/nichohelmut/football_results/blob/master/clustering/clustering.py

import numpy as np

class AA_Fast(AA_Abstract):

    def __init__(self, n_archetypes, max_iter=100, tol=1e-6, verbose=False, derivative_max_iter=10):
        super().__init__(n_archetypes, max_iter, tol, verbose)
        self.derivative_max_iter = derivative_max_iter

    def _computeA(self, X, Z, A=None):
        X = X.T
        Z = Z.T

        A = np.zeros((self.n_archetypes, self.n_samples))
        A[0, :] = 1.0
        e = np.zeros(A.shape)
        for t in range(self.derivative_max_iter):
            # brackets are VERY important to save time
            # [G] ~  k x n
            G = 2.0 * ((Z.T @ Z) @ A - Z.T @ X)
            # Get the argument mins along each column
            argmins = np.argmin(G, axis=0)
            e[argmins, range(self.n_samples)] = 1.0
            A += 2.0 / (t + 2.0) * (e - A)
            e[argmins, range(self.n_samples)] = 0.0
        return A.T

    def _computeB(self, X, A, B=None):
        X = X.T
        A = A.T

        B = np.zeros((self.n_samples, self.n_archetypes))
        B[0, :] = 1.0
        e = np.zeros(B.shape)
        for t in range(self.derivative_max_iter):
            # brackets are VERY important to save time
            t1 = X.T @ (X @ B) @ (A @ A.T)
            t2 = X.T @ (X @ A.T)
            G = 2.0 * (t1 - t2)
            argmins = np.argmin(G, axis=0)
            e[argmins, range(self.n_archetypes)] = 1.0
            B += 2.0 / (t + 2.0) * (e - B)
            e[argmins, range(self.n_archetypes)] = 0.0
        return B.T



def archetypal_plot(ax, data, dp, epsilon=0.2):
    '''
    Source: Dr. Luke Bovard
    '''
    ax.scatter(data[0, :], data[1, :], alpha=0.6, linewidths=10)
    ax.scatter(dp[0, :], dp[1, :], c='orange')

    for i in range(dp.shape[1]):
        if dp[0, i] < 0.5:
            eps_x = -epsilon
        else:
            eps_x = epsilon
        if dp[1, i] < np.max(dp[1, :]) / 2.0:
            eps_y = -epsilon
        else:
            eps_y = epsilon
        ax.text(dp[0, i] + eps_x, dp[1, i] + eps_y, "{}".format(i + 1))
    return ax

In [None]:
archetypes = AA_Fast(COMPONENTS).fit(data)

In [None]:
# Save archetypes
np.save('archetypes.npy', archetypes.Z)

# PCA

In [None]:
from sklearn.decomposition import PCA

pca = PCA(COMPONENTS).fit(data)

In [None]:
# Save PCA components
np.save('PCA.npy', pca.components_)

# K-Means

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=COMPONENTS, random_state=0).fit(data)

In [None]:
# Save KMeans
np.save('KMeans.npy', kmeans.cluster_centers_)

# Reconstruct

In [None]:
reconstructed_archetypes, reconstructed_pca, reconstructed_kmeans = [], [], []

for i in range(COMPONENTS):
    reconstructed_archetypes.append(reconstruct_img(archetypes.Z[i]))
    reconstructed_pca.append(reconstruct_img(pca.components_[i]))
    reconstructed_kmeans.append(reconstruct_img(kmeans.cluster_centers_[i]))

In [None]:
import matplotlib.pyplot as plt

def plot_portraits(images, title, n_row, n_col):
    plt.figure(figsize=(2.2 * n_col, 2.2 * n_row))
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.20)
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        plt.imshow(images[i])
        plt.title(f"{title} {i+1}")
        plt.xticks(())
        plt.yticks(())
    plt.savefig(f"{title}.png")

In [None]:
plot_portraits(reconstructed_archetypes, 'Archetype', PLOT_N_ROWS, PLOT_N_COLS)

In [None]:
min_value = pca.components_.min()
max_value = pca.components_.max()
plot_portraits([map_to_01(img_array, min_value, max_value) for img_array in reconstructed_pca], 'PCA', PLOT_N_ROWS, PLOT_N_COLS)

In [None]:
plot_portraits(reconstructed_kmeans, 'KMeans', PLOT_N_ROWS, PLOT_N_COLS)

In [None]:
nan