Load and normalize images from CelebA and LFW datasets.

In [None]:
import cv2
import numpy as np

def load_and_normalize_images(paths):
    images = []
    for path in paths:
        img = cv2.imread(path)
        img = cv2.resize(img, (128, 128))  # Resize to 128x128
        img = img / 255.0  # Normalize to [0, 1]
        images.append(img)
    return np.array(images)

celeba_paths = ['path/to/celeba1.jpg', 'path/to/celeba2.jpg']
lfw_paths = ['path/to/lfw1.jpg', 'path/to/lfw2.jpg']

image_paths = celeba_paths + lfw_paths
normalized_data = load_and_normalize_images(image_paths)

Apply PCA to reduce the dimensionality of the image data.

In [None]:
from sklearn.decomposition import PCA

pca_model = PCA(n_components=10)
pca_data = pca_model.fit_transform(normalized_data.reshape(len(normalized_data), -1))

Train a KMeans model on the PCA data to identify clusters.

In [None]:
from sklearn.cluster import KMeans

kmeans_model = KMeans(n_clusters=5)
kmeans_labels = kmeans_model.fit_predict(pca_data)

Visualize KMeans clusters in PCA space.

In [None]:
import matplotlib.pyplot as plt

plt.scatter(pca_data[:, 0], pca_data[:, 1], c=kmeans_labels)
plt.title('KMeans Clusters on PCA Data')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.show()