In [None]:
from time import time
import matplotlib.pyplot as plt
import sys

from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn import random_projection
from sklearn.utils.fixes import loguniform
from tqdm import tqdm
import numpy as np
import pickle

In [1]:
# Acquire MNIST data
from tensorflow.keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Reshape data
import numpy as np

train_images = np.reshape(train_images, (-1, 784))
test_images = np.reshape(test_images, (-1, 784))

# Normalize data
train_images = train_images.astype('float32') / 255
test_images = test_images.astype('float32') / 255

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
def get_data():
    from datamodule import ImageWeightsModule
    import torch
    all_data_loader = ImageWeightsModule("lora_dataset_creator/lora_dataset", 1, augment_training=False, val_split=0).train_dataloader()
    X = None
    Y = None
    for x, y in tqdm(all_data_loader):
        if X is None:
            X = x
        X = torch.cat((X, x), dim=0)
        if Y is None:
            Y = y
        Y = torch.cat((Y, y), dim=0)
    return X, np.array(Y)

X, Y = get_data()

In [None]:
scaler = StandardScaler()
Y_scaled = scaler.fit_transform(Y)
Y_scaled = Y
Y_scaled_val = Y_scaled[:10, :]
Y_scaled = Y_scaled[10:, :]
print(Y_scaled.shape)

In [None]:
@np.printoptions(suppress=True)
def test_pca(x):
    t0 = time()
    z = pca.transform(x)
    print("transform done in %0.3fs" % (time() - t0))
    print(f"out = {z.shape}")
    t0 = time()
    x_hat = pca.inverse_transform(z)
    print("inverse_transform done in %0.3fs" % (time() - t0))
    print(f"loss = {abs(x_hat - x).mean()}")

In [None]:
n_components = 100
t0 = time()
print(f"in = {Y_scaled.shape}")
pca = PCA(n_components=n_components, svd_solver="randomized", whiten=True, random_state=5).fit(Y_scaled)
print("done in %0.3fs" % (time() - t0))

In [None]:
test_pca(Y_scaled)
test_pca(Y_scaled_val)

In [None]:
eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]
plot_gallery(eigenfaces, eigenface_titles, h, w)

plt.show()

In [None]:
print("Fitting the classifier to the training set")
t0 = time()
param_grid = {
    "C": loguniform(1e3, 1e5),
    "gamma": loguniform(1e-4, 1e-1),
}
clf = RandomizedSearchCV(
    SVC(kernel="rbf", class_weight="balanced"), param_grid, n_iter=10
)
clf = clf.fit(X_train_pca, y_train)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)

In [None]:
print("Predicting people's names on the test set")
t0 = time()
y_pred = clf.predict(X_test_pca)
print("done in %0.3fs" % (time() - t0))

print(classification_report(y_test, y_pred, target_names=target_names))
ConfusionMatrixDisplay.from_estimator(
    clf, X_test_pca, y_test, display_labels=target_names, xticks_rotation="vertical"
)
plt.tight_layout()
plt.show()