In [1]:
# imports and paths for local venv
from operations import S21MnistOperations

In [2]:
operator = S21MnistOperations(
    images_path='../../datasets/data/visualizations/train-images-idx3-ubyte.gz',
    labels_path='../../datasets/data/visualizations/train-labels-idx1-ubyte.gz',
)

X_train, y_train = operator.load_dataset()

In [3]:
from tqdm.auto import tqdm

results = {}

for name, func in tqdm(
    [
        ("pca", operator.transform_pca),
        ("svd", operator.transform_svd),
        ("rand_svd", operator.transform_random_svd),
        ("tsne", operator.transform_tsne),
        ("umap", operator.transform_umap),
        ("lle", operator.transform_lle),
    ],
    desc="Transformations",
    unit="method",
):
    results[name] = func()

X_pca_2d      = results["pca"]
X_svd_2d      = results["svd"]
X_rand_svd_2d = results["rand_svd"]
X_tsne_2d     = results["tsne"]
X_umap_2d     = results["umap"]
X_lle_2d      = results["lle"]

Transformations:   0%|          | 0/6 [00:00<?, ?method/s]

In [4]:
metrics = operator.evaluate_2d(n_samples=5000)

PCA:
  accuracy: 0.5890
  f1_macro: 0.5687
  silhouette: 0.0298

SVD:
  accuracy: 0.5094
  f1_macro: 0.4919
  silhouette: -0.0273

RAND_SVD:
  accuracy: 0.5094
  f1_macro: 0.4919
  silhouette: -0.0273

TSNE:
  accuracy: 0.9446
  f1_macro: 0.9441
  silhouette: 0.2777

UMAP:
  accuracy: 0.9704
  f1_macro: 0.9703
  silhouette: 0.5003

LLE:
  accuracy: 0.7722
  f1_macro: 0.7667
  silhouette: 0.0039



In [5]:
IDX = 0
z_train = operator.pca_model.transform(operator._get_images()[IDX].reshape(1, -1))[0]
print("TRAIN:", z_train)
# operator.save_trained_pca("pca_model.pkl")

TRAIN: [0.48578763 1.2261804 ]
