In [23]:
import sys
sys.path.append("src")

import numpy as np
import torchvision
import matplotlib.pyplot as plt
import math
from tqdm.notebook import tqdm

%matplotlib inline

# Download MNIST dataset
emnist_train = torchvision.datasets.EMNIST('./dataset/', download=True, train=True, split='digits')
emnist_eval = torchvision.datasets.EMNIST('./dataset', download=True, train=False, split='digits')

In [24]:
# Transform data in a matrix of form [batch, dim]
# Create list of correct_labels for train and eval sets

dim = 28*28

n_images_train = 1000 # len(emnist_train))
n_images_eval = 10 # len(emnist_eval))

train_data = np.empty([n_images_train, dim])
train_correct_labels = []

eval_data = np.empty([n_images_eval, dim])
eval_correct_labels = []

for i in range(n_images_train):
    train_data[i] = np.array(emnist_train[i][0]).reshape(1, dim)
    train_correct_labels.append(emnist_train[i][1])

for i in range(n_images_eval):
    eval_data[i] = np.array(emnist_eval[i][0]).reshape(1, dim)
    eval_correct_labels.append(emnist_eval[i][1])

In [25]:
from src.VectorSpace import VectorSpace
from src.VectorSet import VectorSet

def cossine_similarity(vector:np.ndarray, subspace:VectorSpace) -> np.ndarray:
    """
    Returns S = \sum_{i=0}^{r-1} \frac{(x,\phi_i)^2}{\|x\|\|\phi_i\|}
    """
    if vector.ndim > 2:
        raise(AssertionError("Cannot input tensor of ndim > 2"))
    if vector.ndim == 1:
        vector = vector[np.newaxis, :]
    if vector.shape[1] != subspace.dim:
        raise(AssertionError("Vector dimension must be the same as VectorSpace dimension"))       

    vector = vector.astype(subspace.dtype)

    S = np.sum(
            np.divide(
                np.matmul(vector, subspace.A.transpose())**2,
                np.matmul(
                    np.sqrt(
                        np.diag(
                            np.matmul(vector, vector.transpose()
                            )
                        )
                    )[np.newaxis, :].transpose(),
                    np.sqrt(
                        np.diag(
                            np.matmul(subspace.A, subspace.A.transpose())
                        )
                    )[np.newaxis, :]
                )
            ), axis=1
        )
    return S

In [28]:
# List of min energy for parameter tunning
min_energy_list = np.linspace(0.05, 1, 21)

# Create a VectorSet for all VectorSpaces
set = VectorSet(dim=dim)
set.populate(train_data, train_correct_labels)

for min_energy in tqdm(min_energy_list):
    # Generate Subspaces using pca (svd) and maintain the N biggest eigenvectors, energy(N) > energy(min_energy)
    subset = set.pca(min_energy=min_energy)

    # Create a list of max likelihood using the traditional cossine similarity
    max_likelihood_cs = [None]*eval_data.shape[0]
    cs = [0]*eval_data.shape[0]

    # Classify the eval_data
    for subspace in subset:
        foo = cossine_similarity(eval_data, subspace)
        for i in range(len(foo)):
            if foo[i] > cs[i]: cs[i] = foo[i]; max_likelihood_cs[i] = subspace.label

    correct_class = []
    for l1, l2 in zip(max_likelihood_cs, eval_correct_labels):
        correct_class.append(l1 == l2)

    prediction_ratio = correct_class.count(True) / len(correct_class)

    print(prediction_ratio)
    

  0%|          | 0/21 [00:00<?, ?it/s]

0.7
0.7
0.7
0.8
0.8
0.8
0.8
0.8
0.8
1.0
1.0
1.0
1.0
1.0
0.9
0.9
0.9
0.9
0.9
0.9
0.9
