# Practical Session 2

In [1]:
# Imports
import torch
from torch import Tensor
import random
import matplotlib.pyplot as plt

import dlc_practical_prologue as prologue

## Nearest neighbor

In [2]:
def nearest_classification(train_input, train_target, x):
    i = torch.argmin(torch.sqrt(torch.sum(torch.square(train_input - x), dim=1)))
    return train_target[i]

## PCA

In [3]:
def PCA(x):
    mean = x.mean(0)
    b = x - mean
    Sigma = b.t() @ b
    eigen_values, eigen_vectors = Sigma.eig(True)
    right_order = eigen_values[:, 0].abs().sort(descending=True)[1]
    # print(right_order)
    eigen_vectors = eigen_vectors.t()[right_order]
    return mean, eigen_vectors

## Error estimation

In [4]:
def compute_nb_errors(train_input, train_target, test_input, test_target, mean = None, proj = None):

    if mean is not None:
        train_input = train_input - mean
        test_input = test_input - mean

    if proj is not None:
        train_input = train_input @ proj.t()
        test_input = test_input @ proj.t()

    nb_errors = 0

    for n in range(test_input.size(0)):
        if test_target[n] != nearest_classification(train_input, train_target, test_input[n]):
            nb_errors = nb_errors + 1
    
    return nb_errors

## Check that all this makes sense

In [5]:
# Cifar
train_input, train_target, test_input, test_target = prologue.load_data(cifar = True)

nb_errors = compute_nb_errors(train_input, train_target, test_input, test_target)
print('Baseline nb_errors {:d} error {:.02f}%'.format(nb_errors, 100 * nb_errors / test_input.size(0)))

##

basis = train_input.new(100, train_input.size(1)).normal_()

nb_errors = compute_nb_errors(train_input, train_target, test_input, test_target, None, basis)
print('Random {:d}d nb_errors {:d} error {:.02f}%'.format(basis.size(0), nb_errors, 100 * nb_errors / test_input.size(0)))

##

mean, basis = PCA(train_input)

for d in [ 100, 50, 10, 3 ]:
    nb_errors = compute_nb_errors(train_input, train_target, test_input, test_target, mean, basis[:d])
    print('PCA {:d}d nb_errors {:d} error {:.02f}%'.format(d, nb_errors, 100 * nb_errors / test_input.size(0)))

* Using CIFAR
Files already downloaded and verified
Files already downloaded and verified
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples
Baseline nb_errors 746 error 74.60%
Random 100d nb_errors 781 error 78.10%
PCA 100d nb_errors 745 error 74.50%
PCA 50d nb_errors 737 error 73.70%
PCA 10d nb_errors 763 error 76.30%
PCA 3d nb_errors 839 error 83.90%
