In [None]:
# load the data
import numpy as np
import matplotlib.pyplot as plt
import scipy.linalg

data = np.load("digits389.npy", allow_pickle=True).item()


In [None]:
X_train = data["Xtrain"]
Y_train = data["Ytrain"]
X_test = data["Xtest"]
Y_test = data["Ytest"]

In [None]:
x_test.shape

In [None]:
def LDA_feature(X,Y):
    y_indices = np.array([np.squeeze(np.where(y_i == np.unique(Y))) for y_i in Y])
    mu = np.mean(X, axis=0)
    mu_class = np.array([np.mean(X[Y==k, :], axis=0) for k in np.unique(Y)])
    within = np.sum([np.outer(X[index, :] - mu_class[y_index], X[index, :] - mu_class[y_index]) for index, y_index in enumerate(y_indices)], axis=0)
    # within = np.cov(X.T, x)

    between_class = np.sum([np.count_nonzero(Y==y_i)* np.outer(mu_class_i - mu, mu_class_i - mu) for mu_class_i, y_i in zip(mu_class, np.unique(Y))])
    eigen_values, eigen_vectors = np.linalg.eig(np.linalg.inv(within).dot(between_class))
    # eigen_values, eigen_vectors = scipy.linalg.eig(between_class, within)
    eigen_vectors = eigen_vectors[np.argsort(eigen_values), :]
    eigen_vectors = eigen_vectors[-(len(np.unique(Y))-1):, :].T
    return eigen_vectors, X @ eigen_vectors

In [None]:
vectors, values = LDA_feature(x_test, y_test)

In [None]:
dimensions = [5, 30, 60, 75]
samples=25
fig, axs = plt.subplots(1, len(dimensions))
for index, d in enumerate(dimensions):
    X = np.random.randn(3*samples,d)
    Y = np.repeat([1,2,3], samples)
    vectors, values = LDA_feature(X, Y)
    axs[index].scatter(values[:,0], values[:,1], c=Y)
    axs[index].set_title(f"Dimensions: {d}")
    axs[index].figure.set_size_inches(15, 4)

In [None]:
plt.figure()
vecs, vals = LDA_feature(X_train, Y_train)
plt.scatter(vals[:, 0], vals[:, 1], c=Y_train)

In [None]:
plt.figure()
vals = X_test @ vecs
plt.scatter(vals[:, 0], vals[:, 1], c=Y_test)

## 9 b)

Proof by induction

\begin{align*}
& \text{Let $C = {<w,x> + b}$ be the set of all binary linear classifiers and d their dimensions} \\
& d = 1:\\
& \text{If we chose one data point of dimensionality 1 as the origin, at most one data point of dimensionality 1 can be classified. If n = 2, a classifier of dimensionality 3 is needed at least (XOR-Problem)}\\
& \Rightarrow n \leq d + 1 \\
& n + 1:\\
& \text{If we look at the bitcode of the data points of dimensionality n+1, we need for classifying the first n bits correctly classifiers with dimensionality n+1. For classifiying the last bit correctly as well we need another dimension of the classifer: d + 2}\\
& \text{with a linear classifier of dimension of d + 1 at most n can be classified correctly.} \\ 
\end{align*}