In [53]:
import numpy as np
import sympy as sp
import pandas as pd

In [54]:
class LDA:
    def __init__(self, n_components):
        self.n_components = n_components
        self.X_train = None
        self.y_train = None
        self.Sw = None
        self.Sb = None
        self.eigen_values = None
        self.eigen_vectors = None

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
        Sw = np.zeros((self.X_train.shape[1], self.X_train.shape[1]))
        Sb = np.zeros((self.X_train.shape[1], self.X_train.shape[1]))
        Sw = np.zeros((self.X_train.shape[1], self.X_train.shape[1]))
        for i in range(len(self.X_train)):
            x = self.X_train[i].reshape(self.X_train.shape[1], 1)
            m = np.mean(self.X_train[self.y_train == self.y_train[i]])
            Sw += np.dot((x - m), (x - m).T)
        m = np.mean(self.X_train)
        for i in range(len(self.X_train)):
            x = self.X_train[i].reshape(self.X_train.shape[1], 1)
            m_i = np.mean(self.X_train[self.y_train == self.y_train[i]])
            Sb += len(self.X_train[self.y_train == self.y_train[i]]) * np.dot((m_i - m), (m_i - m).T)
        self.Sw = Sw
        self.Sb = Sb
        A = np.dot(np.linalg.inv(Sw), Sb)
        eigen_values, eigen_vectors = np.linalg.eig(A)
        eigen_vectors = eigen_vectors.T
        idx = np.argsort(abs(eigen_values))[::-1]
        eigen_vectors = eigen_vectors[idx]
        eigen_values = eigen_values[idx]
        self.eigen_values = eigen_values[0:self.n_components]
        self.eigen_vectors = eigen_vectors[0:self.n_components]

    def transform(self, X):
        return np.dot(X, self.eigen_vectors.T)

In [55]:
def euclidean_distance(X_train, X_test):
    dist = np.zeros((len(X_test), len(X_train)))
    for i in range(len(X_test)):
        for j in range(len(X_train)):
            dist[i,j] = np.sqrt(np.sum((X_test[i] - X_train[j])**2))
    return dist

In [56]:
def covariance(X):
    mean = np.mean(X,axis=0)
    X = X - mean
    return np.dot(X.T, X)/(X.shape[0]-1)

In [57]:
class KNN:
    def __init__(self, k = 5):
        self.k = k
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
        
    def predict(self, X_test):
        self.distances = euclidean_distance(self.X_train, X_test)
        pred = []
        for dist in self.distances:
            k_nearest_indices = np.argsort(dist)[:self.k]
            k_nearest_labels = self.y_train[k_nearest_indices]
            pred.append(np.unique(k_nearest_labels)[np.argmax(np.unique(k_nearest_labels, return_counts=True)[1])])
        return np.array(pred)

In [58]:
df = pd.read_csv('face.csv')

In [59]:
classes = df['target'].unique()
train = pd.DataFrame()
test = pd.DataFrame()

for i in classes:
    train = pd.concat([train, df[df['target'] == i].iloc[1:]], ignore_index=True)
    test = pd.concat([test, df[df['target'] == i].iloc[:1]], ignore_index=True)

print(test)

           0         1         2         3         4         5         6  \
0   0.309917  0.367769  0.417355  0.442149  0.528926  0.607438  0.657025   
1   0.541322  0.586777  0.640496  0.661157  0.685950  0.685950  0.690083   
2   0.578512  0.603306  0.632231  0.665289  0.677686  0.710744  0.723140   
3   0.169422  0.264463  0.219008  0.280992  0.421488  0.549587  0.669422   
4   0.454545  0.429752  0.537190  0.611570  0.652893  0.702479  0.727273   
5   0.719008  0.727273  0.723140  0.714876  0.723140  0.731405  0.739669   
6   0.206612  0.280992  0.367769  0.392562  0.681818  0.714876  0.723140   
7   0.185950  0.194215  0.322314  0.524793  0.661157  0.772727  0.822314   
8   0.500000  0.487603  0.537190  0.574380  0.595041  0.595041  0.628099   
9   0.326446  0.483471  0.524793  0.599174  0.665289  0.702479  0.702479   
10  0.289256  0.239669  0.227273  0.528926  0.772727  0.826446  0.834711   
11  0.483471  0.520661  0.590909  0.632231  0.652893  0.706612  0.706612   
12  0.371901

In [60]:
X_train = train.drop(['target'], axis=1).values
y_train = train['target'].values
X_test = test.drop(['target'], axis=1).values
y_test = test['target'].values

In [61]:
lda = LDA(39)
lda.fit(X_train, y_train)

In [62]:
X_train = lda.transform(X_train)
X_test = lda.transform(X_test)
print(f"X_train: {len(X_train)}")
print(f"X_test: {len(X_test)}")
print(X_train)

X_train: 360
X_test: 40
[[-0.06934221+0.j          0.02001728+0.j         -0.02876857+0.j
  ...  0.09527241-0.03477702j  0.01238876+0.j
  -0.16388612+0.04211298j]
 [-0.06934221+0.j          0.03288809+0.j         -0.02099824+0.j
  ...  0.07803182-0.07746382j  0.04613435+0.j
  -0.09455975-0.00403091j]
 [-0.06934221+0.j          0.03673326+0.j         -0.03528519+0.j
  ...  0.06704287-0.04265419j  0.02799889+0.j
   0.01290651+0.05458445j]
 ...
 [-0.06509287+0.j         -0.00996599+0.j         -0.01762707+0.j
  ...  0.17846456+0.07167398j  0.15052054+0.j
  -0.15965584-0.0493058j ]
 [-0.06509287+0.j          0.0292918 +0.j         -0.01161092+0.j
  ...  0.01905974-0.06547205j  0.02042639+0.j
   0.02920411+0.06245979j]
 [-0.06509287+0.j         -0.00365166+0.j          0.01126837+0.j
  ...  0.23007053+0.05904044j  0.27063358+0.j
  -0.14224525+0.00017954j]]


In [63]:
knn = KNN(5)
knn.fit(X_train, y_train)

In [64]:
y_pred = knn.predict(X_test)
compared = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(compared)

    Actual  Predicted
0        0         18
1        1         26
2        2          3
3        3          1
4        4          1
5        5         13
6        6          2
7        7          7
8        8         22
9        9         29
10      10          3
11      11          3
12      12         12
13      13         13
14      14          6
15      15         35
16      16         22
17      17         17
18      18          2
19      19         29
20      20          3
21      21         37
22      22          8
23      23         26
24      24         14
25      25          6
26      26          1
27      27         36
28      28          8
29      29          3
30      30         13
31      31         26
32      32         32
33      33         26
34      34         34
35      35          2
36      36         36
37      37         26
38      38          3
39      39         12


  dist[i,j] = np.sqrt(np.sum((X_test[i] - X_train[j])**2))


In [65]:
def accuracy(y_pred,y_test):
    return np.sum(y_pred == y_test)/len(y_test)

print("Accuracy: ", accuracy(y_pred, y_test))
print("accuracy percentage: ", accuracy(y_pred, y_test)*100, "%")

Accuracy:  0.175
accuracy percentage:  17.5 %
