# Linear Discriminant Analysis

## Load Data

In [1]:
import numpy as np
from sklearn.datasets import load_iris

In [2]:
X = load_iris().data
y = load_iris().target

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

---

## My LDA

In [5]:
class MyLDA:
    def fit(self, X_train, y_train):
        X_train = np.array(X_train)
        y_train = np.array(y_train)
        self.pi = [len(X_train[y_train==i])/len(X_train) for i in np.unique(y_train)]
        self.mu = [np.sum(X_train[y_train==i], axis=0)/len(X_train[y_train==i]) for i in np.unique(y_train)]
        self.sigma = np.sum([(np.transpose(X_train[y_train==i]-self.mu[i])).dot(X_train[y_train==i]-self.mu[i]) for i in np.unique(y_train)], axis=0)/(len(X_train)-len(np.unique(y_train)))
    
    def predict(self, X_test):
        delta = [X_test.dot(np.linalg.inv(self.sigma)).dot(self.mu[i])-0.5*self.mu[i].dot(np.linalg.inv(self.sigma)).dot(self.mu[i])+np.log(self.pi[i]) for i in np.unique(y)]
        yhat = np.argmax(delta, axis=0)
        return yhat

In [6]:
model = MyLDA()

In [7]:
model.fit(X_train, y_train)

In [8]:
y_pred = model.predict(X_test)

In [9]:
from sklearn.metrics import zero_one_loss

In [10]:
print('0-1 Loss of My LDA:', zero_one_loss(y_pred, y_test))

0-1 Loss of My LDA: 0.022222222222222254


---

## Compare to Scikit-Learn

In [11]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [12]:
lda = LinearDiscriminantAnalysis()

In [13]:
lda.fit(X_train, y_train)

LinearDiscriminantAnalysis()

In [14]:
y_pred = lda.predict(X_test)

In [15]:
print('0-1 Loss of sklearn LDA:', zero_one_loss(y_pred, y_test))

0-1 Loss of sklearn LDA: 0.022222222222222254
