In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [7]:
class LDA:
    def __init__(self):
        self.full_stats = None
        self.cov_mat = None
        return

    def dis(self, X, cov_mat, mean, pi_apriori):
        cov_mat_inv = np.linalg.inv(cov_mat)
        return (X).dot(cov_mat_inv).dot(mean) + np.log(pi_apriori) - (1/2)*(mean.T).dot(cov_mat_inv).dot(mean)
    
    def fit(self, X_train, y_train):
        self.full_stats = dict()
        classes, counts = np.unique(y_train, return_counts=True)
        cov_mat = None

        for i in range(len(classes)):
            class_i = dict()

            x_k = X_train[np.where(y_train == classes[i])]
            class_i['pi_apriori'] = counts[i]/len(y_train)
            class_i['mean'] = np.mean(x_k, axis=0)
            if cov_mat is None:
                cov_mat = ((x_k - class_i['mean']).T).dot(x_k-class_i['mean'])
            else:
                cov_mat = cov_mat + ((x_k - class_i['mean']).T).dot(x_k-class_i['mean'])

            self.full_stats[classes[i]] = class_i
        self.cov_mat = cov_mat / (X_train.shape[0] - len(classes))
        return
            
        
    def predict_proba(self, X):
        y_pred_all = []
        for k, value in self.full_stats.items():
            y_pred_all.append(self.dis(X, self.cov_mat, value['mean'], value['pi_apriori']))
        return np.array(y_pred_all).T
       
                
    def predict(self, X):
        y_pred_all = self.predict_proba(X)
        y_pred = np.argmax(y_pred_all, axis=1)
        return y_pred
 
    def score(self, x_test, y_test):
        diff_vector = self.predict(x_test) - y_test
        accuracy = diff_vector[diff_vector==0].shape[0] / y_test.shape[0]
        return accuracy
       

In [8]:
X, y = load_iris(return_X_y=True)
x_train, x_val, y_train, y_val = train_test_split(X, y, random_state=24)

In [9]:
lda = LDA()
lda.fit(x_train, y_train)

In [10]:
lda.predict(x_val) - y_val

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
       -1,  0,  0,  0], dtype=int64)

In [11]:
print('Train score:', lda.score(x_train, y_train))
print('Test score: ', lda.score(x_val, y_val))

Train score: 0.9821428571428571
Test score:  0.9736842105263158
