In [1]:
import warnings

from matplotlib import pyplot as plt

warnings.filterwarnings("ignore")
from itertools import chain
import pandas as pd
import numpy as np
from numpy import array_split, random

import math
from math import sqrt
import sklearn
from sklearn.datasets import load_iris

#### Loading the iris dataset

In [2]:
data = load_iris()
data
# convert dataset into a pandas dataframe
df = pd.DataFrame(data = np.c_[data['data'], data['target']],columns = data['feature_names'] + ['target'])
df['species'] = pd.Categorical.from_codes(data.target, data.target_names)
df.columns = ['s_length', 's_width', 'p_length', 'p_width', 'target', 'species']

Replacing the categorical variable into integer

In [3]:
print(df['species'].unique())
# replacing categorical values to numeric values
df['species'].replace(['setosa', 'versicolor','virginica'],[0, 1,2], inplace=True)
df

['setosa', 'versicolor', 'virginica']
Categories (3, object): ['setosa', 'versicolor', 'virginica']


Unnamed: 0,s_length,s_width,p_length,p_width,target,species
0,5.1,3.5,1.4,0.2,0.0,0
1,4.9,3.0,1.4,0.2,0.0,0
2,4.7,3.2,1.3,0.2,0.0,0
3,4.6,3.1,1.5,0.2,0.0,0
4,5.0,3.6,1.4,0.2,0.0,0
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2.0,2
146,6.3,2.5,5.0,1.9,2.0,2
147,6.5,3.0,5.2,2.0,2.0,2
148,6.2,3.4,5.4,2.3,2.0,2


### Splitting the dataset 

In [4]:
def train_test_split(data):
    np.random.seed(0)
    split_ratio = np.random.rand(len(data))<0.8
    train_data = data[split_ratio]
    test_data = data[~split_ratio] 
    return train_data, test_data
train,test = train_test_split(df)

In [7]:
species = train['species']
Y = species['Y']
species = species.values.reshape(len(train),1)
# species.shape
Y = Y.values.reshape(len(Y),1)

KeyError: 'Y'

In [None]:
del train['species']
del train['target']
del test['species']
del test['target']

X = train
X

In [None]:
# species = df['species']
# Y = species[0:75]
# species = species.values.reshape(150,1)
# # species.shape
# Y = Y.values.reshape(75,1)

In [None]:
# del df['species']
# del df['target']

# X = df
# X

Unnamed: 0,s_length,s_width,p_length,p_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


## Defining the LDA class and performing the functions in it

In [None]:
class LDA:
    def __init__(self):

        self.classes = None
        self.cov_c = None
        self.means = None
        self.pi_k = None
        self.pooled_cov = None        

    def fit(self, X, categories):
        self.pi_k = dict()
        self.means = dict()
        self.cov_c = dict()
        self.pooled_cov = np.zeros((4,4))
        self.classes = np.unique(categories)

        for c in self.classes:
            X_c = X[categories == c]
            # slef.exs.append(X_c)
            self.pi_k[c] = X_c.shape[0] / X.shape[0]
            print("Pi's for ",c," is ", self.pi_k[c])
            self.means[c] = np.mean(X_c, axis=0)
            print(self.means[c])
            X_corrected = X_c-self.means[c]
            # print(X_corrected[c])
            self.cov_c[c] = self.pi_k[c]*np.dot(X_corrected.T,X_corrected)
            self.pooled_cov+=self.pi_k[c]*self.cov_c[c] 
    
    def predict(self,test):
        predictions = list()
        for x in range(len(test)):
            likelihood_list = list()
            for c in self.classes:
                # X_c = X[categories == c]
                # self.pi_k[c] = X_c.shape[0] / X.shape[0]
                pi = np.log(self.pi_k[c])
                #print(self.covariances_total)
                inv_cov = np.linalg.inv(self.pooled_cov)
                inv_cov_det = np.linalg.det(inv_cov)
                diff = x-self.means[c]
                MD = np.dot(diff.T,np.dot(inv_cov, diff))
                # print(MD)
                likelihood = 0.5*np.log(inv_cov_det) - 0.5*np.dot(diff.T,np.dot(inv_cov, diff))
                #print(likelihood)
                discriminant = pi + likelihood
                likelihood_list.append(discriminant)
            pred = self.classes[np.argmax(likelihood_list)]
            predictions.append(pred)
        return np.array(predictions)
        # return MD.shape

In [None]:
lda = LDA()
lda.fit(X, Y)
# pred_LDA = lda.predict(xtest)
# pred_LDA
print(" The predicted values for LDA :\n")
lda.predict(test)

Pi's for  0  is  0.6666666666666666
s_length    5.006
s_width     3.428
p_length    1.462
p_width     0.246
dtype: float64
Pi's for  1  is  0.3333333333333333
s_length    6.012
s_width     2.776
p_length    4.312
p_width     1.344
dtype: float64
 The predicted values for LDA :



array([0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1])

## Defining the LDA class and performing the functions in it

In [None]:
class QDA:
    def __init__(self):

        self.classes = None
        self.cov_c = None
        self.means = None
        self.pi_k = None
        self.pooled_cov = None        

    def fit(self, X, categories):
        self.pi_k = dict()
        self.means = dict()
        self.cov_c = dict()
        # self.pooled_cov = np.zeros((2,2))
        self.classes = np.unique(categories)

        for c in self.classes:
            X_c = X[categories == c]
            # exs.append(X_c)
            self.pi_k[c] = X_c.shape[0] / X.shape[0]
            self.means[c] = np.mean(X_c, axis=0)
            print(self.means[c])
            X_corrected = X_c-self.means[c]
            # print(X_corrected[c])
            self.cov_c[c] = self.pi_k[c]*np.dot(X_corrected.T,X_corrected)
            # self.pooled_cov+=self.pi_k[c]*self.cov_c[c] 
    
    def predict(self,X):
        predictions = list()
        for x in range(len(X)):
            likelihood_list = list()
            for c in self.classes:
                pi = np.log(self.pi_k[c])
                #print(self.covariances_total)
                inv_cov = np.linalg.inv(self.cov_c[c])
                inv_cov_det = np.linalg.det(inv_cov)
                diff = x-self.means[c]
                likelihood = 0.5*np.log(inv_cov_det) - 0.5*np.dot(diff.T,np.dot(inv_cov, diff))
                # print(likelihood)
                discriminant = pi + likelihood
                likelihood_list.append(discriminant)
            pred = self.classes[np.argmax(likelihood_list)]
            predictions.append(pred)
        return np.array(predictions)

In [None]:
qda = QDA()
qda.fit(X, Y)
# pred_LDA = lda.predict(xtest)
# pred_LDA
print(" The predicted values for QDA :\n")
qda.predict(test)

s_length    5.006
s_width     3.428
p_length    1.462
p_width     0.246
dtype: float64
s_length    6.012
s_width     2.776
p_length    4.312
p_width     1.344
dtype: float64
 The predicted values for QDA :



array([0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0])