In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('IRIS.csv')

In [3]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
data['species'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [5]:
X = data.drop(['species'],axis=1)
y = data['species'].map({'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2})

In [6]:
y.unique()

array([0, 1, 2])

In [13]:
class classifier:
    def __init__(self,X,y,dim):
        self.X = X          
        self.y = y          
        self.dim = dim
        self.n_class = y.value_counts()
        self.priors = []
        self.classes = y.unique()
    def prior_calc(self):
        for c in self.classes:
            self.priors.append(self.n_class[c]/sum(self.n_class))
        
    def projection(self):
        indices = []
        mean=[]
        for c in self.classes:
            indices.append(self.y[self.y==c].index)
        for m in self.classes:
            mean.append(np.mean(self.X.loc[indices[m],:],axis=0))
        #within covariance matrix
        Sw=[]
        for s in self.classes:
            Sw.append(np.dot((self.X.loc[indices[s],:]-mean[s]).T,(self.X.loc[indices[s],:]-mean[s])))
        total_m = np.mean(self.X,axis=0)
        Sb=0
        for b in self.classes:
            Sb += self.n_class[b]*np.dot((mean[b]-total_m),(mean[b]-total_m).T)
        eigenvalues,eigenvectors = np.linalg.eig(np.dot(np.linalg.inv(sum(Sw)),Sb))
        idx = eigenvalues.argsort()[::-1]
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:,idx]
        if self.dim<=self.X.shape[1]:
            W = eigenvectors[:,:self.dim]
            return np.dot(self.X,W)
        else:
            print("Choose a dimension lower than"+(self.X.shape[1])) 
    
    def gaussian_distribution(self,x,u,cov):
        scalar = 1. / ((2 * np.pi) ** (x.shape[0] / 2.)) * (1 /(np.sqrt(np.linalg.det(cov))))
        x_sub_u = np.subtract(x, u)
        return scalar * np.exp(-np.dot(np.dot(x_sub_u, np.linalg.inv(cov)), x_sub_u.T) / 2.)
    def means(self,X,y):
        indices=[]
        means =[]
        for c in self.classes:
            indices.append([])
        for i in range(0,len(y)):
            z =y.loc[i,'species']
            indices[z].append(i)
         #mean of reduced class
        for c in self.classes:
            means.append(np.mean(X[indices[c],:],axis=0))
        return means
    def cov(self,X,y,mean):
        sw = []
        indices=[]
        for c in self.classes:
            indices.append([])
        for i in range(0,len(y)):
            z =y.loc[i,'species']
            indices[z].append(i)
        for c in self.classes:
            sw.append(np.dot((X[indices[c],:]-mean[c]).T,(X[indices[c],:]-mean[c])))
        return sw
    def posterior(self):
        X = self.projection()
        y = self.y.reset_index().drop('index',axis=1)
        mean = self.means(X,y)
        Sw = self.cov(X,y,mean)
        posterior = []
        self.prior_calc()
        priors = self.priors
        for x in X:
            row = []
            for c in self.classes:
                res =priors[c] * self.gaussian_distribution(x,mean[c],Sw[c]) 
                row.append(res)
            posterior.append(row)
        return posterior

        

In [34]:
c = classifier(X,y,2) 
pred = c.posterior()

In [35]:
norm = np.sum(pred,axis=0)

In [36]:
norm_pred = np.array(pred)/norm

In [37]:
predictions = np.argmax(norm_pred, axis=1)

In [38]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y, predictions)
print(cm)
accuracy_score(y, predictions)

[[50  0  0]
 [ 1 45  4]
 [ 0  2 48]]


0.9533333333333334