In [18]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets

# defining variables to test the function
q_data=int(input('Enter the number of classes\n'))
r_data=q_data-1
N_data=int(input('Enter the number of samples\n'))
d_data=int(input('Enter the number of dimensions\n'))
#defining dataset and labels
X_data,y_data=datasets.make_blobs(n_samples=N_data,n_features=d_data,centers=q_data)
y_data+=1

def meanX(X): # X is an Nxd (or N_ixd) matrix
    # finding the mean over the samples in the array given as input
    X=np.array(X)
    mean=[]
    (N,d)=np.shape(X)
    for i in range(0,d):
        temp=((np.sum(X[:,i]))/N)
        mean.append([temp])
    return mean # dx1 matrix

def separateClass(X,y):
    # separates the main array into classes so that the length of this array
    #will be the number of classes, shape of each element in this array will 
    #be (N_i,d) where N_i is the number of samples in  i^th class
    (N,d)=np.shape(X)
    q=np.max(y)-np.min(y)+1 # the number of classes
    x_separated=[]
    for i  in range(0,q):
        x_separated.append([])
    for c in range(1,q+1):
        for i in range(0,N):
            if y[i]==c:
                x_separated[c-1].append(X[i])
    return x_separated

def meanDiff(X,y):
    #gets the matrix (mu_g-mu)
    X=np.array(X)
    (N,d)=np.shape(X)
    q=np.max(y)-np.min(y)+1
    x_separated=separateClass(X,y)
    theMean=meanX(X)
    theMean=np.array(theMean).T[0]
    means=[] # to collect the mean for each class
    for i in range(0,q):
        temp=np.array(meanX(x_separated[i])).T
        means.append(temp[0])
    meanDifference=means-theMean
    xMeanDifference=np.array(X)-theMean
        
    return meanDifference.T,xMeanDifference.T # shapes (d,q), (d,N) ==> this will go as the input for finding the gamma values
    
def withinClassScatter(X,y): # input: x_separated
    (N,d)=np.shape(X)
    q=np.max(y)-np.min(y)+1
    x_separated=separateClass(X,y)
    withinScatter=np.zeros((d,d))
    for i in range(0,q):
        x_separated[i]=np.array(x_separated[i]) # 1xd matrix
        temp=x_separated[i].T # dx1 matrix
        temp1=meanX(x_separated[i]) #dx1 matrix
        temp2=np.subtract(temp,temp1)
        temp3=np.matmul(temp2,temp2.T) # dxd matrix
        withinScatter+=temp3
    return withinScatter/N

def betweenClassScatter(X,y):
    (N,d)=np.shape(X)
    q=np.max(y)-np.min(y)+1
    x_separated=separateClass(X,y)
    x_mean=meanX(X) # dx1 matrix
    x_separatedMeans=[]
    theCount=[]
    betweenScatter=np.zeros((d,d))
    for i in range(0,q):
        count=0
        x_separatedMeans.append(meanX(x_separated[i])) # this will be a matrix with length q when the for loop ends
        for j in range(0,len(x_separated[i])):
            count+=1
        theCount.append(count) # array that has the number of elements of each class
    
    for i in range(0,q):
        temp=np.subtract(x_separatedMeans[i],x_mean)
        temp2=np.matmul(temp,temp.T)
        betweenScatter+=temp2
    return betweenScatter/N # dxd matrix


def LDATrain(X,y):
    (N,d)=np.shape(X)
    globalAverage=meanX(X)
    q=np.max(y)-np.min(y)+1
    r=q-1
    withinScatter=withinClassScatter(X,y)
    betweenScatter=betweenClassScatter(X,y)
    temp=np.linalg.inv(withinScatter)
    temp1=np.matmul(temp,betweenScatter)
    tempWeight,tempVec=np.linalg.eig(temp1) # tempVec[:,i] is the eigenvector for tempWeight[i]
    # sorting step
    tempWeightSorted=sorted(tempWeight,reverse=True)
    tempVector=tempVec.T
    tempVecSorted=[]
    for i in range(0,len(tempWeight)):
        for j in range(0,len(tempWeight)):
            if tempWeightSorted[i]==tempWeight[j]:
                tempVecSorted.append(tempVector[j])
    eigenVectors=tempVecSorted[0:r] # first r eigen vectors
    eigenVectors=np.array(eigenVectors).T 
    # computing (mu_g-mu).T x e_j
    difference,x_difference=meanDiff(X,y) # dxq and dxN
    difference=np.array(difference).T
    x_difference=np.array(x_difference).T
    m_c=[] # here the first row (for both xm_c and m_c) will have the multiplication by the first eigen vector, second will have that by the second vector, and so on    
    for i in range(0,r):
        m_c.append([])
        for g in range(0,q):
            temp3=np.matmul(difference[g],eigenVectors.T[i])
            m_c[i].append(temp3)
        #m_c=np.matmul(difference.T,eigenVectors)
    
    return eigenVectors,globalAverage,m_c # dxr ,dx1, rxq, rxN


def LDAClassify(X,y,X_test,y_test):
    predictedY=[]
    (N,d)=np.shape(X)
    (N_test,d_test)=np.shape(X_test)
    q=np.max(y)-np.min(y)+1
    r=q-1
    x_separated=separateClass(X,y)
    eigenVectors,globalAverage,m_c=LDATrain(X,y)
    pred=[]
    xm_c=[]
    difference,x_difference=meanDiff(X_test,y_test) # dxq and dxN
    difference=np.array(difference).T
    x_difference=np.array(x_difference).T
    for i in range(0,r):
        xm_c.append([])
        for j in range(0,N_test):
            temp2=np.matmul(x_difference[j],eigenVectors.T[i])
            xm_c[i].append(temp2) # xm_c is now an rxN matrix
    for i in range(0,N_test):
        toPred=[]
        for g in range(0,q):
            pi=len(x_separated[g]) # getting the value of pi
            temp=0
            temp1=0
            for j in range(0,r):
                temp+=xm_c[j][i]*m_c[j][g]
                temp1+=m_c[j][g]**2
            temp1/=2
            temp2=temp-temp1+np.log(pi)
            toPred.append(temp2)
        ind=np.argmax(toPred)
        pred.append(ind)
    return np.array(pred)+1           

predictedY=LDAClassify(X_data,y_data,X_data,y_data)

Enter the number of classes
4
Enter the number of samples
500
Enter the number of dimensions
7


In [19]:
predictedY

array([4, 2, 2, 2, 3, 1, 3, 3, 2, 3, 4, 3, 1, 2, 1, 2, 1, 3, 4, 2, 1, 2,
       1, 1, 3, 3, 1, 4, 4, 1, 3, 3, 1, 4, 1, 2, 3, 1, 1, 3, 1, 2, 3, 3,
       3, 4, 3, 2, 3, 1, 2, 3, 3, 2, 2, 2, 4, 2, 3, 1, 3, 2, 3, 3, 2, 3,
       2, 2, 2, 4, 2, 3, 4, 4, 4, 3, 4, 1, 2, 3, 3, 2, 4, 3, 4, 3, 2, 4,
       2, 2, 2, 4, 2, 3, 1, 4, 3, 2, 1, 3, 3, 1, 1, 4, 1, 4, 4, 1, 3, 2,
       4, 1, 4, 3, 4, 1, 4, 2, 1, 1, 1, 1, 4, 1, 2, 3, 1, 2, 1, 4, 4, 4,
       1, 4, 3, 4, 3, 4, 1, 2, 4, 2, 2, 1, 2, 2, 2, 1, 3, 3, 4, 4, 4, 3,
       4, 2, 1, 4, 1, 2, 4, 4, 1, 1, 3, 2, 1, 4, 4, 2, 1, 2, 2, 3, 1, 4,
       1, 4, 4, 4, 3, 2, 3, 4, 1, 4, 2, 4, 1, 3, 2, 1, 1, 2, 3, 3, 3, 2,
       4, 1, 1, 3, 1, 4, 4, 3, 3, 4, 3, 3, 2, 2, 2, 4, 3, 3, 2, 4, 3, 1,
       1, 1, 4, 3, 2, 3, 3, 2, 2, 3, 2, 3, 2, 2, 1, 3, 2, 4, 1, 3, 1, 4,
       1, 3, 4, 4, 2, 2, 3, 1, 1, 1, 2, 2, 1, 3, 4, 2, 1, 1, 3, 2, 1, 1,
       2, 1, 3, 4, 2, 1, 4, 1, 1, 1, 1, 3, 4, 4, 4, 4, 3, 2, 4, 1, 1, 3,
       4, 3, 4, 1, 1, 4, 4, 1, 2, 3, 2, 2, 3, 2, 1,

In [20]:
y_data

array([4, 2, 2, 2, 3, 1, 3, 3, 2, 3, 4, 3, 1, 2, 1, 2, 1, 3, 4, 2, 1, 2,
       1, 1, 3, 3, 1, 4, 4, 1, 3, 3, 1, 4, 1, 2, 3, 1, 1, 3, 1, 2, 3, 3,
       3, 4, 3, 2, 3, 1, 2, 3, 3, 2, 2, 2, 4, 2, 3, 1, 3, 2, 3, 3, 2, 3,
       2, 2, 2, 4, 2, 3, 4, 4, 4, 3, 4, 1, 2, 3, 3, 2, 4, 3, 4, 3, 2, 4,
       2, 2, 2, 4, 2, 3, 1, 4, 3, 2, 1, 3, 3, 1, 1, 4, 1, 4, 4, 1, 3, 2,
       4, 1, 4, 3, 4, 1, 4, 2, 1, 1, 1, 1, 4, 1, 2, 3, 1, 2, 1, 4, 4, 4,
       1, 4, 3, 4, 3, 4, 1, 2, 4, 2, 2, 1, 2, 2, 2, 1, 3, 3, 4, 4, 4, 3,
       4, 2, 1, 4, 1, 2, 4, 4, 1, 1, 3, 2, 1, 4, 4, 2, 1, 2, 2, 3, 1, 4,
       1, 4, 4, 4, 3, 2, 3, 4, 1, 4, 2, 4, 1, 3, 2, 1, 1, 2, 3, 3, 3, 2,
       4, 1, 1, 3, 1, 4, 4, 3, 3, 4, 3, 3, 2, 2, 2, 4, 3, 3, 2, 4, 3, 1,
       1, 1, 4, 3, 2, 3, 3, 2, 2, 3, 2, 3, 2, 2, 1, 3, 2, 4, 1, 3, 1, 4,
       1, 3, 4, 4, 2, 2, 3, 1, 1, 1, 2, 2, 1, 3, 4, 2, 1, 1, 3, 2, 1, 1,
       2, 1, 3, 4, 2, 1, 4, 1, 1, 1, 1, 3, 4, 4, 4, 4, 3, 2, 4, 1, 1, 3,
       4, 3, 4, 1, 1, 4, 4, 1, 2, 3, 2, 2, 3, 2, 1,

In [21]:
predictedY-y_data

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [22]:
np.shape(X_data)

(500, 7)