## Linear Discriminant Analysis(LDA) From Scratch

### Reading Data

In [1]:
import numpy as np
import pandas as pd
# reading data
df = pd.read_table('../dataset/occupancy/datatraining.txt', skiprows=1, names=('A', 'B', 'C', 'D', 'E', 'F', 'Occ'), sep=',')
test1 = pd.read_table('../dataset/occupancy/datatest.txt', skiprows=1, names=('A', 'B', 'C', 'D', 'E', 'F', 'Occ'), sep=',')
columns = ['B', 'C', 'D', 'E', 'F', 'Occ']

df = df[columns]
df.head(10)

  import sys
  


Unnamed: 0,B,C,D,E,F,Occ
1,23.18,27.272,426.0,721.25,0.004793,1
2,23.15,27.2675,429.5,714.0,0.004783,1
3,23.15,27.245,426.0,713.5,0.004779,1
4,23.15,27.2,426.0,708.25,0.004772,1
5,23.1,27.2,426.0,704.5,0.004757,1
6,23.1,27.2,419.0,701.0,0.004757,1
7,23.1,27.2,419.0,701.666667,0.004757,1
8,23.1,27.2,419.0,699.0,0.004757,1
9,23.1,27.2,419.0,689.333333,0.004757,1
10,23.075,27.175,419.0,688.0,0.004745,1


### Dividing data-set into different classes

In [2]:
# classifying datas
print("Occupied Parameters")
x0 = df[df['Occ'] == 0][['B', 'C', 'D', 'E', 'F']]
print(x0.head())

print("Unoccupied Parameters")
x1 = df[df['Occ'] == 1][['B', 'C', 'D', 'E', 'F']]
print(x1.head())

Occupied Parameters
         B      C    D      E         F
17  23.000  27.20  0.0  681.5  0.004728
18  22.945  27.29  0.0  685.0  0.004728
19  22.945  27.39  0.0  685.0  0.004745
20  22.890  27.39  0.0  689.0  0.004730
21  22.890  27.39  0.0  689.5  0.004730
Unoccupied Parameters
       B        C      D       E         F
1  23.18  27.2720  426.0  721.25  0.004793
2  23.15  27.2675  429.5  714.00  0.004783
3  23.15  27.2450  426.0  713.50  0.004779
4  23.15  27.2000  426.0  708.25  0.004772
5  23.10  27.2000  426.0  704.50  0.004757


### Determinig Mean of different classes and the over all Mean

In [3]:
mean_all = df[['B', 'C', 'D', 'E', 'F']].mean()
mean_x0 = x0.mean()
mean_x1 = x1.mean()

### Mean corrected data and Covariance matrix

In [4]:
# mean corrected datas
corrected_x0 = x0-mean_all
corrected_x1 = x1-mean_all

# correlation matrix
cov0 = corrected_x0.cov()
cov1 = corrected_x1.cov()

# group covarience matrix
Cov = ((len(x0)/len(df))*cov0 + (len(x1)/len(df))*cov1)
Cov.head()
Cov_inv = pd.DataFrame(np.linalg.pinv(Cov))
Cov_inv
Cov_inv = Cov_inv.dot(Cov_inv)
Cov_inv.T

Unnamed: 0,0,1,2,3,4
0,36672370000.0,24431170000.0,-26996660.0,32450720.0,-165022700000000.0
1,24431170000.0,16276070000.0,-17985200.0,21618700.0,-109938300000000.0
2,-26996660.0,-17985200.0,19873.81,-23888.85,121482800000.0
3,32450720.0,21618700.0,-23888.85,28715.05,-146025600000.0
4,-165022700000000.0,-109938300000000.0,121482800000.0,-146025600000.0,7.425892e+17


### Test data-set

In [5]:
# Preparing test data
test1_X = test1[['B', 'C', 'D', 'E', 'F']]
test1_Y = test1['Occ']
test1_X.head()

Unnamed: 0,B,C,D,E,F
140,23.7,26.272,585.2,749.2,0.004764
141,23.718,26.29,578.4,760.4,0.004773
142,23.73,26.23,572.666667,769.666667,0.004765
143,23.7225,26.125,493.75,774.75,0.004744
144,23.754,26.2,488.6,779.0,0.004767


### Discriminant function
<img src="assets/discriminant_function.png">

In [6]:
f0 = np.array(mean_x0) @ np.array(Cov_inv.T) @ np.array(test1_X.T) - (0.5*(np.array(mean_x0) @ np.array(Cov_inv.T) @ np.array(mean_x0.T) )) + np.log((len(x0)/len(df)))
# print(Cov_inv.shape, test1_X.T.shape)
f1 = np.array(mean_x1) @ np.array(Cov_inv.T) @ np.array(test1_X.T) - (0.5*(np.array(mean_x1) @ np.array(Cov_inv.T) @ np.array(mean_x1.T) )) + np.log((len(x1)/len(df)))

In [7]:
f0

array([7.31824417e+12, 7.32334345e+12, 7.33728914e+12, ...,
       7.36937735e+12, 7.36328373e+12, 7.36249089e+12])

In [8]:
f1

array([7.32157483e+12, 7.32664636e+12, 7.34051616e+12, ...,
       7.37242975e+12, 7.36636929e+12, 7.36558077e+12])

### Assign object k with class i such that f(i) is maximun for that object

In [9]:
cl = f0>f1
y_out = np.array([0 if f0[i]>f1[i] else 1 for i in range(len(f0))])
y_out

array([1, 1, 1, ..., 1, 1, 1])

### Calculating Accuracy

In [10]:
gr_val = np.array(test1_Y)

tp_bool = np.logical_and((y_out==1),(gr_val==1))
tn_bool = np.logical_and((y_out==0),(gr_val==0))
fp_bool = np.logical_and((y_out==1),(gr_val==0))
fn_bool = np.logical_and((y_out==0),(gr_val==1))
tp = len(y_out[tp_bool])
tn = len(y_out[tn_bool])
fp = len(y_out[fp_bool])
fn = len(y_out[fn_bool])
print(tp,tn,fp,fn)

accuracy = (tp+tn)/(tp+tn+fp+fn)
print("accuracy : "+str(accuracy))

# precision = tp/(total yes predictions)
precision = tp/(len(y_out[y_out==1]))
print("precision : "+str(precision))

specificity = tp/(tp+fn)
print("specificity : "+str(specificity))

sensitivity = tn/(tn+fp)
print("sensitivity : "+str(sensitivity))

972 1393 300 0
accuracy : 0.8874296435272045
precision : 0.7641509433962265
specificity : 1.0
sensitivity : 0.8227997637330183
