In [1]:
from google.colab import files
uploaded = files.upload()

Saving data4.xlsx to data4.xlsx


In [2]:
import pandas as pd
import numpy as np

In [3]:
## reading and shuffling data
data = pd.read_excel('data4.xlsx',header = None)
shuffled_data = data.sample(frac = 1, random_state = 7).reset_index(drop = True)

## separating X and Y
X = shuffled_data.iloc[:, :-1]
Y = shuffled_data.iloc[:, -1:]

In [4]:
###splitting the data
def split(X, Y, split_size = 0.6):
  train_size = int(len(X) * split_size)
  X_tr = X.iloc[:train_size, :]
  Y_tr = Y.iloc[:train_size, :]
  X_ts = X.iloc[train_size:, :].reset_index(drop = True)
  Y_ts = Y.iloc[train_size:, :].reset_index(drop = True)

  return X_tr, Y_tr, X_ts, Y_ts

In [14]:
X_train, Y_train, X_test, Y_test = split(X, Y, split_size = 0.7)
Y_train.set_axis([0], axis = 1, inplace = True)

In [15]:
### likelihood ratio function
def MAP(x, X, Y):
  n = X.shape[1]                                        #number of features
  number_of_labels = list(Y.groupby(0).size())          #[number of 1's, number of 2's]
  py1 = number_of_labels[0]/len(Y)                      #probability(Y==1)
  py2 = number_of_labels[1]/len(Y)                      #probability(Y==2)
  py3 = number_of_labels[2]/len(Y)                      #probability(Y==3)
  X1 = X[Y[0] == 1].reset_index(drop = True)            #rows of X where Y == 1
  X2 = X[Y[0] == 2].reset_index(drop = True)            #rows of X where Y == 2
  X3 = X[Y[0] == 3].reset_index(drop = True)            #rows of X where Y == 3
  e1, e2, e3 = len(X1)/len(X), len(X2)/len(X), len(X3)/len(X)
  m1 = list(X1.mean(axis = 0))                          #mean of features of X1
  m2 = list(X2.mean(axis = 0))                          #mean of features of X2
  m3 = list(X3.mean(axis = 0))                          #mean of features of X3
  cov1 = X1.cov()                                       #covariance matrix of X1
  cov2 = X2.cov()                                       #covariance matrix of X2
  cov3 = X3.cov()                                       #covariance matrix of X3
  c1 = 1/(np.power(6.28, n/2)*(np.linalg.det(cov1)**0.5))      #coeff of P1
  c2 = 1/(np.power(6.28, n/2)*(np.linalg.det(cov2)**0.5))      #coeff of P2
  c3 = 1/(np.power(6.28, n/2)*(np.linalg.det(cov2)**0.5))      #coeff of P3
  mat1 = x-m1                                          
  mat2 = x-m2
  mat3 = x-m3
  cov1_inv = np.linalg.inv(cov1)
  cov2_inv = np.linalg.inv(cov2)
  cov3_inv = np.linalg.inv(cov3)
  l1 = c1*np.exp(-0.5*np.dot(np.dot(mat1, cov1_inv), mat1.T)) #likelihood function
  l2 = c2*np.exp(-0.5*np.dot(np.dot(mat2, cov2_inv), mat2.T)) #likelihood function
  l3 = c3*np.exp(-0.5*np.dot(np.dot(mat3, cov3_inv), mat3.T)) #likelihood function

  prob1, prob2, prob3 = (l1*py1)/(e1), (l2*py2)/(e2), (l3*py3)/(e3)
  if(max(prob1, prob2, prob3) == prob1):
    pred = 1
  elif(max(prob1, prob2, prob3) == prob2):
    pred = 2
  else:
    pred = 3

  return pred

In [16]:
def predict(X_test):
  Y_pred = []
  for i in range(len(X_test)):
    pred = MAP(X_test.iloc[i,:], X_train, Y_train)
    Y_pred.append(pred)
  Y_pred = pd.DataFrame(Y_pred)
  return Y_pred

In [17]:
Y_predicted = predict(X_test)

In [18]:
df_to_compare = Y_test.copy()
df_to_compare[1] = Y_predicted
df_to_compare.columns = ['Y_test', 'Y_predicted']

In [19]:
confusion_matrix = pd.crosstab(df_to_compare['Y_test'], df_to_compare['Y_predicted'], rownames = ['Actual'], colnames = ['Predicted'])
confusion_matrix

Predicted,1,2,3
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,14,0,0
2,0,14,3
3,0,1,13


In [20]:
Acc_1 = confusion_matrix.iloc[0,0]/np.sum(confusion_matrix.iloc[0,:])
Acc_2 = confusion_matrix.iloc[1,1]/np.sum(confusion_matrix.iloc[1,:])
Acc_3 = confusion_matrix.iloc[2,2]/np.sum(confusion_matrix.iloc[2,:])
Acc_overall = (confusion_matrix.iloc[0,0]+confusion_matrix.iloc[1,1]+confusion_matrix.iloc[2,2])/np.sum(np.sum(confusion_matrix))

In [21]:
print("Accuracy of 1 is: %f" %(Acc_1*100)+' %')
print("Accuracy of 2 is: %f" %(Acc_2*100)+' %')
print("Accuracy of 3 is: %f" %(Acc_3*100)+' %')
print("Overall Accuracy is: %f" %(Acc_overall*100)+' %')

Accuracy of 1 is: 100.000000 %
Accuracy of 2 is: 82.352941 %
Accuracy of 3 is: 92.857143 %
Overall Accuracy is: 91.111111 %
