In [1]:
from google.colab import files
uploaded = files.upload()

Saving data3.xlsx to data3.xlsx


In [2]:
import pandas as pd
import numpy as np

In [3]:
### reading the data
data = pd.read_excel('data3.xlsx', header = None)
data.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,1
1,4.9,3.0,1.4,0.2,1
2,4.7,3.2,1.3,0.2,1
3,4.6,3.1,1.5,0.2,1
4,5.0,3.6,1.4,0.2,1


In [6]:
### shuffling the data
shuffled_data = data.sample(frac = 1, random_state = 7).reset_index(drop = True)
shuffled_data.head()

Unnamed: 0,0,1,2,3,4
0,4.9,3.1,1.5,0.1,1
1,5.0,3.4,1.6,0.4,1
2,6.0,2.9,4.5,1.5,2
3,6.1,3.0,4.6,1.4,2
4,5.0,3.3,1.4,0.2,1


In [9]:
#### separating input(X) and label(Y)
X = shuffled_data.iloc[:,:-1]
Y = shuffled_data.iloc[:,-1:]

In [10]:
### splitting X and Y in test and train
def split(X, Y, split_size = 0.6):
  train_size = int(len(X) * split_size)
  X_tr = X.iloc[:train_size, :]
  Y_tr = Y.iloc[:train_size, :]
  X_ts = X.iloc[train_size:, :].reset_index(drop = True)
  Y_ts = Y.iloc[train_size:, :].reset_index(drop = True)

  return X_tr, Y_tr, X_ts, Y_ts

In [19]:
X_train, Y_train, X_test, Y_test = split(X, Y, split_size = 0.6)
Y_train.set_axis([0], axis = 1, inplace = True)

In [14]:
### likelihood ratio function
def likelihood(x, X, Y):
  n = X.shape[1]                                        #number of features
  number_of_labels = list(Y.groupby(0).size())          #[number of 1's, number of 2's]
  py1 = number_of_labels[0]/len(Y)                      #probability(Y==1)
  py2 = number_of_labels[1]/len(Y)                      #probability(Y==2)
  X1 = X[Y[0] == 1].reset_index(drop = True)            #rows of X where Y == 1
  X2 = X[Y[0] == 2].reset_index(drop = True)            #rows of X where Y == 2
  m1 = list(X1.mean(axis = 0))                          #mean of features of X1
  m2 = list(X2.mean(axis = 0))                          #mean of features of X2
  cov1 = X1.cov()                                       #covariance matrix of X1
  cov2 = X2.cov()                                       #covariance matrix of X2
  c1 = 1/(np.power(6.28, n/2)*(np.linalg.det(cov1)**0.5))      #coeff of P1
  c2 = 1/(np.power(6.28, n/2)*(np.linalg.det(cov2)**0.5))      #coeff of P2
  mat1 = x-m1                                          
  mat2 = x-m2
  cov1_inv = np.linalg.inv(cov1)
  cov2_inv = np.linalg.inv(cov2)
  l1 = c1*np.exp(-0.5*np.dot(np.dot(mat1, cov1_inv), mat1.T)) #likelihood function
  l2 = c2*np.exp(-0.5*np.dot(np.dot(mat2, cov2_inv), mat2.T)) #likelihood function

  if((l1/py2) >= (l2/py1)):
    pred = 1
  else:
    pred = 2

  return pred

In [22]:
def predict(X_test):
  Y_pred = []
  for i in range(len(X_test)):
    pred = likelihood(X_test.iloc[i,:], X_train, Y_train)
    Y_pred.append(pred)
  Y_pred = pd.DataFrame(Y_pred)
  return Y_pred

In [24]:
Y_predicted = predict(X_test)

In [29]:
df_to_compare = Y_test.copy()
df_to_compare[1] = Y_predicted
df_to_compare.columns = ['Y_test', 'Y_predicted']
df_to_compare.head(10)

Unnamed: 0,Y_test,Y_predicted
0,2,2
1,1,1
2,1,1
3,1,1
4,1,1
5,2,2
6,1,1
7,2,2
8,2,2
9,2,2


In [27]:
confusion_matrix = pd.crosstab(df_to_compare['Y_test'], df_to_compare['Y_predicted'], rownames = ['Actual'], colnames = ['Predicted'])
confusion_matrix

Predicted,1,2
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1
1,19,0
2,0,21


In [33]:
Accuracy = (confusion_matrix.iloc[0,0]+confusion_matrix.iloc[1,1])/(np.sum(np.sum(confusion_matrix)))
Sensitivity = confusion_matrix.iloc[0,0]/np.sum(confusion_matrix.iloc[0,:])
Specificity = confusion_matrix.iloc[1,1]/np.sum(confusion_matrix.iloc[1,:])

In [37]:
print("accuracy is: {0:13f}%".format(Accuracy*100))
print("sensitivity is: {0:5f}%".format(Sensitivity*100))
print("specificity is: {0:5f}%".format(Specificity*100))

accuracy is:    100.000000%
sensitivity is: 100.000000%
specificity is: 100.000000%
