In [1]:
from __future__ import print_function, division
from future.utils import iteritems
from builtins import range, input
# Note: you may need to update your version of future
# sudo pip install -U future

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime
from scipy.stats import norm
from scipy.stats import multivariate_normal as mvn
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix as cm



In [2]:
def get_data(path):
    df = pd.read_excel(path,header=None)
    data = df.values
  
    X = (data[:,:-1]-data[:,:-1].mean())/data[:,:-1].std()
    Y = data[:,-1]
    Y = Y-1
    #print(X)
    #print(Y)
    return X,Y


In [3]:
path = r"C:\Users\gprak\Downloads\BITS Courses\CS F464 Machine Learning I\data\data3.xlsx"
X,Y = get_data(path)

## Hold out Cross Validation

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
        X, Y, test_size=0.4, random_state=1234
    )

## Likelihood Ratio Test

In [5]:
class Bayes(object):
    def fit(self,X,Y,smoothing=1e-2):
        N,D = X.shape
        self.gaussians,self.priors = dict(),dict()
        labels = set(Y)
        for c in labels:
            current_x = X[Y==c]
            self.gaussians[c] = {
                'mean':current_x.mean(axis=0),
                'cov':np.cov(current_x.T)+np.eye(D)*smoothing
            }    
            self.priors[c]=float(len(Y[Y==c]))/len(Y)
    
    def score(self,X,Y):
        P=self.predict(X)
        return np.mean(P==Y)
    
    def predict(self,X):
        N,D = X.shape
        K = len(self.gaussians)
        P = np.zeros((N,K))
        for c,g in iteritems(self.gaussians):
            mean,cov = g['mean'],g['cov']
            P[:,int(c)]=mvn.logpdf(X,mean=mean,cov=cov)+np.log(self.priors[c])
        return np.argmax(P,axis=1)

In [6]:

model = Bayes()
model.fit(X_train,y_train)


In [7]:
print(f"Train accuracy: {model.score(X_train,y_train)}")


Train accuracy: 1.0


In [8]:
Y_test_pred = model.predict(X_test)

In [9]:
print(f"Test accuracy: {model.score(X_test,y_test)}")

Test accuracy: 1.0


## Get all of metrics like Accuracy, Sensitivity and Specificity

In [10]:
def get_metrics(y_true,y_pred):
    confusion_matrix = cm(y_true,y_pred)
    FP = confusion_matrix.sum(axis=0) - np.diag(confusion_matrix)  
    FN = confusion_matrix.sum(axis=1) - np.diag(confusion_matrix)
    TP = np.diag(confusion_matrix)
    TN = confusion_matrix.sum() - (FP + FN + TP)

    # Sensitivity, hit rate, recall, or true positive rate
    TPR = TP/(TP+FN)
    # Specificity or true negative rate
    TNR = TN/(TN+FP) 
    # Precision or positive predictive value
    PPV = TP/(TP+FP)
    # Negative predictive value
    NPV = TN/(TN+FN)
    # Fall out or false positive rate
    FPR = FP/(FP+TN)
    # False negative rate
    FNR = FN/(TP+FN)
    # False discovery rate
    FDR = FP/(TP+FP)

    # Overall accuracy
    ACC = (TP+TN)/(TP+FP+FN+TN)
    return {
        "ACC":ACC,
        "TPR":TPR,
        "TNR":TNR
    }

In [11]:
metrics = get_metrics(y_test,Y_test_pred)

In [12]:
metrics

{'ACC': array([1., 1.]), 'TPR': array([1., 1.]), 'TNR': array([1., 1.])}