In [1]:
import numpy as np

import scipy.stats
from scipy import stats

from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt

In [2]:
def onehotencoding(a):
    a = iris.target
    b = np.zeros((a.size, a.max()+1))
    b[np.arange(a.size),a] = 1
    return b
def calculate_prior(y):
    return np.array([(y==0).sum(axis=0), (y==1).sum(axis=0)])/y.size
def calculate_mean(x, y):
    m = np.zeros((2, x.shape[1]))
    m[0] = x[y==0].mean(axis=0)
    m[1] = x[y==1].mean(axis=0)
    return m
def calculate_var(x, y):
    v = np.zeros((2, x.shape[1]))
    v[0] = x[y==0].var(axis=0)
    v[1] = x[y==1].var(axis=0)
    return v
def calculate_posterior(mean, var, x):
    n0 = stats.norm(mean[0], np.sqrt(var[0]))
    n1 = stats.norm(mean[1], np.sqrt(var[1]))
    a = n0.pdf(x).prod(axis=1)*prior[0]
    b = n1.pdf(x).prod(axis=1)*prior[0]
    return np.array([a/(a+b), b/(a+b)]).T
def validation(y_true, y_pred):
    cnfx = confusion_matrix(y_true, y_pred)
    [[tn, fp], [fn, tp]] = cnfx
    accuracy = (tn + tp)/(tn + fp + fn + tp)
    no, yes = cnfx.sum(axis=1)
    negative, positive = cnfx.sum(axis=0)
    recall = tp/yes
    precession = tp/positive
    fscore = lambda p, r, b: ((1 + b**2) * (p*r))/ (b**2 * p + r)
    f1 = fscore(precession, recall, 1)
    print(cnfx)
    print("accuracy", accuracy)
    print("recall", recall)
    print("precession", precession)
    print("f1 score", f1)

In [3]:
iris = datasets.load_iris()
cols = iris.feature_names
X = iris.data
label = iris.target_names
Y = onehotencoding(iris.target)[:, 0]

In [4]:
TRAIN_TEST_RATIO = 0.8
M = X.shape[0]
M_TRAIN = int(np.ceil(0.8*M))

In [5]:
scaler = MinMaxScaler()
scaler = scaler.fit(X)
X = scaler.transform(X)

indices = np.random.permutation(X.shape[0])
train_idx, test_idx = indices[:M_TRAIN], indices[M_TRAIN:]
X_train, X_test = X[train_idx, :], X[test_idx, :]
Y_train, Y_test = Y[train_idx], Y[test_idx]

x_train, x_test = X_train, X_test
y_train, y_test = Y_train, Y_test

In [6]:
prior = calculate_prior(y_train)
mean = calculate_mean(x_train, y_train)
var = calculate_var(x_train, y_train)
train_post_prob = calculate_posterior(mean, var, x_train)
test_post_prob = calculate_posterior(mean, var, x_test)

In [8]:
validation(y_train, train_post_prob.argmax(axis=1))

[[81  0]
 [ 0 39]]
accuracy 1.0
recall 1.0
precession 1.0
f1 score 1.0


In [9]:
validation(y_test, test_post_prob.argmax(axis=1))

[[19  0]
 [ 0 11]]
accuracy 1.0
recall 1.0
precession 1.0
f1 score 1.0
