In [None]:
import numpy as np

In [None]:
def fit(X, y):
    n_samples, n_features = X.shape
    classes = np.unique(y)
    n_classes = len(classes)
    mean = np.zeros((n_classes, n_features), dtype=np.float64)
    var = np.zeros((n_classes, n_features), dtype=np.float64)
    prior = np.zeros(n_classes, dtype=np.float64)
    
    for idx, c in enumerate(classes):
        X_c = X[y == c]
        mean[idx, :] = np.mean(X_c, axis = 0)
        var[idx, :] = np.var(X_c, axis=0)
        prior[idx] = X_c.shape[0] / float(n_samples)
    print('mean: ', mean)
    print('var: ', var)
    print('prior: ', prior)
    return mean, var, prior

def calc_pdf(X, m, v):
    z = np.power(2 * np.pi, X.shape[0]/2) * np.power(np.linalg.det(v), 1/2)
    pdf = (z/2) * np.exp(-(1/2) * (X - m).T @ np.linalg.inv(v) @ (X - m))
    return pdf

def calc_log_likelyhood_prior_prod(X, mean, var, prior):
    classes = [0, 1, 2, 3]
    llpp = np.zeros((X.shape[0],2),dtype=np.float64)
    for x_idx, x in enumerate(X):
      for idx, c in enumerate(classes):
        m = mean[idx]
        v = np.diag(var[idx])
        pdf = calc_pdf(x, m, v)
        pr = prior[idx]
        # print('log pdf: ', np.log(pdf))
        # print('log prior: ', np.log(pr))
        llpp[x_idx, c] = np.log(pdf) + np.log(pr)
      #print('LLPP: ', llpp)
          
    return llpp

def predict(X,m,v,p):
  plp = calc_log_likelyhood_prior_prod(X,m,v,p)
  print('Predict: ', plp)
  return np.argmax(plp, axis=1)


def naive_gaussian_predict(X_train, y_train, X_test):
    """
    Train a Gaussian NB and predict the labels on test set 

    Arguments:
        X_train: train samples, (800, 2), np.ndarray 
        y_train: train labels,  (800, ), np.ndarray 
        X_test:  test samples, (200, 2), np.ndarray
    Return  
        y_pred: test labels, (200, ) np.ndarray
    """
    mean, var, prior = fit(X_train, y_train)
    
    q = calc_log_likelyhood_prior_prod(X_test, mean, var, prior)
    #print(q)
    return np.argmax(q, axis=1)

In [None]:
X_train = np.array([[-1.43313789,  0.87886565], [ 1.73544375,  5.74074094], [-0.20466712, -0.17858128], [ 1.13599219,  5.89066163], [ 2.74485936,  3.86226574], [-0.62227035,  0.67791906], [ 0.18273191,  1.69422586], [-1.49552969, -0.16019098], [ 1.48276596,  7.56215945], [ 2.5756331,   4.79358451], [-0.02672632, -0.9002082 ], [-0.47535179,  0.9614788 ], [-0.62624673,  6.19767345], [-0.36105075,  4.71649291], [-1.04671485,  5.13251142], [ 0.36400416,  0.30274527], [-0.01760132,  4.93424899], [-1.24501648, -1.52927977], [ 0.74281699, -0.26205474], [-1.49643126,  4.29044987], [ 1.43098835,  4.8552861 ], [ 0.83629896,  5.33601689], [ 0.76640666,  2.56599276], [-0.55953029,  4.00154206], [-0.05876738, -0.34313126]])
y_train = np.array([0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0])
x_test = np.array([[-0.27459298, -0.33154335], [-1.90328844,  8.63813903], [-1.1212894,   5.11997934], [-0.32198076,  1.53187219], [-1.20681238,  5.03012668], [ 0.83820448,  3.74266307], [ 0.9278087,  -0.6647579 ], [ 0.65004053,  5.65506597], [ 1.76557838,  5.03833828], [ 0.55750544,  2.77951423], [ 0.72126917,  6.16397755], [ 1.9633985,   7.09960884], [ 0.7731207,  -1.67230658], [ 1.78351614,  2.52235728], [-1.45051856,  7.82657438], [ 0.27545036,  5.87035969], [ 0.04892322,  4.0835455 ], [-1.96195702,  8.20859446], [ 1.29811144, -3.45230805], [-1.17286752, -0.65626347]])
print(X_train.shape, y_train.shape, x_test.shape)

(25, 2) (25,) (20, 2)


In [None]:
mean, var, prior = fit(X_train, y_train)
print(mean, var, prior)

mean:  [[-0.29212561  0.30898176]
 [ 0.60264665  5.17797184]]
var:  [[0.56506611 1.16251618]
 [1.75888975 0.92329467]]
prior:  [0.48 0.52]
[[-0.29212561  0.30898176]
 [ 0.60264665  5.17797184]] [[0.56506611 1.16251618]
 [1.75888975 0.92329467]] [0.48 0.52]


In [None]:
predict(X_train,mean, var, prior)

In [None]:
naive_gaussian_predict(X_train, y_train, x_test)

mean:  [[-0.29212561  0.30898176]
 [ 0.60264665  5.17797184]]
var:  [[0.56506611 1.16251618]
 [1.75888975 0.92329467]]
prior:  [0.48 0.52]


array([0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0])

In [None]:
predict(x_test,mean,var,prior)

In [None]:
X = np.array([[0.25,3.14], [0.48,8.96],[0.75,9.0], [0.55, 6.5], [0.0, 10.0], [1.0, 4.5]])
y1 = np.array([1, 2, 1, 2, 1, 2])
y2 = np.array([1, 1, 1, 2, 2, 2])
m1, v1, pr1 = fit(X, y1)
m2, v2, pr2 = fit(X, y2)

mean:  [[0.33333333 7.38      ]
 [0.67666667 6.65333333]]
var:  [[0.09722222 9.15546667]
 [0.05308889 3.32702222]]
prior:  [0.5 0.5]
mean:  [[0.49333333 7.03333333]
 [0.51666667 7.        ]]
var:  [[0.04175556 7.57928889]
 [0.16722222 5.16666667]]
prior:  [0.5 0.5]


In [None]:
import numpy as np
def fit(X, y):
    n_samples, n_features = X.shape
    classes = np.unique(y)
    n_classes = len(classes)
    
    mean = np.zeros((n_classes,n_features), dtype=np.float64)
    var = np.zeros((n_classes,n_features), dtype=np.float64)
    prior = np.zeros(n_classes, dtype=np.float64)
    
    for idx, c in enumerate(classes):
        X_c = X[y == c]
        mean[idx, :] = np.mean(X_c, axis = 0)
        var[idx, :] = np.var(X_c, axis=0)
        prior[idx] = X_c.shape[0] / float(n_samples)
    
    return mean, var, prior

def calc_pdf(x, m, v):
    z = np.power(2 * np.pi, x.shape[0]/2) * np.power(np.linalg.det(v), 1/2)
    pdf = (z/2) * np.exp(-(1/2) * (x - m).T @ np.linalg.inv(v) @ (x - m))
    return pdf

def calc_log_likelyhood_prior_prod(X, mean, var, prior):
    classes = [0, 1, 2, 3]
    log_like_pr_prod = np.zeros((X.shape[0],len(classes)), dtype=np.float64)
    for x_idx, x in enumerate(X):
        for idx, c in enumerate(classes):
            m = mean[idx]
            v = np.diag(var[idx])
            log_like_pr_prod[x_idx, c] = np.log(calc_pdf(x,m,v)) + np.log(prior[idx])
    return log_like_pr_prod

def predict(X, mean, var, prior):
    log_likelyhood_prior_prod = calc_log_likelyhood_prior_prod(X, mean, var, prior)
    label_predicted = np.argmax(log_likelyhood_prior_prod, axis=1)
    return label_predicted

def naive_gmodel_eval(X_train, y_train, X_test, y_test):
    """
    Perform multi-class classification and prepare classification report

    Arguments:
        X_train: training samples, (1800, 5), np.ndarray
        y_train: training labels, (1800, ), np.ndarray, labels are 0, 1, 2 or 3
        X_test:  test samples, (200, 5), np.ndarray
        y_test:  test_labels, (200, ), np.ndarray, labels are 0, 1, 2, or 3
    Return:
        metrics: dict of dicts
           key: label, int
           value: dict
               key: string
               value: float
    """
    mean, var, prior = fit(X_train, y_train)
    y_predicted = predict(X_test, mean, var, prior)
    print(y_predicted)
    print(y_test)
    metrics = {}
    #eval_metric = {}
    classes = [0, 1, 2, 3]
    for i in range(len(classes)):
        c = classes[i]
        print(c)
        eval_metric = dict()
        TP = np.sum(np.where((y_predicted == c) & (y_test == c), 1, 0))
        TN = np.sum(np.where((y_predicted != c) & (y_test != c), 1, 0))
        FP = np.sum(np.where((y_predicted == c) & (y_test != c), 1, 0))
        FN = np.sum(np.where((y_predicted != c) & (y_test == c), 1, 0))
        print(TP, TN, FP, FN)
        P = TP/(TP + FP)
        R = TP/(TP + TN)
        F1 = 2 * R * R /(P + R)
        Acc = (TP + FN)/(TP + TN + FP + FN)
        msc = 1 - Acc
        print(P, R, F1, Acc, msc)
        eval_metric['Precision'] = float(P)
        eval_metric['Recall'] = float(R)
        eval_metric['Accuracy'] = float(Acc)
        eval_metric['F1 score'] = float(F1)
        eval_metric['Misclassification Rate'] = float(msc)
        metrics[classes[i]] = eval_metric
    
    return(metrics)

In [None]:
X1 = X1 = np.array([[-2.17519084e+00,5.52661237e+00,-8.58259499e+00,1.40639543e+01,-1.13206816e+01],
 [-1.13579154e+01,-8.27733259e+00,-4.52817610e+00,4.57991108e+00,-4.89895633e+00],
 [6.15253774e+00,5.24821058e-02,-4.12148754e+00,1.64614647e+00,-1.70040298e+01],
 [4.23190261e+00,5.50783812e-01,-2.55700315e+00,-9.85471314e+00,-9.70144210e+00],
 [-1.49648717e+00,-2.45909673e+00,-8.42191554e+00,2.77454691e-01,-1.13624619e+01],
 [-3.98303225e+00,4.82365584e+00,-6.99574002e+00,-1.28342158e+01,-2.50947001e+00],
 [-5.37306232e-01,-7.68700983e-01,-1.71914389e+01,-6.26259607e+00,-1.59564728e+01],
 [-3.83881250e+00,2.55262871e+00,9.18872190e+00,-1.00609405e+01,-9.91918514e+00],
 [-6.02336489e+00,-2.30114649e+00,3.12187616e-01,-2.97400389e+00,1.77142211e+00],
 [-7.32631860e+00,-5.25004623e-02,-1.21590490e+00,5.19202506e+00,6.13814403e+00],
 [-1.03597363e+01,-6.64053523e+00,3.64921244e+00,8.02230139e+00,-7.45683923e+00],
 [-3.90628914e+00,9.83274647e+00,-1.40484127e+01,-8.80035617e-01,-2.78292104e-02],
 [5.32109089e+00,4.12959054e+00,9.88307249e-01,-6.41769757e+00,-1.09246116e+01],
 [-2.41208153e+00,8.98398621e+00,-1.21887138e+01,-3.02567044e+00,-5.09274203e+00],
 [1.43649923e-01,5.05194243e+00,-1.79638857e+00,-4.80560057e+00,-2.40392717e+00],
 [-1.29720001e+01,9.57847439e-01,-9.03090848e+00,2.27705139e+00,1.04651946e+00],
 [-9.07050048e+00,-3.76470091e+00,-7.72697224e+00,-6.45654588e+00,-1.62908244e-02],
 [4.85980618e+00,5.34314675e+00,-6.31793940e+00,8.23428993e+00,-7.59324881e-01],
 [-2.28542323e+00,5.27015749e-01,-1.50256986e+01,5.15571980e-01,-2.31854175e+00],
 [7.28453707e+00,1.33529098e-01,-6.72911049e+00,9.75479514e+00,-1.56431748e+01],
 [6.11552007e+00,1.56074660e+00,-1.53066963e+01,-3.29210965e+00,-9.88106520e+00],
 [3.22590362e+00,1.56084112e-01,6.23958884e+00,-1.27310500e+01,-5.47241930e+00],
 [-8.71238066e+00,1.66995994e+00,-1.34420605e+01,3.62610705e+00,-6.42427740e+00],
 [-3.93098851e+00,4.79008703e+00,2.10088632e+00,7.76077964e+00,-9.51627871e+00],
 [2.14007638e+00,-3.68504049e-02,-5.89122461e+00,2.99665298e+00,-1.36611944e+01]])
y1 = np.array([2, 1, 0, 3, 0, 0, 0, 3, 1, 1, 1, 0, 3, 0, 3, 1, 1, 2, 0, 2, 0, 3, 2, 2, 2])

X2 = np.array([[ 1.05460567e+00,7.20355350e+00, -4.84679966e+00,3.46745033e-01,-1.00804015e+01],
 [-7.62640336e+00, -3.22368607e+00, -1.38081365e+01, -7.84154033e+00,-3.14351635e+00],
 [ 4.66031479e+00, -8.45739195e+00,2.75547079e+00, -5.93437228e+00,-6.37435456e+00],
 [-3.25610317e+00, -1.39624700e+01,7.95310963e-01,2.82698720e+00,-3.33509818e-01],
 [ 1.75896762e+00, -2.96358726e+00, -9.87567768e+00, -2.71763212e+00,-5.83571173e-02],
 [ 5.73991530e+00, -8.01065678e+00,6.12496043e+00, -9.83832380e-01,-7.07120909e+00],
 [-9.13052873e+00,2.17299385e+00, -8.97582553e+00, -8.92185932e-01,-3.34045950e+00],
 [ 8.37584641e-01, -5.33739622e+00, -3.83296750e+00, -1.12252263e+00, 6.25477227e+00],
 [ 4.39369860e+00,1.03721797e+00,2.09535845e+00, -1.16488770e+01,-3.90472034e+00],
 [ 1.10729770e+00,1.08478509e+01, -1.15073215e+01, -3.21912629e+00,-3.33755059e+00],
 [ 6.21935732e+00,1.02015431e+01, -1.57767139e+01, -2.13545912e+00,-1.13958956e+01],
 [-3.64872565e+00, -8.70187917e+00, -1.00962383e+00,1.97272004e+00,-8.02577016e+00],
 [-1.73456357e+00,3.86532602e+00, -7.09152115e+00, -9.90660472e+00,-1.03340048e+01],
 [ 5.90109801e+00,1.98544100e+00, -1.40029410e+01,2.66137533e+00,-1.71117764e+01],
 [ 7.61331345e+00,9.69675426e+00, -4.07218355e+00, -9.51453398e-03,-8.27517946e+00],
 [ 3.64090429e+00,6.72858475e+00, -1.34165272e+01,7.30386049e+00,-1.60015824e+01],
 [ 4.46411526e+00, -3.48473373e+00, -1.20574085e+00,1.47227194e+00,-9.95155707e+00],
 [-1.34872325e+01, -3.63045817e+00, -4.64160376e+00, -1.31816304e+01,-6.70779904e+00],
 [ 2.28915682e+00, -3.45818750e+00, -7.23686788e+00, -9.40041413e+00, 1.81744424e+00],
 [-8.21283273e+00,1.90298199e+00, -1.57463264e+01, -1.82523565e+00,-1.01979194e+01],
 [-1.66037000e+00, -1.57561854e+01, -7.87984954e+00, -6.47975917e+00, 3.15222164e+00],
 [-1.42373528e+01, -7.11931148e+00, -7.28436495e+00,4.33463247e+00, 3.09794527e+00],
 [-1.25131152e+01,1.32903167e+00, -5.17327182e+00,8.16818455e+00, 3.68688685e+00],
 [-9.09330433e+00, -6.23185272e+00, -8.38202037e+00,7.61223030e-01, 5.76622087e+00],
 [-7.00761395e+00,1.22421636e+00, -8.22679633e+00,4.53920093e+00,-5.34880624e+00]])
y2 = np.array([2, 0, 3, 1, 2, 3, 0, 1, 3, 2, 0, 3, 0, 2, 2, 2, 3, 1, 1, 0, 1, 1, 0, 1, 2])

In [None]:
mean, var, prior = fit(X1, y1)
print(mean, var, prior)

mean:  [[ -0.29407022   2.81910441 -11.66251292  -2.9819318   -8.01907659]
 [ -9.51830596  -3.34639471  -3.09009361   1.77345652  -0.56933346]
 [ -0.0890234    2.90441413  -6.47700728   7.73942984  -9.5541553 ]
 [  1.81674691   2.48820592   2.41264525  -8.77400036  -7.68431706]]
var:  [[14.90682466 18.3047822  18.78460281 19.57769881 36.70652506]
 [ 5.54002042 10.96821231 19.89472548 24.85596465 19.92826628]
 [29.5507125   5.70530346 21.15962076 13.95333433 24.0351927 ]
 [10.97674644  3.69268704 21.00056433  7.95655132 10.46758931]]
prior:  [0.32 0.24 0.24 0.2 ]
[[ -0.29407022   2.81910441 -11.66251292  -2.9819318   -8.01907659]
 [ -9.51830596  -3.34639471  -3.09009361   1.77345652  -0.56933346]
 [ -0.0890234    2.90441413  -6.47700728   7.73942984  -9.5541553 ]
 [  1.81674691   2.48820592   2.41264525  -8.77400036  -7.68431706]] [[14.90682466 18.3047822  18.78460281 19.57769881 36.70652506]
 [ 5.54002042 10.96821231 19.89472548 24.85596465 19.92826628]
 [29.5507125   5.70530346 21.159

In [None]:
predict(X1,mean, var, prior)

IndexError: ignored

In [None]:
naive_gaussian_predict(X1, y1, X2)

mean:  [[ -0.29407022   2.81910441 -11.66251292  -2.9819318   -8.01907659]
 [ -9.51830596  -3.34639471  -3.09009361   1.77345652  -0.56933346]
 [ -0.0890234    2.90441413  -6.47700728   7.73942984  -9.5541553 ]
 [  1.81674691   2.48820592   2.41264525  -8.77400036  -7.68431706]]
var:  [[14.90682466 18.3047822  18.78460281 19.57769881 36.70652506]
 [ 5.54002042 10.96821231 19.89472548 24.85596465 19.92826628]
 [29.5507125   5.70530346 21.15962076 13.95333433 24.0351927 ]
 [10.97674644  3.69268704 21.00056433  7.95655132 10.46758931]]
prior:  [0.32 0.24 0.24 0.2 ]


IndexError: ignored