In [None]:
import numpy as np

In [None]:
class GaussianNB(object):
  def fit(self, X, y):
    n_samples, n_features = X.shape
    self.classes = np.unique(y)
    n_classes = len(self.classes)

    self.mean = np.zeros((n_classes, n_features), dtype=np.float64)
    self.var = np.zeros((n_classes, n_features), dtype=np.float64)
    self.prior = np.zeros(n_classes, dtype=np.float64)

    for idx, c in enumerate(self.classes):
      X_c = X[y == c]

      self.mean[idx, :] = X_c.mean(axis=0)
      self.var[idx, :] = X_c.var(axis=0)

      self.prior[idx] = X_c.shape[0] / float(n_samples)
    print('Mean: ', self.mean)
    print('Varience: ', self.var)
    print('Prior: ', self.prior)
  
  def calc_pdf(self, class_idx, X):
    mean = self.mean[class_idx]
    
    var = np.diag(self.var[class_idx])
    z = np.power(2 * np.pi, X.shape[0]/2) * np.power(np.linalg.det(var), 1/2)
   
    pdf = (z/2) * np.exp(-(1/2) * (X - mean).T @ np.linalg.inv(var) @ (X - mean) )
    return pdf

  def calc_prod_likelyhood_prior(self, X):
    ''' calculates product of likelyhood and prior'''
    self.prod_likelyhood_prior = np.zeros((X.shape[0], len(self.classes)), dtype=np.float64)
    
    for x_idx, x in enumerate(X):
      for idx, c in enumerate(self.classes):
        self.prod_likelyhood_prior[x_idx,c] = np.log(self.calc_pdf(idx,x)) + np.log(self.prior[idx])
            
  
  def predict(self, X):
    self.calc_prod_likelyhood_prior(X)
    label_predicted = self.prod_likelyhood_prior
    return np.argmax(self.prod_likelyhood_prior, axis=1)
  
  def predict_probability(self, X):
    self.calc_prod_likelyhood_prior(X)
    q = self.prod_likelyhood_prior
    prob = np.exp(q) / np.expand_dims(np.sum(np.exp(q), axis=1), axis=1)
    return prob

In [None]:
from sklearn.datasets import make_classification, make_blobs
from sklearn.model_selection import train_test_split

In [None]:
X, y = make_blobs(n_samples=100, n_features=2, centers=[[5,5],[10,10]], cluster_std=1.5, random_state=3)
X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [None]:
gaussian_nb = GaussianNB()
gaussian_nb.fit(X_train, y_train)

In [None]:
X1 = np.array([[-1.43313789,  0.87886565], [ 1.73544375,  5.74074094], [-0.20466712, -0.17858128], [ 1.13599219,  5.89066163], [ 2.74485936,  3.86226574], [-0.62227035,  0.67791906], [ 0.18273191,  1.69422586], [-1.49552969, -0.16019098], [ 1.48276596,  7.56215945], [ 2.5756331,   4.79358451], [-0.02672632, -0.9002082 ], [-0.47535179,  0.9614788 ], [-0.62624673,  6.19767345], [-0.36105075,  4.71649291], [-1.04671485,  5.13251142], [ 0.36400416,  0.30274527], [-0.01760132,  4.93424899], [-1.24501648, -1.52927977], [ 0.74281699, -0.26205474], [-1.49643126,  4.29044987], [ 1.43098835,  4.8552861 ], [ 0.83629896,  5.33601689], [ 0.76640666,  2.56599276], [-0.55953029,  4.00154206], [-0.05876738, -0.34313126]])
y1 = np.array([0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0])
x2 = np.array([[-0.27459298, -0.33154335], [-1.90328844,  8.63813903], [-1.1212894,   5.11997934], [-0.32198076,  1.53187219], [-1.20681238,  5.03012668], [ 0.83820448,  3.74266307], [ 0.9278087,  -0.6647579 ], [ 0.65004053,  5.65506597], [ 1.76557838,  5.03833828], [ 0.55750544,  2.77951423], [ 0.72126917,  6.16397755], [ 1.9633985,   7.09960884], [ 0.7731207,  -1.67230658], [ 1.78351614,  2.52235728], [-1.45051856,  7.82657438], [ 0.27545036,  5.87035969], [ 0.04892322,  4.0835455 ], [-1.96195702,  8.20859446], [ 1.29811144, -3.45230805], [-1.17286752, -0.65626347]])

In [None]:
g_nb = GaussianNB()
g_nb.fit(X1, y1)

Mean:  [[-0.29212561  0.30898176]
 [ 0.60264665  5.17797184]]
Varience:  [[0.56506611 1.16251618]
 [1.75888975 0.92329467]]
Prior:  [0.48 0.52]


In [None]:
g_nb.predict(X1)

array([0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1,
       1, 1, 0])

In [None]:
g_nb.predict(x2)

array([0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0])

In [None]:
g_nb.predict_probability(X1)

array([[9.99914144e-01, 8.58559738e-05],
       [8.14172001e-08, 9.99999919e-01],
       [9.99999718e-01, 2.81857772e-07],
       [2.08939498e-07, 9.99999791e-01],
       [6.91208138e-06, 9.99993088e-01],
       [9.99977576e-01, 2.24238298e-05],
       [9.93726100e-01, 6.27389962e-03],
       [9.99999617e-01, 3.83301539e-07],
       [1.45822800e-10, 1.00000000e+00],
       [2.32791087e-07, 9.99999767e-01],
       [9.99999994e-01, 6.21950623e-09],
       [9.99900260e-01, 9.97396032e-05],
       [4.78080139e-07, 9.99999522e-01],
       [2.00863691e-04, 9.99799136e-01],
       [3.46937019e-05, 9.99965306e-01],
       [9.99993693e-01, 6.30695196e-06],
       [6.38471763e-05, 9.99936153e-01],
       [1.00000000e+00, 1.62012192e-10],
       [9.99999449e-01, 5.51173783e-07],
       [9.53088055e-04, 9.99046912e-01],
       [7.51873149e-06, 9.99992481e-01],
       [3.72888176e-06, 9.99996271e-01],
       [4.96798922e-01, 5.03201078e-01],
       [4.83577492e-03, 9.95164225e-01],
       [9.999998

#PPA2

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, gaussian_nb.predict(x_test)))

              precision    recall  f1-score   support

           0       0.47      1.00      0.64         9
           1       1.00      0.09      0.17        11

    accuracy                           0.50        20
   macro avg       0.74      0.55      0.40        20
weighted avg       0.76      0.50      0.38        20



In [None]:
X1, y1 = make_blobs(n_samples=100, n_features=2, centers = [[5,5],[10,10],[20,20]], cluster_std=1.5, random_state=5)
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size=0.2,random_state=5)

In [None]:
gnb_multiclass = GaussianNB()
gnb_multiclass.fit(X1_train, y1_train)

Mean:  [[ 4.99538779  5.48522533]
 [ 9.61165477  9.99829772]
 [20.48065    20.50524366]]
Varience:  [[1.44846347 1.50423486]
 [2.49993902 2.5165167 ]
 [2.37472197 1.09980241]]
Prior:  [0.375  0.3125 0.3125]


In [None]:
gnb_multiclass.predict(X1_test)

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
print(classification_report(y1_test, gnb_multiclass.predict(X1_test)))

              precision    recall  f1-score   support

           0       0.16      0.75      0.26         4
           1       0.00      0.00      0.00         8
           2       0.00      0.00      0.00         8

    accuracy                           0.15        20
   macro avg       0.05      0.25      0.09        20
weighted avg       0.03      0.15      0.05        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
