In [9]:
from sklearn.model_selection import train_test_split
from scipy.stats import norm
import pandas as pd
from sklearn.datasets import make_blobs
import numpy as np
from dataclasses import dataclass

In [17]:
X,y = make_blobs(n_samples=10000, centers=2, n_features=2,center_box=(0,1), random_state=1)

In [21]:
X1 = np.random.rand(1000,2)
X1

array([[0.86504831, 0.81703566],
       [0.12323882, 0.90598921],
       [0.42040398, 0.89045924],
       ...,
       [0.47544797, 0.28100544],
       [0.19281377, 0.51733814],
       [0.60980102, 0.70719006]])

In [3]:
X.shape

(10000, 2)

In [13]:
@dataclass
class NaiveBayes:
  X: np.ndarray
  y: np.ndarray

  def __post_init__(self):
    self.splitData()

  def splitData(self):
    self.X_train,self.X_test,self.y_train,self.y_test = train_test_split(self.X, self.y,
                                                                         test_size=0.3, random_state=24,
                                                                         shuffle=True)

  def likelihood(self,X):
    mean = np.mean(X)
    std = np.std(X)
    dist = norm(mean,std)
    return dist

  def posterior(self, x, prior, dist1, dist2):
    return prior*dist1.pdf(x[0])*dist2.pdf(x[1])

  def fit(self):
    X0_train = self.X_train[self.y_train==0]
    X1_train = self.X_train[self.y_train==1]
    # Column 0 --Class 0
    X00_train = X0_train[:,0]
    # Column 1 --Class 0
    X10_train = X0_train[:,1]
     # Column 0  --Class 1
    X01_train = X1_train[:,0]
    # Column 1 --Class 1
    X11_train = X1_train[:,1]

    # calculating priors
    self.prior_X0 = len(X0_train) / len(self.X_train)
    self.prior_X1 = len(X1_train) / len(self.X_train)

    # Likelihood
    self.dist_X00 = self.likelihood(X00_train)
    self.dist_X10 = self.likelihood(X10_train)
    self.dist_X01 = self.likelihood(X01_train)
    self.dist_X11 = self.likelihood(X11_train)

  def predict(self):
    for sample,target in zip(self.X_test, self.y_test):
      py0 = self.posterior(sample,self.prior_X0,self.dist_X00,self.dist_X10)
      py1 = self.posterior(sample,self.prior_X1,self.dist_X10,self.dist_X11)

      print('P(y=0|%s)=%2f'%(sample,np.round(py0,2)))
      print('P(y=1|%s)=%2f'%(sample,np.round(py1,2)))
      print('Target was class{} and model predicted class{}'.format(target,
                                                                    np.argmax([py0,py1])))



In [14]:
clf = NaiveBayes(X,y)
clf.fit()
clf.predict()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
P(y=1|[-3.00261859  4.31312879])=0.000000
Target was class0 and model predicted class0
P(y=0|[-1.72042057  3.65838399])=0.060000
P(y=1|[-1.72042057  3.65838399])=0.000000
Target was class0 and model predicted class0
P(y=0|[-2.24898958  4.84672437])=0.060000
P(y=1|[-2.24898958  4.84672437])=0.000000
Target was class0 and model predicted class0
P(y=0|[-1.9264902   5.11564704])=0.060000
P(y=1|[-1.9264902   5.11564704])=0.000000
Target was class0 and model predicted class0
P(y=0|[-10.51859601  -4.27128949])=0.000000
P(y=1|[-10.51859601  -4.27128949])=0.000000
Target was class1 and model predicted class0
P(y=0|[-10.96082023  -3.62612067])=0.000000
P(y=1|[-10.96082023  -3.62612067])=0.000000
Target was class1 and model predicted class0
P(y=0|[-11.58280035  -3.69130312])=0.000000
P(y=1|[-11.58280035  -3.69130312])=0.000000
Target was class1 and model predicted class0
P(y=0|[-1.05485459  3.4246517 ])=0.040000
P(y=1|[-1.05485459  

In [15]:
X

array([[-3.08389358,  5.70067218],
       [-8.80258525, -5.07389013],
       [-1.68452735,  5.22511143],
       ...,
       [-8.65168502, -5.58805662],
       [-1.41968841,  3.76555241],
       [-9.9077506 , -3.42556702]])