In [77]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import accuracy_score

In [78]:
#Implementing Gaussian Naive Bayes Classifier

class CustomNaiveBayes():

  def __init__(self,X, Y):

    #Determining the class labels and total number of classes
    self.n, self.features = X.shape
    self.class_labels = np.unique(Y)
    self.no_classes = len(np.unique(Y))
  
  def train(self, X, Y):

      self.mean_c = np.zeros((self.no_classes, self.features))
      self.var_c = np.zeros((self.no_classes, self.features))
      self.prior_c = np.zeros(self.no_classes)

      #Calculating mean, variance and prior for each class 
      for i, c in enumerate(self.class_labels):
        X_in_C = X[Y==c]
        self.mean_c[i, :] = X_in_C.mean(axis=0)
        self.var_c[i, :] = X_in_C.var(axis=0)
        self.prior_c[i] = X_in_C.shape[0]/ X.shape[0]

  def predict(self, X):

    y_pred = []
    for x in X:
      #Calculating posterior probability for each class
      posteriors = []
      for i,c in enumerate(self.class_labels):
        prior = np.log(self.prior_c[i])
        posterior_c = np.sum(np.log(self.calculate_pdf(i,x)))
        posterior_c = prior + posterior_c
        posteriors.append(posterior_c)

      #Determine the class with maximum posterior probability
      y_pred.append(self.class_labels[np.argmax(posteriors)])
    
    #Return an array of class labels
    return y_pred


  def calculate_pdf(self, class_no, x):

    #Calculating the probability density function
    m = self.mean_c[class_no]
    v = self.var_c[class_no]
    pdf = (np.exp(-np.power((x - m),2) / (2 * v))) / np.sqrt(2 * np.pi * v)
    return pdf

  

In [79]:
dataset = pd.read_csv('data.csv')
dataset = dataset.replace({'B':0, 'M':1})
X = dataset.iloc[:,2:] 
Y = dataset.iloc[:,1]

x_mean = X.mean()
x_std = X.std()
X_norm = (X - x_mean)/x_std

dataset

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,smoothness_se,compactness_se,concavity_se,concave points_se,symmetry_se,fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,1,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,1.0950,0.9053,8.589,153.40,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,
1,842517,1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.01860,0.01340,0.01389,0.003532,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,
2,84300903,1,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.006150,0.04006,0.03832,0.02058,0.02250,0.004571,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,
3,84348301,1,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,0.4956,1.1560,3.445,27.23,0.009110,0.07458,0.05661,0.01867,0.05963,0.009208,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,
4,84358402,1,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.011490,0.02461,0.05688,0.01885,0.01756,0.005115,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,926424,1,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,1.1760,1.2560,7.673,158.70,0.010300,0.02891,0.05198,0.02454,0.01114,0.004239,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,
565,926682,1,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,0.7655,2.4630,5.203,99.04,0.005769,0.02423,0.03950,0.01678,0.01898,0.002498,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,
566,926954,1,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,0.4564,1.0750,3.425,48.55,0.005903,0.03731,0.04730,0.01557,0.01318,0.003892,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,
567,927241,1,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,0.7260,1.5950,5.772,86.22,0.006522,0.06158,0.07117,0.01664,0.02324,0.006185,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,


In [None]:
model = CustomNaiveBayes(X_norm,Y)
model.train(X_norm, Y)
y_pred = model.predict(X_norm)
#print(predictions)

print("Naive Bayes classification accuracy", accuracy_score(Y_test, predictions))