In [1]:
import numpy as np

# Fit function fits the training data and in this we built the matrix for finding probability

In [2]:
def fit(x_train , y_train):
    result = {}
    
    #to get all the unique values of classes we use set
    class_values = set(y_train)
    
    for current_class in class_values:
        
        result[current_class] = {}
        
        #to maintain total_count
        result["total_data"] = len(y_train)
        
        #to get the rows/columns where current_class is present
        current_class_rows = (y_train == current_class)   #this gives true - false columns
        x_train_current = x_train[current_class_rows]
        y_train_current = y_train[current_class_rows]
        
        num_features = x_train.shape[1]
        
        result[current_class]["total_count"] = len(y_train_current)
        
        for j in range(1, num_features+1):
            result[current_class][j] = {}
            
            all_possible_values = set(x_train[:,j-1])
            for current_value in all_possible_values:
                result[current_class][j][current_value] = (x_train_current[:, j-1] == current_value).sum()
    return result        

# probability function
This gives the probability

In [12]:
def probability(dictionary, x, current_class):
    
    output = np.log(dictionary[current_class]["total_count"]) - np.log(dictionary["total_data"])
    
    num_features = len(dictionary[current_class].keys()) - 1  #as we have an extra feature ie total_count
    
    for j in range(1, num_features+1):
        xj = x[j-1]
        count_current_class_with_value_xj = dictionary[current_class][j][xj] + 1   #+1 for laplace correction
        count_current_class = dictionary[current_class]["total_count"] + len(dictionary[current_class][j].keys())
        
        current_xj_probability = np.log(count_current_class_with_value_xj) - np.log(count_current_class)
        
        output += current_xj_probability
    return output

# predictSinglePoint function
This predicts probability for a single point

In [4]:
def predictSinglePoint(dictionary, x):
    first_run = True
    
    #Both the below values may give some wrong answers.So use first run so that both of the values get updated after the first run automatically
    #And later this will be updated when the value of prob is greater.
    best_p = -1000
    best_class = -1
    
    classes = dictionary.keys()
    for current_class in classes:
        if current_class == "total_data":
            continue
        p_current_class = probability(dictionary, x, current_class)
        
        if first_run or best_p < p_current_class:
            best_p = p_current_class
            best_class = current_class
        first_run = False
    return best_class

# Predict function

In [5]:
def predict(dictionary, x_test):
    y_pred = []
    for x in x_test:
        x_class = predictSinglePoint(dictionary, x)
        y_pred.append(x_class)
    
    return y_pred

# makeLabelled function for Continuous valued features

In [6]:
def makeLabelled(column):
    secondLimit = column.mean()
    firstLimit = secondLimit * 0.5
    thirdLimit = secondLimit * 1.5
    
    for i in range(0, len(column)):
        if column[i] < firstLimit:
            column[i] = 0
        elif column[i] < secondLimit:
            column[i] = 1
        elif column[i] < thirdLimit:
            column[i] = 2
        else:
            column[i] = 3
    return column

# Implementing on Iris dataset

In [7]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [8]:
#making labelled columns(labels are 0,1,2,3) instead of continuous valued columns.
for i in range(0, X.shape[-1]):
    X[:, i] = makeLabelled(X[:, i])

In [9]:
from sklearn import model_selection
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size = 0.25, random_state = 0)

In [10]:
dictionary = fit(x_train, y_train)

In [13]:
y_pred = predict(dictionary, x_test)

In [14]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.94      1.00      0.97        16
           2       1.00      0.89      0.94         9

    accuracy                           0.97        38
   macro avg       0.98      0.96      0.97        38
weighted avg       0.98      0.97      0.97        38

[[13  0  0]
 [ 0 16  0]
 [ 0  1  8]]


# Inbuilt Gaussian Naive Bayes Classifier on Iris dataset

In [15]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()

In [16]:
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

In [17]:
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.76      1.00      0.86        16
           2       1.00      0.67      0.80         9

    accuracy                           0.87        38
   macro avg       0.92      0.84      0.86        38
weighted avg       0.90      0.87      0.87        38

[[11  2  0]
 [ 0 16  0]
 [ 0  3  6]]


# Inbuilt Multinomial Naive Bayes Classifier on Iris dataset
This works really bad on Iris dataset..Looking at the precision

Multinomial NB works well for text classification

In [18]:
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.00      0.00      0.00        16
           2       0.36      1.00      0.53         9

    accuracy                           0.53        38
   macro avg       0.45      0.62      0.48        38
weighted avg       0.43      0.53      0.44        38

[[11  2  0]
 [ 0  0 16]
 [ 0  0  9]]
