<br>
   Description: Implement a Naive Bayes Classifier from scratch to design a machine learning technique for email spam filter.<br>
   Author: Krutarth Trivedi,  MS Robotics'23, WPI | ktrivedi@wpi.edu<br>
   OS: ubuntu 20.04 LTS<br>
   Software/Tools/Language: Python, Jupyter Notebook</br>

# Dataset: dbworld_subjects_stemmed.csv

In [769]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.naive_bayes import MultinomialNB

In [770]:
def dataset_read(path):
    dataFrame = pd.read_csv(path)
    dataFrame.head()
    dataFrame = dataFrame.drop(['id'], axis = 1)
    dataFrame.head()
    X = dataFrame.iloc[:, 0:dataFrame.shape[1]-1]
    Y = dataFrame.iloc[:,-1]
    return X, Y

In [771]:
def data_splitting(X, Y, train_size):
    x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size = train_size)
    return x_train, x_test, y_train, y_test

In [772]:
def sklearn_NB_train_test_model(x_train, y_train, x_test):
    clf = MultinomialNB()
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    
    return y_pred

In [773]:
class naive_bayes_algorithm:
    
    def __init__(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train
        
    def histogram_of_elements(self):
        List_n_column_element_M = [] 
        List_n_column_element_S = [] 

        for i in range(self.x_train.shape[1]):
            n_element_M = 0
            n_element_S = 0
            index = []
            column = self.x_train.iloc[:,i]
            
            index = np.where(column == True)

            list_index = [item for t in index for item in t]

            if not (len(list_index) == 0):
                for i in range(len(list_index)):
                    corresponding_class = self.y_train.iat[list_index[i]]

                    if(corresponding_class) == True:
                        n_element_M = n_element_M + 1
                    else:
                        n_element_S = n_element_S + 1

            List_n_column_element_M.append(n_element_M)
            List_n_column_element_S.append(n_element_S)
            
        return List_n_column_element_M, List_n_column_element_S

    def laplacian_smoothing(self, m, s, alpha):
        m_count = 0
        s_count = 0
        
        for i in range(len(m)):
            if m[i] == 0:
                m_count = m_count + 1
                
        if m_count > 0:
            for i in range(len(m)):  
                m[i]= m[i] + alpha
                
        for j in range(len(s)):
            if s[j] == 0:
                s_count = s_count + 1

        if s_count > 0:
            for j in range(len(s)):  
                s[j]= s[j] + alpha
                
        self.alpha = alpha
        return m, s
    
        
    def find_probability_feature(self):
        probability_list_mail = []
        probability_list_spam = []

        number_mail = sum(self.Array_mail) 
        
        for i in range(len(self.Array_mail)):
            probability_mail = self.Array_mail[i]/number_mail
            probability_list_mail.append(probability_mail)

        number_spam = sum(self.Array_spam) 

        for j in range(len(self.Array_spam)):
            probability_spam = self.Array_spam[j]/number_spam
            probability_list_spam.append(probability_spam) 
        
        self.probability_list_mail = probability_list_mail
        self.probability_list_spam = probability_list_spam      
        

    def find_probability_class(self):
        n_Mail = np.count_nonzero(self.y_train)
        n_Spam = self.y_train.size - n_Mail
        probability_Mail = n_Mail/(n_Mail + n_Spam)
        probability_Spam = n_Spam/(n_Spam + n_Mail)
    
        self.prior_probability_Mail = probability_Mail
        self.prior_probability_Spam = probability_Spam
        
    def fit(self, Array_mail, Array_spam):
        self.Array_mail = Array_mail
        self.Array_spam = Array_spam
        self.find_probability_feature()
        self.find_probability_class()
        
    def prediction(self, x_test, y_test):
        probability_predicted_mail = []
        probability_predicted_spam = []
        predict=[]
        
        for i in range(x_test.shape[0]):
            data = x_test.iloc[i]
            location = np.where(data == True)

            list_location = [item for t in location for item in t]

            if not (len(list_location) == 0):
                last_probability_mail = 0
                last_probability_mail = np.log10(self.prior_probability_Mail)
                for i in range(len(list_location)):
                    probability_mail = last_probability_mail + np.log10(self.probability_list_mail[list_location[i]])
                    last_probability_mail = probability_mail

                probability_predicted_mail.append(probability_mail)

        for j in range(x_test.shape[0]):
            data = x_test.iloc[j]
            
            location = np.where(data == True)

            list_location = [item for t in location for item in t]

            if not (len(list_location) == 0):
                last_probability_spam = 0
                last_probability_spam = np.log10(self.prior_probability_Spam)
                for i in range(len(list_location)):
                    probability_spam = last_probability_spam + np.log10(self.probability_list_spam[list_location[i]])
                    last_probability_spam = probability_spam

                probability_predicted_spam.append(probability_spam)

        for k in range(y_test.size):
            if probability_predicted_mail[k] > probability_predicted_spam[k]:
                 result = 1
            else:
                result = 0
            predict.append(result) 
        
        return predict

In [774]:
def f_measure(y_test, y_pred):
    score = f1_score(y_test, y_pred)
    return score


Main Loop starts from here...

In [775]:
X, Y = dataset_read('dbworld_subjects_stemmed.csv')

In [776]:
X

Unnamed: 0,10th,13th,1st,2nd,31st,3rd,5th,6th,abstract,academ,...,usa,vacanc,valencia,vehicular,video,web,wireless,workflow,workshop,zurich
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
60,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
61,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
62,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [777]:
Y

0     0
1     0
2     0
3     0
4     0
     ..
59    1
60    0
61    0
62    0
63    0
Name: CLASS, Length: 64, dtype: int64

In [778]:
x_train, x_test, y_train, y_test = data_splitting(X, Y, 0.8)

In [779]:
x_train

Unnamed: 0,10th,13th,1st,2nd,31st,3rd,5th,6th,abstract,academ,...,usa,vacanc,valencia,vehicular,video,web,wireless,workflow,workshop,zurich
41,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
38,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
35,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
34,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
16,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
45,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [780]:
y_train

41    1
38    1
35    1
34    1
16    0
45    1
4     0
21    0
32    1
29    1
15    0
12    0
8     0
17    0
36    1
24    0
62    0
44    1
37    1
54    0
55    0
49    1
18    0
59    1
39    1
11    0
56    1
61    0
31    1
9     0
50    0
28    1
7     0
33    1
42    1
25    1
20    0
47    1
30    1
26    1
13    0
53    0
63    0
60    0
43    1
6     0
19    0
14    0
52    0
40    1
22    0
Name: CLASS, dtype: int64

In [781]:
classifier = naive_bayes_algorithm(x_train, y_train)

In [782]:
List_n_column_element_M, List_n_column_element_S = classifier.histogram_of_elements()

In [783]:
List_n_column_element_M

[1,
 2,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 3,
 1,
 2,
 1,
 1,
 2,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 9,
 0,
 0,
 0,
 12,
 0,
 0,
 0,
 0,
 1,
 2,
 1,
 2,
 0,
 4,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 2,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 2,
 0,
 1,
 1,
 0,
 2,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 6,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 2,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 3,
 1,
 0,
 3,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 5,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 4,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 3,
 1,
 0,
 0,
 1,
 0,
 2,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 3,
 2,
 0,
 2,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 2,
 1,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 2,
 1,
 0,
 3,
 0]

In [784]:
List_n_column_element_S

[0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 2,
 0,
 0,
 0,
 0,
 1,
 2,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 3,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 4,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 2,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 5,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 3,
 2,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 2,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 13,
 3,
 3,
 2,
 1,
 1,
 1,
 1,
 2,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 4,
 1,
 1,
 2,
 1,
 0,
 2,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 2,
 1,
 0,
 1,
 0,
 0,
 2,
 0,
 3,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 5,
 1,
 2,
 0,
 0,
 0,
 3,
 0,
 1,
 0,
 0]

In [785]:
alpha_value = 1
ls_List_n_column_element_M, ls_List_n_column_element_S = classifier.laplacian_smoothing(List_n_column_element_M, List_n_column_element_S, alpha_value)

In [786]:
ls_List_n_column_element_M

[2,
 3,
 2,
 1,
 2,
 2,
 1,
 2,
 2,
 1,
 1,
 4,
 2,
 3,
 2,
 2,
 3,
 1,
 2,
 2,
 2,
 1,
 1,
 1,
 1,
 2,
 1,
 2,
 1,
 1,
 1,
 10,
 1,
 1,
 1,
 13,
 1,
 1,
 1,
 1,
 2,
 3,
 2,
 3,
 1,
 5,
 1,
 2,
 1,
 1,
 1,
 2,
 2,
 1,
 1,
 3,
 1,
 1,
 2,
 1,
 1,
 1,
 2,
 2,
 1,
 1,
 1,
 2,
 1,
 1,
 3,
 1,
 2,
 2,
 1,
 3,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 3,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 2,
 1,
 2,
 1,
 1,
 1,
 1,
 2,
 1,
 2,
 1,
 1,
 2,
 2,
 1,
 7,
 1,
 4,
 1,
 1,
 1,
 1,
 1,
 3,
 1,
 3,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 2,
 4,
 2,
 1,
 4,
 2,
 2,
 2,
 1,
 2,
 2,
 2,
 6,
 2,
 2,
 1,
 2,
 2,
 1,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 5,
 1,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 4,
 2,
 1,
 1,
 2,
 1,
 3,
 1,
 1,
 1,
 1,
 3,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 1,
 4,
 3,
 1,
 3,
 1,
 1,
 1,
 2,
 2,
 2,
 1,
 1,
 1,
 1,
 2,
 2,
 1,
 1,
 2,
 2,
 3,
 2,
 1,
 1,
 1,
 1,
 3,
 1,
 1,
 3,
 2,
 1,
 4,
 1]

In [787]:
ls_List_n_column_element_S

[1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 1,
 1,
 1,
 2,
 1,
 1,
 3,
 1,
 1,
 1,
 1,
 2,
 3,
 2,
 1,
 2,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 4,
 2,
 2,
 2,
 2,
 1,
 1,
 1,
 3,
 1,
 1,
 1,
 1,
 2,
 2,
 1,
 1,
 5,
 2,
 1,
 1,
 1,
 2,
 1,
 1,
 2,
 2,
 1,
 1,
 2,
 3,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 6,
 2,
 1,
 2,
 2,
 1,
 2,
 1,
 1,
 2,
 2,
 1,
 2,
 2,
 1,
 1,
 1,
 2,
 2,
 2,
 1,
 1,
 2,
 1,
 4,
 3,
 1,
 1,
 2,
 1,
 1,
 2,
 1,
 1,
 2,
 2,
 3,
 2,
 1,
 1,
 2,
 2,
 1,
 2,
 2,
 2,
 2,
 1,
 1,
 2,
 1,
 2,
 2,
 1,
 1,
 1,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 2,
 1,
 1,
 2,
 1,
 1,
 2,
 2,
 2,
 2,
 1,
 1,
 1,
 2,
 2,
 1,
 2,
 14,
 4,
 4,
 3,
 2,
 2,
 2,
 2,
 3,
 1,
 1,
 1,
 1,
 2,
 2,
 1,
 2,
 5,
 2,
 2,
 3,
 2,
 1,
 3,
 2,
 2,
 2,
 2,
 1,
 1,
 1,
 2,
 1,
 1,
 3,
 2,
 1,
 2,
 1,
 1,
 3,
 1,
 4,
 2,
 2,
 1,
 1,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 6,
 2,
 3,
 1,
 1,
 1,
 4,
 1,
 2,
 1,
 1]

In [788]:
classifier.fit(ls_List_n_column_element_M, ls_List_n_column_element_S)

In [789]:
y_pred_scratch_algorithm = classifier.prediction(x_test, y_test)

In [790]:
y_pred_scratch_algorithm

[0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0]

In [791]:
y_test

1     0
58    1
46    1
5     0
10    0
2     0
51    0
23    0
3     0
27    1
48    1
57    1
0     0
Name: CLASS, dtype: int64

In [792]:
score_scratch_algorithm = f_measure(y_test, y_pred_scratch_algorithm)
score_scratch_algorithm

0.9090909090909091

In [793]:
y_pred_sklearn = sklearn_NB_train_test_model(x_train, y_train, x_test)

In [794]:
y_pred_sklearn

array([0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0])

In [795]:
score_sklearn = f_measure(y_test, y_pred_sklearn)
score_sklearn

0.9090909090909091

# dbworld_bodies_stemmed.csv

I will be using the same definitions created in the previous code.

Main loop starts from here.....

In [796]:
X, Y = dataset_read('dbworld_bodies_stemmed.csv')

In [797]:
X

Unnamed: 0,000euro,05102011,10th,11th,12noon,12th,13th,14th,15th,16th,...,ziyang,znie,zurich,zürich,ètop,özsu,û37,û42,û46,û56
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
60,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
61,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
62,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [798]:
Y

0     0
1     0
2     0
3     0
4     0
     ..
59    1
60    0
61    0
62    0
63    0
Name: CLASS, Length: 64, dtype: int64

In [799]:
x_train, x_test, y_train, y_test = data_splitting(X, Y, 0.8)

In [800]:
x_train

Unnamed: 0,000euro,05102011,10th,11th,12noon,12th,13th,14th,15th,16th,...,ziyang,znie,zurich,zürich,ètop,özsu,û37,û42,û46,û56
56,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
25,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
31,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
37,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
50,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
35,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
46,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
48,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [801]:
y_train

56    1
20    0
30    1
25    1
31    1
37    1
50    0
35    1
46    1
48    1
55    0
27    1
5     0
34    1
42    1
10    0
60    0
44    1
8     0
15    0
39    1
4     0
0     0
36    1
32    1
3     0
38    1
53    0
57    1
62    0
26    1
24    0
22    0
54    0
1     0
13    0
6     0
17    0
61    0
7     0
41    1
47    1
40    1
23    0
21    0
18    0
33    1
14    0
59    1
49    1
29    1
Name: CLASS, dtype: int64

In [802]:
classifier = naive_bayes_algorithm(x_train, y_train)

In [803]:
List_n_column_element_M, List_n_column_element_S = classifier.histogram_of_elements()

In [804]:
List_n_column_element_M

[0,
 0,
 3,
 2,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 2,
 1,
 0,
 1,
 0,
 0,
 1,
 3,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 2,
 0,
 0,
 1,
 0,
 0,
 0,
 2,
 0,
 3,
 0,
 0,
 1,
 1,
 2,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 6,
 1,
 5,
 1,
 2,
 0,
 1,
 1,
 16,
 1,
 1,
 14,
 0,
 4,
 1,
 1,
 1,
 6,
 1,
 1,
 1,
 1,
 0,
 3,
 0,
 0,
 0,
 2,
 4,
 2,
 1,
 1,
 0,
 5,
 0,
 1,
 7,
 1,
 9,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 13,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 3,
 0,
 0,
 1,
 1,
 0,
 1,
 5,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 8,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 5,
 1,
 0,
 0,
 2,
 5,
 0,
 0,
 1,
 0,
 0,
 3,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 3,
 10,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 2,
 2,
 0,
 0,
 3,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 3,
 1,
 1,
 3,
 0,
 5,
 21,
 1,
 0,
 0,
 0,
 0,
 5,
 3,
 1,
 1,
 10,
 0,
 2,
 2,
 0,
 0,
 5,
 2,
 15,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 2,
 0,
 1,
 3,
 1,
 9,
 1,
 3,
 1,
 7,
 

In [805]:
List_n_column_element_S

[0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 2,
 0,
 0,
 0,
 2,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 2,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 7,
 1,
 1,
 1,
 0,
 12,
 0,
 1,
 1,
 0,
 0,
 4,
 0,
 0,
 3,
 0,
 0,
 5,
 1,
 5,
 0,
 0,
 0,
 0,
 0,
 2,
 2,
 1,
 2,
 1,
 3,
 9,
 0,
 1,
 0,
 1,
 7,
 1,
 0,
 4,
 2,
 8,
 0,
 0,
 2,
 1,
 1,
 1,
 0,
 1,
 4,
 2,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 3,
 0,
 0,
 2,
 3,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 4,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 7,
 0,
 0,
 2,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 2,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 10,
 4,
 1,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 0,
 0,
 1,
 2,
 3,
 2,
 2,
 0,
 0,
 2,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 17,
 24,
 0,
 0,
 1,
 3,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 5,
 3,
 20,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 3,
 0,
 1,
 0,
 2,
 0,

In [806]:
alpha_value = 1
ls_List_n_column_element_M, ls_List_n_column_element_S = classifier.laplacian_smoothing(List_n_column_element_M, List_n_column_element_S, alpha_value)

In [807]:
ls_List_n_column_element_M

[1,
 1,
 4,
 3,
 1,
 1,
 2,
 1,
 2,
 2,
 2,
 1,
 1,
 2,
 2,
 1,
 1,
 1,
 1,
 2,
 3,
 2,
 1,
 2,
 1,
 1,
 2,
 4,
 1,
 2,
 1,
 1,
 1,
 2,
 1,
 1,
 3,
 1,
 1,
 2,
 1,
 1,
 1,
 3,
 1,
 4,
 1,
 1,
 2,
 2,
 3,
 1,
 2,
 1,
 1,
 2,
 2,
 1,
 2,
 1,
 1,
 7,
 2,
 6,
 2,
 3,
 1,
 2,
 2,
 17,
 2,
 2,
 15,
 1,
 5,
 2,
 2,
 2,
 7,
 2,
 2,
 2,
 2,
 1,
 4,
 1,
 1,
 1,
 3,
 5,
 3,
 2,
 2,
 1,
 6,
 1,
 2,
 8,
 2,
 10,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 14,
 1,
 2,
 2,
 2,
 2,
 1,
 1,
 4,
 1,
 1,
 2,
 2,
 1,
 2,
 6,
 1,
 1,
 2,
 1,
 1,
 2,
 1,
 2,
 1,
 1,
 1,
 2,
 1,
 9,
 2,
 2,
 2,
 2,
 2,
 1,
 2,
 1,
 2,
 2,
 1,
 1,
 2,
 1,
 6,
 2,
 1,
 1,
 3,
 6,
 1,
 1,
 2,
 1,
 1,
 4,
 2,
 1,
 1,
 1,
 2,
 1,
 2,
 2,
 2,
 2,
 1,
 4,
 11,
 2,
 2,
 2,
 1,
 2,
 2,
 1,
 2,
 2,
 1,
 3,
 3,
 1,
 1,
 4,
 2,
 1,
 2,
 2,
 2,
 1,
 2,
 1,
 1,
 2,
 2,
 2,
 4,
 2,
 2,
 4,
 1,
 6,
 22,
 2,
 1,
 1,
 1,
 1,
 6,
 4,
 2,
 2,
 11,
 1,
 3,
 3,
 1,
 1,
 6,
 3,
 16,
 1,
 1,
 2,
 1,
 1,
 2,
 2,
 2,
 2,
 3,
 1,
 2,
 4,
 2,
 10,
 2,
 4,
 2,
 8,

In [808]:
ls_List_n_column_element_S

[1,
 1,
 2,
 2,
 1,
 2,
 2,
 2,
 3,
 1,
 1,
 1,
 3,
 1,
 1,
 2,
 2,
 2,
 2,
 1,
 1,
 2,
 1,
 1,
 1,
 2,
 1,
 3,
 2,
 1,
 2,
 2,
 2,
 1,
 2,
 2,
 2,
 2,
 2,
 1,
 2,
 2,
 2,
 1,
 2,
 1,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 8,
 2,
 2,
 2,
 1,
 13,
 1,
 2,
 2,
 1,
 1,
 5,
 1,
 1,
 4,
 1,
 1,
 6,
 2,
 6,
 1,
 1,
 1,
 1,
 1,
 3,
 3,
 2,
 3,
 2,
 4,
 10,
 1,
 2,
 1,
 2,
 8,
 2,
 1,
 5,
 3,
 9,
 1,
 1,
 3,
 2,
 2,
 2,
 1,
 2,
 5,
 3,
 2,
 1,
 1,
 1,
 2,
 2,
 2,
 4,
 1,
 1,
 3,
 4,
 2,
 1,
 2,
 2,
 1,
 2,
 1,
 1,
 2,
 1,
 2,
 2,
 1,
 2,
 1,
 5,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 2,
 1,
 1,
 2,
 2,
 1,
 1,
 8,
 1,
 1,
 3,
 1,
 2,
 1,
 1,
 2,
 2,
 2,
 3,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 4,
 1,
 1,
 11,
 5,
 2,
 1,
 1,
 1,
 1,
 3,
 1,
 1,
 1,
 1,
 1,
 2,
 3,
 4,
 3,
 3,
 1,
 1,
 3,
 2,
 1,
 2,
 2,
 1,
 2,
 2,
 2,
 1,
 1,
 1,
 1,
 18,
 25,
 1,
 1,
 2,
 4,
 2,
 2,
 1,
 1,
 2,
 2,
 2,
 1,
 1,
 2,
 2,
 6,
 4,
 21,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 4,
 1,
 2,
 1,
 3,
 1

In [809]:
classifier.fit(ls_List_n_column_element_M, ls_List_n_column_element_S)

In [810]:
y_pred_scratch_algorithm = classifier.prediction(x_test, y_test)

In [811]:
y_pred_scratch_algorithm

[0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1]

In [812]:
y_test

2     0
11    0
16    0
12    0
19    0
28    1
51    0
45    1
9     0
58    1
52    0
43    1
63    0
Name: CLASS, dtype: int64

In [813]:
score_scratch_algorithm = f_measure(y_test, y_pred_scratch_algorithm)
score_scratch_algorithm

0.888888888888889

In [814]:
y_pred_sklearn = sklearn_NB_train_test_model(x_train, y_train, x_test)

In [815]:
y_pred_sklearn

array([0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1])

In [816]:
score_sklearn = f_measure(y_test, y_pred_sklearn)
score_sklearn

0.888888888888889