# ML - Laboratory 2: Naive Bayes Classifiers
## Santiago Álvarez Sepúlveda
## e-mail: saalvarezse@unal.edu.co


In [4]:
import pandas as pd
import numpy as np
import nltk
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.naive_bayes import MultinomialNB, BernoulliNB

## Document's Data Preprocessing

In [5]:
df = pd.read_csv('./documentsChina.csv', usecols=[0,1], encoding='latin-1')
df.columns = ['Class','Document']
# label spam as 1, not spam as 0
df['Class'] = df['Class'].replace(["yes","no"],[1,0])
# Remove Upper case letters
df['Document'] = df.Document.map(lambda x: x.lower())
# Remove any puntuation
df['Document'] = df.Document.str.replace('[^\w\s]', '')  
data = df.values
print data

[[1 u' chinese beijing chinese']
 [1 u' chinese chinese shanghai']
 [1 u' chinese macao']
 [0 u' tokyo japan chinese']
 [1 u' taipei taiwan']
 [1 u' macao taiwan shanghai']
 [0 u' japan sapporo']
 [0 u' sapporo osaka taiwan']
 [1 u' chinese chinese chinese tokyo japan']
 [0 u' taiwan taiwan sapporo']]


## Ngram Bayesian Classifier

In [6]:
class ngrams_bayes():
    
    def __init__(self, data, n=2, split=0.75):
        
        # split into training and testing data
        self.train_data, self.test_data = train_test_split(data, train_size=split)
        # convert into n grams
        self.train_data = [[item[0], self.ngrams(n, item[1])] for item in self.train_data]
        self.test_data = [[item[0], self.ngrams(n, item[1])] for item in self.test_data]
        print 'Training %d-gram data:'%(n)
        print self.train_data
        print '\nTesting %d-gram data:'%(n)
        print self.test_data
        
        # count unique n grams in training data
        flattened = [gram for message in self.train_data for gram in message[1]]
        self.unique = len(set(flattened))
        print '\nVocabulary in training data'
        print set(flattened)
        print '\nUnique %d-grams in training data'%(n)
        print 'N = %d'%(self.unique)
        
        # init dicts
        self.trainPositive = {}
        self.trainNegative = {}
        # counters
        self.posGramCount = 0
        self.negGramCount = 0
        self.spamCount = 0
        self.negSpamCount = 0
        # priors
        self.pA = 0
        self.pNotA = 0
        
    def ngrams(self, n, text):
        text = text.split(' ')
        grams = []
        for i in range(len(text)-n+1):
            gram = ' '.join(text[i:i+n])
            grams.append(gram)
        return grams 
    
    def train(self):
        print '\nTraining Naive Bayesian Classifier'
        for item in self.train_data:
            label = item[0]
            grams = item[1]
            print '\n- Class (c): %d\n- grams:'%(label)
            print grams
            if label == 1:
                self.spamCount += 1
            else:
                self.negSpamCount += 1
            for gram in grams:
                if label == 1:
                    self.trainPositive[gram] = self.trainPositive.get(gram, 0) + 1
                    self.posGramCount += 1
                else:
                    self.trainNegative[gram] = self.trainNegative.get(gram, 0) + 1
                    self.negGramCount += 1
        print '\nPositive Training Vocabulary'
        print self.trainPositive
        print '\nNegative Training Vocabulary'
        print self.trainNegative
        self.pA = self.spamCount/float(len(self.train_data))
        self.pNotA = 1.0 - self.pA
        print '\nPrior(1) = %f'%(self.pA)
        print '\nPrior(0) = %f'%(self.pNotA)
        
    def classify(self, text, alpha=1.0):
        
        self.alpha = alpha
        isSpam = self.pA * self.conditionalText(text, 1)
        notSpam = self.pNotA * self.conditionalText(text, 0)
        print 'P(t|1) = %f'%(isSpam)
        print 'P(t|0) = %f'%(notSpam)
        if (isSpam > notSpam):
            return 1
        else:
            return 0
        
    def conditionalText(self, grams, label):
        result = 1.0
        for ngram in grams:
            result *= self.conditionalNgram(ngram, label)
        return result
    
    def conditionalNgram(self, ngram, label):
        alpha = self.alpha
        if label == 1:
            return ((self.trainPositive.get(ngram,0)+alpha) /
                    float(self.posGramCount+alpha*self.unique))
        else:
            return ((self.trainNegative.get(ngram,0)+alpha) /
                    float(self.negGramCount+alpha*self.unique))
            
    def evaluate_test_data(self):
        print '\nTesting Naive Bayesian Classifier'
        results = []
        for test in self.test_data:
            label = test[0]
            text = test[1]
            print '\n- Class (c): %d\n- grams:'%(label)
            print text
            ruling = self.classify(text)
            print '- Classified as (%d)\n-labeled as (%d)'%(ruling, label)
            if ruling == label:
                print 'correct!'
                results.append(1) 
            else:
                print 'wrong.'
                results.append(0) 
        print("Evaluated {} test cases. {:.2f}% Accuracy".format(len(results), 100.0*sum(results)/float(len(results))))
        return sum(results)/float(len(results))

In [7]:
unigram_bayes = ngrams_bayes(data,1)
unigram_bayes.train()
unigram_bayes.evaluate_test_data()

Training 1-gram data:
[[0, [u'', u'taiwan', u'taiwan', u'sapporo']], [1, [u'', u'chinese', u'beijing', u'chinese']], [0, [u'', u'japan', u'sapporo']], [1, [u'', u'chinese', u'chinese', u'chinese', u'tokyo', u'japan']], [1, [u'', u'macao', u'taiwan', u'shanghai']], [0, [u'', u'tokyo', u'japan', u'chinese']], [1, [u'', u'chinese', u'macao']]]

Testing 1-gram data:
[[1, [u'', u'taipei', u'taiwan']], [0, [u'', u'sapporo', u'osaka', u'taiwan']], [1, [u'', u'chinese', u'chinese', u'shanghai']]]

Vocabulary in training data
set([u'', u'beijing', u'chinese', u'tokyo', u'shanghai', u'japan', u'taiwan', u'macao', u'sapporo'])

Unique 1-grams in training data
N = 9

Training Naive Bayesian Classifier

- Class (c): 0
- grams:
[u'', u'taiwan', u'taiwan', u'sapporo']

- Class (c): 1
- grams:
[u'', u'chinese', u'beijing', u'chinese']

- Class (c): 0
- grams:
[u'', u'japan', u'sapporo']

- Class (c): 1
- grams:
[u'', u'chinese', u'chinese', u'chinese', u'tokyo', u'japan']

- Class (c): 1
- grams:
[u''

0.6666666666666666

In [8]:
bigram_sms= ngrams_bayes(data,2) 
bigram_sms.train()
bigram_sms.evaluate_test_data()


Training 2-gram data:
[[1, [u' macao', u'macao taiwan', u'taiwan shanghai']], [1, [u' chinese', u'chinese chinese', u'chinese shanghai']], [0, [u' tokyo', u'tokyo japan', u'japan chinese']], [1, [u' chinese', u'chinese macao']], [0, [u' sapporo', u'sapporo osaka', u'osaka taiwan']], [1, [u' chinese', u'chinese chinese', u'chinese chinese', u'chinese tokyo', u'tokyo japan']], [0, [u' japan', u'japan sapporo']]]

Testing 2-gram data:
[[1, [u' taipei', u'taipei taiwan']], [1, [u' chinese', u'chinese beijing', u'beijing chinese']], [0, [u' taiwan', u'taiwan taiwan', u'taiwan sapporo']]]

Vocabulary in training data
set([u'macao taiwan', u' tokyo', u'tokyo japan', u'taiwan shanghai', u' chinese', u'chinese chinese', u'chinese tokyo', u'sapporo osaka', u'japan chinese', u' sapporo', u'osaka taiwan', u'chinese macao', u'chinese shanghai', u' japan', u'japan sapporo', u' macao'])

Unique 2-grams in training data
N = 16

Training Naive Bayesian Classifier

- Class (c): 1
- grams:
[u' macao', u'

0.6666666666666666

In [9]:
trigram_sms = ngrams_bayes(data,3) 
trigram_sms.train()
trigram_sms.evaluate_test_data()

Training 3-gram data:
[[1, [u' chinese chinese', u'chinese chinese shanghai']], [1, [u' chinese beijing', u'chinese beijing chinese']], [0, [u' tokyo japan', u'tokyo japan chinese']], [0, [u' japan sapporo']], [1, [u' macao taiwan', u'macao taiwan shanghai']], [0, [u' sapporo osaka', u'sapporo osaka taiwan']], [0, [u' taiwan taiwan', u'taiwan taiwan sapporo']]]

Testing 3-gram data:
[[1, [u' chinese chinese', u'chinese chinese chinese', u'chinese chinese tokyo', u'chinese tokyo japan']], [1, [u' taipei taiwan']], [1, [u' chinese macao']]]

Vocabulary in training data
set([u' tokyo japan', u' chinese beijing', u'chinese chinese shanghai', u'tokyo japan chinese', u'sapporo osaka taiwan', u' macao taiwan', u'macao taiwan shanghai', u' chinese chinese', u'chinese beijing chinese', u' taiwan taiwan', u' japan sapporo', u'taiwan taiwan sapporo', u' sapporo osaka'])

Unique 3-grams in training data
N = 13

Training Naive Bayesian Classifier

- Class (c): 1
- grams:
[u' chinese chinese', u'chi

0.3333333333333333

Here we can see that our bayesian classifier performs well with unigrams, ok with bigrams, and is basically guessing randomly when it comes to trigrams. Increasing the size of your grams does not help this classifier classify this dataset. This is likely due to the data being made of up of short messages with highly specific and colloquial words. Nearly none of the trigrams will occur more than once in this dataset. I imagine that larger ngrams used in a baysian classifer would work well with something like product reviews which are longer than text messages and use less colloquial language. 

## Naive Bayesian Classifier with Scikit-Learn

In [10]:
#nltk.download()  

### Deeper Preprocessing of the Documents

In [11]:
# Tokenize the words in every message
df['Document'] = df['Document'].apply(nltk.word_tokenize)
print df

   Class                                   Document
0      1                [chinese, beijing, chinese]
1      1               [chinese, chinese, shanghai]
2      1                           [chinese, macao]
3      0                    [tokyo, japan, chinese]
4      1                           [taipei, taiwan]
5      1                  [macao, taiwan, shanghai]
6      0                           [japan, sapporo]
7      0                   [sapporo, osaka, taiwan]
8      1  [chinese, chinese, chinese, tokyo, japan]
9      0                  [taiwan, taiwan, sapporo]


In [12]:
# Use Porter Stemmer
stemmer = PorterStemmer()
df['Document'] = df['Document'].apply(lambda x: [stemmer.stem(y) for y in x])
print df

   Class                                Document
0      1                  [chines, beij, chines]
1      1              [chines, chines, shanghai]
2      1                         [chines, macao]
3      0                  [tokyo, japan, chines]
4      1                        [taipei, taiwan]
5      1               [macao, taiwan, shanghai]
6      0                        [japan, sapporo]
7      0                [sapporo, osaka, taiwan]
8      1  [chines, chines, chines, tokyo, japan]
9      0               [taiwan, taiwan, sapporo]


In [13]:
# This converts the list of words into space-separated strings
df['Document'] = df['Document'].apply(lambda x: ' '.join(x))
count_vect = CountVectorizer()  
counts = count_vect.fit_transform(df['Document'])
print counts
print df

  (0, 0)	1
  (0, 1)	2
  (1, 6)	1
  (1, 1)	2
  (2, 3)	1
  (2, 1)	1
  (3, 2)	1
  (3, 9)	1
  (3, 1)	1
  (4, 8)	1
  (4, 7)	1
  (5, 8)	1
  (5, 3)	1
  (5, 6)	1
  (6, 5)	1
  (6, 2)	1
  (7, 4)	1
  (7, 5)	1
  (7, 8)	1
  (8, 2)	1
  (8, 9)	1
  (8, 1)	3
  (9, 5)	1
  (9, 8)	2
   Class                          Document
0      1                chines beij chines
1      1            chines chines shanghai
2      1                      chines macao
3      0                tokyo japan chines
4      1                     taipei taiwan
5      1             macao taiwan shanghai
6      0                     japan sapporo
7      0              sapporo osaka taiwan
8      1  chines chines chines tokyo japan
9      0             taiwan taiwan sapporo


In [14]:
#
transformer = TfidfTransformer().fit(counts)
counts = transformer.transform(counts)  
print counts

  (0, 1)	0.764949063238371
  (0, 0)	0.6440907782686681
  (1, 1)	0.8131579832939391
  (1, 6)	0.5820430346678277
  (2, 1)	0.5726575030814076
  (2, 3)	0.8197947207469487
  (3, 1)	0.4653431056969696
  (3, 9)	0.6661675073383854
  (3, 2)	0.5828178498869621
  (4, 7)	0.8341378713086336
  (4, 8)	0.5515559913995145
  (5, 6)	0.61957540186839
  (5, 3)	0.61957540186839
  (5, 8)	0.48192597232276857
  (6, 2)	0.7071067811865475
  (6, 5)	0.7071067811865475
  (7, 8)	0.4686906257315891
  (7, 5)	0.5271685592911145
  (7, 4)	0.708817612257386
  (8, 1)	0.8445513958589379
  (8, 9)	0.4030092286105375
  (8, 2)	0.352585452631641
  (9, 8)	0.8716186013638371
  (9, 5)	0.4901846731146827


  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):


# Training and Evaluating the Multinomial Model

In [15]:
X_train, X_test, y_train, y_test = train_test_split(counts, df['Class'], test_size=0.25, random_state=69)
print 'X_train:'
print X_train
print '\nX_test:'
print X_test
print '\ny_train:'
print y_train
print '\ny_test:'
print y_test
model = MultinomialNB().fit(X_train, y_train)
predicted = model.predict(X_test)
print '\nAccuracy: %f'%(np.mean(predicted == y_test))
print '\nConfusion Matrix:'
print confusion_matrix(y_test, predicted)

X_train:
  (0, 8)	0.48192597232276857
  (0, 3)	0.61957540186839
  (0, 6)	0.61957540186839
  (1, 0)	0.6440907782686681
  (1, 1)	0.764949063238371
  (2, 5)	0.4901846731146827
  (2, 8)	0.8716186013638371
  (3, 6)	0.5820430346678277
  (3, 1)	0.8131579832939391
  (4, 8)	0.5515559913995145
  (4, 7)	0.8341378713086336
  (5, 4)	0.708817612257386
  (5, 5)	0.5271685592911145
  (5, 8)	0.4686906257315891
  (6, 5)	0.7071067811865475
  (6, 2)	0.7071067811865475

X_test:
  (0, 2)	0.5828178498869621
  (0, 9)	0.6661675073383854
  (0, 1)	0.4653431056969696
  (1, 2)	0.352585452631641
  (1, 9)	0.4030092286105375
  (1, 1)	0.8445513958589379
  (2, 3)	0.8197947207469487
  (2, 1)	0.5726575030814076

y_train:
5    1
0    1
9    0
1    1
4    1
7    0
6    0
Name: Class, dtype: int64

y_test:
3    0
8    1
2    1
Name: Class, dtype: int64

Accuracy: 0.666667

Confusion Matrix:
[[0 1]
 [0 2]]


Up to this point it has to be said that the results of the classifiers implemented by hand, using python basic tools, and using scikitlearn have thrown differnt results, mainly because of the information they are mannaging in each case. 

First of all, actually the classifier implemented by hand and using basic python tools work exactly the same, but they differ int he final result because in the python script, symbols like spaces (" ") are being considered as tokens, this changes the weights of every word in each document and in fact the probabilities of a word to belong to a document of a certain class, if this symbols were not taken in count, the results should be exactly the same.

On the other hand, the classifier of scikitlearn is giving some differences in the results at a probabilities level, not so much at the decission level, because it changes the information given to the classifier, and it has generated some tokens for each document, or it has coded the documents to handle their information in an easier way, this affects the numerical results of the classifier, but not actually the decisions it takes.

All of this methods are different aproaches to the Naive Bayesian Classifiers, which can throw different results depending on the amount of information it has to handle and how the information is represented.

## Training and Evaluating the Multivariate Bernoulli Model

In [16]:
X_train, X_test, y_train, y_test = train_test_split(counts, df['Class'], test_size=0.25, random_state=69)
print 'X_train:'
print X_train
print '\nX_test:'
print X_test
print '\ny_train:'
print y_train
print '\ny_test:'
print y_test
model = BernoulliNB().fit(X_train, y_train)
predicted = model.predict(X_test)
print '\nAccuracy: %f'%(np.mean(predicted == y_test))
print '\nConfusion Matrix:'
print confusion_matrix(y_test, predicted)

X_train:
  (0, 8)	0.48192597232276857
  (0, 3)	0.61957540186839
  (0, 6)	0.61957540186839
  (1, 0)	0.6440907782686681
  (1, 1)	0.764949063238371
  (2, 5)	0.4901846731146827
  (2, 8)	0.8716186013638371
  (3, 6)	0.5820430346678277
  (3, 1)	0.8131579832939391
  (4, 8)	0.5515559913995145
  (4, 7)	0.8341378713086336
  (5, 4)	0.708817612257386
  (5, 5)	0.5271685592911145
  (5, 8)	0.4686906257315891
  (6, 5)	0.7071067811865475
  (6, 2)	0.7071067811865475

X_test:
  (0, 2)	0.5828178498869621
  (0, 9)	0.6661675073383854
  (0, 1)	0.4653431056969696
  (1, 2)	0.352585452631641
  (1, 9)	0.4030092286105375
  (1, 1)	0.8445513958589379
  (2, 3)	0.8197947207469487
  (2, 1)	0.5726575030814076

y_train:
5    1
0    1
9    0
1    1
4    1
7    0
6    0
Name: Class, dtype: int64

y_test:
3    0
8    1
2    1
Name: Class, dtype: int64

Accuracy: 0.666667

Confusion Matrix:
[[0 1]
 [0 2]]


After running the Multinomial NB classifier and the Bernoulli NB classifier, we see that they don't differ much in the decissions they make and the accuracy they have to do the job correctly. They are different approaches inside, as they have different models to calculate the conditional probabilities and the escence of the classification (binary to counting) changes a bit the information handeled but not the way the process happens.

## Naive Bayesian Classification System for lines from Biggie Smalls and 2Pac

In [17]:
biggie_df = pd.read_csv('./biggie_lyrics.csv', usecols=[1], encoding='latin-1', header=None)
biggie_df.columns = ["lyrics"]
biggie_df["lyrics"] = biggie_df["lyrics"].str.replace('[^\w\s]','')
biggie_df["lyrics"] = biggie_df["lyrics"].str.lower()

In [18]:
biggie_df.tail()

Unnamed: 0,lyrics
11,relax and take notes while i take tokes of the...
12,good evenin ladies and gentlemen\nhows everybo...
13,who shot ya\nseperate the weak from the obsole...
14,when i die fuck it i wanna go to hell\ncause i...
15,when the lala hits ya lyrics just splits ya\nh...


In [19]:
pac_df = pd.read_csv('./2pac_lyrics.csv', usecols=[1], encoding='latin-1', header=None)
pac_df.columns = ["lyrics"]
pac_df["lyrics"] = pac_df["lyrics"].str.replace('[^\w\s]','')
pac_df["lyrics"] = pac_df["lyrics"].str.lower()

In [20]:
pac_df.head()

Unnamed: 0,lyrics
0,little something for my godson elijah\nand a l...
1,yo mo bee mayn drop that shit\nyou know what t...
2,rest in peace to my motherfucker biggy smallz\...
3,makaveli in this killuminati\nall through your...
4,its just me against the world\nnothin to lose\...


In [21]:
biggie_lyrics = biggie_df["lyrics"].values
biggie_lyrics = [ song.split('\n') for song in biggie_lyrics]
biggie_lyrics = [line for song in biggie_lyrics for line in song]
pac_lyrics = pac_df["lyrics"].values
pac_lyrics = [ song.split('\n') for song in pac_lyrics]
pac_lyrics = [line for song in pac_lyrics for line in song]

rap_lines = [] 

for line in biggie_lyrics:
    if len(line.split()) > 3:
        rap_lines.append(np.array([0,str(line)]))
        
for line in pac_lyrics:
    if len(line.split()) > 3:
        rap_lines.append(np.array([1,str(line)]))
        
rap_lines = np.array(rap_lines)
rap_lines = pd.DataFrame(rap_lines)
rap_lines.columns = ["label","line"]
rap_lines['label'] = rap_lines['label'].replace(['0','1'],[0,1])
rap_lines.head()

Unnamed: 0,label,line
0,0,fuck all you hoes
1,0,get a grip motherfucker
2,0,yeah this album is dedicated to all the teache...
3,0,id never amount to nothin to all the people th...
4,0,buildings that i was hustlin in front of that ...


In [22]:
# Tokenize the words in every message
rap_lines['line'] = rap_lines['line'].apply(nltk.word_tokenize)
# Use Porter Stemmer
stemmer = PorterStemmer()
rap_lines['line'] = rap_lines['line'].apply(lambda x: [stemmer.stem(y) for y in x])
# This converts the list of words into space-separated strings
rap_lines['line'] = rap_lines['line'].apply(lambda x: ' '.join(x))
count_vect = CountVectorizer()  
counts = count_vect.fit_transform(rap_lines['line'])
#
transformer = TfidfTransformer().fit(counts)
counts = transformer.transform(counts)  
print counts
print rap_lines

  (0, 849)	0.49087318817854825
  (0, 58)	0.43268940172940906
  (0, 2501)	0.31850678004835703
  (0, 1027)	0.6858402334521511
  (1, 877)	0.41830230655719114
  (1, 932)	0.7601505859197147
  (1, 1431)	0.49718635043095055
  (2, 58)	0.20803213246469884
  (2, 2493)	0.34411189665671293
  (2, 2215)	0.2250169996870463
  (2, 53)	0.4322487816477064
  (2, 1131)	0.2057966169419576
  (2, 561)	0.410044488257387
  (2, 2255)	0.1425118077471302
  (2, 2203)	0.1160197544126776
  (2, 2176)	0.4322487816477064
  (2, 2201)	0.2028127062967269
  (2, 2261)	0.3183733108460118
  (2, 1357)	0.16506204370082317
  (3, 58)	0.20116206656478056
  (3, 2255)	0.2756109781373328
  (3, 2203)	0.22437662185626947
  (3, 2201)	0.19611500704665685
  (3, 1089)	0.3383271897309154
  (3, 1467)	0.24968336690403506
  :	:
  (1967, 2203)	0.15468580785408723
  (1967, 1357)	0.22007248425203085
  (1967, 1515)	0.2704043589421255
  (1967, 1239)	0.34580186212426445
  (1967, 2102)	0.37858120072806645
  (1967, 1199)	0.5094034783786443
  (1967, 189

In [27]:
def train_MultiNB_Sharokhian(counts, rap_lines, test_size=0.25, random_state=69):
    X_train, X_test, y_train, y_test = train_test_split(counts, rap_lines['label'], test_size=0.25, random_state=69)
    model = MultinomialNB().fit(X_train, y_train)
    predicted = model.predict(X_test)
    accuracy = np.mean(predicted == y_test)
    print '\nAccuracy: %f'%(accuracy)
    print '\nConfusion Matrix:'
    print confusion_matrix(y_test, predicted)
    return accuracy

In [28]:
train_MultiNB_Sharokhian(counts, rap_lines)


Accuracy: 0.691684

Confusion Matrix:
[[ 91 136]
 [ 16 250]]


0.691683569979716

In [30]:
results = []
for _ in range(10):
    accuracy = train_MultiNB_Sharokhian(counts, rap_lines)
    results.append(accuracy)
print("Average Accuracy: {:.2f}".format(sum(results)/float(len(results))))


Accuracy: 0.691684

Confusion Matrix:
[[ 91 136]
 [ 16 250]]

Accuracy: 0.691684

Confusion Matrix:
[[ 91 136]
 [ 16 250]]

Accuracy: 0.691684

Confusion Matrix:
[[ 91 136]
 [ 16 250]]

Accuracy: 0.691684

Confusion Matrix:
[[ 91 136]
 [ 16 250]]

Accuracy: 0.691684

Confusion Matrix:
[[ 91 136]
 [ 16 250]]

Accuracy: 0.691684

Confusion Matrix:
[[ 91 136]
 [ 16 250]]

Accuracy: 0.691684

Confusion Matrix:
[[ 91 136]
 [ 16 250]]

Accuracy: 0.691684

Confusion Matrix:
[[ 91 136]
 [ 16 250]]

Accuracy: 0.691684

Confusion Matrix:
[[ 91 136]
 [ 16 250]]

Accuracy: 0.691684

Confusion Matrix:
[[ 91 136]
 [ 16 250]]
Average Accuracy: 0.69


After running this method of classification for the data of N. Lidel, we see that the average result is pretty much the same as the one that Lidel obtained after doing the classification with his own method. So that shows that the Bayesian Classifiers All have very similar behaviours if the information the mannage is equal, and may differ if the information is changed or understood in a different way, mainly this is the greatest sensitivity that this classifiers have.