# Boosting - Mohammed Adib Oumer

In [None]:
import os, os.path
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB,MultinomialNB
from sklearn.ensemble import AdaBoostClassifier

## Restructure images and prepare training and test(or validation) dataset

In [None]:
# when running the below cell again with diff dimension and saving to a folder, run this before.
# !rmdir /s /q xray

In [None]:
def process_images(dimension):
    imgs = np.zeros((1,np.prod(dimension)+1))
    train, test = imgs,imgs

    in_directory = ["tumor_mri/test/glioma","tumor_mri/test/meningioma",\
                "tumor_mri/test/notumor","tumor_mri/test/pituitary",\
                "tumor_mri/train/glioma","tumor_mri/train/meningioma",\
                "tumor_mri/train/notumor","tumor_mri/train/pituitary"]

    out_directory = ["mri/test/glioma","mri/test/meningioma",\
                "mri/test/notumor","mri/test/pituitary",\
                "mri/train/glioma","mri/train/meningioma",\
                "mri/train/notumor","mri/train/pituitary"]

    valid_images = [".jpeg",".jpg",".png"]

    for i in range(len(in_directory)):
        if not os.path.exists(out_directory[i]):
            # os.makedirs(out_directory[i])
            imgs = np.zeros((1,np.prod(dimension)+1))
            group = in_directory[i].split("/")[1:]
            if (group[-1] == "glioma"):
                label = 0.
            elif (group[-1] == "meningioma"):
                label = 1.
            elif (group[-1] == "notumor"):
                label = 2. 
            else:
                label = 3.
            label = np.array(label).reshape((1,1))

            for f in os.listdir(in_directory[i]):
                ext = os.path.splitext(f)[1]
                if ext.lower() not in valid_images:
                    continue
                img = Image.open(os.path.join(in_directory[i],f)).convert("L")
                img = img.resize(size=(dimension[0],dimension[1])) # resize

                # img.save(f"{out_directory[i]}/{f}") # optional - save restructed image to folder
                img = np.array(img).reshape((1,np.prod(dimension)))
                imgs = np.vstack((imgs,np.hstack((img,label))))

            imgs = imgs[1:,:]
            if (group[0] == "train"):
                train = np.vstack((train,imgs))
            else:
                test = np.vstack((test,imgs))

    train, test = train[1:,:], test[1:,:]
    train[:,:-1] /= 255
    test[:,:-1] /= 255
    # remove the below if you shuffle elsewhere
    train=np.random.permutation(train)
    test = np.random.permutation(test)
    return train, test


In [98]:
# by inspection of files, min Width = 384, min Height = 127
dimension = np.array([64,64]) # will increase this later 
train, test = process_images(dimension)
train.shape, test.shape 

((5712, 4097), (1311, 4097))

## Use Naive Bayes weak classifier with Adaptive Boositng (AdaBoost)

In [99]:
def bayespost(data,px,py):
    # we need to incorporate the prior probability p(y) since p(y|x) is
    # proportional to p(x|y) p(y)
    data = data.reshape((len(data),1))
    # work in log scale
    logpx = np.log(px)
    logpxneg = np.log(1-px)
    logpy = np.log(py)
    logpost = logpy
    logpost += (logpx * data + logpxneg * (1-data)).sum(0)
    # normalize to prevent overflow or underflow by subtracting the largest value
    logpost -= np.max(logpost)
    # get the estimate back
    post = np.exp(logpost)
    post /= np.sum(post)
    return post

def gaussNB(data,means,vars,n_classes,py):
    x = np.ones(len(data))
    for i in range(n_classes):
        x0 = (np.log(1/np.sqrt(2*3.1415*vars[:,i]))\
                -0.5*((data[:,:-1] - means[:,i])**2)/vars[:,i]).sum(1)
        x = np.vstack((x,x0))
    x = (x[1:,:]).T
    logpy = np.log(py)
    pred_label = (x+logpy).argmax(1)
    return pred_label

def Loss(y,yhat):
    return (y!=yhat).sum()/len(y)

In [100]:
# from sklearn.preprocessing import OneHotEncoder
# yyy = OneHotEncoder(sparse=False).fit_transform(test[:,-1].reshape(-1,1))

In [121]:
import random
class data_iterable(object):
    def __init__(self, features, labels, batch_size):
        self.features = features
        self.labels = labels
        self.batch_size = batch_size
        
        self.num_examples = len(features)
        self.indices = list(range(self.num_examples))
        # The examples are read at random, in no particular order
        random.shuffle(self.indices)
    
    def data_iterator(self):
        for i in range(0, self.num_examples, self.batch_size):
            batch_indices = torch.tensor(self.indices[i: min(i + self.batch_size, self.num_examples)])
            yield self.features[batch_indices], self.labels[batch_indices]

    def __iter__(self):
        return self.data_iterator() 

class LogRegression():   
    def onehotencode(self,y):
        classes = np.sort(np.unique(y))
        return (y.reshape((len(y),1)) == classes).astype(float)
    
    def softmax(self,Z,axis=1):
        # Z_max = np.max(Z, axis = 1, keepdims = True)[0]
        # Z_exp = np.exp(Z - Z_max)
        # partition = Z_exp.sum(1, keepdims=True)
        # return Z_exp / partition  # The broadcasting mechanism is applied here
        Z_sum = np.sum(np.exp(Z),axis=axis,keepdims=True)
        return np.exp(Z)/Z_sum
    
    def loss(self,X, y, W): # y one hot encoded
        N = X.shape[0]
        loss = 1/N * (np.trace(X @ W @ y.T) + np.sum(np.log(np.sum(np.exp(X @ W), axis=1))))
        return loss
    
    def gradient(self,X, y, W, mu): # y one hot encoded
        Z = -X @ W
        P = self.softmax(Z)
        N = X.shape[0]
        gd = 1/N * (X.T @ (y - P)) + 2 * mu * W
        return gd

    def gradient_descent(self,X, y, max_iter, lr, mu):
        y_onehot = self.onehotencode(y)
        W = np.zeros((X.shape[1], y_onehot.shape[1]))
        step = 0
        self.step_lst =list()
        self.loss_lst = list()
        self.W_lst = list()
    
        while step < max_iter:
            step += 1
            W -= lr * self.gradient(X, y_onehot, W, mu)
            self.step_lst.append(step)
            self.W_lst.append(W)
            self.loss_lst.append(self.loss(X, y_onehot, W))
            # if step%1000 == 0:
            #     print("step",step)
        return W

    def fit(self,X,y,max_iter=1000,lr=0.1,mu=0):
        self.max_iter = max_iter
        self.lr = lr
        self.mu = mu
        self.X = np.hstack((np.ones((len(X),1)),X))
        self.y = y
        self.W = self.gradient_descent(self.X,self.y,self.max_iter,self.lr,self.mu)
    
    def predict(self,X):
        X = np.hstack((np.ones((len(X),1)),X))
        Z = -X @ self.W
        P = self.softmax(Z)
        return P.argmax(1)
 

# batch_size = 256
# train_iter = data_iterable(train_images, train_labels, batch_size)
# test_iter = data_iterable(test_images, test_labels, batch_size)

In [123]:
# from sklearn.datasets import load_iris
# X = load_iris().data
# Y = load_iris().target
# model = LogRegression()
# model.fit(X, Y,max_iter=100,mu=0.01)
# Loss(model.predict(X),Y)

LR = LogRegression()
LR.fit(train[:,:-1],train[:,-1],500,mu=0.01)
pred = LR.predict(train[:,:-1])
err_rate = Loss(train[:,-1],pred)
print(f"Training Accuracy: {100*(1-err_rate):.{4}}%")
pred = LR.predict(test[:,:-1])  
err_rate = Loss(test[:,-1],pred)
print(f"Training Accuracy: {100*(1-err_rate):.{4}}%") 

step 100
step 200
step 300
step 400
step 500
Training Accuracy: 65.53%
Training Accuracy: 61.1%


In [103]:
from sklearn.linear_model import LogisticRegression
# model = LogisticRegression(C=0.01,multi_class='multinomial')
# model.fit(X, Y)
# Loss(model.predict(X),Y)

LR = LogisticRegression(C=0.01,multi_class='multinomial')
LR.fit(train[:,:-1],train[:,-1])
pred = LR.predict(train[:,:-1])
err_rate = Loss(train[:,-1],pred)
print(f"Training Accuracy: {100*(1-err_rate):.{4}}%")
pred = LR.predict(test[:,:-1])  
err_rate = Loss(test[:,-1],pred)
print(f"Training Accuracy: {100*(1-err_rate):.{4}}%") 

Training Accuracy: 86.03%
Training Accuracy: 78.57%


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Pure naive classifier (for comparison) - single learner

In [104]:
X_train, y_train = train[:,:-1], train[:,-1]
idx0 = (y_train==0.)
idx1 = (y_train==1.)
idx2 = (y_train==2.)
idx3 = (y_train==3.)

ycount = np.ones((4))
ycount[0],ycount[1],ycount[2],ycount[3] = idx0.sum(),idx1.sum(),idx2.sum(),idx3.sum()
py = ycount / ycount.sum()

means = np.ones((np.prod(dimension),4))
vars = np.ones((np.prod(dimension),4))
means[:,0] = X_train[idx0,:].mean(axis=0)
means[:,1] = X_train[idx1,:].mean(axis=0)
means[:,2] = X_train[idx2,:].mean(axis=0)
means[:,3] = X_train[idx3,:].mean(axis=0)
vars[:,0] = X_train[idx0,:].var(axis=0)
vars[:,1] = X_train[idx1,:].var(axis=0)
vars[:,2] = X_train[idx2,:].var(axis=0)
vars[:,3] = X_train[idx3,:].var(axis=0)
vars += 1e-9 * vars.max()

#train
pred = gaussNB(train,means,vars,4,py)
err_rate = Loss(train[:,-1],pred)
print(f"Training Accuracy: {100*(1-err_rate):.{4}}%")

#test
pred = gaussNB(test,means,vars,4,py)
err_rate = Loss(test[:,-1],pred)
print(f"Testing Accuracy: {100*(1-err_rate):.{4}}%")


Training Accuracy: 61.85%
Testing Accuracy: 57.51%


In [105]:
####Unused code
##train
# x0 = (np.log(1/np.sqrt(2*3.1415*vars[:,0]))\
#         -0.5*((train[:,:-1] - means[:,0])**2)/vars[:,0]).sum(1)
# x1 = (np.log(1/np.sqrt(2*3.1415*vars[:,1]))\
#         -0.5*((train[:,:-1] - means[:,1])**2)/vars[:,1]).sum(1)
# x2 = (np.log(1/np.sqrt(2*3.1415*vars[:,2]))\
#         -0.5*((train[:,:-1] - means[:,2])**2)/vars[:,2]).sum(1)
# x3 = (np.log(1/np.sqrt(2*3.1415*vars[:,3]))\
#         -0.5*((train[:,:-1] - means[:,3])**2)/vars[:,3]).sum(1)
# logpy = np.log(py)
# x = np.vstack((x0,np.vstack((x1,np.vstack((x2,x3)))))).T
# pred = (x+logpy).argmax(1)



## in fit function
# self.nb = GaussianNB()
# self.nb.fit(X_train, y_train)
# self.pred_label2 = self.nb.predict(X_train)
# print((self.pred_label==self.pred_label2).all())

# err2 = np.dot(err_idx, self.probs[:,-1])/self.probs[:,-1].sum() + 1e-12

# if ctr != 0:
    # self.pred_label = (sum((pred == np.array([np.arange(self.num_classes)]).T) * al \
    #         for pred, al in zip(self.pred_labels.T,self.alphas)) \
    #         + alpha*(self.pred_label == np.array([np.arange(self.num_classes)]).T)).argmax(0)

    # err_idx = (y_train != self.pred_label)
    # err = np.dot(err_idx, self.probs[:,-1])/self.probs[:,-1].sum() + 1e-10
    # assert err < (1 - 1 / self.num_classes)
    # alpha = np.log((1-err)/err) + np.log(self.num_classes-1)

## in predict function
# pred_label = self.nb.predict(data[:,:-1])

## final prediction method
# predictions = np.ones((len(data),len(self.alphas),self.num_classes))
# for k in range(self.num_classes):
#         predictions[:,:,k] = (pred_labels == k)
# final_pred2 = ((self.alphas.reshape((1,len(self.alphas),1))*predictions).sum(1)).argmax(1)


In [106]:
# Compare to scikit
nb = GaussianNB()
nb.fit(train[:,:-1], train[:,-1])
print("Train Naive Bayes accuracy: %.2f%%" %(100*nb.score(train[:,:-1], train[:,-1])))
print("Test Naive Bayes accuracy: %.2f%%" %(100*nb.score(test[:,:-1], test[:,-1])))


Train Naive Bayes accuracy: 61.85%
Test Naive Bayes accuracy: 57.51%


Adaboost based on Naive Bayes

In [133]:
class AdaBoost:

    # initialization
    def __init__(self):
        self.num_classes = 4


    def fit(self,data,dimension,estimator,rand,max_iter):

        self.errs = np.array(0.) 
        self.alphas = np.array(0.) 
        self.pred_labels = np.ones((len(data),1))

        self.prob = 1/len(data)*np.ones((len(data),1)) #initialize weights to 1/N uniformly
        self.probs = np.ones((len(data),1))
        self.probs = np.hstack((self.probs,self.prob))
        self.max_iter = max_iter
        self.estimator = estimator
        self.gnbs,self.mnbs,self.dcts,self.lrs = list(),list(),list(),list()
        self.dimension = np.prod(dimension)
        self.py = np.ones((self.num_classes,self.max_iter))
        self.px = np.ones((self.dimension,self.num_classes,self.max_iter))
        self.means = np.ones((np.prod(dimension),self.num_classes,self.max_iter))
        self.vars = np.ones((np.prod(dimension),self.num_classes,self.max_iter))

        # several learners
        for ctr in range(self.max_iter):
            ycount = np.zeros(self.num_classes)
            # get samples of training data with replacement and find px, py 
            # random.seed(0)    
            # idx = random.choices(np.arange(len(self.probs[:,-1])), k =len(data), weights = list(self.probs[:,-1]/self.prob[:,-1].min()))
            np.random.seed(rand)    
            idx = np.random.choice(len(self.probs[:,-1]), size =len(data), replace = True, p = self.probs[:,-1])
            X_train, y_train = data[idx,:-1], data[idx,-1]

            # fit (get mean, var, py)
            idx0 = (y_train==0.) #glioma
            idx1 = (y_train==1.) #meningioma
            idx2 = (y_train==2.) #notumor
            idx3 = (y_train==3.) #pituitary
            
            ycount[0],ycount[1],ycount[2],ycount[3] = idx0.sum(),idx1.sum(),idx2.sum(),idx3.sum()
            ycount += 1.
            self.py[:,ctr] = ycount / ycount.sum()
            if self.estimator == "Gaussian":
                var_smoothing = 5e-11
                self.means[:,0,ctr] = X_train[idx0,:].mean(axis=0)
                self.means[:,1,ctr] = X_train[idx1,:].mean(axis=0)
                self.means[:,2,ctr] = X_train[idx2,:].mean(axis=0)
                self.means[:,3,ctr] = X_train[idx3,:].mean(axis=0)
                self.vars[:,0,ctr] = X_train[idx0,:].var(axis=0)+var_smoothing
                self.vars[:,1,ctr] = X_train[idx1,:].var(axis=0)+var_smoothing
                self.vars[:,2,ctr] = X_train[idx2,:].var(axis=0)+var_smoothing
                self.vars[:,3,ctr] = X_train[idx3,:].var(axis=0)+var_smoothing
                # self.vars[:,:,ctr] += var_smoothing * self.vars[:,:,ctr].max()

                self.pred_label = gaussNB(data,self.means[:,:,ctr],self.vars[:,:,ctr],self.num_classes,self.py[:,ctr])
                err_idx = (data[:,-1] != self.pred_label)

                ### scikit
                # estim = GaussianNB()
                # estim.fit(X_train,y_train)
                # self.pred_label = estim.predict(data[:,:-1])
                # err_idx = (data[:,-1] != self.pred_label)

                self.gnbs.append(estim)

            elif self.estimator == "Multinomial":
                ## For 0 to 1 pixel images (treating it as )
                # xcount = np.ones((self.dimension,self.num_classes)) # Laplace smoothing
                # xcount[:,0] += X_train[idx0,:].sum(axis=0)
                # xcount[:,1] += X_train[idx1,:].sum(axis=0)
                # xcount[:,2] += X_train[idx2,:].sum(axis=0)
                # xcount[:,3] += X_train[idx3,:].sum(axis=0)
                # self.px[:,:,ctr] = (xcount / ycount.reshape(1,self.num_classes))#broadcasting
                
                # pred = np.ones((len(data),self.num_classes))
                # for i in range(len(data)):
                #     pred[i,:] = bayespost(data[i,:-1],self.px[:,:,ctr],self.py[:,ctr])
                # self.pred_label = pred.argmax(1)
                # err_idx = (y_train != self.pred_label)
                
                ###
                estim = MultinomialNB()
                estim.fit(X_train,y_train)
                self.pred_label = estim.predict(data[:,:-1])
                err_idx = (data[:,-1] != self.pred_label)
                self.mnbs.append(estim)
            
            elif self.estimator == "Logistic":
                estim = LogRegression()
                # estim.fit(data[:,:-1], data[:,-1],sample_weight=self.probs[:,-1])
                estim.fit(X_train,y_train,500,0.1,0.01)
                self.pred_label = estim.predict(data[:,:-1])
                err_idx = (data[:,-1] != self.pred_label)

                # estim = LogisticRegression(C=0.01,multi_class='multinomial')
                # estim.fit(X_train,y_train)
                # self.pred_label = estim.predict(data[:,:-1])
                # err_idx = (data[:,-1] != self.pred_label)
                
                self.lrs.append(estim)

            else: # Decision Tree
                estim = DecisionTreeClassifier(max_depth=2,random_state=rand)
                estim.fit(X_train,y_train)
                self.pred_label = estim.predict(data[:,:-1])
                err_idx = (data[:,-1] != self.pred_label)
                self.dcts.append(estim)
            
            # estimate misclassification from predictions
            err = (err_idx*self.probs[:,-1]).sum() + 1e-12     
            self.errs = np.hstack((self.errs,err)) # store   
            if err >= 1 - 1 / self.num_classes:
                break
            
            # measure performance of the naive bayes with alpha
            alpha = np.log((1-err)/err) + np.log(self.num_classes-1)
            self.alphas = np.hstack((self.alphas,alpha)) 
            # print(1.-err_idx.sum()/len(err_idx))
            self.pred_labels = np.hstack((self.pred_labels,self.pred_label.reshape((len(data),1))))
            # increase weights of the wrongly classified records and decrease weights of the correctly classified records
            # training label != prediction label -> misclassified -> e^(alpha) is large -> weight increased
            # training label == prediction label -> correctly classified -> e^(0) is 1 -> weight unaffected
            # prob = self.probs[:,-1]*np.exp(alpha*err_idx) #/(2*np.sqrt(err*(1-err))) 

            # training label != prediction label -> misclassified -> e^(alpha) is large -> weight increased
            # training label == prediction label -> correctly classified -> e^(-alpha) is small -> weight decreased
            prob = np.ones(len(data))
            prob[err_idx] = self.probs[err_idx,-1]*np.exp(alpha)
            prob[~err_idx] = self.probs[~err_idx,-1]*np.exp(-alpha)
            
            if prob.sum() <= 0:
                print("prob sum invalid")
                break     
            prob /= prob.sum() # normalize weights   
            if ctr+1 >= max_iter:
                break
            self.probs = np.hstack((self.probs,prob.reshape((len(train),1)))) # store  

        self.ctr = ctr+1 # save iterations
        # remove garbage initialization values
        self.errs = self.errs[1:]
        self.alphas = self.alphas[1:]
        self.probs = self.probs[:,1:]
        self.pred_labels = self.pred_labels[:,1:]
    

    def predict(self,data):
        pred_labels = np.ones((len(data),1))
        for ctr in range(len(self.alphas)):
            if self.estimator == "Gaussian":
                pred_label = gaussNB(data,self.means[:,:,ctr],self.vars[:,:,ctr],self.num_classes,self.py[:,ctr])
                
                # scikit
                # pred_label = self.gnbs[ctr].predict(data[:,:-1])

            elif self.estimator == "Multinomial":
                # pred = np.ones((len(data),self.num_classes))
                # for i in range(len(data)):
                #     pred[i,:] = bayespost(data[i,:-1],self.px[:,:,ctr],self.py[:,ctr])
                # pred_label = pred.argmax(1)
                
                pred_label = self.mnbs[ctr].predict(data[:,:-1])

            elif self.estimator == "Logistic":
                pred_label = self.lrs[ctr].predict(data[:,:-1])

            else:
                pred_label = self.dcts[ctr].predict(data[:,:-1])

            pred_labels = np.hstack((pred_labels,pred_label.reshape((len(data),1)))) 
        # remove garbage initialization
        pred_labels = pred_labels[:,1:]
        # adaboost = final classifier is the argmax of the performance weighted sum of predicted values at different iterations
        final_pred = sum((pred == np.array([np.arange(self.num_classes)]).T) * alpha \
                        for pred, alpha in zip(pred_labels.T,self.alphas))
        # final_pred /= self.alphas.sum()
        final_pred = final_pred.argmax(0)
        return final_pred


In [134]:
rand = np.random.randint(0,100)
M = [1,20,50,100]#,200,500]
estims = ["Gaussian","Multinomial","Decision Tree","Logistic"]
for estim in estims[:-1]:
    print("Estimator: ", estim)
    for m in M:
        AB = AdaBoost()
        AB.fit(train,dimension,estim,rand,max_iter=m)
        print("Max iterations: %d\tElapsed iterations: %d"%(m,AB.ctr))
        
        #train
        t_pred = AB.predict(train)
        err_rate = Loss(train[:,-1],t_pred)
        print(f"Training Accuracy: {100*(1-err_rate):.{4}}%")

        # test
        test_pred = AB.predict(test)
        err_rate = Loss(test[:,-1],test_pred)
        print(f"Testing Accuracy: {100*(1-err_rate):.{4}}%\n")
    

Estimator:  Gaussian
Max iterations: 1	Elapsed iterations: 1
Training Accuracy: 62.43%
Testing Accuracy: 59.19%

Max iterations: 20	Elapsed iterations: 11
Training Accuracy: 71.04%
Testing Accuracy: 68.88%

Max iterations: 50	Elapsed iterations: 11
Training Accuracy: 71.04%
Testing Accuracy: 68.88%

Max iterations: 100	Elapsed iterations: 11
Training Accuracy: 71.04%
Testing Accuracy: 68.88%

Estimator:  Multinomial
Max iterations: 1	Elapsed iterations: 1
Training Accuracy: 53.82%
Testing Accuracy: 49.28%

Max iterations: 20	Elapsed iterations: 19
Training Accuracy: 54.73%
Testing Accuracy: 56.06%

Max iterations: 50	Elapsed iterations: 19
Training Accuracy: 54.73%
Testing Accuracy: 56.06%

Max iterations: 100	Elapsed iterations: 19
Training Accuracy: 54.73%
Testing Accuracy: 56.06%

Estimator:  Decision Tree
Max iterations: 1	Elapsed iterations: 1
Training Accuracy: 59.84%
Testing Accuracy: 55.15%

Max iterations: 20	Elapsed iterations: 20
Training Accuracy: 71.76%
Testing Accuracy: 6

## Compare with scikit AdaBoost Classifier

Scikit version uses decision tree classifier as a weak learner.

In [135]:
est = [GaussianNB(),MultinomialNB(),DecisionTreeClassifier(max_depth=2),LogisticRegression(C=0.01,multi_class='multinomial')]
for estim in estims[:-1]:
    print("Estimator: ", estims[np.where(np.array(estims) == estim)[0][0]])
    for m in M:
        # train
        clf = AdaBoostClassifier(est[np.where(np.array(estims) == estim)[0][0]],n_estimators=m,random_state=rand,algorithm='SAMME')
        clf.fit(train[:,:-1], train[:,-1])
        t_acc = 100*clf.score(train[:,:-1], train[:,-1])

        # test
        t_p = clf.predict(test[:,:-1])
        te_acc = 100*clf.score(test[:,:-1], test[:,-1])
        print("M: ",m)
        print("Training Accuracy: %.2f%%, Testing Accuracy %.2f%%\n" % (t_acc, te_acc))

Estimator:  Gaussian
M:  1
Training Accuracy: 61.85%, Testing Accuracy 57.51%

M:  20
Training Accuracy: 67.21%, Testing Accuracy 64.91%

M:  50
Training Accuracy: 67.21%, Testing Accuracy 64.91%

M:  100
Training Accuracy: 67.21%, Testing Accuracy 64.91%

Estimator:  Multinomial
M:  1
Training Accuracy: 27.92%, Testing Accuracy 30.74%

M:  20
Training Accuracy: 27.92%, Testing Accuracy 30.74%

M:  50
Training Accuracy: 27.92%, Testing Accuracy 30.74%

M:  100
Training Accuracy: 27.92%, Testing Accuracy 30.74%

Estimator:  Decision Tree
M:  1
Training Accuracy: 59.77%, Testing Accuracy 54.69%

M:  20
Training Accuracy: 82.39%, Testing Accuracy 76.13%

M:  50
Training Accuracy: 85.00%, Testing Accuracy 78.11%

M:  100
Training Accuracy: 87.83%, Testing Accuracy 81.16%

