# NBC

## Data

In [1]:
import numpy as np
from numpy import linalg as LA
from sklearn import metrics
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

In [2]:
X,y = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)


In [3]:
def revamp(X):
    X_out = np.zeros(np.shape(X))
    for i in range(len(X)):
        for j in range(len(X[1])):
            value = X[i][j]
            if value < 5.0:
                X_out[i][j] = 0 #'DARK'
            elif value > 10.0:
                X_out[i][j] = 2 #'LIGHT'
            else:
                X_out[i][j] = 1 #'GRAY'
    return X_out
#Summerised into 0,1,2 aka DARK, LIGHT, GRAY
X_train_sum = revamp(X_train)
X_test_sum = revamp(X_test)

In [4]:
import MNIST
mnist = MNIST.MNISTData('MNIST_Light/*/*.png')
train_features, test_features, train_labels, test_labels = mnist.get_data()

## Classifier

In [5]:
class NBC:
    def __init__(self, nbr_classes, size, value_len):
        self.nbr_classes = nbr_classes 
        self.size = size
        self.value_len = value_len
        self.prob = np.zeros(nbr_classes)
        self.cond_prob = np.zeros((nbr_classes,size,value_len))
        
        
    def fit(self, X, y):
        for i in range(len(X)):
            self.prob[y[i]] += 1
            for j in range(len(X[1])):
                self.cond_prob[y[i]][j][int(X[i][j])] += 1
        for k in range(self.nbr_classes):
            self.cond_prob[k] = self.cond_prob[k]/self.prob[k]
        self.prob = self.prob/sum(self.prob)
        
    def prediction(self,X):
        y = np.ones((len(X),self.nbr_classes))
        pred = np.zeros(len(X))
        for i in range(len(X)):
            for j in range(self.nbr_classes):
                for k in range(len(X[1])):
                    y[i][j] *= self.cond_prob[j][k][int(X[i][k])]
                y[i][j] *= self.prob[j]
                if np.max(y[i]) == 0:
                    pred[i] = 10
                else:
                    pred[i] = np.argmax(y[i])
        return pred           
        

## Digits

In [6]:
nbc_dig = NBC(10,len(X_train[1]),17)
nbc_dig.fit(X_train,y_train)
y_pred = nbc_dig.prediction(X_test)

In [7]:
print("Classification report NBC digits:\n%s\n"
          % (metrics.classification_report(y_test, y_pred)))
print("Confusion matrix NBC digits:\n%s" % metrics.confusion_matrix(y_test, y_pred))


Classification report NBC digits:
              precision    recall  f1-score   support

         0.0       1.00      0.51      0.68        49
         1.0       0.94      0.51      0.66        63
         2.0       0.92      0.75      0.83        44
         3.0       0.95      0.52      0.67        67
         4.0       0.97      0.61      0.75        57
         5.0       1.00      0.75      0.86        52
         6.0       1.00      0.69      0.82        49
         7.0       0.83      0.56      0.67        52
         8.0       0.65      0.55      0.60        51
         9.0       0.81      0.52      0.63        56
        10.0       0.00      0.00      0.00         0

    accuracy                           0.59       540
   macro avg       0.82      0.54      0.65       540
weighted avg       0.91      0.59      0.71       540


Confusion matrix NBC digits:
[[25  0  0  0  0  0  0  0  1  0 23]
 [ 0 32  0  0  1  0  0  0  3  3 24]
 [ 0  0 33  2  0  0  0  0  2  0  7]
 [ 0  0  3 35  

  'recall', 'true', average, warn_for)


## Digits (summerised)

In [8]:
nbc_dig2 = NBC(10,len(X_train_sum[1]),17)
nbc_dig2.fit(X_train_sum,y_train)
y_pred = nbc_dig2.prediction(X_test_sum)

In [9]:
print("Classification report NBC digits (summerised):\n%s\n"
          % (metrics.classification_report(y_test, y_pred)))
print("Confusion matrix NBC digits (summerised):\n%s" % metrics.confusion_matrix(y_test, y_pred))


Classification report NBC digits (summerised):
              precision    recall  f1-score   support

         0.0       1.00      0.92      0.96        49
         1.0       0.80      0.81      0.80        63
         2.0       0.85      0.89      0.87        44
         3.0       0.95      0.88      0.91        67
         4.0       0.93      0.89      0.91        57
         5.0       0.88      0.96      0.92        52
         6.0       0.98      0.96      0.97        49
         7.0       0.94      0.92      0.93        52
         8.0       0.79      0.82      0.81        51
         9.0       0.87      0.84      0.85        56
        10.0       0.00      0.00      0.00         0

    accuracy                           0.89       540
   macro avg       0.82      0.81      0.81       540
weighted avg       0.90      0.89      0.89       540


Confusion matrix NBC digits (summerised):
[[45  0  0  0  0  3  0  0  0  0  1]
 [ 0 51  4  0  1  0  1  0  2  3  1]
 [ 0  1 39  0  0  0  0  0

## MNIST_light

In [10]:
nbc_mnist = NBC(10,len(train_features[1]),255)
nbc_mnist.fit(train_features,train_labels)
pred_labels = nbc_mnist.prediction(test_features)

In [11]:
print("Classification report NBC MNIST_light:\n%s\n"
          % (metrics.classification_report(test_labels, pred_labels)))
print("Confusion matrix NBC MNIST_light:\n%s" % metrics.confusion_matrix(test_labels, pred_labels))


Classification report NBC MNIST_light:
              precision    recall  f1-score   support

         0.0       0.92      0.73      0.81       164
         1.0       0.44      0.91      0.59       152
         2.0       0.72      0.55      0.62       155
         3.0       0.71      0.56      0.63       154
         4.0       0.61      0.64      0.62       143
         5.0       0.55      0.57      0.56       141
         6.0       0.84      0.61      0.70       143
         7.0       0.76      0.55      0.64       158
         8.0       0.52      0.46      0.49       132
         9.0       0.54      0.59      0.57       158
        10.0       0.00      0.00      0.00         0

    accuracy                           0.62      1500
   macro avg       0.60      0.56      0.57      1500
weighted avg       0.66      0.62      0.63      1500


Confusion matrix NBC MNIST_light:
[[119   5   7   2   2  19   5   0   2   3   0]
 [  0 138   0   0   0   4   1   0   9   0   0]
 [  0  24  85   8  