## Testing Naive Bayes implementation

In [1]:
from packages.metagenomics import naive_bayes
from scipy.sparse import csr_matrix
import numpy as np

#### Initializing the training and testing data

In [2]:
training_matrix = np.array([[2,2,0,0,0,0,0,0,0],
                            [0,0,0,1,1,0,0,0,0],
                            [0,0,0,0,0,0,3,3,0],
                            [2,1,0,0,0,0,0,0,0],
                            [0,0,0,1,1,0,0,0,0],
                            [0,0,0,0,0,0,3,2,0]], dtype=float)

sparse_training_mtx = csr_matrix(training_matrix)
print(type(sparse_training_mtx))
print(sparse_training_mtx.shape)

training_classes = np.array([2,1,3,2,1,3])

<class 'scipy.sparse.csr.csr_matrix'>
(6, 9)


In [3]:
test_array = np.array([2,1,0,0,0,0,0,0,0], dtype=float)
test_array_sparse = csr_matrix(test_array)
test_class = 2

In [4]:
# getting class probabilities
class_probabilities = naive_bayes.taxid_probability(training_classes)

# splitting the training matrix according to class
split_training_mtx_list = naive_bayes.split_sparsemtx_by_taxid(training_matrix, training_classes)

print(class_probabilities)
print(len(split_training_mtx_list))
print(split_training_mtx_list)
print(type(split_training_mtx_list[0]))

{1: 0.3333333333333333, 2: 0.3333333333333333, 3: 0.3333333333333333}
3
[array([[0., 0., 0., 1., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 1., 0., 0., 0., 0.]]), array([[2., 2., 0., 0., 0., 0., 0., 0., 0.],
       [2., 1., 0., 0., 0., 0., 0., 0., 0.]]), array([[0., 0., 0., 0., 0., 0., 3., 3., 0.],
       [0., 0., 0., 0., 0., 0., 3., 2., 0.]])]
<class 'numpy.ndarray'>


#### Testing our new data

In [5]:
final_probabilities = {}
class_list = list(class_probabilities.keys())

#
test_to_array = test_array_sparse.toarray()
print(test_to_array)
print(type(test_to_array))
#
for i in range(len(split_training_mtx_list)):
    col_sum = split_training_mtx_list[i].sum(axis=0)
    col_sum /= split_training_mtx_list[i].shape[0]
    print(col_sum)

    prod = np.multiply(test_to_array, col_sum)
    # multiplying by the taxid probability
    total_prod = np.multiply(prod, class_probabilities[class_list[i]])
    # print('Total: ',total_prod)
    final_probabilities[class_list[i]] = total_prod.sum()

print(final_probabilities)

prediction = max(final_probabilities, key=final_probabilities.get)
print(f'The new data is classified as class: {prediction}')

[[2. 1. 0. 0. 0. 0. 0. 0. 0.]]
<class 'numpy.ndarray'>
[0. 0. 0. 1. 1. 0. 0. 0. 0.]
[2.  1.5 0.  0.  0.  0.  0.  0.  0. ]
[0.  0.  0.  0.  0.  0.  3.  2.5 0. ]
{1: 0.0, 2: 1.8333333333333333, 3: 0.0}
The new data is classified as class: 2
