In [27]:
import os

import h5py
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB, MultinomialNB, ComplementNB, BernoulliNB, CategoricalNB

In [5]:
max_length = 400
group = "100_400"
root_data_dir = f"../data/{group}"
train_dir = os.path.join(root_data_dir, "train")
test_dir = os.path.join(root_data_dir, "test")

In [22]:
train_matrix = None
test_matrix = None
train_label = None
test_label = None
for r in range(5):
    j = r + 1
    train_sequence_file = [f for f in os.listdir(os.path.join(train_dir, "sequences")) if f'_{j}.mat' in f][0]
    train_label_file = [f for f in os.listdir(os.path.join(train_dir, "labels")) if f'_{j}.mat' in f][0]
    test_sequence_file = [f for f in os.listdir(os.path.join(test_dir, "sequences")) if f'_{j}.mat' in f][0]
    test_label_file = [f for f in os.listdir(os.path.join(test_dir, "labels")) if f'_{j}.mat' in f][0]

    train_matrix = h5py.File(os.path.join(train_dir, f'sequences/{train_sequence_file}'), 'r')['P_train_ds'][:]
    train_label = h5py.File(os.path.join(train_dir, f'labels/{train_label_file}'), 'r')['T_train_ds'][:]
    test_matrix = h5py.File(os.path.join(test_dir, f'sequences/{test_sequence_file}'), 'r')['P_test'][:]
    test_label = h5py.File(os.path.join(test_dir, f'labels/{test_label_file}'), 'r')['T_test'][:]

    train_matrix = train_matrix.transpose()
    train_label = train_label.transpose()
    test_matrix = test_matrix.transpose()
    test_label = test_label.transpose()

    train_matrix = train_matrix.reshape(-1, max_length, 4)
    test_matrix = test_matrix.reshape(-1, max_length, 4)

print(train_matrix.shape)
print(test_matrix.shape)

(160000, 400, 4)
(20000, 400, 4)


In [20]:
train_matrix_flat = train_matrix.reshape(train_matrix.shape[0], -1)
print(train_matrix_flat.shape)
test_matrix_flat = test_matrix.reshape(test_matrix.shape[0], -1)
print(test_matrix_flat.shape)

(160000, 1600)
(20000, 1600)


In [23]:
train_matrix[0]

array([[0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 0, 1],
       ...,
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]], dtype=int8)

In [24]:
train_matrix_flat[0]

array([0, 1, 0, ..., 0, 0, 0], dtype=int8)

In [15]:
nb_classifier = GaussianNB()
nb_classifier.fit(train_matrix_flat, train_label.ravel())
test_predictions = nb_classifier.predict(test_matrix_flat)
print(classification_report(test_label, test_predictions))

              precision    recall  f1-score   support

         0.0       0.78      0.53      0.63     11403
         1.0       0.56      0.80      0.66      8597

    accuracy                           0.65     20000
   macro avg       0.67      0.67      0.65     20000
weighted avg       0.68      0.65      0.64     20000



In [30]:
nb_classifier = MultinomialNB()
nb_classifier.fit(train_matrix_flat, train_label.ravel())
test_predictions = nb_classifier.predict(test_matrix_flat)
print(classification_report(test_label, test_predictions))

              precision    recall  f1-score   support

         0.0       0.76      0.59      0.66     11403
         1.0       0.58      0.76      0.66      8597

    accuracy                           0.66     20000
   macro avg       0.67      0.67      0.66     20000
weighted avg       0.68      0.66      0.66     20000



In [28]:
nb_classifier = ComplementNB()
nb_classifier.fit(train_matrix_flat, train_label.ravel())
test_predictions = nb_classifier.predict(test_matrix_flat)
print(classification_report(test_label, test_predictions))

              precision    recall  f1-score   support

         0.0       0.76      0.59      0.66     11403
         1.0       0.58      0.76      0.66      8597

    accuracy                           0.66     20000
   macro avg       0.67      0.67      0.66     20000
weighted avg       0.68      0.66      0.66     20000



In [29]:
nb_classifier = BernoulliNB()
nb_classifier.fit(train_matrix_flat, train_label.ravel())
test_predictions = nb_classifier.predict(test_matrix_flat)
print(classification_report(test_label, test_predictions))

              precision    recall  f1-score   support

         0.0       0.76      0.59      0.66     11403
         1.0       0.58      0.75      0.65      8597

    accuracy                           0.66     20000
   macro avg       0.67      0.67      0.66     20000
weighted avg       0.68      0.66      0.66     20000



In [31]:
nb_classifier = CategoricalNB()
nb_classifier.fit(train_matrix_flat, train_label.ravel())
test_predictions = nb_classifier.predict(test_matrix_flat)
print(classification_report(test_label, test_predictions))

              precision    recall  f1-score   support

         0.0       0.76      0.59      0.66     11403
         1.0       0.58      0.75      0.65      8597

    accuracy                           0.66     20000
   macro avg       0.67      0.67      0.66     20000
weighted avg       0.68      0.66      0.66     20000

