In [9]:
import os

import h5py
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [2]:
max_length = 400
group = "100_400"
root_data_dir = f"../../data/{group}"
train_dir = os.path.join(root_data_dir, "train")
test_dir = os.path.join(root_data_dir, "test")

In [3]:
train_matrix = None
test_matrix = None
train_label = None
test_label = None
for r in range(5):
    j = r + 1
    train_sequence_file = [f for f in os.listdir(os.path.join(train_dir, "sequences")) if f'_{j}.mat' in f][0]
    train_label_file = [f for f in os.listdir(os.path.join(train_dir, "labels")) if f'_{j}.mat' in f][0]
    test_sequence_file = [f for f in os.listdir(os.path.join(test_dir, "sequences")) if f'_{j}.mat' in f][0]
    test_label_file = [f for f in os.listdir(os.path.join(test_dir, "labels")) if f'_{j}.mat' in f][0]

    train_matrix = h5py.File(os.path.join(train_dir, f'sequences/{train_sequence_file}'), 'r')['P_train_ds'][:]
    train_label = h5py.File(os.path.join(train_dir, f'labels/{train_label_file}'), 'r')['T_train_ds'][:]
    test_matrix = h5py.File(os.path.join(test_dir, f'sequences/{test_sequence_file}'), 'r')['P_test'][:]
    test_label = h5py.File(os.path.join(test_dir, f'labels/{test_label_file}'), 'r')['T_test'][:]

    train_matrix = train_matrix.transpose()
    train_label = train_label.transpose()
    test_matrix = test_matrix.transpose()
    test_label = test_label.transpose()

    train_matrix = train_matrix.reshape(-1, max_length, 4)
    test_matrix = test_matrix.reshape(-1, max_length, 4)

print(train_matrix.shape)
print(test_matrix.shape)

(160000, 400, 4)
(20000, 400, 4)


In [29]:
train_matrix_flat = train_matrix.reshape(train_matrix.shape[0], -1)
test_matrix_flat = test_matrix.reshape(test_matrix.shape[0], -1)
sample_train_matrix, _, sample_train_label, _ = train_test_split(
    train_matrix_flat, train_label, test_size=0.70, random_state=42
)

In [30]:
print(train_matrix_flat.shape)
print(sample_train_matrix.shape)

(160000, 1600)
(48000, 1600)


In [31]:
train_matrix_flat = train_matrix.reshape(train_matrix.shape[0], -1)
test_matrix_flat = test_matrix.reshape(test_matrix.shape[0], -1)

clf = svm.SVC()
clf.fit(sample_train_matrix, sample_train_label.ravel())    

test_predictions = clf.predict(test_matrix_flat)
print(classification_report(test_label, test_predictions))

              precision    recall  f1-score   support

         0.0       0.75      0.49      0.59     11403
         1.0       0.53      0.78      0.63      8597

    accuracy                           0.61     20000
   macro avg       0.64      0.63      0.61     20000
weighted avg       0.66      0.61      0.61     20000



In [32]:
train_matrix_flat = train_matrix.reshape(train_matrix.shape[0], -1)
test_matrix_flat = test_matrix.reshape(test_matrix.shape[0], -1)

clf = svm.SVC()
clf.fit(sample_train_matrix, sample_train_label.ravel())    

test_predictions = clf.predict(test_matrix_flat)
print(classification_report(test_label, test_predictions))

              precision    recall  f1-score   support

         0.0       0.75      0.49      0.59     11403
         1.0       0.53      0.78      0.63      8597

    accuracy                           0.61     20000
   macro avg       0.64      0.63      0.61     20000
weighted avg       0.66      0.61      0.61     20000

