In [27]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import patsy
from sklearn.utils import resample


from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import ConfusionMatrixDisplay

In [28]:
train = pd.read_csv("../data/arrhythmia/mitbih_train.csv", header = None)
test = pd.read_csv("../data/arrhythmia/mitbih_test.csv", header = None)

# Last column reserved for classes
X_train = train.iloc[:, :-1].to_numpy()
y_train = train.iloc[:, -1].to_numpy()

X_test = test.iloc[:, :-1].to_numpy()
y_test = test.iloc[:, -1].to_numpy().astype(int)

In [29]:
df_1 = train[y_train == 1]
df_2 = train[y_train == 2]
df_3 = train[y_train == 3]
df_4 = train[y_train == 4]
df_0 = (train[y_train == 0]).sample(n = 15000, random_state = 42)

df_1 = resample(df_1, replace = True, n_samples = 15000, random_state = 123)
df_2 = resample(df_2, replace = True, n_samples = 15000, random_state = 124)
df_3 = resample(df_3, replace = True, n_samples = 15000, random_state = 125)
df_4 = resample(df_4, replace = True, n_samples = 15000, random_state = 126)

train = pd.concat([df_0, df_1, df_2, df_3, df_4])

X_train = train.iloc[:, :-1].to_numpy()
y_train = train.iloc[:, -1].to_numpy()


In [41]:
class_names = [
    "Normal Beat",
    "Supraventricular Premature Beat", 
    "Premature Ventricular Contraction",
    "Fusion of Ventricular and Normal Beat",
    "Unclassifiable Beat"
    ]

In [42]:
# MinMax Scale:
X_train_minmax = MinMaxScaler().fit_transform(X_train)
X_test_minmax = MinMaxScaler().fit_transform(X_test)

# Standard Scale:
X_train_sscale = StandardScaler().fit_transform(X_train)
X_test_sscale = StandardScaler().fit_transform(X_test)

In [70]:
x = np.linspace(0,1, X_train.shape[1])
M = 4 # Order
K = 11 # Knots
spb = K + 2 # spline basis
k = np.linspace(0, 1, spb) # knots
k = k[1:(len(k) - 1)] # Excluding end points
nbasis = K + M - 2 + 2 # Due to exclusion of end points, add + 2 to calculate nbasis

B = patsy.bs(x,  knots = k, degree = M - 1, include_intercept = True)

In [71]:
Bcoef_train = np.zeros((X_train.shape[0], nbasis))

for i in range(X_train.shape[0]):
    Bcoef_train[i, :] = np.linalg.inv(B.T@B) @ B.T @ X_train[i, :].T
    
Bcoef_test = np.zeros((X_test.shape[0], nbasis))

for i in range(X_test.shape[0]):
    Bcoef_test[i, :] = np.linalg.inv(B.T@B) @ B.T @ X_test[i, :].T

In [72]:
# MinMax Scale:
Bcoef_train_minmax = MinMaxScaler().fit_transform(Bcoef_train)
Bcoef_test_minmax = MinMaxScaler().fit_transform(Bcoef_test)

# Standard Scale:
Bcoef_train_sscale = StandardScaler().fit_transform(Bcoef_train)
Bcoef_test_sscale = StandardScaler().fit_transform(Bcoef_test)

### k-Nearest Neighbors:

In [73]:
lr = LogisticRegression(multi_class = "multinomial", max_iter = 1000)
lr.fit(X_train_minmax, y_train)
print(classification_report(y_test, lr.predict(X_test_minmax), target_names = class_names))

                                       precision    recall  f1-score   support

                          Normal Beat       0.97      0.64      0.77     18118
      Supraventricular Premature Beat       0.14      0.67      0.23       556
    Premature Ventricular Contraction       0.29      0.73      0.42      1448
Fusion of Ventricular and Normal Beat       0.08      0.87      0.15       162
                  Unclassifiable Beat       0.75      0.91      0.82      1608

                             accuracy                           0.67     21892
                            macro avg       0.45      0.76      0.48     21892
                         weighted avg       0.88      0.67      0.73     21892



In [74]:
lr = LogisticRegression(multi_class = "multinomial", max_iter = 1000)
lr.fit(Bcoef_train_minmax, y_train)
print(classification_report(y_test, lr.predict(Bcoef_test_minmax), target_names = class_names))

                                       precision    recall  f1-score   support

                          Normal Beat       0.95      0.83      0.88     18118
      Supraventricular Premature Beat       0.16      0.49      0.24       556
    Premature Ventricular Contraction       0.36      0.48      0.41      1448
Fusion of Ventricular and Normal Beat       0.14      0.64      0.23       162
                  Unclassifiable Beat       0.81      0.81      0.81      1608

                             accuracy                           0.80     21892
                            macro avg       0.48      0.65      0.52     21892
                         weighted avg       0.87      0.80      0.83     21892



In [75]:
knn = KNeighborsClassifier(n_neighbors = 2)
knn.fit(X_train, y_train)
print(classification_report(y_test, knn.predict(X_test), target_names = class_names))

                                       precision    recall  f1-score   support

                          Normal Beat       0.99      0.97      0.98     18118
      Supraventricular Premature Beat       0.58      0.79      0.67       556
    Premature Ventricular Contraction       0.87      0.94      0.90      1448
Fusion of Ventricular and Normal Beat       0.64      0.80      0.71       162
                  Unclassifiable Beat       0.97      0.97      0.97      1608

                             accuracy                           0.96     21892
                            macro avg       0.81      0.89      0.85     21892
                         weighted avg       0.97      0.96      0.96     21892



In [76]:
knn = KNeighborsClassifier(n_neighbors = 2)
knn.fit(Bcoef_train, y_train)
print(classification_report(y_test, knn.predict(Bcoef_test), target_names = class_names))

                                       precision    recall  f1-score   support

                          Normal Beat       0.99      0.97      0.98     18118
      Supraventricular Premature Beat       0.57      0.79      0.66       556
    Premature Ventricular Contraction       0.87      0.93      0.90      1448
Fusion of Ventricular and Normal Beat       0.52      0.75      0.61       162
                  Unclassifiable Beat       0.95      0.97      0.96      1608

                             accuracy                           0.96     21892
                            macro avg       0.78      0.88      0.82     21892
                         weighted avg       0.96      0.96      0.96     21892



In [78]:
from sklearn import svm

In [79]:
svm_linear = svm.SVC(kernel = "linear")
svm_linear.fit(X_train, y_train)
print(classification_report(y_test, svm_linear.predict(X_test), target_names = class_names))

                                       precision    recall  f1-score   support

                          Normal Beat       0.97      0.74      0.84     18118
      Supraventricular Premature Beat       0.28      0.65      0.39       556
    Premature Ventricular Contraction       0.32      0.79      0.46      1448
Fusion of Ventricular and Normal Beat       0.10      0.89      0.18       162
                  Unclassifiable Beat       0.78      0.91      0.84      1608

                             accuracy                           0.75     21892
                            macro avg       0.49      0.80      0.54     21892
                         weighted avg       0.89      0.75      0.80     21892

