In [3]:
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.functional as F

import numpy as np
import pywt
import csv
import os
import pandas as pd
import math
from collections import Counter
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# data preprocessing
from sklearn.preprocessing import StandardScaler
# data splitting
from sklearn.model_selection import train_test_split

# data modeling
from sklearn.metrics import confusion_matrix,accuracy_score,roc_curve,classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

#visualisation
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns



In [9]:
#DATA loading
npy_filepath = "/home/ngsci/datasets/silent-cchs-ecg/npy"
dir_list = os.listdir(npy_filepath)
npy_arrays = []
for each in dir_list:
    file = f"{npy_filepath}/{each}"
    tempf = np.load(file)[:,:,0:5000]  
    npy_arrays.append(tempf)
    
stacked = np.stack(npy_arrays, axis= 0)
stacked = torch.from_numpy(stacked)
stacked = stacked.permute(1,0,2,3)
stacked = stacked.reshape(3750, 12*5000) # log Reg akz. 2D

rwma = pd.read_csv("/home/ngsci/datasets/silent-cchs-ecg/csv/rwma-outcomes.csv")
rwma = rwma.astype(float)
rwma = torch.tensor(rwma.iloc[:, 1])


In [11]:
#from sklearn.svm import SVC

class ekgData():
    def __init__(self):
        self.x = stacked.reshape(3750, -1)
        self.y = rwma
        self.n_persons = len(self.y)

        # Split the dataset into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(self.x, self.y, test_size=0.2, random_state=42)

        # Further split the training set into training and validation sets
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

        self.x_train = X_train
        self.y_train = y_train
        self.x_val = X_val
        self.y_val = y_val
        self.x_test = X_test
        self.y_test = y_test

    def get_train_data(self): #get training data
        # return as tuple
        return self.x_train, self.y_train
        
    def get_val_data(self): #get validation data
        # return as tuple
        return self.x_val, self.y_val
        
    def get_test_data(self): #get testing data
        # return as tuple
        return self.x_test, self.y_test
        
    def __len__(self):
        return self.n_persons
    
    def train_svm(self, kernel='rbf', C=1.0, gamma='scale'):
        # Create SVM classifier with specified kernel and regularization parameter
        clf = SVC(kernel=kernel, C=C, gamma=gamma)

        # Train SVM classifier on training set
        clf.fit(self.x_train, self.y_train)

        # Predict on validation set
        y_val_pred = clf.predict(self.x_val)

        # Calculate validation accuracy
        val_accuracy = np.mean(y_val_pred == self.y_val)

        print(f"Validation accuracy: {val_accuracy:.2f}")
        
        return clf

    def test_svm(self, clf):
        # Predict on test set
        y_test_pred = clf.predict(self.x_test)

        # Calculate test accuracy
        test_accuracy = np.mean(y_test_pred == self.y_test)

        print(f"Test accuracy: {test_accuracy:.2f}")
        

# Create DataLoader
#batch_size = 32 # choose a batch size that works for your specific model and hardware
#dataloader = DataLoader(ekgData, batch_size=batch_size, shuffle=True)

In [None]:
# create an instance of the ekgData class to get the data
data = ekgData()

# get the training, validation, and testing data
x_train, y_train = data.get_train_data()
x_val, y_val = data.get_val_data()
x_test, y_test = data.get_test_data()

In [13]:
def svm_training_loop(train_loader, model, criterion, optimizer):
    for t in range(num_epochs):
        # Set the model to training mode
        model.train()
        # Train the model for one epoch
        for batch, (features, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            pred = model(features)
            loss = criterion(pred, labels)
            loss.backward()
            optimizer.step()
        # Print the epoch number and the loss value
        print(f"Epoch {t+1}: Loss = {loss.item():.4f}")


In [17]:
from sklearn.svm import SVC


# create SVM model object
svm = sklearn.svm.SVC(kernel='linear', C=1, random_state=42)

# train the model
svm.fit(x_train, y_train)

# make predictions
svm_predict = svm.predict(x_test)

# calculate confusion matrix
svm_conf_matrix = confusion_matrix(y_test, svm_predict)

# calculate accuracy score
svm_acc_score = accuracy_score(y_test, svm_predict)

# print confusion matrix
print("Confusion Matrix:")
print(svm_conf_matrix)
print("\n")

# print accuracy score
print("Accuracy of SVM:", svm_acc_score*100, '\n')

# print classification report
print(classification_report(y_test, svm_predict))


Confusion Matrix:
[[626  48]
 [ 66  10]]


Accuracy of SVM: 84.8 

              precision    recall  f1-score   support

         0.0       0.90      0.93      0.92       674
         1.0       0.17      0.13      0.15        76

    accuracy                           0.85       750
   macro avg       0.54      0.53      0.53       750
weighted avg       0.83      0.85      0.84       750

