In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# To see whether the GPU works or not

import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [0]:
# To see the devices

from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 8067333583253992370, name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 11916475986345691337
 physical_device_desc: "device: XLA_CPU device", name: "/device:XLA_GPU:0"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 2613860281567420954
 physical_device_desc: "device: XLA_GPU device", name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 11150726272
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 6109914333638398363
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7"]

In [0]:
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import KFold
import joblib
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
import datetime

In [0]:
def seq_encoding(path, outfile):
    dataMat = []; labelMat = []
    dataList = []; labelList = []
    feature_val = []
    encoded_20D = []
    result_1D = []
    
    files = os.listdir(path)
    for file in files:
        if not os.path.isdir(file):  #open when it is a folder
            f = open(path+'/'+file,'r')
            lines = f.readlines()  # return a list
            
            for line in lines:
                #separate the sequences and labels
                l_line = line.strip()
                line_list = l_line.split(' ')
                while '' in line_list:
                    line_list.remove('')

                dataList.append(line_list[0])
                labelList.append(line_list[1])
    
    for i in range(len(labelList)):
        result_1D.append(int(labelList[i]))
    
    dataMat = np.array(dataList) #dataMat as a X feature matrix
    labelMat = np.asarray(result_1D) #labelMat as a y label matrix
    # Combine the feature matrix and the label matrix into one matrix
    window_Mat = np.column_stack((dataMat, labelMat))
    np.random.shuffle(window_Mat)

    np.savetxt(outfile,window_Mat,fmt='%s')
    f.close()
    return window_Mat

In [0]:
def splitDataSetbyKFold(window_Mat,split_size,outdir):
    if not os.path.exists(outdir): #if not outdir, makedir
        os.makedirs(outdir)
    train_all = [];
    test_all = []
    each_split_tr = []
    each_split_te = []
    count_split = 0
    kf = KFold(n_splits=split_size)
    for train_index, test_index in kf.split(window_Mat):
        count_split += 1
        for index in train_index:
            each_split_tr.append(list(window_Mat[index]))
        array_ = np.array(each_split_tr)
        np.savetxt(outdir + "/train_" + str(count_split) + '.txt',array_, fmt="%s", delimiter='\t')  # output each piece of data
        train_all.append(each_split_tr)  # Add each piece of data to a list '[[[],[],...[]]]' 3-D list
        each_split_tr = []

        for index in test_index:
            each_split_te.append(list(window_Mat[index]))
        array_ = np.array(each_split_te)
        np.savetxt(outdir + "/test_" + str(count_split) + '.txt',array_, fmt="%s", delimiter='\t')  # output each piece of data
        test_all.append(each_split_te)  # Add each piece of data to a list
        each_split_te = []

    return train_all, test_all

In [0]:
def performance(labelArr, predictArr):
    #labelArr[i] is actual value,predictArr[i] is predict value
    TP = 0.0; TN = 0.0; FP = 0.0; FN = 0.0
    for i in range(len(labelArr)):
        if labelArr[i] == 1 and predictArr[i] == 1:
            TP += 1.0
        if labelArr[i] == 1 and predictArr[i] == 0:
            FN += 1.0
        if labelArr[i] == 0 and predictArr[i] == 1:
            FP += 1.0
        if labelArr[i] == 0 and predictArr[i] == 0:
            TN += 1.0
    print(TP)
    print(FN)
    print(TN)
    print(FP)
    
    if ((TP+FN) == 0):
        SN = 0
        SP = 0
    elif ((FP+FN) == 0):
        SN = 0
        SP = 0
    else:
        SN = TP/(TP + FN) #Sensitivity = TP/P  and P = TP + FN
        SP = TN/(FP + TN) #Specificity = TN/N  and N = TN + FP

    
    return SN,SP

In [0]:
def classifier(clf,clfname,train_X, train_y, test_X, test_y,i):#X:feature matrix，y:label matrix
    # train with train set
    print(" training begin...")
    clf = clf.fit(train_X,train_y)
    print(" training end.")
    #==========================================================================
    # test with validation set
    print(" test begin.")
    predict_ = clf.predict(test_X) #return type is float64
    proba = clf.predict_proba(test_X)[:,1] #return type is float64
    score_ = clf.score(test_X, test_y)
    
    # Report
    sk_report = classification_report(
    digits=6,
    y_true=test_y, 
    y_pred=clf.predict(test_X))
    print(sk_report)
    
    print(" test end.")
    
    #==========================================================================
    
    ACC = accuracy_score(test_y, predict_)
    SN, SP = performance(test_y, predict_)
    MCC = matthews_corrcoef(test_y, predict_)
    #AUC = roc_auc_score(test_y, proba)
    AUC = 0
    
    # Model Evaluation
    #==========================================================================
    #save output
    
    eval_output = []
    eval_output.append(ACC)
    eval_output.append(SN)
    eval_output.append(AUC)
    eval_output.append(SP)
    eval_output.append(MCC)
    eval_output.append(score_)
    eval_output = np.array(eval_output,dtype=float)
    
    np.savetxt("proba.data",proba,fmt="%f",delimiter="\t")
    np.savetxt("test_y.data",test_y,fmt="%f",delimiter="\t")
    np.savetxt("predict.data",predict_,fmt="%f",delimiter="\t")
    #np.savetxt("eval_output.data",eval_output,fmt="%f",delimiter="\t")
    print("Wrote results to output.data...EOF...")
    # ==========================================================================
    # save Model
    os.chdir("/content/drive/My Drive/FYP_ECG_ID/coding/ML_Model")

    joblib.dump(clf,'train_'+clfname+str(i)+'.model')
    return ACC,SN,SP,MCC,AUC

In [0]:
# mean_fun used to find the average value of the values in the list,
# mainly ACC mean,SP mean and SN mean, to evaluate the model
def mean_fun(onelist):
    count = 0
    for i in onelist:
        count += i
    return float(count/len(onelist))

In [0]:
def crossValidation(clf, clfname, curdir, train_all, test_all):
    os.chdir(curdir)
    cur_path = curdir
    ACCs = [];SNs = [];SPs = [];MCCs = [];AUCs = []

    for i in range(len(train_all)):
        print('----- Round ', i, ' -----' )
        print('Start Time: ', datetime.datetime.now())
        
        os.chdir(cur_path)
        train_data = train_all[i]; train_X = []; train_y = []
        test_data = test_all[i]; test_X = []; test_y = []

        #Divide train_all into train_X and train_y
        for eachline_train in train_data:
            one_train = eachline_train
            one_train_format = []
            for index in range(0, len(one_train) - 1):
                one_train_format.append(float(one_train[index]))
            train_X.append(one_train_format)
            train_y.append(int(one_train[-1]))

        #Divide test_all into test_X and test_y
        for eachline_test in test_data:
            one_test = eachline_test
            one_test_format = []
            for index in range(0, len(one_test) - 1):
                one_test_format.append(float(one_test[index]))
            test_X.append(one_test_format)
            test_y.append(int(one_test[-1]))
        # ======================================================================
        # classifier start here
        if not os.path.exists(clfname):
            os.mkdir(clfname)
        out_path = clfname + "/" + clfname + "_00" + str(i)  # the folder that save result of each fold
        if not os.path.exists(out_path):
            os.mkdir(out_path)
        os.chdir(out_path)
        ACC, SN, SP, MCC, AUC = classifier(clf, clfname, train_X, train_y, test_X, test_y,i)
        ACCs.append(ACC)
        SNs.append(SN)
        SPs.append(SP)
        MCCs.append(MCC)
        AUCs.append(AUC)
        
        print('End Time: ', datetime.datetime.now())
        print('---------------')
        print('')
        
    # ======================================================================
    ACC_mean = mean_fun(ACCs)
    SN_mean = mean_fun(SNs)
    SP_mean = mean_fun(SPs)
    MCC_mean = mean_fun(MCCs)
    AUC_mean = mean_fun(AUCs)
    # ==========================================================================
    # output experiment result
    ("/content/drive/My Drive/FYP_ECG_ID/coding/")
    os.system("echo `date`'" + str(clf) + "' >> log.out")
    os.system("echo ACC_mean=" + str(ACC_mean) + " >> log.out")
    os.system("echo SN_mean=" + str(SN_mean) + " >> log.out")
    os.system("echo SP_mean=" + str(SP_mean) + " >> log.out")
    os.system("echo MCC_mean=" + str(MCC_mean) + " >> log.out")
    os.system("echo AUC_mean=" + str(AUC_mean) + " >> log.out")
    
    return ACC_mean, SN_mean, SP_mean, MCC_mean, AUC_mean

In [0]:
if __name__ == '__main__':
    path = '/content/drive/My Drive/FYP_ECG_ID/coding/LABELED_DATASET/'
    outfile = 'seq_encoded.txt'
    outdir = 'KFold'
    a = []
    # encode the original dataset
    window_Mat = seq_encoding(path,outfile)

    # split the feature matrix into N fold
    train_all, test_all = splitDataSetbyKFold(window_Mat, 10, outdir)

    print("Generate dataset end and cross validation start")

    clf = svm.SVC(C=1, kernel='rbf', gamma=0.2, probability=True)
    curdir = '/content/drive/My Drive/FYP_ECG_ID/coding'
    clfname = 'SVM'

    crossValidation(clf, clfname, curdir, train_all, test_all)


Generate dataset end and cross validation start
----- Round  0  -----
Start Time:  2020-05-21 16:32:51.437781
 training begin...
 training end.
 test begin.
              precision    recall  f1-score   support

           1   0.524452  0.525714  0.525083      3325
           2   0.790276  0.676919  0.729219      3674
           3   0.632395  0.713195  0.670369      4062

    accuracy                       0.644788     11061
   macro avg   0.649041  0.638610  0.641557     11061
weighted avg   0.652388  0.644788  0.646242     11061

 test end.
1748.0
0.0
0.0
0.0
Wrote results to output.data...EOF...
End Time:  2020-05-21 17:09:09.290636
---------------

----- Round  1  -----
Start Time:  2020-05-21 17:09:09.293146
 training begin...
 training end.
 test begin.
              precision    recall  f1-score   support

           1   0.526155  0.514816  0.520424      3341
           2   0.801849  0.694422  0.744279      3747
           3   0.618870  0.708281  0.660563      3973

    accuracy