In [1]:
import numpy as np
import matplotlib.pyplot as plt
# plt.rcParams['figure.figsize'] = [14, 6]
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from biosppy.signals import ecg
#
from hrv.classical import time_domain
from hrv.classical import frequency_domain
from hrv.classical import non_linear
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import KFold
from joblib import dump, load


In [2]:
data1 = np.load("1.npy")
data2 = np.load("2.npy")
data3 = np.load("3.npy")
data4 = np.load("4.npy")
data5 = np.load("5.npy")
print(data1.shape)
print(data2.shape)
print(data3.shape)
print(data4.shape)
print(data5.shape)

(833, 10801)
(1253, 10801)
(902, 10801)
(935, 10801)
(1105, 10801)


In [6]:
def hrvAnalysis(NN):
    diffNN = np.absolute(np.diff(NN))
    L = len(NN)    
    ANN = np.mean(NN)
    SDNN = np.std(NN)
    SDSD = np.std(diffNN)    
    #NN50 = len(np.where(np.diff(NN) > 0.05)[0])    
    NN50 = diffNN[diffNN > 0.05].size
    pNN50 = NN50/(L - 1)    
    rMSSD = np.sqrt(1/(L-1) * sum(diffNN ** 2))        
    rangeNum = np.ptp(NN)
    
    #non linear: pointcare & entropy
    #pointcare
    sd1, sd2 = poincare(NN)
    #entropy
    prob_NN = NN / NN.sum()
    shannon = -np.sum(prob_NN*np.log2(prob_NN))
    
    
    timeDomainFeats = {'ANN': ANN, 'SDNN': SDNN,
                       'SDSD': SDSD,
                       'pNN50': pNN50, 'rMSSD':rMSSD, 'sd1': sd1, 'sd2': sd2, 'shannon': shannon}
                       
    return timeDomainFeats

def frequencyDomain(NN):
    results = frequency_domain(
        rri=NN,
        fs=4.0,
        method='welch',
        interp_method='cubic',
        detrend='linear'
    )
    return results['lf_hf']

def poincare(rri):
    diff_rri = np.absolute(np.diff(rri))
    sd1 = np.sqrt(np.std(diff_rri, ddof=1) ** 2 * 0.5)
    sd2 = np.sqrt(2 * np.std(rri, ddof=1) ** 2 - 0.5 * np.std(diff_rri,
                                                              ddof=1) ** 2)
    return sd1, sd2

def cal_r_peaks(signal, sampling_rate):
    """
    """
    rpeaks, = ecg.hamilton_segmenter(signal=signal,
                                     sampling_rate=sampling_rate)
    rpeaks, = ecg.correct_rpeaks(signal=signal,
                                 rpeaks=rpeaks,
                                 sampling_rate=sampling_rate,
                                 tol=0.05)
    templates, rpeaks = ecg.extract_heartbeats(signal=signal,
                                               rpeaks=rpeaks,
                                               sampling_rate=sampling_rate,
                                               before=0.2,
                                               after=0.4)
    return rpeaks

def create_features(raw_data, fs):
    """
    """
    data  = raw_data[:, :-1]
    label = raw_data[:, -1]

    result = []
    for x in data:
        rpeaks = cal_r_peaks(x, fs)
        tmp = np.diff(rpeaks)
        rri = tmp / fs

        time = hrvAnalysis(rri)
        result.append(list(time.values()))
        
    result = np.array(result)
    return result, label

def train(training_data, testing_data, training_label, testing_label, hidden_layer, activation):
    
    #scaled data
    scaler = StandardScaler()
    scaler.fit(training_data)
    scaled_training_features = scaler.transform(training_data)
    scaler.fit(testing_data)
    scaled_testing_features = scaler.transform(testing_data)
    
    #model
    model = MLPClassifier(solver='lbfgs', alpha=5e-3, activation = activation, hidden_layer_sizes=(hidden_layer), learning_rate_init=0.001, max_iter=500)
    model.fit(X=scaled_training_features, y=training_label)
    prediction = model.predict(scaled_testing_features)
    accuracy = accuracy_score(prediction, testing_label)
    loss = model.loss_
    iteration = model.n_iter_
    return accuracy, loss, iteration

def kfold(data1, data2, data3, data4, data5, label1, label2, label3, label4, label5, hidden_layer, activation):
    #fold 1
    training1 = np.concatenate((data2, data3, data4, data5), 0)
    training_label1 = np.concatenate((label2, label3, label4, label5), 0)
    accuracy1, loss1, iteration1 = train(training1, data1, training_label1, label1, hidden_layer, activation)
    #print("accuracy:", accuracy1, "loss", loss1, "iteration", iteration1)
    #end

    #fold 2
    training2 = np.concatenate((data1, data3, data4, data5), 0)
    training_label2 = np.concatenate((label1, label3, label4, label5), 0)
    accuracy2, loss2, iteration2 = train(training2, data2, training_label2, label2, hidden_layer, activation)
    #print("accuracy:", accuracy1, "loss", loss1, "iteration", iteration1)
    #end

    #fold 3
    training3 = np.concatenate((data2, data1, data4, data5), 0)
    training_label3 = np.concatenate((label2, label1, label4, label5), 0)
    accuracy3, loss3, iteration3 = train(training3, data3, training_label3, label3, hidden_layer, activation)
    #print("accuracy:", accuracy1, "loss", loss1, "iteration", iteration1)
    #end

    #fold 4
    training4 = np.concatenate((data2, data3, data1, data5), 0)
    training_label4 = np.concatenate((label2, label3, label1, label5), 0)
    accuracy4, loss4, iteration4 = train(training4, data4, training_label4, label4, hidden_layer, activation)
    #print("accuracy:", accuracy1, "loss", loss1, "iteration", iteration1)
    #end

    #fold 5
    training5 = np.concatenate((data2, data3, data4, data1), 0)
    training_label5 = np.concatenate((label2, label3, label4, label1), 0)
    accuracy5, loss5, iteration5 = train(training5, data5, training_label5, label5, hidden_layer, activation)
    #print("accuracy:", accuracy1, "loss", loss1, "iteration", iteration1)
    #end
    loss_average = (loss1 + loss2 + loss3 + loss4 + loss5)/5
    return loss_average

def modelSelection(list_activation, list_hidden, data1, data2, data3, data4, data5):
    
    #create features
    training1, label1 = create_features(data1, 360)
    training2, label2 = create_features(data2, 360)
    training3, label3 = create_features(data3, 360)
    training4, label4 = create_features(data4, 360)
    training5, label5 = create_features(data5, 360)
    
    g_loss = 1000
    best_hidden = list_hidden[0]
    i = 0
    for activation in list_activation:
        for hidden_layer in list_hidden:
            loss = kfold(training1, training2, training3, training4, training5, label1, label2, label3, label4, label5, hidden_layer, activation)
            i = i+1
            if(loss < g_loss):
                print("ith: ", i, "loss: ", loss, "hidden_layer", hidden_layer, "activation: ", activation)
                g_loss = loss
                best_hidden = hidden_layer
    return g_loss, best_hidden, activation
                

In [7]:
#init the potential parameter - hidden layer
B = np.linspace(15, 25, 11, dtype=int)
C = np.append(B, B.T, axis = 0)
D = C.reshape((2, 11))
A2 = D.T
B = B.reshape((11,1))
A3 = np.append(A2, B, axis = 1)
A4 = np.append(A3, B, axis = 1)
A5 = np.append(A4, B, axis = 1)


#init activation
list_activation = ['logistic', 'tanh', 'relu']


In [None]:
#model selection
g_loss1, best_hidden1, best_activation = modelSelection(list_activation, A2, data1, data2, data3, data4, data5)
print("g_loss1: ", g_loss1, " best_hiddent1: ", best_hidden1, "best_activation:", best_activation)

#g_loss2, best_hidden2 = modelSelection(list_activation, A3, data1, data2, data3, data4, data5)
#print("g_loss2: ", g_loss2, " best_hiddent2: ", best_hidden2)

#g_loss3, best_hidden3 = modelSelection(list_activation, A4, data1, data2, data3, data4, data5)
#print("g_loss3: ", g_loss3, " best_hiddent3: ", best_hidden3)

#g_loss4, best_hidden4 = modelSelection(list_activation, A5, data1, data2, data3, data4, data5)  
#print("g_loss4: ", g_loss4, " best_hiddent4: ", best_hidden4)

ith:  1 loss:  0.04871504605588685 hidden_layer [15 15] activation:  logistic
ith:  2 loss:  0.048240096439974886 hidden_layer [16 16] activation:  logistic
ith:  3 loss:  0.045766704834315594 hidden_layer [17 17] activation:  logistic
ith:  4 loss:  0.03709638331428359 hidden_layer [18 18] activation:  logistic
ith:  7 loss:  0.033246897589144175 hidden_layer [21 21] activation:  logistic
ith:  11 loss:  0.029485152623826717 hidden_layer [25 25] activation:  logistic
ith:  12 loss:  0.024272125967532414 hidden_layer [15 15] activation:  tanh
ith:  13 loss:  0.02031941751647597 hidden_layer [16 16] activation:  tanh
ith:  15 loss:  0.019272809534109363 hidden_layer [18 18] activation:  tanh
ith:  16 loss:  0.01570302269065001 hidden_layer [19 19] activation:  tanh
ith:  18 loss:  0.014671381092650282 hidden_layer [21 21] activation:  tanh
ith:  19 loss:  0.013519182818932449 hidden_layer [22 22] activation:  tanh
ith:  20 loss:  0.012254955751607673 hidden_layer [23 23] activation:  ta

In [11]:
g_loss2, best_hidden2 = modelSelection(list_activation, A3, data1, data2, data3, data4, data5)
print("g_loss2: ", g_loss2, " best_hiddent2: ", best_hidden2)

ith:  1 loss:  0.04930500560829211 hidden_layer [15 15 15] activation:  logistic
ith:  4 loss:  0.04261962184499117 hidden_layer [18 18 18] activation:  logistic
ith:  5 loss:  0.039864879706394254 hidden_layer [19 19 19] activation:  logistic
ith:  6 loss:  0.03749530158222221 hidden_layer [20 20 20] activation:  logistic
ith:  12 loss:  0.01776539880717056 hidden_layer [15 15 15] activation:  tanh
ith:  14 loss:  0.01710972832423601 hidden_layer [17 17 17] activation:  tanh
ith:  15 loss:  0.01416314431989417 hidden_layer [18 18 18] activation:  tanh
ith:  16 loss:  0.011156843394806139 hidden_layer [19 19 19] activation:  tanh
ith:  19 loss:  0.00956591233400747 hidden_layer [22 22 22] activation:  tanh
ith:  20 loss:  0.00869223563406736 hidden_layer [23 23 23] activation:  tanh


KeyboardInterrupt: 