In [None]:
import numpy as np 
import pandas as pd 
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.utils import class_weight
from sklearn.utils import shuffle
from keras.utils.np_utils import to_categorical
import tensorflow as tf

In [None]:
# Load Data
mit_train_data = pd.read_csv('/kaggle/input/heartbeat/mitbih_train.csv',header=None)
mit_test_data = pd.read_csv('/kaggle/input/heartbeat/mitbih_test.csv', header=None)

# Visualize data

In [None]:
# Number of training point, number of time points
np.shape(mit_train_data)

Classes: ['N': 0, 'S': 1, 'V': 2, 'F': 3, 'Q': 4]

In [None]:
# Get the training points in each class
    # Column 187 = class labels
trainN = mit_train_data[mit_train_data[187]==0] 
trainS = mit_train_data[mit_train_data[187]==1] 
trainV = mit_train_data[mit_train_data[187]==2] 
trainF = mit_train_data[mit_train_data[187]==3] 
trainQ = mit_train_data[mit_train_data[187]==4] 

In [None]:
# Get the testing points in each class
    # Column 187 = class labels
testN = mit_test_data[mit_test_data[187]==0] 
testS = mit_test_data[mit_test_data[187]==1] 
testV = mit_test_data[mit_test_data[187]==2] 
testF = mit_test_data[mit_test_data[187]==3] 
testQ = mit_test_data[mit_test_data[187]==4] 

In [None]:
# Find the number of training points in each class
n_trainN = trainN.shape[0]
n_trainS = trainS.shape[0]
n_trainV = trainV.shape[0]
n_trainF = trainF.shape[0]
n_trainQ = trainQ.shape[0]

In [None]:
# Find the number of test points in each class
n_testN = testN.shape[0]
n_testS = testS.shape[0]
n_testV = testV.shape[0]
n_testF = testF.shape[0]
n_testQ = testQ.shape[0]

Plot data set (recreate Figure 4). We plot the first 50 beat signals all together as well as the averaged beat signal

In [None]:
# Put data in propoer form for plotting
trainN_plot = trainN.drop(columns = [187]).values
trainS_plot = trainS.drop(columns = [187]).values
trainV_plot = trainV.drop(columns = [187]).values
trainF_plot = trainF.drop(columns = [187]).values
trainQ_plot = trainQ.drop(columns = [187]).values

In [None]:
# Calculate average of first 50 heartbeats in each class
trainN_avg = np.mean(trainN_plot[:50],axis=0)
trainS_avg = np.mean(trainS_plot[:50],axis=0)
trainV_avg = np.mean(trainV_plot[:50],axis=0)
trainF_avg = np.mean(trainF_plot[:50],axis=0)
trainQ_avg = np.mean(trainQ_plot[:50],axis=0)

In [None]:
# Recreate Figure 4
plt.figure(figsize=(10,10))
plt.subplot(3,2,1)
for i in range(50):
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.plot(trainN_plot[i],'tab:red',alpha=0.1)
plt.plot(trainN_avg,'k',linewidth=2)
plt.title('N')
plt.xlabel('Time (ms)')
plt.ylabel('Amplitude (mV)')

plt.subplot(3,2,2)
for i in range(50):
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.plot(trainS_plot[i],'tab:blue',alpha=0.1)
plt.plot(trainS_avg,'k',linewidth=2)
plt.title('S')
plt.xlabel('Time (ms)')
plt.ylabel('Amplitude (mV)')

plt.subplot(3,2,3)
for i in range(50):
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.plot(trainV_plot[i],'tab:green',alpha=0.1)
plt.plot(trainV_avg,'k',linewidth=2)
plt.title('V')
plt.xlabel('Time (ms)')
plt.ylabel('Amplitude (mV)')

plt.subplot(3,2,4)
for i in range(50):
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.plot(trainF_plot[i],'tab:purple',alpha=0.1)
plt.plot(trainF_avg,'k',linewidth=2)
plt.title('F')
plt.xlabel('Time (ms)')
plt.ylabel('Amplitude (mV)')

plt.subplot(3,2,5)
for i in range(50):
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.plot(trainQ_plot[i],'tab:orange',alpha=0.1)
plt.plot(trainQ_avg,'k',linewidth=2)
plt.title('Q')
plt.xlabel('Time (ms)')
plt.ylabel('Amplitude (mV)')
plt.show()

# Prepare data for training and testing

In [None]:
def shuffle(train):
    X_train = np.asarray(train.drop(columns=[187]))
    y_train = np.asarray(train[187])
    indexes = np.arange(int(X_train.shape[0]))
    indexes = np.random.RandomState(seed=42).permutation(indexes)  # shuffle data to randomly select
    X_train_shuffled = X_train[indexes]
    y_train_shuffled = y_train[indexes]
    y_train_shuffled = y_train_shuffled.astype(int)
    y_train_shuffled_cat = to_categorical(y_train_shuffled)
    return X_train_shuffled,y_train_shuffled,y_train_shuffled_cat

In [None]:
X_train, labels, y_train = shuffle(mit_train_data)
X_train_new = X_train.reshape(87554,187,1)

X_test, test_labels, y_test = shuffle(mit_test_data)
X_test_new = X_test.reshape(X_test.shape[0],187,1)

# Define focal loss function

In [None]:
""" Define focal loss function as per 
    https://www.dlology.com/blog/
    multi-class-classification-with-focal-loss-for-imbalanced-datasets/""" 
def focal_loss_fnc(gamma,alpha):
    
    gamma = float(gamma)
    alpha = float(alpha)
    
    def focal_loss_fixed(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

        Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -tf.math.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)
    return focal_loss_fixed

# Load trained models 

In [None]:
from keras.models import load_model

In [None]:
filename = "../input/models/Models/model_wout_focal_loss"
model_wout_fl = load_model(filename,
                           custom_objects=None,
                           compile=True)
history_wout_fl = pd.read_csv("../input/models/Models/history_wout_focal_loss.csv")

In [None]:
filename = "../input/models/Models/model_focal_loss"
model_fl = load_model(filename,
                           custom_objects={'focal_loss_fixed': focal_loss_fnc(gamma=2,alpha=0.25)},
                           compile=True)
history_fl = pd.read_csv("../input/models/Models/history_focal_loss.csv")

# Evaluate models performance

**Predictions**

In [None]:
pred_p_wout_fl = model_wout_fl.predict(X_test_new)
y_test_wout_fl = np.argmax(pred_p_wout_fl,axis=1)
pred_p_fl = model_fl.predict(X_test_new)
y_test_fl = np.argmax(pred_p_fl,axis=1)

**Loss and Accuracy**

In [None]:
def test_loss_acc(model, X_test, y_test):
    score = model.evaluate(X_test, y_test, verbose=0)
    test_loss = score[0]
    test_acc = score[1]
    print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')
    return test_loss, test_acc

In [None]:
loss_wout_fl, acc_wout_fl = test_loss_acc(model_wout_fl,X_test_new,y_test)

In [None]:
loss_fl, acc_fl = test_loss_acc(model_fl,X_test_new,y_test)

Plot curves (recreate figure 5)

In [None]:
train_loss_wout_fl = np.asarray(history_wout_fl['loss'])
train_acc_wout_fl = np.asarray(history_wout_fl['accuracy'])
train_loss_fl = np.asarray(history_fl['loss'])
train_acc_fl = np.asarray(history_fl['accuracy'])

In [None]:
epochs = np.linspace(1,100,100)
plt.figure(figsize=(12,4))

plt.subplot(1,2,1)
plt.plot(epochs,train_loss_wout_fl,'b',linewidth=2,label='Model 1 training loss (0.06951)')
plt.plot(epochs,train_loss_fl,'r',linewidth=2,label='Model 1 traning loss (0.00612)')
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1,2,2)
plt.plot(epochs,train_acc_wout_fl,'b',linewidth=2,label='Model 1 training accuracy (0.98314)')
plt.plot(epochs,train_acc_fl,'r',linewidth=2,label='Model 1 training accuracy (0.98360)')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('training_loss_acc.png')
plt.show()


Note: drop in loss/increase in accuracy at 80 epochs is due to change in learning rate (0.001 to 0.0001)

**Other Metrics (Precision, Recall, F1-score)**

In [None]:
from sklearn.metrics import classification_report

In [None]:
print("Classification report: Model 1 (without focal loss)")
print(classification_report(test_labels, y_test_wout_fl,digits=4))

In [None]:
print("Classification report: Model 2 (with focal loss)")
print(classification_report(test_labels, y_test_fl,digits=4))

* Paper has a range of values (must have trained multiple times - might want to train a second model to see if things stay relatively the same or change drastically)
* In the paper, most values increase or stay relatively consistent when focal loss is added
* Here, without focal loss class 1 precision was much higher than expected, while recall was much lower than expected (low number of false negatives, but also positives)
* With focal loss added, precision significantly decreased while recall significantly increased i.e. focal loss identified more true positives but also more false positives
* To help visualize this, a row and a column were added to confusion matrix

**AUC**

* Can use the one vs all classification needed to create Fig 8a

In [None]:
from sklearn.metrics import roc_auc_score

In [None]:
auc_wout_fl = roc_auc_score(test_labels,pred_p_wout_fl,average='weighted',multi_class='ovr')
auc_fl = roc_auc_score(test_labels,pred_p_fl,average='weighted',multi_class='ovr')
print('AUC Model 1 (without fl): %.4f' % auc_wout_fl)
print('AUC Model 2 (with fl): %.4f' % auc_fl)

**Confusion matrix**

In [None]:
cm_wout_fl = confusion_matrix(test_labels, y_test_wout_fl)
print("Confusion matrix: Model 1 (without focal loss)")
print(cm_wout_fl)

In [None]:
cm_fl = confusion_matrix(test_labels, y_test_fl)
print("Confusion matrix: Model 2 (with focal loss)")
print(cm_fl)

**Predict on unshuffled data (Figure 8b)**

In [None]:
X_test_ns = np.asarray(mit_test_data.drop(columns=[187]))
X_test_ns_new = X_test_ns.reshape(X_test_ns.shape[0],187,1)
y_test_ns = np.asarray(mit_test_data[187])
y_test_ns_cat = to_categorical(y_test_ns)

In [None]:
pred_p_fl_ns = model_fl.predict(X_test_ns_new)
y_test_fl_ns = np.argmax(pred_p_fl_ns,axis=1)

In [None]:
# Find misclassified points
diff = y_test_fl_ns - y_test_ns
mask = diff != 0
miss = np.copy(y_test_fl_ns)
miss[mask] = y_test_fl_ns[mask]

In [None]:
plt.figure(figsize=(7,6))
plt.scatter(np.linspace(1,len(y_test_ns),len(y_test_ns)),y_test_ns,s=70,c='b',alpha=0.5)
plt.scatter(np.linspace(1,len(y_test_ns),len(y_test_ns)),y_test_fl_ns,c='r',alpha=0.5)
for i in range(len(y_test_ns)):
    if y_test_ns[i] != y_test_fl_ns[i]:
        plt.scatter(i,y_test_fl_ns[i],s=5,c='k')
plt.legend(["True","Predicted","Missclassified"],loc='upper center',facecolor='white',framealpha=1)
plt.xlabel('Samples')
plt.yticks([0,1,2,3,4])
plt.ylabel('Arrythmia Classes')
plt.savefig("test_classification.png")

Adding FP and FN to Confusion Matrix

In [None]:
def false_pos(index,cm):
    column = cm[:,index]
    FP = sum(column)-column[index]
    return FP

In [None]:
def false_neg(index,cm):
    row = cm[index,:]
    FN = sum(row)-row[index]
    return FN

In [None]:
# False pos = sum of the column - diag (row at bottom)
def false_pos_vec(cm):
    FP = np.empty(5)
    for i in range(5):
        FPi = false_pos(i,cm)
        FP[i] = FPi
    return FP

In [None]:
# False neg = sum of the row - diag (column at end)
def false_neg_vec(cm):
    FN = np.empty(5)
    for i in range(5):
        FNi = false_neg(i,cm)
        FN[i] = FNi
    return FN

In [None]:
def create_new_cm(cm):
    FP_vec = false_pos_vec(cm)
    FN_vec = false_neg_vec(cm)
    new_cm = pd.DataFrame(data=cm)
    new_cm['FN'] = FN_vec
    FP_df = pd.DataFrame([FP_vec],index=["FP"])
    new_cm = pd.concat([new_cm,FP_df,])
    return new_cm

In [None]:
cm_wout_fl_df = create_new_cm(cm_wout_fl)
cm_wout_fl_df

In [None]:
cm_fl_df = create_new_cm(cm_fl)
cm_fl_df

* Class 1: Number of TP (diagonal) increased, but so did the number of FP (why recall increased but precision decreased)

# ROC Curves (one vs rest classification)

In [None]:
from sklearn import metrics

In [None]:
# New binary label function
def bin_labels(index,labels):
    new_labels = np.copy(labels)
    new_labels[new_labels==index]=10
    new_labels[new_labels!=10]=0
    new_labels[new_labels==10]=1
    return new_labels

In [None]:
def pre_process(index,data,shuffle=True):
    X_train = np.asarray(data.drop(columns=[187]))
    y_train = np.asarray(data[187])
    y_train = bin_labels(index,y_train)
    if shuffle:
        indexes = np.arange(int(X_train.shape[0]))
        indexes = np.random.RandomState(seed=42).permutation(indexes)  # shuffle data to randomly select
        X_train_new = X_train[indexes]
        y_train_new= y_train[indexes]
    else:
        X_train_new = X_train
        y_train_new = y_train
    y_train_new = y_train_new.astype(int)
    y_train_new_cat = to_categorical(y_train_new)
    return X_train_new,y_train_new,y_train_new_cat

using multi class probabilities

In [None]:
bin_labels0 = bin_labels(0,test_labels)
bin_labels1 = bin_labels(1,test_labels)
bin_labels2 = bin_labels(2,test_labels)
bin_labels3 = bin_labels(3,test_labels)
bin_labels4 = bin_labels(4,test_labels)

In [None]:
fpr0, tpr0, threshold0 = metrics.roc_curve(bin_labels0,pred_p_fl[:,0])
fpr1, tpr1, threshold1 = metrics.roc_curve(bin_labels1,pred_p_fl[:,1])
fpr2, tpr2, threshold2 = metrics.roc_curve(bin_labels2,pred_p_fl[:,2])
fpr3, tpr3, threshold3 = metrics.roc_curve(bin_labels3,pred_p_fl[:,3])
fpr4, tpr4, threshold4 = metrics.roc_curve(bin_labels4,pred_p_fl[:,4])

In [None]:
auc0 = roc_auc_score(bin_labels0,pred_p_fl[:,0])
auc1 = roc_auc_score(bin_labels1,pred_p_fl[:,1])
auc2 = roc_auc_score(bin_labels2,pred_p_fl[:,2])
auc3 = roc_auc_score(bin_labels3,pred_p_fl[:,3])
auc4 = roc_auc_score(bin_labels4,pred_p_fl[:,4])

In [None]:
plt.figure(figsize=(7,6))
plt.plot(fpr0, tpr0,'m',label='ROC curve of class 0',linewidth=2)
plt.plot(fpr1, tpr1,'c',label='ROC curve of class 1',linewidth=2)
plt.plot(fpr2, tpr2,'g',label='ROC curve of class 2',linewidth=2)
plt.plot(fpr3, tpr3,'b',label='ROC curve of class 3',linewidth=2)
plt.plot(fpr4, tpr4,'k',label='ROC curve of class 4',linewidth=2)
plt.plot([0,1],[0,1],'r',label='Random Guess',linestyle='--',linewidth=2)
plt.legend()

**0 vs rest**

In [None]:
filename = "../input/model-0vr/model_0vR"
model_0vR = load_model(filename,
                           custom_objects={'focal_loss_fixed': focal_loss_fnc(gamma=2,alpha=0.25)},
                           compile=True)
history_0vR = pd.read_csv("../input/model-0vr/model_0vR/history_0vR.csv")

In [None]:
# Get new test set in proper form
X_test0, test_labels0, y_test0 = pre_process(0,mit_test_data,shuffle=False)
X_test_new0 = X_test0.reshape(X_test0.shape[0],187,1)

In [None]:
# Predict binary labels
pred_p_0vR = model_0vR.predict(X_test_new0)
y_test_0vR = np.argmax(pred_p_0vR,axis=1)

In [None]:
# Make sure test labels match what you expect
test_labels0

In [None]:
y_test_0vR

In [None]:
# ROC curve on 0vR model
fpr0_new, tpr0_new, threshold0_new = metrics.roc_curve(test_labels0,pred_p_0vR[:,1])

In [None]:
# AUC of 0vR model
auc0_new = roc_auc_score(test_labels0,pred_p_0vR[:,1])

In [None]:
comp_auc0 = auc0_new - auc0
comp_auc0

In [None]:
# Plot both ROC curves
plt.plot(fpr0_new, tpr0_new,label='ROC curve of class 0')
plt.plot(fpr0, tpr0,label='ROC curve of class 0 (method 2)')

**1 vs rest**

In [None]:
filename = "../input/model-1vr/model_1vR"
model_1vR = load_model(filename,
                           custom_objects={'focal_loss_fixed': focal_loss_fnc(gamma=2,alpha=0.25)},
                           compile=True)
history_1vR = pd.read_csv("../input/model-1vr/model_1vR/history_1vR.csv")

In [None]:
# Get new test set in proper form
X_test1, test_labels1, y_test1 = pre_process(1,mit_test_data,shuffle=False)
X_test_new1 = X_test1.reshape(X_test1.shape[0],187,1)

In [None]:
# Predict binary labels
pred_p_1vR = model_1vR.predict(X_test_new1)
y_test_1vR = np.argmax(pred_p_1vR,axis=1)

In [None]:
# Make sure test labels match what you expect
test_labels1

In [None]:
y_test_1vR

In [None]:
# ROC curve on 0vR model
fpr1_new, tpr1_new, threshold1_new = metrics.roc_curve(test_labels1,pred_p_1vR[:,1])

In [None]:
# AUC of 1vR model
auc1_new = roc_auc_score(test_labels1,pred_p_1vR[:,1])
auc1_new

In [None]:
comp_auc1 = auc1_new - auc1
comp_auc1

In [None]:
plt.plot(fpr1_new, tpr1_new,label='ROC curve of class 1')
plt.plot(fpr1, tpr1,label='ROC curve of class 1 (method 2)')

In [None]:
# Lets Estimate our ROC curves using their technique

In [None]:
def true_pos(index,cm):
    TP = cm[index,index]
    return TP

In [None]:
def true_neg(index,cm):
    diag = np.diagonal(cm)
    TN = sum(diag) - diag[index]
    return TN

In [None]:
def per_class_R(index,cm):
    TP = true_pos(index,cm)
    FN = false_neg(index,cm)
    R = TP/(TP + FN)
    return R

In [None]:
def per_class_SPE(index,cm):
    FP = false_pos(index,cm)
    TN = true_neg(index,cm)
    SPE = TN/(TN+FP)
    return SPE

In [None]:
TPR0 = per_class_R(0,cm_fl)
FPR0 = 1-per_class_SPE(0,cm_fl)

TPR1 = per_class_R(1,cm_fl)
FPR1 = 1-per_class_SPE(1,cm_fl)

TPR2 = per_class_R(2,cm_fl)
FPR2 = 1-per_class_SPE(2,cm_fl)

TPR3 = per_class_R(3,cm_fl)
FPR3 = 1-per_class_SPE(3,cm_fl)

TPR4 = per_class_R(4,cm_fl)
FPR4 = 1-per_class_SPE(4,cm_fl)

In [None]:
avg_FPR = (1/5)*(FPR0+FPR1+FPR2+FPR3+FPR4)
avg_TPR = (1/5)*(TPR0+TPR1+TPR2+TPR3+TPR4)

In [None]:
plt.plot([0,avg_FPR,1],[0,avg_TPR,1],linestyle='--',linewidth=2,label='macro-average ROC')
plt.plot([0,FPR0,1],[0,TPR0,1],'m',label='ROC of class 0',linewidth=2)
plt.plot([0,FPR1,1],[0,TPR1,1],'c',label='ROC of class 1',linewidth=2)
plt.plot([0,FPR2,1],[0,TPR2,1],'g',label='ROC of class 2',linewidth=2)
plt.plot([0,FPR3,1],[0,TPR3,1],'b',label='ROC of class 3',linewidth=2)
plt.plot([0,FPR4,1],[0,TPR4,1],'k',label='ROC of class 4',linewidth=2)
plt.plot([0,1],[0,1],'r',linestyle='--',label='Random Guess',linewidth=2)
plt.xlim([0,1])
plt.legend()

In [None]:
plt.figure(figsize=(8,10))

plt.subplot(3,2,1)
plt.plot(fpr0, tpr0,'tab:red',linewidth=2)
plt.plot([0,FPR0,1],[0,TPR0,1],'tab:red',linewidth=2,linestyle='--',label='Paper')
plt.title('N(0)')
plt.legend()
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')

plt.subplot(3,2,2)
plt.plot(fpr1, tpr1,'tab:blue',linewidth=2)
plt.plot([0,FPR1,1],[0,TPR1,1],'tab:blue',linewidth=2,linestyle='--',label='Paper')
plt.title('S(1)')
plt.legend()
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')

plt.subplot(3,2,3)
plt.plot(fpr2, tpr2,'tab:green',linewidth=2)
plt.plot([0,FPR2,1],[0,TPR2,1],'tab:green',linewidth=2,linestyle='--',label='Paper')
plt.title('V(2)')
plt.legend()
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')

plt.subplot(3,2,4)
plt.plot(fpr3, tpr3,'tab:purple',linewidth=2)
plt.plot([0,FPR3,1],[0,TPR3,1],'tab:purple',linewidth=2,linestyle='--',label='Paper')
plt.title('F(3)')
plt.legend()
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')

plt.subplot(3,2,5)
plt.plot(fpr4, tpr4,'tab:orange',linewidth=2)
plt.plot([0,FPR4,1],[0,TPR4,1],'tab:orange',linewidth=2,linestyle='--',label='Paper')
plt.title('Q(4)')
plt.legend()
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')

plt.tight_layout()
plt.savefig("ROC_curves.png")

# PTBD Model/Data

In [None]:
# Load Data
PTBD_train_data = pd.read_csv('/kaggle/input/ptbd-data/PTBD_train.csv')
PTBD_test_data = pd.read_csv('/kaggle/input/ptbd-data/PTBD_test.csv')

In [None]:
def pre_process(train):
    X_train = np.asarray(train.drop(columns=['187']))
    y_train = np.asarray(train['187'])
    y_train_cat = to_categorical(y_train)
    return X_train,y_train,y_train_cat

In [None]:
X_train_ptbd, labels_ptbd, y_train_ptbd = pre_process(PTBD_train_data)
X_train_new_ptbd = X_train_ptbd.reshape(X_train_ptbd.shape[0],187,1)

X_test_ptbd, test_labels_ptbd, y_test_ptbd = pre_process(PTBD_test_data)
X_test_new_ptbd = X_test_ptbd.reshape(X_test_ptbd.shape[0],187,1)

In [None]:
labels_ptbd

In [None]:
np.unique(labels_ptbd, return_counts=True)

In [None]:
# Load Model
filename = "../input/model-ptbd/model_PTBD"
model_ptbd = load_model(filename,
                           custom_objects={'focal_loss_fixed': focal_loss_fnc(gamma=2,alpha=0.25)},
                           compile=True)
history_ptbd = pd.read_csv("../input/model-ptbd/model_PTBD/history_PTBD.csv")

In [None]:
# Predict
pred_p_ptbd = model_ptbd.predict(X_test_new_ptbd)
y_test_ptbd = np.argmax(pred_p_ptbd,axis=1)

In [None]:
print("Classification report: PTBD Data-set")
print(classification_report(test_labels_ptbd, y_test_ptbd,digits=4))

In [None]:
auc_ptbd = roc_auc_score(test_labels_ptbd,pred_p_ptbd[:,1])
auc_ptbd

In [None]:
fpr_ptbd, tpr_ptbd,threshold_ptbd = metrics.roc_curve(test_labels_ptbd,pred_p_ptbd[:,1])

In [None]:
cm_ptbd = confusion_matrix(test_labels_ptbd, y_test_ptbd)
print("Confusion matrix: PTB Data-set")
print(cm_ptbd)

In [None]:
TPR_ptbd = per_class_R(1,cm_ptbd)
FPR_ptbd = 1-per_class_SPE(1,cm_ptbd)

In [None]:
plt.plot(fpr_ptbd,tpr_ptbd)
plt.plot([0,FPR_ptbd,1],[0,TPR_ptbd,1],'--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('PTB Data-set')

PTB Model without focal loss

In [None]:
# Load Model
filename = "../input/model-ptbd-wout-fl/model_ptbd_wout_focal_loss"
model_ptbd_wout_fl = load_model(filename,
                           custom_objects=None,
                           compile=True)

In [None]:
# Predict
pred_p_ptbd_wout_fl = model_ptbd_wout_fl.predict(X_test_new_ptbd)
y_test_ptbd_wout_fl = np.argmax(pred_p_ptbd_wout_fl,axis=1)

In [None]:
print("Classification report: PTBD Data-set")
print(classification_report(test_labels_ptbd, y_test_ptbd_wout_fl,digits=4))

In [None]:
auc_ptbd_wout_fl = metrics.roc_auc_score(test_labels_ptbd,pred_p_ptbd_wout_fl[:,1])
auc_ptbd_wout_fl

In [None]:
fpr_ptbd_wout_fl, tpr_ptbd_wout_fl,threshold_ptbd_wout_fl = metrics.roc_curve(test_labels_ptbd,pred_p_ptbd_wout_fl[:,1])

In [None]:
cm_ptbd_wout_fl = confusion_matrix(test_labels_ptbd, y_test_ptbd_wout_fl)
print("Confusion matrix: PTB Data-set")
print(cm_ptbd_wout_fl)

In [None]:
plt.plot(fpr_ptbd_wout_fl,tpr_ptbd_wout_fl)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('PTB Data-set')