In [None]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import class_weight
from sklearn.metrics import classification_report

In [None]:
def create_pd(train_path,test_path):
    train=pd.read_csv(train_path)
    test=pd.read_csv(test_path)
    train.columns=[x for x in range(188)]
    test.columns=[x for x in range(188)]
    return pd.concat([train,test], axis=0, join='inner').sort_index()
    

In [None]:
mit_train_path="../input/heartbeat/mitbih_train.csv"
mit_test_path="../input/heartbeat/mitbih_test.csv"

In [None]:
mit= create_pd(mit_train_path,mit_test_path)
mit.head()

In [None]:
def create_k_folds_column(df):
    df.loc[:,'kfold']=-1
    df=df.sample(frac=1).reset_index(drop=True)
    y=df.loc[:,187].values
    kf=StratifiedKFold(n_splits=5)
    for fold,(target,index) in enumerate(kf.split(X=df,y=y)):
        df.loc[index,'kfold']=fold
    return df
    

In [None]:
mit= create_k_folds_column(mit)

The MITBIH dataset is constituted of 109446 beats, labeled with 5 different classes :

'N': 0, 'S': 1, 'V': 2, 'F': 3, 'Q': 4

N : Non-ecotic beats (normal beat) 

S : Supraventricular ectopic beats 

V : Ventricular ectopic beats

F : Fusion Beats 

Q : Unknown Beats

The PTBHB dataset is constituted of 14552 beats, labeled with two different classes :

'0' for normal beat
'1' for abnormal beat (Myocardial infarction)

All the beats are recorded with 187 points. The shorter beats are padded with zeros to reach 187.

In [None]:
mit.head()

In [None]:
mit.loc[:,187].astype('int').value_counts()

Defining the model

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Activation, Flatten, Convolution1D, Dropout,MaxPooling1D,GlobalAveragePooling1D
from tensorflow.keras import Model, layers,Sequential,regularizers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler

In [None]:
def make_model(X_train):
    model= Sequential()
    model.add(Convolution1D(32,5,activation='relu',input_shape=(187,1)))
    model.add(Convolution1D(64,5,activation='relu'))         
    model.add(MaxPooling1D(3))
    model.add(Convolution1D(128, 3, activation='relu'))
    model.add(Convolution1D(256, 3, activation='relu'))
    model.add(GlobalAveragePooling1D())
    model.add(Dropout(0.3))
    model.add(Flatten())
    model.add(Dense(1024,activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(256,activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(32,activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(5,activation='softmax'))
    model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy',tf.keras.metrics.AUC(name='auc')])
    return model



In [None]:
def training_data(train,valid):
    X_train=np.asarray(train.iloc[:,:187].values)
    y_train=train.iloc[:,187].values
    X_valid=np.asarray(valid.iloc[:,:187].values)
    y_valid=valid.iloc[:,187].values
    X_train=tf.expand_dims(X_train, axis=2)
    X_valid=tf.expand_dims(X_valid, axis=2)
    y_train=to_categorical(y_train)
    y_valid=to_categorical(y_valid)
    return X_train,y_train,X_valid,y_valid

**Fitting the model**

In [None]:
Epochs=100
Batch_size=64
my_callbacks = [EarlyStopping(patience=3,monitor='val_loss', mode='min',restore_best_weights=True),
               ReduceLROnPlateau(monitor='val_loss', factor=0.1,patience=2, min_lr=0.00001, mode='auto')]
dict_acc={}
dict_acc2={}

In [None]:
def run_train(fold):
    train=mit[mit["kfold"]!=fold].reset_index(drop=True)
    valid=mit[mit["kfold"]==fold].reset_index(drop=True)
    X_train,y_train,X_valid,y_valid=training_data(train,valid)
    model=make_model(X_train)
    history = model.fit(X_train,y_train,validation_split=0.1,batch_size=Batch_size,epochs=Epochs,callbacks=my_callbacks)
    model.save(f'model{fold}.h5')
    results = model.evaluate(X_valid, y_valid)
    print("Test Accuracy: {:.2f}%".format(results[1] * 100))
    print("     Test AUC: {:.4f}".format(results[2]))
    dict_acc[f"{i}"]= "Test Accuracy: {:.2f}%".format(results[1] * 100) 
    
    

In [None]:
for i in range(5):
    print(f"{i}-fold trained",sep="/n")
    run_train(i)
    print("_______________________________",sep='/n')
    print("_______________________________",sep='/n')

In [None]:
print(dict_acc)

Model trained with Kfold 1 is with best performance.

In [None]:
mit_model=tf.keras.models.load_model('model1.h5')

In [None]:
mit_model.summary()