In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

# Author: Deepak Khatri
# Based on ECG Classification notebook from Gregoire DC.
# Some terms are taken from harmanbhutani's fork of Gregoire DC's notebook.

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from keras.utils.np_utils import to_categorical
from sklearn.utils import class_weight
import warnings
warnings.filterwarnings('ignore')

In [None]:
# import dataset
train_df = pd.read_csv("/kaggle/input/heartbeat/mitbih_train.csv", header = None)
test_df = pd.read_csv("/kaggle/input/heartbeat/mitbih_test.csv", header = None)

In [None]:
# Value of each class
train_df[187].value_counts()

In [None]:
# representation of classes % wise
per_class = train_df[187].value_counts()
plt.figure(figsize=(20,10))
my_circle=plt.Circle( (0,0), 0.7, color='white')
plt.pie(per_class, labels=['normal beat','unknown Beats','Ventricular ectopic beats','Supraventricular ectopic beats','Fusion Beats'], colors=['tab:blue','tab:orange','tab:purple','tab:olive','tab:green'],autopct='%1.1f%%')
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()

## Resampling for balacing the dataset

In [None]:
from sklearn.utils import resample
df_1=train_df[train_df[187]==1]
df_2=train_df[train_df[187]==2]
df_3=train_df[train_df[187]==3]
df_4=train_df[train_df[187]==4]
df_0=(train_df[train_df[187]==0]).sample(n=20000,random_state=42)

df_1_upsample=resample(df_1,replace=True,n_samples=20000,random_state=123)
df_2_upsample=resample(df_2,replace=True,n_samples=20000,random_state=124)
df_3_upsample=resample(df_3,replace=True,n_samples=20000,random_state=125)
df_4_upsample=resample(df_4,replace=True,n_samples=20000,random_state=126)

train_df=pd.concat([df_0,df_1_upsample,df_2_upsample,df_3_upsample,df_4_upsample])

In [None]:
# representation of classes % wise
per_class = train_df[187].value_counts()
plt.figure(figsize=(20,10))
my_circle=plt.Circle( (0,0), 0.7, color='white')
plt.pie(per_class, labels=['normal beat','unknown Beats','Ventricular ectopic beats','Supraventricular ectopic beats','Fusion Beats'], colors=['tab:blue','tab:orange','tab:purple','tab:olive','tab:green'],autopct='%1.1f%%')
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()

## Classes

In [None]:
# randomly sampling from each class
classes=train_df.groupby(187,group_keys=False).apply(lambda train_df : train_df.sample(1))

In [None]:
# peek on classes
classes

In [None]:
# plotting classes ECG
plt.figure(figsize=(16,8))
# normal
plt.subplot(2, 3, 1)  
plt.plot(classes.iloc[0,:186])
plt.title('normal beat')
# unknown
plt.subplot(2, 3, 2)  
plt.plot(classes.iloc[1,:186])
plt.title('unknown Beat')
# veb
plt.subplot(2, 3, 3)  
plt.plot(classes.iloc[2,:186])
plt.title('Ventricular ectopic beat')
# seb
plt.subplot(2, 3, 4)  
plt.plot(classes.iloc[3,:186])
plt.title('Supraventricular ectopic beat')
# fusion
plt.subplot(2, 3, 5)  
plt.plot(classes.iloc[4,:186])
plt.title('Fusion Beat')
# show plot
plt.show()

## Signal heatmap

In [None]:
def plot_hist(class_number,size,min_):
    # get the class data
    img=train_df.loc[train_df[187]==class_number].values
    # crop it
    img=img[:,min_:size]
    # creates a continous steam of data
    img_flatten=img.flatten()
    
    final1=np.arange(min_,size)
    for i in range (img.shape[0]-1):
        tempo1=np.arange(min_,size)
        final1=np.concatenate((final1, tempo1), axis=None)
    print(len(final1))
    print(len(img_flatten))
    plt.hist2d(final1,img_flatten, bins=(80,80),cmap=plt.cm.jet)
    plt.show()

In [None]:
# Normal Beat
plt.figure(figsize=(12,4))
plt.subplot(1, 2, 1)  
plt.plot(classes.iloc[0,:186])
plt.title('Normal beat')
plt.subplot(1, 2, 2)  
plot_hist(0,70,5)

In [None]:
# unknown
plt.figure(figsize=(12,4))
plt.subplot(1, 2, 1)
plt.plot(classes.iloc[1,:186])
plt.title('unknown Beat')
plt.subplot(1, 2, 2)  
plot_hist(1,50,5)

In [None]:
# veb
plt.figure(figsize=(12,4))
plt.subplot(1, 2, 1)  
plt.plot(classes.iloc[2,:186])
plt.title('Ventricular ectopic beat')
plt.subplot(1, 2, 2) 
plot_hist(2,60,30)

In [None]:
# seb
plt.figure(figsize=(12,4))
plt.subplot(1, 2, 1) 
plt.plot(classes.iloc[3,:186])
plt.title('Supraventricular ectopic beat')
plt.subplot(1, 2, 2) 
plot_hist(3,60,25)

In [None]:
# fusion
plt.figure(figsize=(12,4))
plt.subplot(1, 2, 1)   
plt.plot(classes.iloc[4,:186])
plt.title('Fusion Beat')
plt.subplot(1, 2, 2) 
plot_hist(4,50,18)

## Pretreat

In [None]:
def add_gaussian_noise(signal):
    noise=np.random.normal(0,0.05,186)
    return (signal+noise)

In [None]:
tempo=classes.iloc[0,:186]
bruiter=add_gaussian_noise(tempo)

plt.subplot(2,1,1)
plt.plot(classes.iloc[0,:186])

plt.subplot(2,1,2)
plt.plot(bruiter)

plt.show()

In [None]:
target_train=train_df[187]
target_test=test_df[187]
y_train=to_categorical(target_train)
y_test=to_categorical(target_test)

In [None]:
X_train=train_df.iloc[:,:186].values
X_test=test_df.iloc[:,:186].values
#for i in range(len(X_train)):
#    X_train[i,:186]= add_gaussian_noise(X_train[i,:186])
X_train = X_train.reshape(len(X_train), X_train.shape[1],1)
X_test = X_test.reshape(len(X_test), X_test.shape[1],1)

In [None]:
# Network
def network(X_train,y_train,X_test,y_test):
    im_shape=(X_train.shape[1],1)
    inputs_cnn=Input(shape=(im_shape), name='inputs_cnn')
    conv1_1=Convolution1D(64, (6), activation='relu', input_shape=im_shape)(inputs_cnn)
    conv1_1=BatchNormalization()(conv1_1)
    pool1=MaxPool1D(pool_size=(3), strides=(2), padding="same")(conv1_1)
    conv2_1=Convolution1D(64, (3), activation='relu', input_shape=im_shape)(pool1)
    conv2_1=BatchNormalization()(conv2_1)
    pool2=MaxPool1D(pool_size=(2), strides=(2), padding="same")(conv2_1)
    conv3_1=Convolution1D(64, (3), activation='relu', input_shape=im_shape)(pool2)
    conv3_1=BatchNormalization()(conv3_1)
    pool3=MaxPool1D(pool_size=(2), strides=(2), padding="same")(conv3_1)
    flatten=Flatten()(pool3)
    dense_end1 = Dense(64, activation='relu')(flatten)
    dense_end2 = Dense(32, activation='relu')(dense_end1)
    main_output = Dense(5, activation='softmax', name='main_output')(dense_end2)
    
    
    model = Model(inputs= inputs_cnn, outputs=main_output)
    model.compile(optimizer='adam', loss='categorical_crossentropy',metrics = ['accuracy'])
    
    
    callbacks = [EarlyStopping(monitor='val_loss', patience=8),
             ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)]

    history=model.fit(X_train, y_train,epochs=5,callbacks=callbacks, batch_size=32,validation_data=(X_test,y_test))
    model.load_weights('best_model.h5')
    return(model,history)

In [None]:
def evaluate_model(history,X_test,y_test,model):
    scores = model.evaluate((X_test),y_test, verbose=0)
    print("Accuracy: %.2f%%" % (scores[1]*100))
    
    print(history)
    fig1, ax_acc = plt.subplots()
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Model - Accuracy')
    plt.legend(['Training', 'Validation'], loc='lower right')
    plt.show()
    
    fig2, ax_loss = plt.subplots()
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Model- Loss')
    plt.legend(['Training', 'Validation'], loc='upper right')
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.show()
    target_names=['0','1','2','3','4']
    
    y_true=[]
    for element in y_test:
        y_true.append(np.argmax(element))
    prediction_proba=model.predict(X_test)
    prediction=np.argmax(prediction_proba,axis=1)
    cnf_matrix = confusion_matrix(y_true, prediction)

In [None]:
from keras.layers import Dense, Convolution1D, MaxPool1D, Flatten, Dropout
from keras.layers import Input
from keras.models import Model
from keras.layers.normalization import BatchNormalization
import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint

model,history=network(X_train,y_train,X_test,y_test)

In [None]:
evaluate_model(history,X_test,y_test,model)
y_pred=model.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn import svm, datasets
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.svm import SVC
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.metrics import confusion_matrix
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

In [None]:

encoder = preprocessing.LabelEncoder()
encoder.fit(target_train)
Y_train = encoder.transform(target_train)
encoder.fit(target_test)
Y_test = encoder.transform(target_test)

In [None]:
nsamples, nx, ny = X_train.shape
train_dataset = X_train.reshape((nsamples,nx*ny))
nsamples, nx, ny = X_test.shape
test_dataset = X_test.reshape((nsamples,nx*ny))


In [None]:
accu = []
prec = []
recc = []
f1   = []
models =[]

In [None]:
ytest = Y_test

<h1> DecisionTree<\h1>


In [None]:

dtree_model = DecisionTreeClassifier(max_depth = 100).fit(train_dataset, Y_train)
dt_pred = dtree_model.predict(test_dataset)
  
# creating a confusion matrix
#cm = confusion_matrix(Y_test, dtree_predictions)


accuracy_score(Y_test, dt_pred)#, normalize=False)

In [None]:
print('Accuracy Score : ' + str(accuracy_score(ytest, dt_pred)))
print('Precision Score : ' + str(precision_score(ytest,dt_pred,average='macro')))
print('Recall Score : ' + str(recall_score(ytest,dt_pred,average='macro')))
print('F1 Score : ' + str(f1_score(ytest,dt_pred,average='macro')))

# Classifier Confusion matrix

print('Confusion Matrix : \n' + str(confusion_matrix(ytest,dt_pred)))


accu.append(accuracy_score(ytest, dt_pred))
prec.append(precision_score(ytest,dt_pred,average='macro'))
recc.append(recall_score(ytest,dt_pred,average='macro'))
f1.append(f1_score(ytest,dt_pred,average='macro'))
models.append("Decision Tree" )

<h1>Logistic Regression<\h1>

In [None]:
clf = LogisticRegression(random_state=0).fit(train_dataset, Y_train)
lr_pred = clf.predict(test_dataset)



In [None]:
print('Accuracy Score : ' + str(accuracy_score(ytest, lr_pred)))
print('Precision Score : ' + str(precision_score(ytest,lr_pred,average='macro')))
print('Recall Score : ' + str(recall_score(ytest,lr_pred,average='macro')))
print('F1 Score : ' + str(f1_score(ytest,lr_pred,average='macro')))

# Classifier Confusion matrix

print('Confusion Matrix : \n' + str(confusion_matrix(ytest,lr_pred)))



accu.append(accuracy_score(ytest, lr_pred))
prec.append(precision_score(ytest,lr_pred,average='macro'))
recc.append(recall_score(ytest,lr_pred,average='macro'))
f1.append(f1_score(ytest,lr_pred,average='macro'))
models.append("Logistic Regression" )

# Gaussian naive_bayes

In [None]:
gnb = GaussianNB()
nb_pred = gnb.fit(train_dataset, Y_train).predict(test_dataset)


accuracy_score(ytest,nb_pred)

In [None]:
print('Accuracy Score : ' + str(accuracy_score(ytest, nb_pred)))
print('Precision Score : ' + str(precision_score(ytest,nb_pred,average='macro')))
print('Recall Score : ' + str(recall_score(ytest,nb_pred,average='macro')))
print('F1 Score : ' + str(f1_score(ytest,nb_pred,average='macro')))

#Dummy Classifier Confusion matrix

print('Confusion Matrix : \n' + str(confusion_matrix(ytest,nb_pred)))

In [None]:
accu.append(accuracy_score(ytest, nb_pred))
prec.append(precision_score(ytest,nb_pred,average='macro'))
recc.append(recall_score(ytest,nb_pred,average='macro'))
f1.append(f1_score(ytest,nb_pred,average='macro'))
models.append("Gaussian naive_bayes" )

# Random forrest

In [None]:
RF = RandomForestClassifier().fit(train_dataset, Y_train)
rf_pred = RF.predict(test_dataset)

In [None]:
print('Accuracy Score : ' + str(accuracy_score(ytest, rf_pred)))
print('Precision Score : ' + str(precision_score(ytest,rf_pred,average='macro')))
print('Recall Score : ' + str(recall_score(ytest,rf_pred,average='macro')))
print('F1 Score : ' + str(f1_score(ytest,rf_pred,average='macro')))

#Dummy Classifier Confusion matrix

print('Confusion Matrix : \n' + str(confusion_matrix(ytest,rf_pred)))

In [None]:
accu.append(accuracy_score(ytest, rf_pred))
prec.append(precision_score(ytest,rf_pred,average='macro'))
recc.append(recall_score(ytest,rf_pred,average='macro'))
f1.append(f1_score(ytest,rf_pred,average='macro'))
models.append("Random forrest" )

In [None]:
# create data
#x = [1,2,3,4]
x = ['accuracy','precision','recall','f1']
plt.figure(figsize=(10,10))
plt.title('score')
# plot lines
for i in range(len(accu)):
    plt.plot(x,[accu[i],prec[i],recc[i],f1[i]], label = models[i])
plt.legend()
plt.savefig('curves.png')
plt.show()

#svm

In [None]:
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

In [None]:
clf = Pipeline([('anova', SelectPercentile(chi2)),
                ('scaler', StandardScaler()),
                ('svc', SVC(gamma="auto"))])

In [None]:
score_means = list()
score_stds = list()
percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)

for percentile in percentiles:
    print(percentile)
    clf.set_params(anova__percentile=percentile)
    this_scores = cross_val_score(clf, train_dataset, Y_train)
    score_means.append(this_scores.mean())
    score_stds.append(this_scores.std())

In [None]:
plt.errorbar(percentiles, score_means, np.array(score_stds))
plt.title(
    'Performance of the SVM-Anova varying the percentile of features selected')
plt.xticks(np.linspace(0, 100, 11, endpoint=True))
plt.xlabel('Percentile')
plt.ylabel('Accuracy Score')
plt.axis('tight')
plt.show()


<h1>svm<\h1>


In [None]:
train_dataset[:2][:]

In [None]:
params_grid = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
svm_model = GridSearchCV(SVC(kernel='linear',max_iter=1000,verbose=True), params_grid, cv=5)
svm_model.fit(train_dataset, Y_train)

In [None]:
# View the accuracy score
print('Best score for training data:', svm_model.best_score_,"\n") 

# View the best parameters for the model found using grid search
print('Best C:',svm_model.best_estimator_.C,"\n") 
print('Best Kernel:',svm_model.best_estimator_.kernel,"\n")
print('Best Gamma:',svm_model.best_estimator_.gamma,"\n")

final_model = svm_model.best_estimator_
svm_pred = final_model.predict(test_dataset)
Y_pred_label = list(encoder.inverse_transform(svm_pred))

In [None]:
accuracy_score(Y_test, Y_pred_label)

In [None]:
svm_lin = SVC(kernel='linear',C=10,max_iter=10000,verbose=True).fit(train_dataset, Y_train)

In [None]:
svm_pred1 = svm_lin.predict(test_dataset)
#accuracy_score(Y_test, svm_pred1)

In [None]:
print('Accuracy Score : ' + str(accuracy_score(ytest, svm_pred1)))
print('Precision Score : ' + str(precision_score(ytest,svm_pred1,average='weighted')))
print('Recall Score : ' + str(recall_score(ytest,svm_pred1,average='weighted')))
print('F1 Score : ' + str(f1_score(ytest,svm_pred1,average='weighted')))

#Dummy Classifier Confusion matrix

print('Confusion Matrix : \n' + str(confusion_matrix(ytest,svm_pred2)))

In [None]:
accu.append(accuracy_score(ytest, svm_pred1))
prec.append(precision_score(ytest,svm_pred1,average='weighted'))
recc.append(recall_score(ytest,svm_pred1,average='weighted'))
f1.append(f1_score(ytest,svm_pred1,average='weighted'))
models.append("linear SVM" )

In [None]:
svm_pol = SVC(kernel='poly',C=10,max_iter=10000,verbose=True).fit(train_dataset, Y_train)

In [None]:
svm_pred2 = svm_pol.predict(test_dataset)
#accuracy_score(Y_test, svm_pred2)

In [None]:
print('Accuracy Score : ' + str(accuracy_score(ytest, svm_pred2)))
print('Precision Score : ' + str(precision_score(ytest,svm_pred2,average='weighted')))
print('Recall Score : ' + str(recall_score(ytest,svm_pred2,average='weighted')))
print('F1 Score : ' + str(f1_score(ytest,svm_pred2,average='weighted')))

#Dummy Classifier Confusion matrix

print('Confusion Matrix : \n' + str(confusion_matrix(ytest,svm_pred2)))

In [None]:
accu.append(accuracy_score(ytest, svm_pred2))
prec.append(precision_score(ytest,svm_pred2,average='weighted'))
recc.append(recall_score(ytest,svm_pred2,average='weighted'))
f1.append(f1_score(ytest,svm_pred2,average='weighted'))
models.append("polynomial SVM" )

In [None]:
svm_rbf = SVC(kernel='rbf',C=10,max_iter=10000,verbose=True).fit(train_dataset, Y_train)

In [None]:
svm_pred3 = svm_rbf.predict(test_dataset)
#accuracy_score(Y_test, svm_pred3)

In [None]:
print('Accuracy Score : ' + str(accuracy_score(ytest, svm_pred3)))
print('Precision Score : ' + str(precision_score(ytest,svm_pred3,average='weighted')))
print('Recall Score : ' + str(recall_score(ytest,svm_pred3,average='weighted')))
print('F1 Score : ' + str(f1_score(ytest,svm_pred3,average='weighted')))

#Dummy Classifier Confusion matrix

print('Confusion Matrix : \n' + str(confusion_matrix(ytest,svm_pred3)))

In [None]:
accu.append(accuracy_score(ytest, svm_pred3))
prec.append(precision_score(ytest,svm_pred3,average='weighted'))
recc.append(recall_score(ytest,svm_pred3,average='weighted'))
f1.append(f1_score(ytest,svm_pred3,average='weighted'))
models.append("rbf SVM" )

In [None]:
svm_sig = SVC(kernel='sigmoid',C=10,max_iter=10000,verbose=True).fit(train_dataset, Y_train)

In [None]:
svm_pred4 = svm_sig.predict(test_dataset)
#accuracy_score(Y_test, svm_pred4)

In [None]:
print('Accuracy Score : ' + str(accuracy_score(ytest, svm_pred4)))
print('Precision Score : ' + str(precision_score(ytest,svm_pred4,average='weighted')))
print('Recall Score : ' + str(recall_score(ytest,svm_pred4,average='weighted')))
print('F1 Score : ' + str(f1_score(ytest,svm_pred4,average='weighted')))

#Dummy Classifier Confusion matrix

print('Confusion Matrix : \n' + str(confusion_matrix(ytest,svm_pred4)))

In [None]:
accu.append(accuracy_score(ytest, svm_pred4))
prec.append(precision_score(ytest,svm_pred4,average='weighted'))
recc.append(recall_score(ytest,svm_pred4,average='weighted'))
f1.append(f1_score(ytest,svm_pred4,average='weighted'))
models.append("sigmoid SVM" )

In [None]:
# create data
#x = [1,2,3,4]
x = ['accuracy','precision','recall','f1']
plt.figure(figsize=(10,10))
plt.title('score')
# plot lines
for i in range(4):
    plt.plot(x,[accu[i],prec[i],recc[i],f1[i]], label = models[i])
for i in range(4,8):
    plt.plot(x,[accu[i],prec[i],recc[i],f1[i]], label = models[i],linestyle='dashed')
plt.legend()
plt.savefig('curves.png')
plt.show()

In [None]:
scores = pd.DataFrame({'model':models,'accuracy':accu,'precision':prec,'recall':recc,'f1 score' :f1})

In [None]:
scores.to_csv('scores.csv')

In [None]:
scores