In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
import time
from sklearn.decomposition import PCA
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import resample
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Activation

In [None]:
# CNN for MIT-BIH Arrhythmia Dataset

train_data = pd.read_csv("mitbih_train.csv", header = None) 
train_data=pd.DataFrame(train_data)
test_data = pd.read_csv("mitbih_test.csv", header = None)
test_data=pd.DataFrame(test_data)

In [None]:
# Print some data for train dataset

train_data.head()

In [None]:
# Print some data for test dataset

test_data.head()

In [None]:
# Describe the data frame
train_data.describe()

In [None]:
train_data.shape

In [None]:
# convert target values to integers
train_data[187]=train_data[187].astype(int)

In [None]:
train_data.head()

In [None]:
from tensorflow.k
# define features and target
X = train_data.iloc[:, train_data.columns != 187]
y = train_data.iloc[:, train_data.columns == 187]
y = to_

In [None]:
# Print the shape of datasets after partition
X.shape

In [None]:
y.shape

In [None]:
# train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# scale values
sc = StandardScaler()
sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

In [None]:
# Experiments

test_scores_models = []

for num_components in range(2, y.shape[0], y.shape[0]//10):
    print(f'{num_components} components\n')
    
#     # call the principal component
#     pca = PCA(n_components = num_components, random_state=42) # for reproducability
    
#     X_train = pca.fit_transform(X_train)    
#     X_test = pca.transform(X_test)
    
    # BERNOULLI NB
    print("------------------------BernoulliNB------------------------\n")
    start = time.time()    
    nb = BernoulliNB().fit(X_train, y_train)
    print('Accuracy of BernoulliNB is', nb.score(X_test, y_test))
    end = time.time()
    test_scores_models.append(nb.score(X_test, y_test))
    
    print(f'BernoulliNB took {end-start} seconds to run\n')
    
    # KNN
    print("------------------------KNN------------------------\n")
    start = time.time()
    knn = KNeighborsClassifier(n_neighbors = 3).fit(X_train, y_train)
    print('Accuracy of KNN is', knn.score(X_test, y_test))
    end = time.time()
    test_scores_models.append(knn.score(X_test, y_test))
    
    print(f'KNN took {end-start} seconds to run\n')

    # DECISION TREE
    print("------------------------DECISION TREE------------------------\n")
    start = time.time()
    dt = DecisionTreeClassifier(max_depth = 5).fit(X_train, y_train)
    print('Accuracy of Decision Tree is', dt.score(X_test, y_test))
    end = time.time()    
    test_scores_models.append(dt.score(X_test, y_test))

    print(f'Decision Tree took {end-start} seconds to run\n')
    
    break


In [None]:
# Bar PLot

models = ['BernoulliNB', 'KNN', 'DECISION TREE']

plt.bar(models, test_scores_models)

plt.xlabel('Classifier Models')
plt.ylabel('Accuracy')

plt.title('Accuracies of different classifier models')
plt.show()

In [None]:
# ================================================================================================================

In [None]:
# Display counts of each classes - Most of Data samples are of normal HeartBeats & its a biased data

sns.catplot(x = 187, kind = 'count', data = train_data)

In [None]:
class_0=train_data[train_data[187]==0.0]
class_1=train_data[train_data[187]==1.0]
class_2=train_data[train_data[187]==2.0]
class_3=train_data[train_data[187]==3.0]
class_4=train_data[train_data[187]==4.0]
train_data = pd.concat([class_1, class_2, class_3, class_4, class_0])

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize = (20,8))
plt.plot(train_data.iloc[0, 0:187], color = 'green')
plt.xlabel("Time (in ms)")
plt.ylabel("Heart Beat Amplitude")
plt.legend()
plt.show()

In [None]:
# Balancing the data

# Upsampling is the increasing of the spatial resolution while keeping the 2D representation of an image.

class_1_upsample = resample(class_1, n_samples = 20000, replace = True, random_state = 123)
class_2_upsample = resample(class_2, n_samples = 20000, replace = True, random_state = 123)
class_3_upsample = resample(class_3, n_samples = 20000, replace = True, random_state = 123)
class_4_upsample = resample(class_4, n_samples = 20000, replace = True, random_state = 123)

In [None]:
# Downsampling is the reduction in spatial resolution while keeping the same two-dimensional (2D) representation

class_0_downsample = resample(class_0, n_samples = 20000, replace = True, random_state = 123)

In [None]:
# Join the data samples

train_data_1 = pd.concat([class_1_upsample, class_2_upsample, class_3_upsample, class_4_upsample, class_0_downsample])

In [None]:
# # Plotting the types of hearbeats

# plt.figure(figsize= (5,5))
# my_circle = plt.Circle((0,0), 0.7, color = 'white') 
# plt.pie(train_data_1[187].value_counts(), labels=['Normal Beats','Unknown Beats','Ventricular ectopic beats','Supraventricular ectopic beats',
#                                                 'Fusion Beats'], autopct = '%0.0f%%', colors = ['red','orange','blue','magenta','cyan'])
# p = plt.gcf()
# p.gca().add_artist(my_circle)
# plt.show()

In [None]:
# # Apply Keras Models

# Y=train_data_1.values[:, -1].astype(int)
# y=to_categorical(Y)
# X_train_1, X_valid, y_train_1, y_valid = train_test_split(train_data_1.drop([187], axis=1), y, test_size = 0.3)

In [None]:
# model = Sequential()
# model.add(Dense(50, activation='relu', input_shape=(187,)))
# model.add(Dense(50, activation='relu'))
# model.add(Dense(5, activation='softmax'))

# model.compile(optimizer='adam',
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])

# history=model.fit(X_train_1, y_train_1, epochs=10,validation_data=(X_valid, y_valid))

# print("Evaluation: ")
# mse, acc = model.evaluate(X_valid, y_valid)
# print('mean_squared_error :', mse)
# print('accuracy:', acc)