In [32]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier

import os
print(os.listdir("../input"))

['ptbdb_abnormal.csv', 'ptbdb_normal.csv', 'mitbih_test.csv', 'mitbih_train.csv']


In [33]:
abnormal = pd.read_csv("../input/ptbdb_abnormal.csv", header = None) 
normal = pd.read_csv("../input/ptbdb_normal.csv", header = None)

abnormal = abnormal.drop([187], axis=1)
normal = normal.drop([187], axis=1)

y_abnormal = np.ones((abnormal.shape[0]))
y_abnormal = pd.DataFrame(y_abnormal)

y_normal = np.zeros((normal.shape[0]))
y_normal = pd.DataFrame(y_normal)

X = pd.concat([abnormal, normal], sort=True)
y = pd.concat([y_abnormal, y_normal] ,sort=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [34]:
print(abnormal.dtypes, normal.dtypes)

0      float64
1      float64
2      float64
3      float64
4      float64
5      float64
6      float64
7      float64
8      float64
9      float64
10     float64
11     float64
12     float64
13     float64
14     float64
15     float64
16     float64
17     float64
18     float64
19     float64
20     float64
21     float64
22     float64
23     float64
24     float64
25     float64
26     float64
27     float64
28     float64
29     float64
        ...   
157    float64
158    float64
159    float64
160    float64
161    float64
162    float64
163    float64
164    float64
165    float64
166    float64
167    float64
168    float64
169    float64
170    float64
171    float64
172    float64
173    float64
174    float64
175    float64
176    float64
177    float64
178    float64
179    float64
180    float64
181    float64
182    float64
183    float64
184    float64
185    float64
186    float64
Length: 187, dtype: object 0      float64
1      float64
2      float64
3      float6

In [35]:
abnormal.shape

(10506, 187)

In [36]:
normal.shape

(4046, 187)

In [37]:
np.any(X_train.isna().sum())

False

In [38]:
np.any(X_test.isna().sum())

False

In [39]:
seed=123

classifiers = [
    LogisticRegression(class_weight='balanced', random_state=seed),
    KNeighborsClassifier(3, n_jobs=-1),
    SVC(gamma='auto', class_weight='balanced', random_state=seed),
    RandomForestClassifier(random_state=seed, n_estimators = 500),
    MLPClassifier(alpha=1, max_iter=1000),
    XGBClassifier(random_state=seed, n_jobs=-1)
]

names = ["Logistic", "Nearest Neighbors", "RBF SVM", "Random Forest", "Neural Net", "XGB"]

In [40]:
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
from sklearn.utils.validation import column_or_1d

for name, clf in zip(names, classifiers):
        
    y_train = column_or_1d(y_train, warn=True)
    clf.fit(X_train, y_train)
    print(f"{name}: {round(accuracy_score(y_test, clf.predict(X_test)),3)}")

  


Logistic: 0.776
Nearest Neighbors: 0.928
RBF SVM: 0.752
Random Forest: 0.969
Neural Net: 0.873
XGB: 0.925


In [66]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=seed, n_estimators = 500)
clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=None,
            oob_score=False, random_state=123, verbose=0, warm_start=False)

In [67]:
y_pred = clf.predict(X_test)

In [68]:
y_pred = np.reshape(y_pred, (y_pred.shape[0],1))

y_pred.shape

(5821, 1)

In [69]:
y_pred = clf.predict(X_test)

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Precision:", precision)
print("Recall:", recall)
print("Accuracy:", accuracy)
print("f1:", f1)

Precision: 0.9707792207792207
Recall: 0.9874970511913187
Accuracy: 0.9692492698848995
f1: 0.979066775815694
