In [0]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.decomposition import PCA
from sklearn.preprocessing import Normalizer, StandardScaler
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Dense
import keras
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

#Import and organize data

In [0]:
normal = pd.read_csv("ptbdb_normal.csv", names = list(range(1, 189, 1)))
abnormal = pd.read_csv("ptbdb_abnormal.csv", names = list(range(1, 189, 1)))

In [0]:
df = pd.concat([normal, abnormal])

In [0]:
from sklearn.model_selection import StratifiedShuffleSplit
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in split.split(df, df.iloc[:, 187]):
  X_train = df.iloc[train_index]
  X_test = df.iloc[test_index]

In [0]:
y_train = X_train.iloc[:, 187]
X_train.drop(188, axis=1, inplace=True)

y_test = X_test.iloc[:, 187]
X_test.drop(188, axis=1, inplace=True)

#Function that returns a DNN model

In [0]:
def build_model(n_hidden=1, n_neurons=100, learning_rate=0.001, input_shape=[30]):
  model = Sequential()
  model.add(keras.layers.InputLayer(input_shape=input_shape))
  for layer in range(n_hidden):   
    model.add(Dense(n_neurons, activation = "selu", kernel_initializer="lecun_normal"))
  model.add(Dense(1, activation = "sigmoid"))
  
  optimizer = keras.optimizers.SGD(lr=learning_rate, momentum=0.9)
  model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])
  return model

#Finding value for n_components for PCA

In [0]:
n_components = [20, 30, 50]
for n in n_components:
  scaler = StandardScaler()
  scaled_X = scaler.fit_transform(X_train)
  print(n)
  pca = PCA(n_components=n)
  pca_X = pca.fit_transform(scaled_X)
  model = build_model(n_hidden = 3, n_neurons = 100, learning_rate=0.001, input_shape = [n])
  model.fit(pca_X, y_train, epochs=10,  batch_size=None)


#Transforming data to train models


In [0]:
from sklearn.pipeline import Pipeline
pipeline = Pipeline([("s", StandardScaler()), ("pca", PCA(n_components=15))])
X_train_scaled = pipeline.fit_transform(X_train)
X_test_scaled = pipeline.transform(X_test)

#DNN

In [0]:
final_model = build_model(n_hidden=5, learning_rate=0.001, n_neurons=300, input_shape=[15])
final_model.fit(X_train_scaled, y_train, epochs=500) 

In [79]:
final_model.evaluate(X_test_scaled, y_test)



[0.2554168826225366, 0.967021644115448]

#Random Forest

In [0]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
params = {"n_estimators": [100, 200], "max_depth": [3, 5, 7, 9, 11], "criterion": ["gini", "entropy"]}
rfc_search = GridSearchCV(rfc, params, cv=3)
rfc_search.fit(X_train_scaled, y_train)

In [59]:
accuracy_score(y_test, rfc_search.best_estimator_.predict(X_test_scaled))

0.8938509103400893

#XGBoost

In [0]:
from xgboost import XGBClassifier
xgb = XGBClassifier()
params = params = {"eta": [0.001], "max_depth":[9, 11, 13], "gamma": [0.01, 0.1]}
xgb_search = GridSearchCV(xgb, params, cv=3)
xgb_search.fit(X_train_scaled, y_train)

In [88]:
accuracy_score(y_test, xgb_search.best_estimator_.predict(X_test_scaled))

0.9416008244589488

In [86]:
xgb_search.best_params_

{'eta': 0.001, 'gamma': 0.1, 'max_depth': 11}

#Summary
###DNN - 96.70
###RandomForest - 89.38 
###XGBoost - 94.16