In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import metrics
import os

from time import time
from experiment import *

# Load data ** Chose your path before get stated **

In [2]:
path = os.getcwd()
rootDir, _ = path.split('ECG-Arritmia-Paper1')
path = 'ECG-Arritmia-Paper1/Database_extracted/Dataset_with_features'
filename = 'MIT-BIH__DS1_5classes__HOS.csv'
dataset = pd.read_csv(os.path.join(rootDir, path, filename));

In [3]:
dataset.head()

Unnamed: 0,Var1,Var2,Var3,Var4,Var5
0,2.466699,8.076415,0.130942,0.365797,0
1,2.38825,7.647216,0.118878,0.351855,0
2,2.270058,7.048029,0.147898,0.403758,0
3,2.235512,6.83819,0.157496,0.395403,0
4,2.269395,6.948087,0.165563,0.403978,0


# Data preprocessing

In this step we are going standardize our dataset.

In [4]:
from sklearn.preprocessing import StandardScaler

std_scaler = StandardScaler()

Separete features from labels.

In [5]:
data_std = std_scaler.fit_transform(dataset.values[:,:-1])
data_label = dataset.values[:,-1]

# Classifiers specifications

In [6]:
# Non-linear models:
from sklearn.neural_network import MLPClassifier

# Kernel-based models
from sklearn.svm import SVC

##### Model pre-loadings:

# MLP:
mlp_clf = MLPClassifier(solver='adam', learning_rate='adaptive', 
                        max_iter=1300, learning_rate_init=5e-04, tol=1e-4)

# SVM
svm_rbf_clf = SVC(kernel='rbf')

## Hyperameter tunning by randomized search:

Classifiers definitions:

In [7]:
classifiers = {'MLP': mlp_clf}

Define param range for searching:

In [8]:
param_dist_dict = {
                    'MLP': {"hidden_layer_sizes": list(np.arange(2,1001))},
                  }

In [None]:
from sklearn.model_selection import RandomizedSearchCV

random_search = dict((k,[]) for k in classifiers.keys())

for clf in param_dist_dict.keys():
    start = time()
    random_search[clf] = RandomizedSearchCV(classifiers[clf], param_dist_dict[clf], cv=8, n_iter=20, verbose=5, n_jobs=100, scoring='accuracy')
    random_search[clf].fit(data_std, data_label)
    print('Elapsed time:')
    print(time() - start)

Fitting 8 folds for each of 20 candidates, totalling 160 fits
[CV] hidden_layer_sizes=238 ..........................................
[CV] hidden_layer_sizes=238 ..........................................
[CV] hidden_layer_sizes=238 ..........................................
[CV] hidden_layer_sizes=238 ..........................................
[CV] hidden_layer_sizes=238 ..........................................
[CV] hidden_layer_sizes=238 ..........................................
[CV] hidden_layer_sizes=238 ..........................................
[CV] hidden_layer_sizes=238 ..........................................
[CV] hidden_layer_sizes=838 ..........................................
[CV] hidden_layer_sizes=838 ..........................................
[CV] hidden_layer_sizes=838 ..........................................
[CV] hidden_layer_sizes=838 ..........................................
[CV] hidden_layer_sizes=838 ..........................................
[CV] hidden_lay

[Parallel(n_jobs=100)]: Done  27 out of 160 | elapsed: 136.9min remaining: 674.4min


[CV] hidden_layer_sizes=993 ..........................................
[CV]  hidden_layer_sizes=647, score=0.8107938500156887, total=138.5min
[CV] hidden_layer_sizes=993 ..........................................
[CV]  hidden_layer_sizes=563, score=0.9182616881079385, total=140.1min
[CV] hidden_layer_sizes=151 ..........................................
[CV]  hidden_layer_sizes=375, score=0.8927954795165595, total=145.0min
[CV] hidden_layer_sizes=151 ..........................................
[CV]  hidden_layer_sizes=375, score=0.8640929064657878, total=145.8min
[CV] hidden_layer_sizes=151 ..........................................
[CV]  hidden_layer_sizes=647, score=0.9329775545440276, total=145.9min
[CV] hidden_layer_sizes=151 ..........................................
[CV]  hidden_layer_sizes=415, score=0.7469784963114111, total=147.2min
[CV] hidden_layer_sizes=151 ..........................................
[CV]  hidden_layer_sizes=563, score=0.9348815314608505, total=146.7min
[CV] h

[Parallel(n_jobs=100)]: Done  60 out of 160 | elapsed: 176.0min remaining: 293.4min


[CV] hidden_layer_sizes=676 ..........................................
[CV] . hidden_layer_sizes=375, score=0.934096971598933, total=175.9min
[CV]  hidden_layer_sizes=598, score=0.9185754628176969, total=175.8min
[CV]  hidden_layer_sizes=647, score=0.8441130298273155, total=176.4min
[CV]  hidden_layer_sizes=647, score=0.9328416758198651, total=178.7min
[CV]  hidden_layer_sizes=987, score=0.9439736346516008, total=180.6min
[CV]  hidden_layer_sizes=606, score=0.7443101553916183, total=181.3min
[CV]  hidden_layer_sizes=987, score=0.9174772513335425, total=182.2min
[CV]  hidden_layer_sizes=598, score=0.9301522523936587, total=110.4min
[CV]  hidden_layer_sizes=774, score=0.8475667189952905, total=182.8min
[CV]  hidden_layer_sizes=606, score=0.8174827369742624, total=183.9min
[CV]  hidden_layer_sizes=690, score=0.7455658452362266, total=184.4min
[CV]  hidden_layer_sizes=606, score=0.9317432920131806, total=185.4min
[CV]  hidden_layer_sizes=647, score=0.8455743879472694, total=187.1min
[CV]  

[Parallel(n_jobs=100)]: Done  93 out of 160 | elapsed: 207.9min remaining: 149.8min


[CV]  hidden_layer_sizes=606, score=0.7896140571069972, total=207.6min
[CV] . hidden_layer_sizes=151, score=0.9177910260433009, total=62.6min
[CV]  hidden_layer_sizes=987, score=0.9273269502432899, total=210.2min
[CV]  hidden_layer_sizes=360, score=0.7842798870411045, total=129.2min
[CV] . hidden_layer_sizes=151, score=0.8145591465327895, total=73.2min
[CV]  hidden_layer_sizes=606, score=0.8942081305917439, total=214.9min
[CV]  hidden_layer_sizes=838, score=0.8220338983050848, total=216.4min
[CV] . hidden_layer_sizes=838, score=0.746664573850259, total=217.3min
[CV]  hidden_layer_sizes=360, score=0.9332914770051797, total=116.1min
[CV] . hidden_layer_sizes=781, score=0.922642397614938, total=105.3min
[CV]  hidden_layer_sizes=598, score=0.7449380003139224, total=162.0min
[CV] .. hidden_layer_sizes=151, score=0.864406779661017, total=72.5min
[CV]  hidden_layer_sizes=360, score=0.7494898760006279, total=119.6min
[CV]  hidden_layer_sizes=360, score=0.9461707470182047, total=126.8min
[CV] .

[Parallel(n_jobs=100)]: Done 126 out of 160 | elapsed: 239.9min remaining: 64.7min


[CV]  hidden_layer_sizes=993, score=0.9174772513335425, total=121.7min
[CV]  hidden_layer_sizes=993, score=0.8581293157564344, total=117.5min
[CV]  hidden_layer_sizes=781, score=0.7465076126196829, total=124.0min
[CV]  hidden_layer_sizes=781, score=0.9438166980539862, total=129.4min
[CV]  hidden_layer_sizes=781, score=0.9348610893109403, total=127.2min
[CV] . hidden_layer_sizes=703, score=0.9303310842617292, total=94.2min
[CV]  hidden_layer_sizes=781, score=0.9177910260433009, total=136.7min
[CV] . hidden_layer_sizes=676, score=0.9450721908349027, total=76.2min
[CV] . hidden_layer_sizes=993, score=0.944601381042059, total=126.3min
[CV]  hidden_layer_sizes=993, score=0.7947913398180106, total=131.0min
[CV]  hidden_layer_sizes=993, score=0.8532182103610675, total=110.5min
[CV] . hidden_layer_sizes=909, score=0.8981641299231131, total=89.4min
[CV] . hidden_layer_sizes=703, score=0.7479202636948674, total=95.8min
[CV] . hidden_layer_sizes=909, score=0.9176341386884217, total=91.3min
[CV]  

[Parallel(n_jobs=100)]: Done 160 out of 160 | elapsed: 257.8min finished


# Save all models

In [None]:
save_models(random_search)

In [None]:
os.system('shutdown')

In [None]:
u = random_search['MLP']

In [None]:
u.cv_results_