In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import metrics
import os

from time import time
from experiment import *

# Load data ** Chose your path before get stated **

In [2]:
path = os.getcwd()
rootDir, _ = path.split('ECG-Arritmia-Paper1')
path = 'ECG-Arritmia-Paper1/Database_extracted/Dataset_with_features'
filename = 'MIT-BIH__DS1_5classes__Goertzel.csv'
dataset = pd.read_csv(os.path.join(rootDir, path, filename));

In [3]:
dataset.head()

Unnamed: 0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8,Var9,Var10,...,Var24,Var25,Var26,Var27,Var28,Var29,Var30,Var31,Var32,Var33
0,0.151058,0.128256,0.099898,0.083728,0.068214,0.046109,0.026629,0.017688,0.011045,0.009385,...,0.000768,0.001601,0.000325,0.001421,0.001017,0.001651,0.00106,0.000862,0.000547,0
1,0.147046,0.124114,0.094705,0.077863,0.05956,0.042234,0.02596,0.013447,0.011329,0.006718,...,0.000888,0.000654,0.000932,0.001443,0.0008,0.000732,0.000521,0.000157,0.000781,0
2,0.168356,0.139827,0.106008,0.084504,0.063497,0.040942,0.019961,0.012706,0.006084,0.010158,...,0.001229,0.001054,0.00099,0.000836,0.001372,0.000819,0.000618,0.00144,0.001406,0
3,0.175502,0.144138,0.109498,0.088609,0.062034,0.037186,0.022075,0.012085,0.00809,0.008089,...,0.000907,0.000763,0.001116,0.001679,0.000606,0.001112,0.000735,0.001248,0.000391,0
4,0.17721,0.147767,0.114982,0.092439,0.064739,0.038087,0.020698,0.013635,0.007597,0.00884,...,0.001015,0.000333,0.000903,0.001165,0.000636,0.001016,0.000668,0.001149,0.000859,0


# Data preprocessing

In this step we are going standardize our dataset.

In [4]:
from sklearn.preprocessing import StandardScaler

std_scaler = StandardScaler()

Separete features from labels.

In [5]:
data_std = std_scaler.fit_transform(dataset.values[:,:-1])
data_label = dataset.values[:,-1]

# Classifiers specifications

In [6]:
# Non-linear models:
from sklearn.neural_network import MLPClassifier

# Kernel-based models
from sklearn.svm import SVC

##### Model pre-loadings:

# MLP:
mlp_clf = MLPClassifier(solver='adam', learning_rate='adaptive', 
                        max_iter=1300, learning_rate_init=5e-04, tol=1e-4)

# SVM
svm_rbf_clf = SVC(kernel='rbf')

## Hyperameter tunning by randomized search:

Classifiers definitions:

In [7]:
classifiers = {'MLP': mlp_clf, 'SVM-RBF': svm_rbf_clf}

Define param range for searching:

In [8]:
param_dist_dict = {'MLP': {"hidden_layer_sizes": list(np.arange(2,1001))},
                   'SVM-RBF': {'gamma': np.logspace(-9, 3, 100), 'C': np.logspace(-2, 3, 100)},
                  }

In [9]:
from sklearn.model_selection import RandomizedSearchCV

random_search = dict((k,[]) for k in classifiers.keys())

for clf in param_dist_dict.keys():
    start = time()
    random_search[clf] = RandomizedSearchCV(classifiers[clf], param_dist_dict[clf], cv=8, n_iter=20, verbose=5, n_jobs=10, scoring='accuracy')
    random_search[clf].fit(data_std, data_label)
    print('Elapsed time:')
    print(time() - start)
    save_models(random_search)

Fitting 8 folds for each of 20 candidates, totalling 160 fits
[CV] hidden_layer_sizes=814 ..........................................
[CV] hidden_layer_sizes=814 ..........................................
[CV] hidden_layer_sizes=814 ..........................................
[CV] hidden_layer_sizes=814 ..........................................
[CV] hidden_layer_sizes=814 ..........................................
[CV] hidden_layer_sizes=814 ..........................................
[CV] hidden_layer_sizes=814 ..........................................
[CV] hidden_layer_sizes=814 ..........................................
[CV] hidden_layer_sizes=839 ..........................................
[CV] hidden_layer_sizes=839 ..........................................
[CV] . hidden_layer_sizes=814, score=0.5736032642812304, total=11.4min
[CV] hidden_layer_sizes=839 ..........................................
[CV] . hidden_layer_sizes=814, score=0.9016007532956686, total=12.6min
[CV] hidden_lay

[Parallel(n_jobs=10)]: Done  52 tasks      | elapsed: 69.4min


[CV] . hidden_layer_sizes=753, score=0.8733322869251295, total=14.8min
[CV] hidden_layer_sizes=291 ..........................................
[CV] ... hidden_layer_sizes=291, score=0.90429871352369, total=10.6min
[CV] hidden_layer_sizes=291 ..........................................
[CV] . hidden_layer_sizes=291, score=0.8349286050525655, total= 5.5min
[CV] hidden_layer_sizes=168 ..........................................
[CV] . hidden_layer_sizes=753, score=0.7887964851718187, total=21.0min
[CV] hidden_layer_sizes=168 ..........................................
[CV] . hidden_layer_sizes=753, score=0.6131512868801005, total=18.8min
[CV] hidden_layer_sizes=168 ..........................................
[CV] . hidden_layer_sizes=291, score=0.6523854362837413, total= 6.8min
[CV] hidden_layer_sizes=168 ..........................................
[CV] . hidden_layer_sizes=753, score=0.8367346938775511, total=19.2min
[CV] hidden_layer_sizes=168 ..........................................
[CV] .

[Parallel(n_jobs=10)]: Done 160 out of 160 | elapsed: 152.1min finished


Elapsed time:
9160.247833251953
Fitting 8 folds for each of 20 candidates, totalling 160 fits
[CV] gamma=572.236765935022, C=54.62277217684343 .....................
[CV] gamma=572.236765935022, C=54.62277217684343 .....................
[CV] gamma=572.236765935022, C=54.62277217684343 .....................
[CV] gamma=572.236765935022, C=54.62277217684343 .....................
[CV] gamma=572.236765935022, C=54.62277217684343 .....................
[CV] gamma=572.236765935022, C=54.62277217684343 .....................
[CV] gamma=572.236765935022, C=54.62277217684343 .....................
[CV] gamma=572.236765935022, C=54.62277217684343 .....................
[CV] gamma=0.0008697490026177834, C=21.544346900318846 ...............
[CV] gamma=0.0008697490026177834, C=21.544346900318846 ...............
[CV]  gamma=0.0008697490026177834, C=21.544346900318846, score=0.9171634766237842, total= 2.5min
[CV] gamma=0.0008697490026177834, C=21.544346900318846 ...............
[CV]  gamma=0.00086974900261

[Parallel(n_jobs=10)]: Done  52 tasks      | elapsed: 223.5min


[CV]  gamma=26.56087782946684, C=0.739072203352578, score=0.898932831136221, total=111.4min
[CV] gamma=8.697490026177835, C=0.20565123083486514 ..................
[CV]  gamma=26.56087782946684, C=0.739072203352578, score=0.899215070643642, total=111.4min
[CV] gamma=8.697490026177835, C=0.20565123083486514 ..................
[CV]  gamma=26.56087782946684, C=0.739072203352578, score=0.8987917778126471, total=112.6min
[CV] gamma=0.024770763559917138, C=247.70763559917089 ................
[CV]  gamma=1.1497569953977356e-07, C=8.497534359086439, score=0.899215070643642, total= 2.3min
[CV] gamma=0.024770763559917138, C=247.70763559917089 ................
[CV]  gamma=0.024770763559917138, C=247.70763559917089, score=0.8870411044869784, total= 5.1min
[CV] gamma=0.024770763559917138, C=247.70763559917089 ................
[CV]  gamma=0.024770763559917138, C=247.70763559917089, score=0.8992783181675557, total= 6.2min
[CV] gamma=0.024770763559917138, C=247.70763559917089 ................
[CV]  gam

[Parallel(n_jobs=10)]: Done 160 out of 160 | elapsed: 470.0min finished


Elapsed time:
28252.139038801193


# Save all models

In [13]:
save_models(random_search)

In [11]:
u = random_search['MLP']

In [12]:
u.cv_results_

{'mean_fit_time': array([ 996.50846601, 1129.70609179,  686.43643105,  566.65319648,
         266.24237081,  512.86023742, 1052.46671048,  530.31929207,
         426.49500123,  416.01849753,  516.18515515,  280.4009096 ,
         287.70891654,  311.19698486,  297.43169677,  431.10895357,
         316.14906281,  789.12657499,  740.18262243,  534.44796318]),
 'std_fit_time': array([235.84723232, 290.41982988, 174.8957417 , 166.82668778,
         22.15129089, 114.35444008, 192.78052214, 159.44902345,
         81.74531669,  85.22851095, 127.16714697,  36.36126603,
         45.55079634,  46.3922745 ,  48.46398174, 104.622224  ,
         42.85127342, 147.92279848, 160.94280165, 164.89475471]),
 'mean_score_time': array([0.93927497, 0.91620666, 0.5469231 , 0.40877795, 0.04859912,
        0.18612647, 0.88455939, 0.27593037, 0.15650189, 0.21786708,
        0.35163492, 0.0603047 , 0.09926745, 0.08782601, 0.05959332,
        0.19062978, 0.10808393, 0.48432004, 0.47525197, 0.3131347 ]),
 'std_scor