In [1]:
import my_functions as my_func
import pyod
import random 
import networkx as nx
import numpy as np
import scipy.io
import time
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.ensemble import IsolationForest
from pyod.models import loda
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold
from pyod.models import auto_encoder

### Hyperparameter tuning in Autoencoder

##### Mulcross dataset


In [2]:
mulcross=pd.read_csv(r"C:\Users\Manolis\thesis\thesis\Datasets\Mulcross1.csv")
label=mulcross.Target
mulcross=mulcross.iloc[:,0:4]

In [3]:
from sklearn.model_selection import GridSearchCV
start=time.time()
model = pyod.models.auto_encoder.AutoEncoder()
# define search space
space = dict([('hidden_neurons',[[64,32,16,1,16,32,64],[32,16,1,16,32],[4,1,4]]),
             ('epochs',[20,50,100]),
             ('hidden_activation',['relu','tanh']),
             ('batch_size',[32,64,128])]
)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# define search
search = GridSearchCV(model, space, scoring='accuracy',n_jobs=-1,cv=cv)
result = search.fit(mulcross,label)
print(time.time()-start)
result.best_params_



Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 4)                 20        
_________________________________________________________________
dropout (Dropout)            (None, 4)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 20        
_________________________________________________________________
dropout_1 (Dropout)          (None, 4)                 0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                320       
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2

{'batch_size': 32,
 'epochs': 100,
 'hidden_activation': 'relu',
 'hidden_neurons': [64, 32, 16, 1, 16, 32, 64]}

##### Vertebral dataset

In [4]:
matfile = scipy.io.loadmat(r"C:\Users\Manolis\thesis\thesis\Datasets\vertebral.mat")
tuples=matfile["X"]
label=matfile["y"]
Vertebral=pd.DataFrame(columns=['pelvic incidence', 'pelvic tilt', 'lumbar lordosis angle', 'sacral slope', 'pelvic radius',' grade of spondylolisthesis'])
for i in range(len(tuples)):
    Vertebral.loc[len(Vertebral)] = tuples[i]
Vertebral_label=Vertebral.copy()
Vertebral_label['label']=label 

In [5]:
from sklearn.model_selection import GridSearchCV
start=time.time()
model = pyod.models.auto_encoder.AutoEncoder()
# define search space
space = dict([('hidden_neurons',[[64,32,16,1,16,32,64],[32,16,1,16,32],[4,1,4]]),
             ('epochs',[20,50,100]),
             ('hidden_activation',['relu','tanh']),
             ('batch_size',[32,64,128])]
)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# define search
search = GridSearchCV(model, space, scoring='accuracy',n_jobs=-1,cv=cv)
result_2 = search.fit(Vertebral,label)
print(time.time()-start)
result_2.best_params_



Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 6)                 42        
_________________________________________________________________
dropout_9 (Dropout)          (None, 6)                 0         
_________________________________________________________________
dense_11 (Dense)             (None, 6)                 42        
_________________________________________________________________
dropout_10 (Dropout)         (None, 6)                 0         
_________________________________________________________________
dense_12 (Dense)             (None, 32)                224       
_________________________________________________________________
dropout_11 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_13 (Dense)             (None, 16)               

{'batch_size': 64,
 'epochs': 20,
 'hidden_activation': 'relu',
 'hidden_neurons': [32, 16, 1, 16, 32]}

### Hyperparameter tuning in Isolation Forest

##### Mulcross dataset


In [6]:
mulcross=pd.read_csv(r"C:\Users\Manolis\thesis\thesis\Datasets\Mulcross1.csv")
label=mulcross.Target
mulcross=mulcross.iloc[:,0:4]

In [7]:
from sklearn.model_selection import GridSearchCV
start=time.time()
model = IsolationForest()
# define search space
space = dict([('n_estimators',[50,100,200,400])])
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=10, random_state=1)
# define search
search = GridSearchCV(model, space, scoring='accuracy',n_jobs=-1,cv=cv)
result_3 = search.fit(mulcross,label)
print(time.time()-start)
result_3.best_params_

247.76028037071228


{'n_estimators': 50}

##### Vertebral dataset

In [8]:
matfile = scipy.io.loadmat(r"C:\Users\Manolis\thesis\thesis\Datasets\vertebral.mat")
tuples=matfile["X"]
label=matfile["y"]
Vertebral=pd.DataFrame(columns=['pelvic incidence', 'pelvic tilt', 'lumbar lordosis angle', 'sacral slope', 'pelvic radius',' grade of spondylolisthesis'])
for i in range(len(tuples)):
    Vertebral.loc[len(Vertebral)] = tuples[i]
Vertebral_label=Vertebral.copy()
Vertebral_label['label']=label 

In [9]:
from sklearn.model_selection import GridSearchCV
start=time.time()
model = IsolationForest()
# define search space
space = dict([('n_estimators',[50,100,200,400])])
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=10, random_state=1)
# define search
search = GridSearchCV(model, space, scoring='accuracy',n_jobs=-1,cv=cv,verbose=2)
result_4 = search.fit(Vertebral,label)
print(time.time()-start)
result_4.best_params_

Fitting 100 folds for each of 4 candidates, totalling 400 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done 180 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:    5.7s finished


6.148597717285156


{'n_estimators': 400}

In [11]:
result.best_params_, result_2.best_params_, result_3.best_params_, result_4.best_params_

({'batch_size': 32,
  'epochs': 100,
  'hidden_activation': 'relu',
  'hidden_neurons': [64, 32, 16, 1, 16, 32, 64]},
 {'batch_size': 64,
  'epochs': 20,
  'hidden_activation': 'relu',
  'hidden_neurons': [32, 16, 1, 16, 32]},
 {'n_estimators': 50},
 {'n_estimators': 400})