In [1]:
from modules.crossvalidation import HyperparameterTune
from modules.evaluation import EvaluationFramework
from modules.metrics import metrics
from glob import glob
from scipy.io import loadmat
from time import time
from tqdm import tqdm
import pandas as pd
%config Completer.use_jedi = False

  warn("Tensorflow not installed; ParametricUMAP will be unavailable")


In [2]:
from pyod.utils.utility import standardizer
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.knn import KNN
from pyod.models.pca import PCA
from pyod.models.ocsvm import OCSVM

## Dataset

In [3]:
dataset_fps = glob('datasets/*')
dataset_names = [x.split('\\')[1].split('.mat')[0] for x in dataset_fps]

## Parameter Grid

In [4]:
params_iforest = {
    'n_estimators':[10,100],
    'max_features':[1,0.5,1.0],
    'bootstrap': [True, False]
}
params_lof = {
    'n_neighbors': [5,20],
    'algorithm': ['kd_tree', 'brute'],
}
params_knn = {
    'method':['largest', 'mean', 'median'],
    'n_neighbors': [2,5,10]
}
params_pca = {
    'n_components': [None,1,2,5],
    'whiten': [True, False],
    'weighted': [True, False]
}
params_osvm = {
    'kernel': ['linear', 'poly','rbf','sigmoid'],
    'nu':[0.2,0.5],
    'shrinking': [True,False]
}

In [5]:
methods = [
    ('iForest',IForest, params_iforest),
    ('lof',LOF, params_lof),
    ('KNN', KNN, params_knn),
    ('PCA',PCA, params_pca),
    ('OSVM',OCSVM, params_osvm)
]

## Parameter Tuning

In [None]:
start = time()

for method_nam, method_func, param in tqdm(methods):
    param_df = pd.DataFrame(columns=['Dataset']+list(param.keys()))
    for dataset_nam, dataset_fp in tqdm(zip(dataset_names,dataset_fps)):
        mat = loadmat(dataset_fp)
        X_raw = mat['X']
        y = mat['y'].ravel()
        # Normalise 
        X = standardizer(X_raw)
        # Hyperparameter Tuning
        h = HyperparameterTune(method_func, **param)
        tuned_param = h.evaluate(X,y)
        param_df = param_df.append({'Dataset':dataset_nam, **tuned_param}, ignore_index=True)
        
        # Integration with EvaluationFramework
#         model = method_func(**tuned_param)
#         eva = EvaluationFramework(model)
        
    # Save Results
    param_df.to_csv(f'./results/{method_nam}.csv', index=False)

finish = time() - start
print(f'------ {finish/60} minutes --------')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
0it [00:00, ?it/s][A
1it [00:10, 10.93s/it][A
2it [00:22, 11.48s/it][A
3it [00:32, 10.48s/it][A
4it [00:41, 10.17s/it][A
5it [00:53, 10.67s/it][A
6it [01:02, 10.12s/it][A
7it [01:48, 21.75s/it][A
8it [02:19, 24.88s/it][A
9it [02:39, 23.44s/it][A
10it [02:59, 22.30s/it][A
11it [04:30, 43.39s/it][A
12it [04:44, 34.24s/it][A
13it [04:53, 26.73s/it][A
14it [05:04, 22.01s/it][A
15it [05:14, 20.99s/it][A
 20%|████████████████▌                                                                  | 1/5 [05:14<20:59, 314.92s/it]
0it [00:00, ?it/s][A
1it [00:00,  1.18it/s][A
2it [00:02,  1.29s/it][A
3it [00:02,  1.32it/s][A
4it [00:02,  1.88it/s][A
5it [00:04,  1.09it/s][A
7it [01:10, 17.11s/it][A
8it [01:29, 17.80s/it][A
9it [01:42, 16.55s/it][A
10it [01:55, 15.51s/it][A
11it [09:50, 147.40s/it][A
12it [09:53, 105.52s/it][A
13it [09:53, 74.56s/it] [A
1