In [None]:
import tensorflow as tf
from benchmark_models import *
from oneclass-selfattention import *
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix

<h3>Loading and Processing Data</h3>

In [None]:
with open('data.npy', 'rb') as f:
    X, Y = np.load(f)

#standardization    
std_X = (X - X.mean()) / X.std()

<h3> Modeling </h3>

#### Set up parameters

In [None]:
#rate of anomalies
ano_rate = 0.02
#test and valid rate
test_rate = 0.2
valid_rate = 0.2

#Auto encoder architectures for finetuning
auto_encoder_architectures = [
    [[16,64], [8,32], [8,32],[16,64]],
    [[[16,64], [8,32], [8,32],[16,64]],
    [[32,128], [16,64], [16,64],[32,128]],
    [[16,64], [8,32], [4,32]], [[4,32],[8,32],[16,64]],
    [[32,128], [16,64], [8,32], [8,32],[16,64],[32,128]],
    [[32,128], [16,64], [8,32], [4,32]], [[4,32],[8,32],[16,64],[32,128]],
]
    
#Deep One-Class architectures for finetuning
one_class_architectures = [
    [[32,64],[16,32],[8,32]],
    [[64,128],[32,64],[16,32]],
    [[64,128],[64,128],[32,64],[16,32]],
    [[64,128],[32,64],[32,64],[16,32]],
    [[64,128],[32,64],[16,32],[8,32]]
]

#### Training and testing

In [None]:
%%capture alloutput

OCSVMF1 = []
IFF1 = []
AEF1 = []
AESVMF1 = []
DOCCF1 = []
OCSAv1F1 = []
OCSAv2F1 = []

for seed in [111111,222222,333333,444444,555555,666666,777777,888888,999999,121212]:
    np.random.seed(seed)
    
    #split train/set
    trvX, testX, trvY, testY = train_test_split(X, Y, test_rate)
    trainX, validX, trainY, validY = train_test_split(trvX, trvY, valid_rate)

   
    #################
    #################
    ########One Class Support Vector Machine
    #grid search for gamma
    param_grid = {
        'gamma' : [1e-3, 1e-2, 1e-1, 1, 10, 100],
        'nu' : [0.01, 0.05 ,0.1, 0.15]
    }
    grid_search = GridSearchCV(OneClassSVM(kernel='rbf'), param_grid, cv=10, scoring='f1_score')
    grid_search.fit(trvX, trvY)
    testY_pred = grid_search.best_estimator_.predict(testX)
    OCSVMF1.append(f1_score(testY, testY_pred, pos_label=-1))
    
    
    #################
    #################
    #########Isolation forest
    #grid search for n_estimators
    param_grid = {'n_estimators' : [25, 50, 100, 150, 200]}
    grid_search = GridSearchCV(IsolationForest(contamination=ano_rate), param_grid, cv=10, scoring='f1_score')
    grid_search.fit(trvX, trvY)
    testY_pred = grid_search.best_estimator_.predict(testX)
    IFF1.append(f1_score(testY, testY_pred, pos_label=-1))
    
    
    #################
    #################
    ###########Auto Encoder
    models = []
    f1s_list = []
    for architecture in auto_encoder_architectures:
        cnn = CNN1D_AE(X.shape[1], architecture)
        cnn.train(1000,1.0,trainX)
        valY_pred = cnn.predict(validX)
        f1s_list.append(f1_score(valY, valY_pred, pos_label=-1))
    best_ae = models[f1s_list.index(max(f1s_list))]
    testY_pred = best_ae.predict(testX,ano_rate)
    AEF1.append(f1_score(testY, predY, pos_label=-1))
    
    #generate encoded data from auto encoder
    trvX_Enc = cnn.encode(trvX)
    trvX_Enc = trvX_Enc.reshape(trvX_Enc.shape[0], trvX_Enc.shape[1]*trvX_Enc.shape[2])
    testX_Enc = cnn.encode(testX)
    testX_Enc = testX_Enc.reshape(testX_Enc.shape[0], testX_Enc.shape[1]*testX_Enc.shape[2])
    
    #turn off all auto-encoder models
    for model in models:
        model.close()
    
    
    #################
    #################    
    ###########Auto Encoder --- One Class SVM
    #grid search for gamma
    param_grid = {
        'gamma' : [1e-3, 1e-2, 1e-1, 1, 10, 100],
        'nu' : [0.01, 0.05 ,0.1, 0.15]
    }
    grid_search = GridSearchCV(OneClassSVM(kernel='rbf'), param_grid, cv=10, scoring='f1_score')
    grid_search.fit(trvX_Enc, trvY)
    testY_pred = grid_search.best_estimator_.predict(testX_Enc)
    AESVMF1.append(f1_score(testY, testY_pred, pos_label=-1))
    
    
    

    
    #################
    #################
    #######Deep One-Class Classifier --- finetune externally
    #finetune architectures
    models = []
    f1s_list = []
    for architecture in one_class_architectures:
        cnn = DOCC(X.shape[1], architecture, 64)
        cnn.initialize(trainX)
        cnn.train(50,1e-4,trainX, min_epochs=10, min_improvement=0.01)
        valY_pred = cnn.predict(validX)
        f1s_list.append(f1_score(valY, valY_pred, pos_label=-1))
    docc = models[f1s_list.index(max(f1s_list))]
    predY = docc.predict(testX,ano_rate)
    DOCCF1.append(f1_score(testY, predY, pos_label=-1))
    
    #turn off all auto-encoder models
    for model in models:
        model.close()
    
    
    #################
    #################
    #######One-Class Self-Attention --- v1
    models = []
    f1s_list = []
    for architecture in one_class_architectures:
        cnn = OneClassSelfAttention(X.shape[1], architecture, 64)
        cnn.initialize(trainX)
        cnn.train(50,1e-4,trainX, min_epochs=10, min_improvement=0.01)
        valY_pred = cnn.predict(validX)
        f1s_list.append(f1_score(valY, valY_pred, pos_label=-1))
    ocsav1 = models[f1s_list.index(max(f1s_list))]
    predY = ocsav1.predict(testX,ano_rate)
    OCSAv1F1.append(f1_score(testY, predY, pos_label=-1))
    
    #turn off all auto-encoder models
    for model in models:
        model.close()
        
    
    #################
    #################
    #######One-Class Self-Attention --- v2
    models = []
    f1s_list = []
    for architecture in one_class_architectures:
        cnn = OneClassSelfAttention(X.shape[1], architecture, 64, ver=2)
        cnn.train(50,1e-4,trainX, min_epochs=10, min_improvement=0.01)
        valY_pred = cnn.predict(validX)
        f1s_list.append(f1_score(valY, valY_pred, pos_label=-1))
    ocsav2 = models[f1s_list.index(max(f1s_list))]
    predY = ocsav2.predict(testX,ano_rate)
    OCSAv2F1.append(f1_score(testY, predY, pos_label=-1))
    
    #turn off all auto-encoder models
    for model in models:
        model.close()

In [None]:
for f1 in [OCSVMF1, IFF1, AEF1, AESVMF1, DOCCF1, OCTv1F1, OCTv2F1]:
    print(np.mean(f1), np.std(f1))