### One Class SVM - credit card example

In [None]:
# Import necessary libraries 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split 
from sklearn.svm import OneClassSVM 
from sklearn.metrics import accuracy_score 
from sklearn.preprocessing import StandardScaler

def plot_OCSVM(i): 
	plt.scatter(data_50k_df.iloc[:,i],data_50k_df.iloc[:,i+1],c='red',s=40, edgecolor="k") 
	plt.scatter(svm_anomalies.iloc[:,i],svm_anomalies.iloc[:,i+1],c='green', s=40, edgecolor="k") 
	plt.title("OC-SVM Outlier detection between Feature Pair: V{} and V{}".format(i,i+1)) 
	plt.xlabel("V{}".format(i)) 
	plt.ylabel("V{}".format(i+1)) 





In [None]:
credit_data = pd.read_csv('../creditcard_data/creditcard.csv', nrows=50000) # https://www.kaggle.com/mlg-ulb/creditcardfraud 
standardized_data_without_class = StandardScaler().fit_transform(credit_data.loc[:,credit_data.columns!='Class']) 
data_50k_new = standardized_data_without_class[0:50000] 
data_50k_df = pd.DataFrame(data=data_50k_new) 
# Separate features and target variable 
X = credit_data.drop(columns=['Class']) 
y = credit_data['Class'] 


In [None]:
X

In [None]:
clf_svm = OneClassSVM(kernel="rbf", degree=3, gamma=0.1, nu=0.01) 
y_predict = clf_svm.fit_predict(data_50k_df)


In [None]:
y_predict

In [None]:
svm_predict = pd.Series(y_predict).replace([-1,1],[1,0]) 
svm_anomalies = data_50k_df[svm_predict==1] 
# Calculate accuracy 
accuracy = accuracy_score(y, svm_predict) 
print("Accuracy in separating Outlier:", accuracy)


In [None]:
# plot_OCSVM(1) # chnage the integer value to visualize different pairs of features 
# plot_OCSVM(2) 
plot_OCSVM(3) 

# Novelty Detection using Unsupervised Methods

In [None]:
import numpy as np
import os
from libraries.utils import *
import pandas as pd
from collections import defaultdict


## Load Data

In [None]:
############ configuration ################
############################################

CODE = 'theft_protection'       ### application (code)
BEHAVIOUR_FAULTY = 'faulty_data'            ### normal, faulty_data
BEHAVIOUR_NORMAL = 'normal'            ### normal, faulty_data
THREAD = 'single'           ### single, multi
VER = 3                     ### format of data collection

base_dir = '../trace_data' ### can be replaced with 'csv', 'exe_plot', 'histogram'
normalbase_path = base_dir+f'/{CODE}/{THREAD}_thread/version_{VER}/{BEHAVIOUR_NORMAL}'
faultybase_path = base_dir+f'/{CODE}/{THREAD}_thread/version_{VER}/{BEHAVIOUR_FAULTY}'

print(normalbase_path)
print(faultybase_path)


In [None]:

train_base_path = os.path.join(normalbase_path, 'train_data')
train_data_path = [os.path.join(train_base_path, x) for x in os.listdir(train_base_path)]
print(train_data_path)


######### get paths #######################
paths_log, paths_traces, varlist_path, paths_label = get_paths(faultybase_path)

### remove.Ds_store from all lists
paths_log = [x for x in paths_log if '.DS_Store' not in x]
paths_traces = [x for x in paths_traces if '.DS_Store' not in x]
varlist_path = [x for x in varlist_path if '.DS_Store' not in x]
paths_label = [x for x in paths_label if '.DS_Store' not in x]

paths_log.sort()
paths_traces.sort()
varlist_path.sort()
paths_label.sort()

print(paths_log)
print(paths_traces)
print(varlist_path)
print(paths_label)

test_data_path = paths_traces
test_label_path = paths_label


In [None]:
############# check varlist is consistent ############
############# only for version 3 ######################

if VER == 3:
    to_number = is_consistent(varlist_path)

    if to_number != False:
        from_number = mapint2var(to_number)


In [None]:
############ Get variable list ######################
sorted_keys = list(from_number.keys())
sorted_keys.sort()
var_list = [from_number[key] for key in sorted_keys]   ### get the variable list
# print(var_list)

In [None]:
print(train_data_path[0])
print(train_data_path[0].find('.npy'))
print(train_data_path[0].find('.json') )

if train_data_path[0].find('.npy') != -1:
    sample_data = load_sample(train_data_path[0])
    print('.npy')
elif train_data_path[0].find('.json') != -1:
    sample_data = read_traces(train_data_path[0])
    print('.json')

## Generate Data for Novelty Detection Methods

In [None]:
instances = []
for sample_path in train_data_path:
    ### load the trace
    if sample_path.find('.npy') != -1:
        sample_data = load_sample(sample_path)
        print(sample_path)
    elif sample_path.find('.json') != -1:
        sample_data = read_traces(sample_path)
        print(sample_path)

    ### generate instances from the event trace with window sizw of 100 and sliding interval of 1
    print(sample_data)
    instances.extend(generate_instances(sample_data, 100, 1))

    ### save as numpy array
    # np.save(f'../data-novelty/subtraces{CODE}_ver{VER}_W100.npy', instances, allow_pickle=True)

    
    # break

In [None]:
len(instances)

## Extract Features for LOF

In [None]:
#### load the data
data = np.load(f'../data-novelty/subtraces_{CODE}_ver{VER}_W100.npy', allow_pickle=True)

### Feature extraction 1

In [None]:
############# Approach 1 ################
features_all = []
features_all_1d = []
for inst in data:
    # print(len(inst))
    # print(inst)
    features_2d = []
    features_1d = []
    for e1, e2 in zip(inst[:-1], inst[1:]):
        # print(e1, e2)

        ########## first feature extraction method ##########
        '''
        it is a two dimension feature vector
        for len of 100 events it will be 100x2
        for each event we have two features, the first feature is the difference between the two events, 
        and the second feature is the difference between the two events divided by 1000
        '''
        feat1 = e2[0] - e1[0]
        feat2 = (e2[1] - e1[1]) / 1000
        # print(feat1, feat2)
        features_2d.append((feat1, feat2))
        features_1d.append(feat1)
        features_1d.append(feat2)
        ############ end of first feature extraction method ##########

    
    assert(len(features_2d)==len(inst)-1)
    features_all.append(features_2d)
    features_all_1d.append(features_1d)

    # break
assert(len(features_all)==len(data))

### save features as csv file for training
# df = pd.DataFrame(np.array(features_all))
df_1d = pd.DataFrame(np.array(features_all_1d))

# df.to_csv(f'../data-novelty/train_{CODE}_ver{VER}_W100_features_2d.csv', index=False)
# df_1d.to_csv(f'../data-novelty/train_{CODE}_ver{VER}_W100_features_1d.csv', index=False)

### Feature Extraction 2_

In [None]:
features_all = []
features_all_1d = []
for inst in data:
    # print(len(inst))
    # print(inst)
    features_2d = []
    features_1d = []
    for e1, e2 in zip(inst[:-1], inst[1:]):
        # print(e1, e2)

        ########## second feature extraction method ##########
        '''
        use execution intervals of each variable as a feature
        1. get average execution interval for each variable, and take interval as 0 for variables that are not present in sub-trace -> number of features = total numbe of variables
        2 take average of execution intervals of all variables in the sub-trace -> number of features = 1
        3. run mutiple SVMs, one for each variable. Take the list of execution interval as features -> detect outliers in the execution interval and track it back to the 
        '''
    
    assert(len(features_2d)==len(inst)-1)
    features_all.append(features_2d)
    features_all_1d.append(features_1d)

    # break
assert(len(features_all)==len(data))

### save features as csv file for training
# df = pd.DataFrame(np.array(features_all))
df_1d = pd.DataFrame(np.array(features_all_1d))

# df.to_csv(f'../data-novelty/train_{CODE}_ver{VER}_W100_features_2d.csv', index=False)
# df_1d.to_csv(f'../data-novelty/train_{CODE}_ver{VER}_W100_features_1d.csv', index=False)

In [None]:
np.array(features_all)

In [None]:
df_1d

## Generate Test Instances

In [None]:
#### load test data and labels
test_data = {}
test_labels = {}
for i, test_path in enumerate(test_data_path):
    print(test_path)
    test_data[i] = read_traces(test_path)
    test_labels[i] = read_json(test_label_path[i])


In [None]:
##### cut instances based on labels

all_mid = list()
all_fh = list()
all_lh = list()
all_labels = list()
for i in test_data.keys():
    print(i)
    # print(test_data[i])
    # print(test_labels[i])
    data = test_data[i]
    label_raw = test_labels[i]
    labels = label_raw['labels']
    # print(list(labels.keys())[0])
    label_key = list(labels.keys())[0]
    labels = labels[label_key]

    # print(data)
    # print(labels)
    anomaly_traces_mid = []
    anomaly_traces_fh = []
    anomaly_traces_lh = []
    anomaly_traces_str = []
    anomaly_traces_end = []
    fh_plot_ind = []
    lh_plot_ind = []
    for label in labels:
        # print(label)
        ind1, ind2, ts1, ts2, clas = label
        # print(ind1, ind2, ts1, ts2, clas)

        #### traces with anomaly in the center
        middle_sp = np.clip(ind1-50, 0, len(data))   #### keep the anomaly in the middle and cut window of 100 evetns. Makes sure the start point is not negative
        middle_ep = np.clip(middle_sp+100, 0, len(data))   #### keep the anomaly in the middle and cut window of 100 evetns. Makes sure the end point is not more than the length of the data
        # print(middle_sp, middle_ep)
        anomaly_trace_mid = data[middle_sp:middle_ep]
        anomaly_traces_mid.append(anomaly_trace_mid)

        ##### trace wih anomaly in the first half
        fh_ind = np.random.randint(10, 40)    ### Decide the first half randomly
        fh_sp = np.clip(ind1-fh_ind, 0, len(data))
        fh_ep = np.clip(fh_sp+100, 0, len(data))
        fh_plot_ind.append(fh_ind)
        # print(fh_sp, fh_ep)
        anomaly_trace_fh = data[fh_sp:fh_ep]
        anomaly_traces_fh.append(anomaly_trace_fh)

        ##### trace wih anomaly in the last half
        lh_ind = np.random.randint(60, 85)    ### Decide the last half randomly
        lh_sp = np.clip(ind1-lh_ind, 0, len(data))
        lh_ep = np.clip(lh_sp+100, 0, len(data))
        lh_plot_ind.append(lh_ind)
        # print(fh_sp, fh_ep)
        anomaly_trace_lh = data[lh_sp:lh_ep]
        anomaly_traces_lh.append(anomaly_trace_lh)

        all_labels += [(label, 50, fh_ind, lh_ind,label_key)]   ### store the labels to plot anomalies in the subtraces
        assert(len(anomaly_trace_mid)==100)

    assert(len(anomaly_traces_mid)==len(labels))
    print(len(anomaly_traces_mid), len(labels))
    assert(len(anomaly_traces_fh)==len(labels))
    print(len(anomaly_traces_fh), len(labels))
    assert(len(anomaly_traces_lh)==len(labels))
    print(len(anomaly_traces_lh), len(labels))

    #### store all the anomaly traces for futher processing
    all_mid.extend(anomaly_traces_mid)
    all_fh.extend(anomaly_traces_fh)
    all_lh.extend(anomaly_traces_lh)
    


    # break

### Generate features of Anomaly Instances (generate test data in csv format)

In [None]:
##### Extract features and Save Test data as csv file

subtrace_type = ('mid', 'fh', 'lh')
for type in subtrace_type:
    if type == 'mid':
        data = all_mid
    elif type == 'fh':
        data = all_fh
    elif type == 'lh':
        data = all_lh

    print(type)
    features_all = []
    features_all_1d = []
    for label, inst in zip(all_labels, data):
        assert(len(inst)==100)
        
        features_2d = []
        features_1d = []
        for e1, e2 in zip(inst[:-1], inst[1:]):
            # print(e1, e2)

            ########## first feature extraction method ##########
            '''
            it is a two dimension feature vector
            for len of 100 events it will be 100x2
            for each event we have two features, the first feature is the difference between the two events, 
            and the second feature is the difference between the two events divided by 1000
            '''
            feat1 = e2[0] - e1[0]
            feat2 = (e2[1] - e1[1]) / 1000
            # print(feat1, feat2)
            features_2d.append((feat1, feat2))
            features_1d.append(feat1)
            features_1d.append(feat2)
            ############ end of first feature extraction method ##########
        
        assert(len(features_2d)==len(inst)-1)
        features_all.append(features_2d)
        features_all_1d.append(features_1d)

        # break
    assert(len(features_all)==len(data))

    ### save features as csv file for training
    # df = pd.DataFrame(features_all)
    df_1d = pd.DataFrame(np.array(features_all_1d))

    # df.to_csv(f'../data-novelty/test_{CODE}_ver{VER}_W100_features_2d_{type}.csv', index=False)
    # df_1d.to_csv(f'../data-novelty/test_{CODE}_ver{VER}_W100_features_1d_{type}.csv', index=False)

## Plot Anomaly Trace with Anomalies (verification)

### mid

In [None]:
#### Plot the anomaly traces with the label (mid)
with_time = False
is_xticks = True
plot_single = 0 ### give index number of the trace


for i in range(plot_single, len(anomaly_traces_mid)):
    anomaly_trace = anomaly_traces_mid[i]
    anomaly_label = labels[i]
    #### plot the trace using plotly
    fig = go.Figure()

    if with_time:
        fig.add_trace(go.Scatter(x=[x[1] for x in anomaly_trace], y=[x[0] for x in anomaly_trace], mode='lines+markers', name='event-trace', marker=dict(size=10, color='midnightblue')))
    else:
        fig.add_trace(go.Scatter(y=[x[0] for x in anomaly_trace], mode='lines+markers', name='event-trace', marker=dict(size=10, color='midnightblue')))

    ### adapt labels for the subtrace which starts from index 0
    start_ind = 50   ### we start each trace 50 events before the anomaly starts
    end_ind = start_ind + anomaly_label[1]-anomaly_label[0]  ### we end each trace 50 events after the anomaly ends
    (start_ts, end_ts) = (anomaly_label[2], anomaly_label[3])
    ground_truths_class = anomaly_label[4]

    
    ### check if time on x-axis
    if with_time:
        start = start_ts
        end = end_ts
    else:
        start = start_ind
        end = end_ind

    
    # Add dotted lines on the sides of the rectangle
    for x in [start, end]:
        fig.add_shape(type="line",
                xref="x",
                yref="paper",
                x0=x,
                y0=0,
                x1=x,
                y1=1,
                line=dict(
                    color='red',
                    width=4,
                    dash="dot",
                ),
            )


    ### generate x ticks with timestamp and index num  
    x_data = [x[1] for x in anomaly_trace]
    if is_xticks == True and with_time == False:
        x_ticks = [(i,x_data[i]) for i in range(0,len(x_data),10) ]
        x_tickvals = [k for k in range(0,len(x_data),10)]
    elif is_xticks == True and with_time == True:
        x_ticks = [(i,x_data[i]) for i in range(0,len(x_data),10) ]
        x_tickvals = [x_data[k] for k in range(0,len(x_data),10)]
    elif is_xticks == False:
        x_ticks = None
        x_tickvals = None

    fig.update_layout(
        title_text="Event Trace without Time",
        xaxis=dict(
            title="Number of events",
            rangeslider=dict(visible=True),
            type='linear',
            tickvals=x_tickvals,
            ticktext=x_ticks,
            tickfont = dict(size = FONTSIZE),
            titlefont = dict(size = FONTSIZE),
            color='black',
        ),
        yaxis=dict(
            title="Variables",
            tickvals=[k for k in range(0,len(var_list))],
            # ticktext= var_list,
            tickfont = dict(size = FONTSIZE),
            titlefont = dict(size = FONTSIZE),
            color='black',
        ),
        autosize=True,
        width=PLOTWIDTH,
        height=PLOTHEIGHT,
        plot_bgcolor='rgba(0,0,0,0)',
        
    )
    

    fig.update_xaxes(
        mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey'
    )
    fig.update_yaxes(
        mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey'
    )

    # style all the traces
    fig.update_traces(
        #hoverinfo="name+x+text",
        line={"width": 0.5},
        marker={"size": 8},
        mode="lines+markers",
        showlegend=True,   
    )

    fig.show()



    break

### fh

In [None]:
#### Plot the anomaly traces with the label
with_time = False
is_xticks = True
plot_single = 4 ### give index number of the trace


for i in range(plot_single, len(anomaly_traces_fh)):
    anomaly_trace = anomaly_traces_fh[i]
    anomaly_label = labels[i]
    #### plot the trace using plotly
    fig = go.Figure()

    if with_time:
        fig.add_trace(go.Scatter(x=[x[1] for x in anomaly_trace], y=[x[0] for x in anomaly_trace], mode='lines+markers', name='event-trace', marker=dict(size=10, color='midnightblue')))
    else:
        fig.add_trace(go.Scatter(y=[x[0] for x in anomaly_trace], mode='lines+markers', name='event-trace', marker=dict(size=10, color='midnightblue')))

    ### adapt labels for the subtrace which starts from index 0
    start_ind = fh_plot_ind[i]   ### we start each trace 50 events before the anomaly starts
    end_ind = start_ind + anomaly_label[1]-anomaly_label[0]  ### we end each trace 50 events after the anomaly ends
    (start_ts, end_ts) = (anomaly_label[2], anomaly_label[3])
    ground_truths_class = anomaly_label[4]

    
    ### check if time on x-axis
    if with_time:
        start = start_ts
        end = end_ts
    else:
        start = start_ind
        end = end_ind

    
    # Add dotted lines on the sides of the rectangle
    for x in [start, end]:
        fig.add_shape(type="line",
                xref="x",
                yref="paper",
                x0=x,
                y0=0,
                x1=x,
                y1=1,
                line=dict(
                    color='red',
                    width=4,
                    dash="dot",
                ),
            )


    ### generate x ticks with timestamp and index num  
    x_data = [x[1] for x in anomaly_trace]
    if is_xticks == True and with_time == False:
        x_ticks = [(i,x_data[i]) for i in range(0,len(x_data),10) ]
        x_tickvals = [k for k in range(0,len(x_data),10)]
    elif is_xticks == True and with_time == True:
        x_ticks = [(i,x_data[i]) for i in range(0,len(x_data),10) ]
        x_tickvals = [x_data[k] for k in range(0,len(x_data),10)]
    elif is_xticks == False:
        x_ticks = None
        x_tickvals = None

    fig.update_layout(
        title_text="Event Trace without Time",
        xaxis=dict(
            title="Number of events",
            rangeslider=dict(visible=True),
            type='linear',
            tickvals=x_tickvals,
            ticktext=x_ticks,
            tickfont = dict(size = FONTSIZE),
            titlefont = dict(size = FONTSIZE),
            color='black',
        ),
        yaxis=dict(
            title="Variables",
            tickvals=[k for k in range(0,len(var_list))],
            # ticktext= var_list,
            tickfont = dict(size = FONTSIZE),
            titlefont = dict(size = FONTSIZE),
            color='black',
        ),
        autosize=True,
        width=PLOTWIDTH,
        height=PLOTHEIGHT,
        plot_bgcolor='rgba(0,0,0,0)',
        
    )
    

    fig.update_xaxes(
        mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey'
    )
    fig.update_yaxes(
        mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey'
    )

    # style all the traces
    fig.update_traces(
        #hoverinfo="name+x+text",
        line={"width": 0.5},
        marker={"size": 8},
        mode="lines+markers",
        showlegend=True,   
    )

    fig.show()



    break

### lh

In [None]:
#### Plot the anomaly traces with the label
with_time = False
is_xticks = True
plot_single = 0 ### give index number of the trace


for i in range(plot_single, len(anomaly_traces_lh)):
    anomaly_trace = anomaly_traces_lh[i]
    anomaly_label = labels[i]
    #### plot the trace using plotly
    fig = go.Figure()

    if with_time:
        fig.add_trace(go.Scatter(x=[x[1] for x in anomaly_trace], y=[x[0] for x in anomaly_trace], mode='lines+markers', name='event-trace', marker=dict(size=10, color='midnightblue')))
    else:
        fig.add_trace(go.Scatter(y=[x[0] for x in anomaly_trace], mode='lines+markers', name='event-trace', marker=dict(size=10, color='midnightblue')))

    ### adapt labels for the subtrace which starts from index 0
    start_ind = lh_plot_ind[i]   ### we start each trace 50 events before the anomaly starts
    end_ind = start_ind + anomaly_label[1]-anomaly_label[0]  ### we end each trace 50 events after the anomaly ends
    (start_ts, end_ts) = (anomaly_label[2], anomaly_label[3])
    ground_truths_class = anomaly_label[4]

    
    ### check if time on x-axis
    if with_time:
        start = start_ts
        end = end_ts
    else:
        start = start_ind
        end = end_ind

    
    # Add dotted lines on the sides of the rectangle
    for x in [start, end]:
        fig.add_shape(type="line",
                xref="x",
                yref="paper",
                x0=x,
                y0=0,
                x1=x,
                y1=1,
                line=dict(
                    color='red',
                    width=4,
                    dash="dot",
                ),
            )


    ### generate x ticks with timestamp and index num  
    x_data = [x[1] for x in anomaly_trace]
    if is_xticks == True and with_time == False:
        x_ticks = [(i,x_data[i]) for i in range(0,len(x_data),10) ]
        x_tickvals = [k for k in range(0,len(x_data),10)]
    elif is_xticks == True and with_time == True:
        x_ticks = [(i,x_data[i]) for i in range(0,len(x_data),10) ]
        x_tickvals = [x_data[k] for k in range(0,len(x_data),10)]
    elif is_xticks == False:
        x_ticks = None
        x_tickvals = None

    fig.update_layout(
        title_text="Event Trace without Time",
        xaxis=dict(
            title="Number of events",
            rangeslider=dict(visible=True),
            type='linear',
            tickvals=x_tickvals,
            ticktext=x_ticks,
            tickfont = dict(size = FONTSIZE),
            titlefont = dict(size = FONTSIZE),
            color='black',
        ),
        yaxis=dict(
            title="Variables",
            tickvals=[k for k in range(0,len(var_list))],
            # ticktext= var_list,
            tickfont = dict(size = FONTSIZE),
            titlefont = dict(size = FONTSIZE),
            color='black',
        ),
        autosize=True,
        width=PLOTWIDTH,
        height=PLOTHEIGHT,
        plot_bgcolor='rgba(0,0,0,0)',
        
    )
    

    fig.update_xaxes(
        mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey'
    )
    fig.update_yaxes(
        mirror=True,
        ticks='outside',
        showline=True,
        linecolor='black',
        gridcolor='lightgrey'
    )

    # style all the traces
    fig.update_traces(
        #hoverinfo="name+x+text",
        line={"width": 0.5},
        marker={"size": 8},
        mode="lines+markers",
        showlegend=True,   
    )

    fig.show()



    break

## LOF 

In [None]:
'''
TODO:

- generate anomalous instances using label data - Done
- take instances of len 100 with anomaly at different positions in the window. Five positions: partial at start, first half, middle, second half, partial at end

1. load the data
2. train LOF for novelty detection using 2d features
3. test on the anomalous data
'''

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM 
from sklearn.pipeline import make_pipeline


######### Train data ##########
train_subtrace_data = pd.read_csv(f'../data-novelty/train_{CODE}_ver{VER}_W100_features_1d.csv') 
train_standardized_data = StandardScaler().fit_transform(train_subtrace_data) 
# train_standardized_data = train_subtrace_data
np.random.shuffle(train_standardized_data,)     ### shuffle the data

x_train = train_standardized_data[:round(len(train_standardized_data)*0.8)]
x_val = train_standardized_data[round(len(train_standardized_data)*0.8):]

######### Test data ##########
test_subtrace_data = pd.read_csv(f'../data-novelty/test_{CODE}_ver{VER}_W100_features_1d_mid.csv')
test_standardized_data = StandardScaler().fit_transform(test_subtrace_data)

x_test = test_standardized_data

In [None]:
##### Train models ######
'''
(when novelty is set to True). Label is 1 for an inlier and -1 for an outlier according to the LOF score and the contamination parameter.
'''
lof = LocalOutlierFactor(novelty=True, n_neighbors=35)
lof.fit(x_train)

clf_svm = OneClassSVM(kernel="poly", degree=5, gamma=0.3, nu=0.1)
clf_svm.fit(x_train)

iso = IsolationForest(random_state=42)
iso.fit(x_train)

In [None]:
###### Test Model ######
y_pred = lof.predict(x_test)
print(y_pred)
y_pred = lof.predict(x_val)
print(y_pred)


In [None]:
y_pred = clf_svm.predict(x_test)
print(y_pred)
y_pred = clf_svm.predict(x_val)
print(y_pred)

In [None]:
y_pred = iso.predict(x_test)
print(y_pred)
y_pred = iso.predict(x_val)
print(y_pred)
