In [22]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import LabelBinarizer
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.linear_model import SGDOneClassSVM
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.decomposition import PCA
from sklearn.kernel_approximation import Nystroem
from sklearn import metrics
from sklearn.svm import OneClassSVM
import time
import copy

In [23]:
import re
import pandas as pd
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer, HashingVectorizer
import os,sys
import tqdm
import pickle
import time
from concurrent.futures import ThreadPoolExecutor  
import concurrent.futures

In [24]:
def reshape_matrix(matrix_list):
    new_list = [np.array(i).reshape(-1) for i in matrix_list]
    return new_list

In [25]:
def padding_onehot(onehot_list, padding):
    new_list = []
    for onehot in onehot_list:
        if len(onehot) > padding:
            onehot = np.array(onehot[0:padding])
            new_list.append(onehot)
        else:
            onehot =np.pad(onehot, [(0, padding-len(onehot)), (0, 0)], mode='constant', constant_values=0)
            new_list.append(onehot)
    new_list = reshape_matrix(new_list)
    return new_list
    

In [26]:
def padding_dictencoding(dictencoding_list, padding):
    new_list = []
    for onehot in dictencoding_list:
        if len(onehot) > padding:
            onehot = np.array(onehot[0:padding])
            new_list.append(onehot)
        else:
            onehot =np.pad(onehot, [(0, padding-len(onehot))], mode='constant', constant_values=0)
            new_list.append(onehot)
    return new_list

In [27]:
def train_models(feature, normal):
    X = normal[feature].tolist()
    y = normal['maltype'].tolist()
    # mlb = LabelBinarizer()
    
    y = np.zeros(len(X))
    # h = .02  # step size in the mesh
    outliers_fraction = 0.15
    nu = 0.05
    clfs = []
    results = []
    preds = []
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=.3, random_state=42)
    y_val_2 = [1 if i==0 else -1 for i in y_val]

    X_tv = []
    pca = PCA(n_components=100)
    if feature =='system calls dependency graph':
        X_train = reshape_matrix(X_train)
        X_val = reshape_matrix(X_val)    
        X_tv=[(X_train,X_val)]
        features = [feature]
    elif feature =='one hot encoding':
        X_train_55000 = padding_onehot(X_train, 55000)
        X_val_55000 = padding_onehot(X_val, 55000)
        X_train_pca = pca.fit_transform(X_train_55000)
        X_val_pca = pca.transform(X_val_55000)
        X_tv = [(X_train_pca,X_val_pca)]
        features = [feature]
    elif feature =='dict index encoding':
        X_train_55000 = padding_dictencoding(X_train, 55000)
        X_val_55000 = padding_dictencoding(X_val, 55000)
        X_train_pca = pca.fit_transform(X_train_55000)
        X_val_pca = pca.transform(X_val_55000)
        X_train_10000 = padding_dictencoding(X_train, 10000)
        X_val_10000 = padding_dictencoding(X_val, 10000)
        X_tv = [(X_train_pca,X_val_pca),(X_train_10000,X_val_10000)]
        features = [feature+'-pca', feature]
    elif 'system calls hashing' in feature or '1gram'in feature:
        X_tv=[(X_train,X_val)]
        features = [feature]
    else:
        X_train_pca = pca.fit_transform(X_train)
        X_val_pca = pca.transform(X_val)
        X_tv = [(X_train_pca,X_val_pca),(X_train,X_val)]
        features = [feature+'-pca', feature]

    for i,(X_train,X_val)  in enumerate(X_tv):
        feature = features[i]
        result = []
        pred = dict()
        classifiers = {
                "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
                "One-Class SVM": OneClassSVM(nu=outliers_fraction, kernel="rbf",gamma=0.1),
                "SGD One-Class SVM": SGDOneClassSVM(nu=nu, shuffle=True, fit_intercept=True, random_state=42, tol=1e-4),
                "Isolation Forest": IsolationForest(contamination=outliers_fraction,random_state=42),
            }
        for name in classifiers:
            clf = classifiers[name]
            t1 =time.time()
            res = dict()
            clf.fit(X_train)
            t2 =time.time()
            y_pred = clf.predict(X_val)
            score = metrics.accuracy_score(y_val_2,y_pred)

            pred['valid_'+  feature + '_' + name] = y_pred
            t = t2 -t1
            res['Model'] ='valid_' + feature + '_' + name
            res['Accuracy'] = score
            res['Training time'] = t
            result.append(res)
            print('Model: {}, accuracy score: {}, training time is: {} seconds'.format(res['Model'], score, t))
        results.append(result)
        preds.append(pred)
        clfs.append(classifiers)
    pca = pca
    return clfs, results, preds, pca

In [28]:
def test_models(encoded_trace_df, malware, feature, clfs, pca):
    dfs = encoded_trace_df[encoded_trace_df.maltype==malware]
    X_test = dfs[feature].tolist()
    X_tv = [] 
    if feature =='system calls dependency graph':
        X_test = reshape_matrix(X_test)    
        X_tv=[X_test]
        features = [feature]
    elif feature =='one hot encoding':
        X_test_55000 = padding_onehot(X_test, 55000)
        X_test_pca = pca.transform(X_test_55000)
        X_tv = [X_test_pca]
        features = [feature]
    elif feature =='dict index encoding':
        X_test_55000 = padding_dictencoding(X_test, 55000)       
        X_test_pca = pca.transform(X_test_55000)
        X_test_10000 = padding_dictencoding(X_test, 10000)       
        X_tv = [X_test_pca, X_test_10000]
        features = [feature+'-pca', feature]
    elif 'system calls hashing' in feature or '1gram'in feature:
        X_tv=[X_test]
        features = [feature]
    else:

        X_test_pca = pca.transform(X_test)
        X_tv = [X_test_pca, X_test]
        features = [feature+'-pca', feature]

    results = []
    preds = []
    y_test = np.ones(len(X_test))
    y_test_2 = [1 if i==0 else -1 for i in y_test]

    for i,classifiers in enumerate(clfs):
        feature = features[i]
        X_test = X_tv[i]
        result = []
        pred = dict()    
        for name in classifiers:
            res = dict()
            clf = classifiers[name]
            t1 =time.time()
            if 'anomaly detection' in name:
                y_pred = clf.predict(X_test)
                t2 =time.time()
                score = metrics.accuracy_score(y_test_2,y_pred)
            else:
                y_pred = clf.predict(X_test)
                t2 =time.time()
                score = metrics.accuracy_score(y_test,y_pred)
            t = t2 -t1
            pred[malware +'_' + feature + '_' + name] = y_pred
            res['Model'] = malware +'_' + feature + '_' + name
            res['Accuracy'] = score
            res['Testing time'] = t
            result.append(res)
            print('Model: {}, accuracy score: {}, testing time is: {} seconds'.format( res['Model'], score, t))
        results.append(result)
        preds.append(pred)
    return results, preds       

In [29]:
rootPath ='D:/git/IoT_Sensors_Security_Analysis/data/perf/'
encodePath = rootPath+'encoded/t1/'

In [30]:
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

In [31]:
def run(device, tw):
    resultsPath = 'D:/git/IoT_Sensors_Security_Analysis/results/{}/tw_{}_turn_1/'.format(device, tw)
    encoded_trace_df = pd.read_pickle(encodePath+'encoded_bow{}_{}.pkl'.format(device, tw))
    normal = encoded_trace_df[encoded_trace_df.maltype=='normal']
    resultsdict = dict()
    predsdict = dict()
    classifiersdict = dict()
    pcas = dict()
    for feature in features:
        #train stage
        clfs, results, preds, pca = train_models(feature, normal)
        
        resultsdict[feature+'_validation'] = results
        predsdict[feature+'_validation'] = preds
        classifiersdict[feature] = clfs
        pcas[feature] = pca
        # testing stage
        for malware in malwares:
            results, preds = test_models(encoded_trace_df, malware, feature, clfs, pca)
            resultsdict[malware +'_' + feature] = results
            predsdict[malware +'_' + feature] = preds
            
    loc=open(resultsPath+'classifiers.pk','wb')
    pickle.dump(classifiersdict,loc)
    loc=open(resultsPath+'results.pk','wb')
    pickle.dump(resultsdict,loc)
    loc=open(resultsPath+'preds.pk','wb')
    pickle.dump(predsdict,loc)   
    loc=open(resultsPath+'pcas.pk','wb')
    pickle.dump(pcas,loc)   
    rd = []
    for rs in resultsdict:
        for r in resultsdict[rs]:
            for s in r:
                rd.append(s)

    rd = pd.DataFrame(rd)
    md = [i.split('_') for i in rd['Model']]
    md = pd.DataFrame(md)
    md.columns  = ['Dataset','Features','Ngram','Model']
    nrd=pd.DataFrame([md['Dataset'],md['Features'],md['Ngram'], md['Model'], rd['Accuracy']]).transpose()
    nrd.to_csv(resultsPath+'results.csv',index=None)

In [32]:
features = [#'one hot encoding', 'dict index encoding',
            'system calls dependency graph', 'system calls frequency_1gram',
            'system calls tfidf_1gram', 'system calls hashing_1gram',
            'system calls frequency_2gram', 'system calls tfidf_2gram',
            'system calls hashing_2gram', 'system calls frequency_3gram',
            'system calls tfidf_3gram', 'system calls hashing_3gram',
            'system calls frequency_4gram', 'system calls tfidf_4gram',
            'system calls hashing_4gram', 'system calls frequency_5gram',
            'system calls tfidf_5gram', 'system calls hashing_5gram'
            ]
malwares=["delay", "disorder", "freeze", "hop", "mimic", "noise", "repeat", "spoof"]

In [33]:
def train_models_sequence(normal,ft, tw):
    ls = int(tw/10)
    X = normal[ft].tolist()
    y = np.ones(len(X))

    nu = 0.05
    outliers_fraction = 0.15
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=.3, random_state=42)

    if 'one hot' in ft:
        X_train = reshape_onehot(X_train, ls)
        X_val = reshape_onehot(X_val, ls)
    elif 'dict' in ft:
        X_train = reshape_dictindex(X_train, ls)
        X_val = reshape_dictindex(X_val, ls)

    X_train_pca = {}
    X_val_pca = {}

    pcas = {}
    onehot_clfs = {}
    pred = {}
    results = []
    classifiers = {
                    # "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
                    # "One-Class SVM": OneClassSVM(nu=outliers_fraction, kernel="rbf",gamma=0.1),
                    "SGD One-Class SVM": SGDOneClassSVM(nu=nu, shuffle=True, fit_intercept=True, random_state=42, tol=1e-4),
                    "Isolation Forest": IsolationForest(contamination=outliers_fraction,random_state=42),
                }

    for i in range(0,ls):
        pcas[i] = PCA(n_components=10)
        X_train_pca[i] = pcas[i].fit_transform(X_train[i])
        X_val_pca[i] = pcas[i].transform(X_val[i])

    for name in classifiers:
        t1  = time.time()
        clfs ={}
        clfs_pca = {}
        y_tr_pre = {}
        y_val_pre = {}
        y_tr_pca_pre = {}
        y_val_pca_pre = {}
        for i in range(0,ls):
            clfs[i] = copy.deepcopy(classifiers[name])
            clfs[i].fit(X_train[i])
            y_tr_pre[i] = clfs[i].predict(X_train[i])
            y_val_pre[i] = clfs[i].predict(X_val[i])
        y_tr_pre = pd.DataFrame(y_tr_pre)
        clfs[ls] = classifiers[name]
        clfs[ls].fit(y_tr_pre)
        t2  = time.time()

        y_val_pre = pd.DataFrame(y_val_pre)
        y_pred = clfs[ls].predict(y_val_pre)

        score = metrics.accuracy_score(y_val,y_pred)
        feature = ft
        pred['valid_'+  feature + '_' + name] = y_pred
        t = t2 -t1
        res = {}
        res['Model'] ='valid_' + feature + '_' + name
        res['Accuracy'] = score
        res['Training time'] = t
        results.append(res)
        onehot_clfs[feature + '_' + name] = clfs
        print('Model: {}, accuracy score: {}, training time is: {} seconds'.format(res['Model'], score, t))



        t1  = time.time()
        for i in range(0,ls):
            clfs_pca[i] = copy.deepcopy(classifiers[name])
            clfs_pca[i].fit(X_train_pca[i])
            y_tr_pca_pre[i] = clfs_pca[i].predict(X_train_pca[i])
            y_val_pca_pre[i] = clfs_pca[i].predict(X_val_pca[i])
        y_tr_pca_pre = pd.DataFrame(y_tr_pca_pre)
        clfs_pca[ls] = classifiers[name]
        clfs_pca[ls].fit(y_tr_pca_pre)
        t2  = time.time()

        y_val_pca_pre = pd.DataFrame(y_val_pca_pre)
        y_pred_pca = clfs[ls].predict(y_val_pca_pre)
        
        score = metrics.accuracy_score(y_val,y_pred_pca)

        feature =  ft + ' PCA'
        pred['valid_'+  feature + '_' + name] = y_pred
        t = t2 -t1
        res = {}
        res['Model'] ='valid_' + feature + '_' + name
        res['Accuracy'] = score
        res['Training time'] = t
        results.append(res)
        onehot_clfs[feature + '_' + name] = clfs_pca
        print('Model: {}, accuracy score: {}, training time is: {} seconds'.format(res['Model'], score, t))
    return onehot_clfs, pred, results, pcas

In [34]:
devices = ['pi3', 'pi4_2G', 'pi4_4G']
tws = [50, 60, 70]
for device in devices:
    for tw in tws:
        run(device, tw)

FileNotFoundError: [Errno 2] No such file or directory: 'D:/git/IoT_Sensors_Security_Analysis/data/perf/encoded/t1/encoded_bowpi3_50.pkl'

In [None]:
encoded_trace_df = pd.read_pickle(encodePath+'encoded_bow{}_{}.pkl'.format('pi4_2G', '50'))
one_hot = encoded_trace_df['one hot encoding']
lens = pd.DataFrame([len(i) for i in one_hot])
lens.describe()

Unnamed: 0,0
count,3321.0
mean,151603.051491
std,27801.67902
min,14.0
25%,148941.0
50%,163632.0
75%,166675.0
max,173725.0


In [None]:
encoded_trace_df = pd.read_pickle(encodePath+'encoded_bow{}_{}.pkl'.format('pi4_2G', '60'))
one_hot = encoded_trace_df['one hot encoding']
lens = pd.DataFrame([len(i) for i in one_hot])
lens.describe()

Unnamed: 0,0
count,2766.0
mean,181448.506146
std,32359.95989
min,4600.0
25%,177720.5
50%,195877.0
75%,199687.0
max,207901.0


In [None]:
encoded_trace_df = pd.read_pickle(encodePath+'encoded_bow{}_{}.pkl'.format('pi4_2G', '70'))
one_hot = encoded_trace_df['one hot encoding']
lens = pd.DataFrame([len(i) for i in one_hot])
lens.describe()

Unnamed: 0,0
count,2386.0
mean,210659.577117
std,38037.244949
min,12060.0
25%,203808.0
50%,227928.5
75%,232368.75
max,242128.0


In [None]:
def padding(onehot, dw):
    padding = dw*3000
    if len(onehot) > padding:
        onehot = np.array(onehot[0:padding])
    else:
        onehot =np.pad(onehot, [(0, padding-len(onehot)), (0, 0)], mode='constant', constant_values=0)
    return onehot

In [None]:
def reshape_onehot(onehot_list, dw):
    reshaped = {}
    for i in range(0, dw):
        reshaped[i] = []
    for onehot in onehot_list:
        onehot = padding(onehot, dw)
        for i in range(0, dw):
            start = 3000*i
            end = 3000*(i+1)
            reshaped[i].append(np.array(onehot[start:end]).reshape(-1))
    return reshaped

In [None]:
def reshape_dictindex(dictindex, dw):
    reshaped = {}
    for i in range(0, dw):
        reshaped[i] = []
    padding = dw*3000
    for di in dictindex:
        if len(di) > padding:
            di = np.array(di[0:padding])
        else:
            di =np.pad(di, [(0, padding-len(di))], mode='constant', constant_values=0)
        for i in range(0, dw):
            start = 3000*i
            end = 3000*(i+1)
            reshaped[i].append(np.array(di[start:end]).reshape(-1))
    return reshaped

In [None]:
def train_models_sequence(normal,ft, tw):
    X = normal[ft].tolist()
    y = np.ones(len(X))

    nu = 0.05
    outliers_fraction = 0.15
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=.3, random_state=42)

    if 'one hot' in ft:
        X_train = reshape_onehot(X_train, tw)
        X_val = reshape_onehot(X_val, tw)
    elif 'dict' in ft:
        X_train = reshape_dictindex(X_train, tw)
        X_val = reshape_dictindex(X_val, tw)

    X_train_pca = {}
    X_val_pca = {}

    pcas = {}
    onehot_clfs = {}
    pred = {}
    results = []
    classifiers = {
                    # "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
                    "One-Class SVM": OneClassSVM(nu=outliers_fraction, kernel="rbf",gamma=0.1),
                    "SGD One-Class SVM": SGDOneClassSVM(nu=nu, shuffle=True, fit_intercept=True, random_state=42, tol=1e-4),
                    "Isolation Forest": IsolationForest(contamination=outliers_fraction,random_state=42),
                }

    for i in range(0, tw):
        pcas[i] = PCA(n_components=100)
        X_train_pca[i] = pcas[i].fit_transform(X_train[i])
        X_val_pca[i] = pcas[i].transform(X_val[i])

    for name in classifiers:
        t1  = time.time()
        clfs ={}
        clfs_pca = {}
        y_tr_pre = {}
        y_val_pre = {}
        y_tr_pca_pre = {}
        y_val_pca_pre = {}
        par = tqdm.tqdm(total=tw,ncols=80)
        for i in range(0, tw):
            par.update(1)
            clfs[i] = copy.deepcopy(classifiers[name])
            clfs[i].fit(X_train[i])
            y_tr_pre[i] = clfs[i].predict(X_train[i])
            y_val_pre[i] = clfs[i].predict(X_val[i])
        par.close()
        y_tr_pre = pd.DataFrame(y_tr_pre)
        clfs[tw] = classifiers[name]
        clfs[tw].fit(y_tr_pre)
        t2  = time.time()

        y_val_pre = pd.DataFrame(y_val_pre)
        y_pred = clfs[tw].predict(y_val_pre)

        score = metrics.accuracy_score(y_val,y_pred)
        feature = ft
        pred['valid_'+  feature + '_' + name] = y_pred
        t = t2 -t1
        res = {}
        res['Model'] ='valid_' + feature + '_' + name
        res['Accuracy'] = score
        res['Training time'] = t
        results.append(res)
        onehot_clfs[feature + '_' + name] = clfs
        print('Model: {}, accuracy score: {}, training time is: {} seconds'.format(res['Model'], score, t))
        
        t1  = time.time()
        for i in range(0,tw):
            clfs_pca[i] = copy.deepcopy(classifiers[name])
            clfs_pca[i].fit(X_train_pca[i])
            y_tr_pca_pre[i] = clfs_pca[i].predict(X_train_pca[i])
            y_val_pca_pre[i] = clfs_pca[i].predict(X_val_pca[i])
        y_tr_pca_pre = pd.DataFrame(y_tr_pca_pre)
        clfs_pca[tw] = classifiers[name]
        clfs_pca[tw].fit(y_tr_pca_pre)
        t2  = time.time()

        y_val_pca_pre = pd.DataFrame(y_val_pca_pre)
        y_pred_pca = clfs[tw].predict(y_val_pca_pre)
        
        score = metrics.accuracy_score(y_val,y_pred_pca)

        feature =  ft + ' PCA'
        pred['valid_'+  feature + '_' + name] = y_pred
        t = t2 -t1
        res = {}
        res['Model'] ='valid_' + feature + '_' + name
        res['Accuracy'] = score
        res['Training time'] = t
        results.append(res)
        onehot_clfs[feature + '_' + name] = clfs_pca
        print('Model: {}, accuracy score: {}, training time is: {} seconds'.format(res['Model'], score, t))
    return onehot_clfs, pred, results, pcas

In [None]:
def test__sequence(encoded_trace_df, malware, ft, tw, onehot_clfs, pcas):
    feature = ft

    dfs = encoded_trace_df[encoded_trace_df.maltype==malware]
    X_test = dfs[feature].tolist()
    
    y_test = np.ones(len(X_test))
    y_test_2 = [1 if i==0 else -1 for i in y_test]

    if 'one hot' in ft:
        X_test = reshape_onehot(X_test, tw)
    elif 'dict' in ft:
        X_test = reshape_dictindex(X_test, tw)

    X_test_pca = {}
    y_test_pre = {}
    y_test_pca_pre = {}

    pred_test = {}
    results_test = []
    classifiers = [
        #  "Robust covariance",
                 "One-Class SVM",
                "SGD One-Class SVM",
                "Isolation Forest"]

    for i in range(0,tw):
        X_test_pca[i] = pcas[i].transform(X_test[i])

    for name in classifiers:
        y_test_pre = {}
        y_test_pca_pre = {}
        t1  = time.time()
        
        feature = ft
        modelName = feature + '_' + name
        clfs = onehot_clfs[modelName]
        for i in range(0,tw):
            y_test_pre[i] = clfs[i].predict(X_test[i])
        y_test_pre = pd.DataFrame(y_test_pre)
        y_test_pre = clfs[tw].predict(y_test_pre)
        t2  = time.time()

        score = metrics.accuracy_score(y_test_2, y_test_pre)

        pred_test[malware + '_'+  feature + '_' + name] = y_test_pre
        t = t2 -t1
        res = {}
        res['Model'] =malware + '_'+  feature + '_' + name
        res['Accuracy'] = score
        res['test time'] = t
        results_test.append(res)
        print('Model: {}, accuracy score: {}, test time is: {} seconds'.format(res['Model'], score, t))

        feature = ft + ' PCA'
        modelName = feature + '_' + name
        clfs_pca = onehot_clfs[modelName]
        t1  = time.time()
        for i in range(0,tw):
            y_test_pca_pre[i] = clfs_pca[i].predict(X_test_pca[i])
            
        y_test_pca_pre = pd.DataFrame(y_test_pca_pre)
        y_test_pca_pre = clfs[tw].predict(y_test_pca_pre)
        t2  = time.time()  
        
        score = metrics.accuracy_score(y_test_2,y_test_pca_pre)
        
        pred_test[malware + '_'+  feature + '_' + name] = y_test_pre
        t = t2 -t1
        res = {}
        res['Model'] =malware + '_'+  feature + '_' + name
        res['Accuracy'] = score
        res['test time'] = t
        results_test.append(res)
        print('Model: {}, accuracy score: {}, test time is: {} seconds'.format(res['Model'], score, t))
    return results_test, pred_test

In [None]:
def run__sequence(device, tw, features):
    resultsPath = 'D:/git/IoT_Sensors_Security_Analysis/results/{}/tw_{}_turn_1/'.format(device, tw)
    encoded_trace_df = pd.read_pickle(encodePath+'encoded_bow{}_{}.pkl'.format(device, tw))
    normal = encoded_trace_df[encoded_trace_df.maltype=='normal']
    resultsdict = dict()
    predsdict = dict()
    classifiersdict = dict()
    pcas = dict()
    for ft in features:
        #train stage
        onehot_clfs, pred, results, pca = train_models_sequence(normal,ft, tw)        
        resultsdict[ft+'_validation'] = results
        predsdict[ft+'_validation'] = pred
        classifiersdict[ft] = onehot_clfs
        pcas[ft] = pca
        # testing stage
        for malware in malwares:
            results_test, pred_test = test__sequence(encoded_trace_df, malware, ft, tw, onehot_clfs, pca)
            resultsdict[malware +'_' + ft] = results_test
            predsdict[malware +'_' + ft] = pred_test
            
    loc=open(resultsPath+'sequence_classifiers.pk','wb')
    pickle.dump(classifiersdict,loc)
    loc=open(resultsPath+'sequence_results.pk','wb')
    pickle.dump(resultsdict,loc)
    loc=open(resultsPath+'sequence_preds.pk','wb')
    pickle.dump(predsdict,loc)   
    loc=open(resultsPath+'sequence_pcas.pk','wb')
    pickle.dump(pcas,loc)   
    rd = []
    for rs in resultsdict:
        for r in resultsdict[rs]:        
            rd.append(r)
    rd = pd.DataFrame(rd)
    md = [i.split('_') for i in rd['Model']]
    md = pd.DataFrame(md)
    md.columns  = ['Dataset','Features','Model']
    nrd=pd.DataFrame([md['Dataset'],md['Features'], md['Model'], rd['Accuracy']]).transpose()
    nrd.to_csv(resultsPath+'sequence_results.csv',index=None)
    return resultsdict

In [None]:
sft = ['one hot encoding', 'dict index encoding']
malwares=["delay", "disorder", "freeze", "hop", "mimic", "noise", "repeat", "spoof"]

In [None]:
devices = ['pi3', 'pi4_2G', 'pi4_4G']
tws = [50, 60, 70]
for device in devices:
    for tw in tws:
        run__sequence(device, tw, sft)

100%|███████████████████████████████████████████| 50/50 [15:30<00:00, 18.60s/it]


Model: valid_one hot encoding_One-Class SVM, accuracy score: 0.33613445378151263, training time is: 930.1099662780762 seconds
Model: valid_one hot encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.6041457653045654 seconds


100%|███████████████████████████████████████████| 50/50 [00:11<00:00,  4.24it/s]


Model: valid_one hot encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 11.802725553512573 seconds
Model: valid_one hot encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.050011634826660156 seconds


100%|███████████████████████████████████████████| 50/50 [17:09<00:00, 20.59s/it]


Model: valid_one hot encoding_Isolation Forest, accuracy score: 0.2605042016806723, training time is: 1029.7405016422272 seconds
Model: valid_one hot encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 12.249267101287842 seconds
Model: delay_one hot encoding_One-Class SVM, accuracy score: 1.0, test time is: 732.1697452068329 seconds
Model: delay_one hot encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.4122741222381592 seconds
Model: delay_one hot encoding_SGD One-Class SVM, accuracy score: 0.019021739130434784, test time is: 3.636646032333374 seconds
Model: delay_one hot encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.011001825332641602 seconds
Model: delay_one hot encoding_Isolation Forest, accuracy score: 0.8369565217391305, test time is: 402.1652228832245 seconds
Model: delay_one hot encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 2.051269769668579 seconds
Model: disorder_one hot encoding_One-Class SVM, accurac

100%|███████████████████████████████████████████| 50/50 [00:15<00:00,  3.19it/s]


Model: valid_dict index encoding_One-Class SVM, accuracy score: 0.3277310924369748, training time is: 15.654566049575806 seconds
Model: valid_dict index encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.6126148700714111 seconds


100%|███████████████████████████████████████████| 50/50 [00:00<00:00, 97.44it/s]


Model: valid_dict index encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.5171165466308594 seconds
Model: valid_dict index encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.057500362396240234 seconds


100%|███████████████████████████████████████████| 50/50 [00:52<00:00,  1.05s/it]


Model: valid_dict index encoding_Isolation Forest, accuracy score: 0.42016806722689076, training time is: 52.55604791641235 seconds
Model: valid_dict index encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 11.705713510513306 seconds
Model: delay_dict index encoding_One-Class SVM, accuracy score: 1.0, test time is: 12.100911855697632 seconds
Model: delay_dict index encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.3980991840362549 seconds
Model: delay_dict index encoding_SGD One-Class SVM, accuracy score: 0.016304347826086956, test time is: 0.16003656387329102 seconds
Model: delay_dict index encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.01500391960144043 seconds
Model: delay_dict index encoding_Isolation Forest, accuracy score: 0.6711956521739131, test time is: 18.852429151535034 seconds
Model: delay_dict index encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 2.548710823059082 seconds
Model: disorder_dict index e

100%|███████████████████████████████████████████| 60/60 [24:58<00:00, 24.97s/it]


Model: valid_one hot encoding_One-Class SVM, accuracy score: 0.05102040816326531, training time is: 1498.3683142662048 seconds
Model: valid_one hot encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.5258820056915283 seconds


100%|███████████████████████████████████████████| 60/60 [00:10<00:00,  5.58it/s]


Model: valid_one hot encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 10.754657983779907 seconds
Model: valid_one hot encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.07101607322692871 seconds


100%|███████████████████████████████████████████| 60/60 [16:00<00:00, 16.01s/it]


Model: valid_one hot encoding_Isolation Forest, accuracy score: 0.1326530612244898, training time is: 960.5988204479218 seconds
Model: valid_one hot encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 16.96573519706726 seconds
Model: delay_one hot encoding_One-Class SVM, accuracy score: 1.0, test time is: 1039.8508896827698 seconds
Model: delay_one hot encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.336076021194458 seconds
Model: delay_one hot encoding_SGD One-Class SVM, accuracy score: 0.016611295681063124, test time is: 3.3492603302001953 seconds
Model: delay_one hot encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.012000799179077148 seconds
Model: delay_one hot encoding_Isolation Forest, accuracy score: 0.8538205980066446, test time is: 369.29027104377747 seconds
Model: delay_one hot encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 4.569441556930542 seconds
Model: disorder_one hot encoding_One-Class SVM, accurac

100%|███████████████████████████████████████████| 60/60 [00:19<00:00,  3.04it/s]


Model: valid_dict index encoding_One-Class SVM, accuracy score: 0.05102040816326531, training time is: 19.749342918395996 seconds
Model: valid_dict index encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.5031144618988037 seconds


100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 116.48it/s]


Model: valid_dict index encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.5201189517974854 seconds
Model: valid_dict index encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.062012672424316406 seconds


100%|███████████████████████████████████████████| 60/60 [00:52<00:00,  1.15it/s]


Model: valid_dict index encoding_Isolation Forest, accuracy score: 0.22448979591836735, training time is: 52.179861545562744 seconds
Model: valid_dict index encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 15.593549013137817 seconds
Model: delay_dict index encoding_One-Class SVM, accuracy score: 1.0, test time is: 17.48481798171997 seconds
Model: delay_dict index encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.3320767879486084 seconds
Model: delay_dict index encoding_SGD One-Class SVM, accuracy score: 0.016611295681063124, test time is: 0.1580350399017334 seconds
Model: delay_dict index encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.012001752853393555 seconds
Model: delay_dict index encoding_Isolation Forest, accuracy score: 0.7840531561461794, test time is: 17.679930686950684 seconds
Model: delay_dict index encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 3.146115779876709 seconds
Model: disorder_dict index e

100%|███████████████████████████████████████████| 70/70 [27:02<00:00, 23.17s/it]


Model: valid_one hot encoding_One-Class SVM, accuracy score: 0.0, training time is: 1622.0319757461548 seconds
Model: valid_one hot encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.5061142444610596 seconds


100%|███████████████████████████████████████████| 70/70 [00:10<00:00,  6.42it/s]


Model: valid_one hot encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 10.903904914855957 seconds
Model: valid_one hot encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.08002161979675293 seconds


100%|███████████████████████████████████████████| 70/70 [16:40<00:00, 14.30s/it]


Model: valid_one hot encoding_Isolation Forest, accuracy score: 0.11494252873563218, training time is: 1001.0630276203156 seconds
Model: valid_one hot encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 28.668366193771362 seconds
Model: delay_one hot encoding_One-Class SVM, accuracy score: 1.0, test time is: 1057.6049089431763 seconds
Model: delay_one hot encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.29965758323669434 seconds
Model: delay_one hot encoding_SGD One-Class SVM, accuracy score: 0.0, test time is: 3.456219434738159 seconds
Model: delay_one hot encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.015003681182861328 seconds
Model: delay_one hot encoding_Isolation Forest, accuracy score: 0.9288389513108615, test time is: 386.1279492378235 seconds
Model: delay_one hot encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 6.73864221572876 seconds
Model: disorder_one hot encoding_One-Class SVM, accuracy score: 1.0, t

100%|███████████████████████████████████████████| 70/70 [00:19<00:00,  3.60it/s]


Model: valid_dict index encoding_One-Class SVM, accuracy score: 0.0, training time is: 19.427948236465454 seconds
Model: valid_dict index encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.46210432052612305 seconds


100%|██████████████████████████████████████████| 70/70 [00:00<00:00, 107.50it/s]


Model: valid_dict index encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.6551480293273926 seconds
Model: valid_dict index encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.06501507759094238 seconds


100%|███████████████████████████████████████████| 70/70 [01:07<00:00,  1.03it/s]


Model: valid_dict index encoding_Isolation Forest, accuracy score: 0.16091954022988506, training time is: 70.9730134010315 seconds
Model: valid_dict index encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 16.218544006347656 seconds
Model: delay_dict index encoding_One-Class SVM, accuracy score: 1.0, test time is: 19.561357975006104 seconds
Model: delay_dict index encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.2940700054168701 seconds
Model: delay_dict index encoding_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.1690378189086914 seconds
Model: delay_dict index encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.013002634048461914 seconds
Model: delay_dict index encoding_Isolation Forest, accuracy score: 0.9138576779026217, test time is: 19.256797790527344 seconds
Model: delay_dict index encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 3.271742105484009 seconds
Model: disorder_dict index encoding_One-Class 

100%|███████████████████████████████████████████| 50/50 [49:01<00:00, 58.84s/it]


Model: valid_one hot encoding_One-Class SVM, accuracy score: 0.0, training time is: 2941.930142402649 seconds
Model: valid_one hot encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.7096648216247559 seconds


100%|███████████████████████████████████████████| 50/50 [00:14<00:00,  3.45it/s]


Model: valid_one hot encoding_SGD One-Class SVM, accuracy score: 0.984375, training time is: 14.509922504425049 seconds
Model: valid_one hot encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.06801509857177734 seconds


100%|███████████████████████████████████████████| 50/50 [20:15<00:00, 24.31s/it]


Model: valid_one hot encoding_Isolation Forest, accuracy score: 0.9140625, training time is: 1215.8646953105927 seconds
Model: valid_one hot encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 14.785335302352905 seconds
Model: delay_one hot encoding_One-Class SVM, accuracy score: 1.0, test time is: 593.8423898220062 seconds
Model: delay_one hot encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.16203713417053223 seconds
Model: delay_one hot encoding_SGD One-Class SVM, accuracy score: 0.1925925925925926, test time is: 1.3002941608428955 seconds
Model: delay_one hot encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.010002374649047852 seconds
Model: delay_one hot encoding_Isolation Forest, accuracy score: 0.05185185185185185, test time is: 150.45624208450317 seconds
Model: delay_one hot encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 2.167419672012329 seconds
Model: disorder_one hot encoding_One-Class SVM, accuracy score

100%|███████████████████████████████████████████| 50/50 [00:30<00:00,  1.66it/s]


Model: valid_dict index encoding_One-Class SVM, accuracy score: 0.0, training time is: 30.148183822631836 seconds
Model: valid_dict index encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 1.0712015628814697 seconds


100%|███████████████████████████████████████████| 50/50 [00:00<00:00, 82.63it/s]


Model: valid_dict index encoding_SGD One-Class SVM, accuracy score: 0.984375, training time is: 0.609137773513794 seconds
Model: valid_dict index encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.12402820587158203 seconds


100%|███████████████████████████████████████████| 50/50 [00:52<00:00,  1.06s/it]


Model: valid_dict index encoding_Isolation Forest, accuracy score: 0.8515625, training time is: 53.1169216632843 seconds
Model: valid_dict index encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 12.648869514465332 seconds
Model: delay_dict index encoding_One-Class SVM, accuracy score: 1.0, test time is: 8.327423334121704 seconds
Model: delay_dict index encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.23743796348571777 seconds
Model: delay_dict index encoding_SGD One-Class SVM, accuracy score: 0.1925925925925926, test time is: 0.05701327323913574 seconds
Model: delay_dict index encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.00800180435180664 seconds
Model: delay_dict index encoding_Isolation Forest, accuracy score: 0.1259259259259259, test time is: 7.412184238433838 seconds
Model: delay_dict index encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 1.7223916053771973 seconds
Model: disorder_dict index encoding_One-C

100%|███████████████████████████████████████████| 60/60 [48:42<00:00, 48.72s/it]


Model: valid_one hot encoding_One-Class SVM, accuracy score: 0.0, training time is: 2922.9441850185394 seconds
Model: valid_one hot encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.6351423263549805 seconds


100%|███████████████████████████████████████████| 60/60 [00:12<00:00,  4.62it/s]


Model: valid_one hot encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 12.995633125305176 seconds
Model: valid_one hot encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.09002041816711426 seconds


100%|███████████████████████████████████████████| 60/60 [19:08<00:00, 19.14s/it]


Model: valid_one hot encoding_Isolation Forest, accuracy score: 0.8113207547169812, training time is: 1148.44681930542 seconds
Model: valid_one hot encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 26.41165828704834 seconds
Model: delay_one hot encoding_One-Class SVM, accuracy score: 1.0, test time is: 521.2457544803619 seconds
Model: delay_one hot encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.1345348358154297 seconds
Model: delay_one hot encoding_SGD One-Class SVM, accuracy score: 0.0990990990990991, test time is: 1.1265928745269775 seconds
Model: delay_one hot encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.010002613067626953 seconds
Model: delay_one hot encoding_Isolation Forest, accuracy score: 0.10810810810810811, test time is: 140.56214213371277 seconds
Model: delay_one hot encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 3.0326881408691406 seconds
Model: disorder_one hot encoding_One-Class SVM, accuracy

100%|███████████████████████████████████████████| 60/60 [00:27<00:00,  2.20it/s]


Model: valid_dict index encoding_One-Class SVM, accuracy score: 0.0, training time is: 27.260190963745117 seconds
Model: valid_dict index encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.8862001895904541 seconds


100%|██████████████████████████████████████████| 60/60 [00:00<00:00, 100.99it/s]


Model: valid_dict index encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.5981349945068359 seconds
Model: valid_dict index encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.13303017616271973 seconds


100%|███████████████████████████████████████████| 60/60 [00:58<00:00,  1.03it/s]


Model: valid_dict index encoding_Isolation Forest, accuracy score: 0.6698113207547169, training time is: 58.38577055931091 seconds
Model: valid_dict index encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 15.393921136856079 seconds
Model: delay_dict index encoding_One-Class SVM, accuracy score: 1.0, test time is: 7.962152481079102 seconds
Model: delay_dict index encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.1960446834564209 seconds
Model: delay_dict index encoding_SGD One-Class SVM, accuracy score: 0.0990990990990991, test time is: 0.07001686096191406 seconds
Model: delay_dict index encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.010001659393310547 seconds
Model: delay_dict index encoding_Isolation Forest, accuracy score: 0.1981981981981982, test time is: 8.019814729690552 seconds
Model: delay_dict index encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 1.7013914585113525 seconds
Model: disorder_dict index enco

100%|███████████████████████████████████████████| 70/70 [34:34<00:00, 29.63s/it]


Model: valid_one hot encoding_One-Class SVM, accuracy score: 0.0, training time is: 2074.412483215332 seconds
Model: valid_one hot encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.5740597248077393 seconds


100%|███████████████████████████████████████████| 70/70 [00:13<00:00,  5.20it/s]


Model: valid_one hot encoding_SGD One-Class SVM, accuracy score: 0.9782608695652174, training time is: 13.466591596603394 seconds
Model: valid_one hot encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.09302043914794922 seconds


100%|███████████████████████████████████████████| 70/70 [23:37<00:00, 20.25s/it]


Model: valid_one hot encoding_Isolation Forest, accuracy score: 0.9021739130434783, training time is: 1417.7328751087189 seconds
Model: valid_one hot encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 31.286850214004517 seconds
Model: delay_one hot encoding_One-Class SVM, accuracy score: 1.0, test time is: 463.8415811061859 seconds
Model: delay_one hot encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.12002682685852051 seconds
Model: delay_one hot encoding_SGD One-Class SVM, accuracy score: 0.15789473684210525, test time is: 1.418320894241333 seconds
Model: delay_one hot encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.012002229690551758 seconds
Model: delay_one hot encoding_Isolation Forest, accuracy score: 0.08421052631578947, test time is: 151.8804953098297 seconds
Model: delay_one hot encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 3.6193253993988037 seconds
Model: disorder_one hot encoding_One-Class SVM, accur

100%|███████████████████████████████████████████| 70/70 [00:22<00:00,  3.10it/s]


Model: valid_dict index encoding_One-Class SVM, accuracy score: 0.0, training time is: 22.56212830543518 seconds
Model: valid_dict index encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.7941796779632568 seconds


100%|██████████████████████████████████████████| 70/70 [00:00<00:00, 108.18it/s]


Model: valid_dict index encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.6510584354400635 seconds
Model: valid_dict index encoding PCA_SGD One-Class SVM, accuracy score: 0.9456521739130435, training time is: 0.1470339298248291 seconds


100%|███████████████████████████████████████████| 70/70 [01:03<00:00,  1.10it/s]


Model: valid_dict index encoding_Isolation Forest, accuracy score: 0.8152173913043478, training time is: 66.74962401390076 seconds
Model: valid_dict index encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 15.418065786361694 seconds
Model: delay_dict index encoding_One-Class SVM, accuracy score: 1.0, test time is: 6.709965229034424 seconds
Model: delay_dict index encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.17403936386108398 seconds
Model: delay_dict index encoding_SGD One-Class SVM, accuracy score: 0.16842105263157894, test time is: 0.07301783561706543 seconds
Model: delay_dict index encoding PCA_SGD One-Class SVM, accuracy score: 0.042105263157894736, test time is: 0.012002706527709961 seconds
Model: delay_dict index encoding_Isolation Forest, accuracy score: 0.18947368421052632, test time is: 8.292393922805786 seconds
Model: delay_dict index encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 2.146221399307251 seconds
Model: disor

100%|███████████████████████████████████████████| 50/50 [36:48<00:00, 44.17s/it]


Model: valid_one hot encoding_One-Class SVM, accuracy score: 0.0, training time is: 2208.701686143875 seconds
Model: valid_one hot encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.5671379566192627 seconds


100%|███████████████████████████████████████████| 50/50 [00:13<00:00,  3.71it/s]


Model: valid_one hot encoding_SGD One-Class SVM, accuracy score: 0.9912280701754386, training time is: 13.47463345527649 seconds
Model: valid_one hot encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.07101655006408691 seconds


100%|███████████████████████████████████████████| 50/50 [22:14<00:00, 26.69s/it]


Model: valid_one hot encoding_Isolation Forest, accuracy score: 0.868421052631579, training time is: 1334.8290829658508 seconds
Model: valid_one hot encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 16.676772832870483 seconds
Model: delay_one hot encoding_One-Class SVM, accuracy score: 1.0, test time is: 1077.6815919876099 seconds
Model: delay_one hot encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.32807421684265137 seconds
Model: delay_one hot encoding_SGD One-Class SVM, accuracy score: 0.13636363636363635, test time is: 3.796830177307129 seconds
Model: delay_one hot encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.013005971908569336 seconds
Model: delay_one hot encoding_Isolation Forest, accuracy score: 0.09696969696969697, test time is: 454.3281342983246 seconds
Model: delay_one hot encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 4.183250188827515 seconds
Model: disorder_one hot encoding_One-Class SVM, accura

100%|███████████████████████████████████████████| 50/50 [00:33<00:00,  1.50it/s]


Model: valid_dict index encoding_One-Class SVM, accuracy score: 0.0, training time is: 33.328433990478516 seconds
Model: valid_dict index encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.8793351650238037 seconds


100%|███████████████████████████████████████████| 50/50 [00:00<00:00, 70.97it/s]


Model: valid_dict index encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.7091350555419922 seconds
Model: valid_dict index encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.12202739715576172 seconds


100%|███████████████████████████████████████████| 50/50 [01:05<00:00,  1.32s/it]


Model: valid_dict index encoding_Isolation Forest, accuracy score: 0.7894736842105263, training time is: 65.95709466934204 seconds
Model: valid_dict index encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 12.395180463790894 seconds
Model: delay_dict index encoding_One-Class SVM, accuracy score: 1.0, test time is: 26.459611654281616 seconds
Model: delay_dict index encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.48911571502685547 seconds
Model: delay_dict index encoding_SGD One-Class SVM, accuracy score: 0.13636363636363635, test time is: 0.14203238487243652 seconds
Model: delay_dict index encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.011002063751220703 seconds
Model: delay_dict index encoding_Isolation Forest, accuracy score: 0.2636363636363636, test time is: 19.643742322921753 seconds
Model: delay_dict index encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 2.557591199874878 seconds
Model: disorder_dict index e

100%|███████████████████████████████████████████| 60/60 [39:03<00:00, 39.06s/it]


Model: valid_one hot encoding_One-Class SVM, accuracy score: 0.0, training time is: 2343.8506014347076 seconds
Model: valid_one hot encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.5431010723114014 seconds


100%|███████████████████████████████████████████| 60/60 [00:12<00:00,  4.70it/s]


Model: valid_one hot encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 12.785892248153687 seconds
Model: valid_one hot encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.0980222225189209 seconds


100%|███████████████████████████████████████████| 60/60 [19:15<00:00, 19.26s/it]


Model: valid_one hot encoding_Isolation Forest, accuracy score: 0.9157894736842105, training time is: 1156.1116366386414 seconds
Model: valid_one hot encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 28.21800398826599 seconds
Model: delay_one hot encoding_One-Class SVM, accuracy score: 1.0, test time is: 1897.8148810863495 seconds
Model: delay_one hot encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.2850644588470459 seconds
Model: delay_one hot encoding_SGD One-Class SVM, accuracy score: 0.05474452554744526, test time is: 3.355759859085083 seconds
Model: delay_one hot encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.013002157211303711 seconds
Model: delay_one hot encoding_Isolation Forest, accuracy score: 0.12773722627737227, test time is: 360.81233739852905 seconds
Model: delay_one hot encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 6.054916620254517 seconds
Model: disorder_one hot encoding_One-Class SVM, accura

100%|███████████████████████████████████████████| 60/60 [00:28<00:00,  2.12it/s]


Model: valid_dict index encoding_One-Class SVM, accuracy score: 0.0, training time is: 28.349464416503906 seconds
Model: valid_dict index encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.7361657619476318 seconds


100%|███████████████████████████████████████████| 60/60 [00:00<00:00, 98.82it/s]


Model: valid_dict index encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.6111390590667725 seconds
Model: valid_dict index encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.1380312442779541 seconds


100%|███████████████████████████████████████████| 60/60 [00:57<00:00,  1.05it/s]


Model: valid_dict index encoding_Isolation Forest, accuracy score: 0.8631578947368421, training time is: 57.34249758720398 seconds
Model: valid_dict index encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 17.69368815422058 seconds
Model: delay_dict index encoding_One-Class SVM, accuracy score: 1.0, test time is: 23.284027338027954 seconds
Model: delay_dict index encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.4419679641723633 seconds
Model: delay_dict index encoding_SGD One-Class SVM, accuracy score: 0.05474452554744526, test time is: 0.17103910446166992 seconds
Model: delay_dict index encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.011453628540039062 seconds
Model: delay_dict index encoding_Isolation Forest, accuracy score: 0.19343065693430658, test time is: 19.101948738098145 seconds
Model: delay_dict index encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 3.100933074951172 seconds
Model: disorder_dict index en

100%|███████████████████████████████████████████| 70/70 [33:41<00:00, 28.88s/it]


Model: valid_one hot encoding_One-Class SVM, accuracy score: 0.0, training time is: 2021.444411277771 seconds
Model: valid_one hot encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.4971127510070801 seconds


100%|███████████████████████████████████████████| 70/70 [00:12<00:00,  5.68it/s]


Model: valid_one hot encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 12.338194847106934 seconds
Model: valid_one hot encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.09202027320861816 seconds


100%|███████████████████████████████████████████| 70/70 [20:20<00:00, 17.43s/it]


Model: valid_one hot encoding_Isolation Forest, accuracy score: 0.8048780487804879, training time is: 1220.2621188163757 seconds
Model: valid_one hot encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 30.382693767547607 seconds
Model: delay_one hot encoding_One-Class SVM, accuracy score: 1.0, test time is: 1209.7806043624878 seconds
Model: delay_one hot encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.25905799865722656 seconds
Model: delay_one hot encoding_SGD One-Class SVM, accuracy score: 0.04721030042918455, test time is: 3.5488171577453613 seconds
Model: delay_one hot encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.01500391960144043 seconds
Model: delay_one hot encoding_Isolation Forest, accuracy score: 0.15021459227467812, test time is: 403.167156457901 seconds
Model: delay_one hot encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 6.598665237426758 seconds
Model: disorder_one hot encoding_One-Class SVM, accura

100%|███████████████████████████████████████████| 70/70 [00:14<00:00,  4.68it/s]


Model: valid_dict index encoding_One-Class SVM, accuracy score: 0.0, training time is: 14.952856302261353 seconds
Model: valid_dict index encoding PCA_One-Class SVM, accuracy score: 0.0, training time is: 0.6741526126861572 seconds


100%|██████████████████████████████████████████| 70/70 [00:00<00:00, 114.27it/s]


Model: valid_dict index encoding_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.6165862083435059 seconds
Model: valid_dict index encoding PCA_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.14403128623962402 seconds


100%|███████████████████████████████████████████| 70/70 [01:16<00:00,  1.09s/it]


Model: valid_dict index encoding_Isolation Forest, accuracy score: 0.6829268292682927, training time is: 76.35523176193237 seconds
Model: valid_dict index encoding PCA_Isolation Forest, accuracy score: 1.0, training time is: 16.952894926071167 seconds
Model: delay_dict index encoding_One-Class SVM, accuracy score: 1.0, test time is: 11.45387315750122 seconds
Model: delay_dict index encoding PCA_One-Class SVM, accuracy score: 1.0, test time is: 0.38408684730529785 seconds
Model: delay_dict index encoding_SGD One-Class SVM, accuracy score: 0.04721030042918455, test time is: 0.16602802276611328 seconds
Model: delay_dict index encoding PCA_SGD One-Class SVM, accuracy score: 0.0, test time is: 0.014003753662109375 seconds
Model: delay_dict index encoding_Isolation Forest, accuracy score: 0.34334763948497854, test time is: 18.152623176574707 seconds
Model: delay_dict index encoding PCA_Isolation Forest, accuracy score: 0.0, test time is: 3.288759708404541 seconds
Model: disorder_dict index e

In [None]:
# device = 'pi3'
# tw = 50
# resultsPath = 'D:/git/IoT_Sensors_Security_Analysis/results/{}/tw_{}_turn_1/'.format(device, tw)
# loc=open(resultsPath+'results.pk','rb')
# results =  pickle.load(loc)

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.linear_model import SGDOneClassSVM
from sklearn.svm import OneClassSVM
from sklearn import metrics

import time
import re
import ast
import os,sys
import pickle


In [2]:

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

In [53]:
def train_models(feature, normal):
    X = normal[feature].tolist()
    # if len(X[0]) > 100:
    #     X = [h[0:100] for h in X]
    # if 'frequency' not in feature:
    #     X = [[i*1000 for i in h] for h in X]

    y = [1 for i in range(0,len(X))]
    # h = .02  # step size in the mesh
    outliers_fraction = 0.05
    # nu = 0.01
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=.3, shuffle=False)
    y_val = [1 for i in range(0,len(X_val))]
    
    result = []
    pred = dict()
    classifiers = {}
    classifiers = {
            "Robust covariance": EllipticEnvelope(contamination=outliers_fraction , support_fraction=0.5),
            "One-Class SVM": OneClassSVM(cache_size=200, gamma='scale', kernel='rbf',nu=0.05,  shrinking=True, tol=0.001,verbose=False),
            "SGD One-Class SVM": SGDOneClassSVM(nu=outliers_fraction, shuffle=True, fit_intercept=True, random_state=42, tol=1e-4),
            "Isolation Forest": IsolationForest(contamination=outliers_fraction,random_state=42),
        }
    for name in classifiers:
        clf = classifiers[name]
        t1 =time.time()
        res = dict()
        print( feature + '_' + name)
        try:
            clf.fit(X_train)
            t2 =time.time()        
            y_pred = clf.predict(X_val)
            score = metrics.accuracy_score(y_val,y_pred)
        except:
            t2 =time.time()
            y_pred = []
            score = 0
        
        pred['valid_'+  feature + '_' + name] = y_pred
        t = t2 -t1
        res['Model'] ='valid_' + feature + '_' + name
        res['Accuracy'] = score
        res['Training time'] = t
        result.append(res)
        print('Model: {}, accuracy score: {}, training time is: {} seconds'.format(res['Model'], score, t))
    return classifiers, result, pred

def test_models(encoded_trace_df, malware, feature, classifiers):
    dfs = encoded_trace_df[encoded_trace_df.maltype==malware]
    X_test = dfs[feature].tolist()
    # if len(X_test[0]) > 100:        
    #     X_test = [h[0:100] for h in X_test]
    # if 'frequency' not in feature:
    #     X_test = [[i*1000 for i in h] for h in X_test]
    # y_test = np.ones(len(X_test))
    # y_test = [1 if i==0 else -1 for i in y_test]
    y_test = [-1 for i in range(0,len(X_test))]
    
    result = []
    pred = dict()    
    for name in classifiers:
        res = dict()
        clf = classifiers[name]
        t1 =time.time()
        try:
            y_pred = clf.predict(X_test)
            t2 =time.time()
            score = metrics.accuracy_score(y_test, y_pred)
        except:
            t2 =time.time()
            y_pred = []
            score = 0
        t = t2 -t1
        pred[malware +'_' + feature + '_' + name] = y_pred
        res['Model'] = malware +'_' + feature + '_' + name
        res['Accuracy'] = score
        res['Testing time'] = t
        result.append(res)
        # print('Model: {}, accuracy score: {}, testing time is: {} seconds'.format( res['Model'], score, t))
    return  result, pred

def run(device, tw):
    rootPath = 'd:/mt_data/1126_withrw/'
    resultsPath = 'd:/mt_data/1126_withrw/'
    dataPath = rootPath+'encoded/t1/' 
    resultsdict = dict()
    predsdict = dict()
    classifiersdict = dict()

    for feature in features:
        #train stage
        # read data from file
        tsv_name = dataPath+'encoded_bow{}_{}_{}.csv'.format(device, tw, feature)
        encoded_trace_df = pd.read_csv(tsv_name, sep='\t')
        ft = [ast.literal_eval(i) for i in encoded_trace_df[feature]]
        encoded_trace_df[feature] = ft

        normal = encoded_trace_df[encoded_trace_df.maltype=='normal']
        clfs, results, preds = train_models(feature, normal)
        
        resultsdict[feature+'_validation'] = results
        predsdict[feature+'_validation'] = preds
        classifiersdict[feature] = clfs
 
        # testing stage
        for malware in malwares:
            results, preds = test_models(encoded_trace_df, malware, feature, clfs)
            resultsdict[malware +'_' + feature] = results
            predsdict[malware +'_' + feature] = preds
            
    loc=open(resultsPath+'classifiers_{}_{}.pk'.format(device, tw),'wb')
    pickle.dump(classifiersdict,loc)
    loc=open(resultsPath+'results_{}_{}.pk'.format(device, tw),'wb')
    pickle.dump(resultsdict,loc)
    loc=open(resultsPath+'preds_{}_{}.pk'.format(device, tw),'wb')
    pickle.dump(predsdict,loc)   

    rd = []
    for rs in resultsdict:
        for r in resultsdict[rs]:
                rd.append(r)

    rd = pd.DataFrame(rd)
    md = [i.split('_') for i in rd['Model']]
    md = pd.DataFrame(md)
    md.columns  = ['Dataset','Features','Ngram','Model']
    nrd=pd.DataFrame([md['Dataset'],md['Features'],md['Ngram'], md['Model'], rd['Accuracy']]).transpose()
    nrd.to_csv(resultsPath+'results_{}_{}.csv'.format(device, tw),index=None)

In [33]:
from sklearn.decomposition import PCA

In [42]:
feature = 'system calls frequency_1gram'

In [43]:
normal = encoded_trace_df[encoded_trace_df.maltype=='normal'][feature].tolist() 
delay = encoded_trace_df[encoded_trace_df.maltype=='delay'][feature].tolist()
len(normal[0])

17

In [36]:
X_train, X_val = train_test_split(normal, test_size=.3) 
    
cl = OneClassSVM(cache_size=200, max_iter=-1, gamma='scale', kernel='rbf',nu=0.1,  shrinking=False, tol=0.001,verbose=False)
cl.fit(X_train)
pr = cl.predict(X_val)
tr = np.ones(len(X_val))

print("acc for normal is {}".format(metrics.accuracy_score(pr,tr)))

pr = cl.predict(delay)
tr = np.ones(len(delay))
print("acc for delay is {}".format(1 - metrics.accuracy_score(pr,tr)))

acc for normal is 0.9166666666666666
acc for delay is 0.07499999999999996


In [48]:
m = max([max(i) for i in (normal+ delay)])

In [49]:
normal = [[j/m for j in i] for i in normal]
delay = [[j/m for j in i] for i in delay]

In [50]:
X_train, X_val = train_test_split(normal, test_size=.1, random_state=42) 
    
cl = OneClassSVM(cache_size=200, max_iter=-1, gamma='scale', kernel='rbf',nu=0.1,  shrinking=False, tol=0.001,verbose=False)
cl.fit(X_train)
pr = cl.predict(X_val)
tr = np.ones(len(X_val))

print("acc for normal is {}".format(metrics.accuracy_score(pr,tr)))

pr = cl.predict(delay)
tr = np.ones(len(delay))
print("acc for delay is {}".format(1 - metrics.accuracy_score(pr,tr)))

acc for normal is 0.9166666666666666
acc for delay is 0.07499999999999996


In [37]:
pca = PCA(n_components=10)
pca.fit(normal+ delay)
normal = pca.transform(normal)
delay = pca.transform(delay)

In [38]:
X_train, X_val = train_test_split(normal, test_size=.1, random_state=42) 
    
cl = OneClassSVM(cache_size=200, max_iter=-1, gamma='scale', kernel='rbf',nu=0.1,  shrinking=False, tol=0.001,verbose=False)
cl.fit(X_train)
pr = cl.predict(X_val)
tr = np.ones(len(X_val))

print("acc for normal is {}".format(metrics.accuracy_score(pr,tr)))

pr = cl.predict(delay)
tr = np.ones(len(delay))
print("acc for delay is {}".format(1 - metrics.accuracy_score(pr,tr)))

acc for normal is 0.9444444444444444
acc for delay is 0.7027777777777777


In [9]:
rootPath = 'd:/mt_data/1126_withrw/'
resultsPath = 'd:/mt_data/1126_withrw/'
dataPath = rootPath+'encoded/t1/' 
resultsdict = dict()
predsdict = dict()
classifiersdict = dict()
pca = PCA(n_components=2)
for feature in fts:
    #train stage
    # read data from file
    tsv_name = dataPath+'encoded_bow{}_{}_{}.csv'.format(device, tw, feature)
    encoded_trace_df = pd.read_csv(tsv_name, sep='\t')
    ft = [ast.literal_eval(i) for i in encoded_trace_df[feature]]
    encoded_trace_df[feature] = ft
    normal = []
    
    normal = encoded_trace_df[encoded_trace_df.maltype=='normal'][feature].tolist() 
    delay = encoded_trace_df[encoded_trace_df.maltype=='delay'][feature].tolist()
    pca.fit(normal+ delay)
    normal = pca.transform(normal)
    delay = pca.transform(delay)
    # if 'frequency' in feature:
    # normal = [h[0:100] for h in normal]
    X_train, X_val = train_test_split(normal, test_size=.1, random_state=42) 
    
    cl = OneClassSVM(cache_size=200, max_iter=-1, gamma='scale', kernel='rbf',nu=0.1,  shrinking=False, tol=0.001,verbose=False)
    cl.fit(X_train)
    
    # if 'frequency' in feature:
    # delay = [h[0:100] for h in delay]
    pr = cl.predict(delay)
    tr = np.ones(len(delay))
    print(1 - metrics.accuracy_score(pr,tr))
    # print(cl.decision_function(delay[0:9]))
    pr = cl.predict(X_val)
    tr = np.ones(len(X_val))
    print(metrics.accuracy_score(pr,tr))

0.7055555555555555
0.9444444444444444


In [61]:
if __name__ == "__main__":
    features = [#'one hot encoding', 'dict index encoding',
                # 'system calls dependency graph', 
                'system calls frequency_1gram',
                'system calls tfidf_1gram', 
                'system calls hashing_1gram',
                'system calls frequency_2gram', 
                'system calls tfidf_2gram',
                # 'system calls hashing_2gram',
                'system calls frequency_3gram',
                'system calls tfidf_3gram',
                # 'system calls hashing_3gram',
                # 'system calls frequency_4gram', 
                # 'system calls tfidf_4gram',
                # 'system calls hashing_4gram', 
                # 'system calls frequency_5gram',
                # 'system calls tfidf_5gram', 
                # 'system calls hashing_5gram',
                'system calls frequency_2gram-pcas', 
                'system calls tfidf_2gram-pcas',
                'system calls frequency_3gram-pcas',
                'system calls tfidf_3gram-pcas', 
                'system calls frequency_4gram-pcas', 
                'system calls tfidf_4gram-pcas',
                'system calls frequency_5gram-pcas',
                'system calls tfidf_5gram-pcas',
                'system calls frequency_1gram-scaled', 
                'system calls tfidf_1gram-scaled',                
                'system calls frequency_2gram-scaled', 
                'system calls tfidf_2gram-scaled',
                'system calls frequency_3gram-scaled',
                'system calls tfidf_3gram-scaled', 
                # 'system calls frequency_4gram-scaled', 
                # 'system calls tfidf_4gram-scaled',
                # 'system calls frequency_5gram-scaled',
                # 'system calls tfidf_5gram-scaled'
                ]

    malwares=["delay", "disorder", "freeze", "hop", "mimic", "noise", "repeat", "spoof"]
    devices =  ['pi3', 'pi4_2G', 'pi4_4G']
    tw =  60
    for device in devices:
        run(device, tw)

system calls frequency_1gram_Robust covariance
Model: valid_system calls frequency_1gram_Robust covariance, accuracy score: 0.9629629629629629, training time is: 0.021004676818847656 seconds
system calls frequency_1gram_One-Class SVM
Model: valid_system calls frequency_1gram_One-Class SVM, accuracy score: 0.9444444444444444, training time is: 0.0010006427764892578 seconds
system calls frequency_1gram_SGD One-Class SVM
Model: valid_system calls frequency_1gram_SGD One-Class SVM, accuracy score: 1.0, training time is: 0.00099945068359375 seconds
system calls frequency_1gram_Isolation Forest
Model: valid_system calls frequency_1gram_Isolation Forest, accuracy score: 0.9722222222222222, training time is: 0.11532163619995117 seconds
system calls tfidf_1gram_Robust covariance
Model: valid_system calls tfidf_1gram_Robust covariance, accuracy score: 0.9537037037037037, training time is: 0.020005464553833008 seconds
system calls tfidf_1gram_One-Class SVM
Model: valid_system calls tfidf_1gram_On

In [71]:
resultsPath = 'f:/temp/' 

In [92]:
loc = open(resultsPath+'preds_pi4_4G_60.pk','rb')
preds = pickle.load(loc)

In [93]:
preds

{'system calls frequency_1gram_validation': {'valid_system calls frequency_1gram_Robust covariance': array([-1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1, -1,  1,  1, -1,  1,  1,
          1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1,
          1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
          1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,
          1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1, -1,  1,  1,  1,  1,  1,
          1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
          1,  1,  1,  1, -1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,
          1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
          1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1, -1,
          1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1,  1,
          1,  1,  1,  1,  1,  1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,
          1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  

In [64]:
import psutil as ps

In [75]:
(ps.Process(os.getpid()).memory_full_info().rss)/(pow(1024,3))

0.2259674072265625

In [78]:
rd = []
for rs in res:
    for r in res[rs]:
            rd.append(r)

In [79]:
rd

[{'Model': 'valid_system calls frequency_1gram_Robust covariance',
  'Accuracy': 0.8966666666666666,
  'Training time': 0.03200721740722656},
 {'Model': 'valid_system calls frequency_1gram_One-Class SVM',
  'Accuracy': 0.9033333333333333,
  'Training time': 0.00099945068359375},
 {'Model': 'valid_system calls frequency_1gram_SGD One-Class SVM',
  'Accuracy': 1.0,
  'Training time': 0.0010006427764892578},
 {'Model': 'valid_system calls frequency_1gram_Isolation Forest',
  'Accuracy': 0.8833333333333333,
  'Training time': 0.10802388191223145},
 {'Model': 'delay_system calls frequency_1gram_Robust covariance',
  'Accuracy': 0.1,
  'Testing time': 0.0010008811950683594},
 {'Model': 'delay_system calls frequency_1gram_One-Class SVM',
  'Accuracy': 0.13333333333333333,
  'Testing time': 0.00099945068359375},
 {'Model': 'delay_system calls frequency_1gram_SGD One-Class SVM',
  'Accuracy': 0.0,
  'Testing time': 0.0010006427764892578},
 {'Model': 'delay_system calls frequency_1gram_Isolation

In [80]:
rd = pd.DataFrame(rd)
    

In [89]:
rd

Unnamed: 0,Model,Accuracy,Training time,Testing time
0,valid_system calls frequency_1gram_Robust cova...,0.896667,0.032007,
1,valid_system calls frequency_1gram_One-Class SVM,0.903333,0.000999,
2,valid_system calls frequency_1gram_SGD One-Cla...,1.000000,0.001001,
3,valid_system calls frequency_1gram_Isolation F...,0.883333,0.108024,
4,delay_system calls frequency_1gram_Robust cova...,0.100000,,0.001001
...,...,...,...,...
607,repeat_system calls tfidf_5gram-pcas_Isolation...,0.156667,,0.031007
608,spoof_system calls tfidf_5gram-pcas_Robust cov...,1.000000,,0.003001
609,spoof_system calls tfidf_5gram-pcas_One-Class SVM,1.000000,,0.003000
610,spoof_system calls tfidf_5gram-pcas_SGD One-Cl...,1.000000,,0.001000


In [90]:
md = [i.split('_') for i in rd['Model']]
md = pd.DataFrame(md)
md.columns  = ['Dataset','Features','Ngram','Model']


In [91]:
md

Unnamed: 0,Dataset,Features,Ngram,Model
0,valid,system calls frequency,1gram,Robust covariance
1,valid,system calls frequency,1gram,One-Class SVM
2,valid,system calls frequency,1gram,SGD One-Class SVM
3,valid,system calls frequency,1gram,Isolation Forest
4,delay,system calls frequency,1gram,Robust covariance
...,...,...,...,...
607,repeat,system calls tfidf,5gram-pcas,Isolation Forest
608,spoof,system calls tfidf,5gram-pcas,Robust covariance
609,spoof,system calls tfidf,5gram-pcas,One-Class SVM
610,spoof,system calls tfidf,5gram-pcas,SGD One-Class SVM


In [2]:
@profile(precision=4,stream=open('memory_profiler.log','w+'))
def test1():
    c=0
    for item in range(0, 100000):
        c+=1

In [4]:
if __name__=='__main__':
    test1()

ERROR: Could not find file C:\Users\luke-\AppData\Local\Temp/ipykernel_10396/1408896442.py
