# Import packages

In [1]:
# import tensorflow as tf
# from tensorflow import keras
# from tensorflow.keras.utils import to_categorical
# import tensorflow_datasets as tfds

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2
import os
import random
from tqdm import tqdm
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support, \
                            balanced_accuracy_score, roc_auc_score, classification_report

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Utility function

In [3]:
def uncompressArray(file_dir):
  uncompressed_data = []
  with open(file_dir, 'rb') as f:
    loaded_file = np.load(f)
    ks = list(loaded_file.keys())
    print("First, check the data!")
    print(f"Keys: {ks}")
    ans = input("Please enter 'y' if you want to proceed: ")
    if ans == 'y':
      print("\nloading data !")
      for k in ks:
        uncompressed_data.append(loaded_file[k].copy())
        print(f"load: {k}")
    else:
      print("data is not loaded!")
  return uncompressed_data

def reshape_data(X):
  X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
  return X

In [40]:
# def resultsTypes(y_pred, y_true, ood_class=37):
#   no_classes = ood_class+1
#   labels=list(range(no_classes))
#   cm = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=labels)

#   TP, FP, FN, TN = list(), list(), list(), list()
#   for i in range(no_classes):
#     # sum of the respective column, minus the diagonal element
#     FP.append(sum(cm[:,i]) - cm[i,i])

#     # sum of the respective row, minus the diagonal element
#     FN.append(sum(cm[i,:]) - cm[i,i])

#     # all the samples that have been correctly identified as not being a specific class
#     temp = np.delete(cm, i, 0)   # delete ith row
#     temp = np.delete(temp, i, 1)  # delete ith column
#     TN.append(sum(sum(temp)))
  
#   # diagonal elements
#   TP = list(np.diag(cm))

#   return TP, FP, TN, FN

def roundScore(score):
  return round(score*100, 2)

def binEval(bin_preds, bin_gt):
  bin_preds = np.array(bin_preds)
  bin_gt = np.array(bin_gt)

  TP = np.sum((bin_preds == 1) & (bin_gt == 1))
  TN = np.sum((bin_preds == 0) & (bin_gt == 0))
  FP = np.sum((bin_preds == 1) & (bin_gt == 0))
  FN = np.sum((bin_preds == 0) & (bin_gt == 1))

  sensitivity = roundScore((TP)/(TP+FN))
  specifity = roundScore((TN)/(FP+TN))

  return sensitivity, specifity

def classReport(y_pred, y_true, binary=False, ood_class=37):

  class_report, support = list(), list()

  report = classification_report(y_true, y_pred, output_dict=True)
  keys  =['precision', 'recall', 'f1-score'] #, 'support']
  
  # detailed_results = ['acc', 'acc_balanced', 'P_macro', 'P_weighted', 'R_macro', 'R_weighted', 'F1_macro', 'F1_weighted'] #, 'auc_macro', 'auc_weighted', [support]]

  class_report.append(roundScore(report['accuracy']))
  class_report.append(roundScore(balanced_accuracy_score(y_true=y_true, y_pred=y_pred)))

  for key in keys:
    class_report.append(roundScore(report['macro avg'][key]))
    class_report.append(roundScore(report['weighted avg'][key]))

  # class_report.append(roundScore(roc_auc_score(y_true=y_true, y_score=y_pred, average='macro')))
  # class_report.append(roundScore(roc_auc_score(y_true=y_true, y_score=y_pred, average='weighted')))

  sensitivity, specifity = np.nan, np.nan

  if binary:
    sensitivity, specifity = binEval(y_pred, y_true)
    # no_classes = ood_class+1
    # support = list()
    # classes = [str(i) for i in range(2)]
    # for c in classes:
    #   support.append(report[c]['support'])

  class_report.append(sensitivity)
  class_report.append(specifity)

  # class_report.append(support)

  return class_report.copy()

def binResults(y_pred, y_true, ood_class=37):

  bin_preds = list(np.where(y_pred!=ood_class, 1, 0))
  bin_gt = list(np.where(y_true!=ood_class, 1, 0))

  return bin_preds.copy(), bin_gt.copy()

def evaluate(y_pred, y_true, binary=False, ood_class=37):
  if binary:
    y_pred, y_true = binResults(y_pred, y_true)
  
  return classReport(y_pred=y_pred, y_true=y_true, binary=binary)

In [5]:
# row = all_results.loc[(all_results['exp']=='thr') & (all_results['ds']=='mnist') & (all_results['ood_ratio']=='50') & (all_results['thr']=='0.99')]
# g = row['k_1_gt'].tolist()[0].copy()
# p = row['k_1_preds'].tolist()[0].copy()
# b_p, b_g = binResults(p, g)
# [np.count_nonzero(b_p), np.count_nonzero(b_g)]

In [6]:
# row = all_results.loc[(all_results['exp']=='thr') & (all_results['ds']=='mnist') & (all_results['ood_ratio']=='75') & (all_results['thr']=='0.99')]
# report = classification_report(row['k_1_gt'].tolist()[0].copy(), row['k_1_preds'].tolist()[0].copy())
# print(report)
# report = classification_report(row['k_1_gt'].tolist()[0].copy(), row['k_1_preds'].tolist()[0].copy(), output_dict=True)
# print(report['macro avg']['precision'])

In [7]:
# report.keys()

In [8]:
# report['0'].keys()

In [9]:
def writeToExcel(data_list, columns, algorithms, title=''):
  if title: title = '_' + title
  excel_file = f'/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/predictions/all_results_extended{title}.xlsx'

  df = pd.DataFrame(data_list, columns=columns)

  with pd.ExcelWriter(excel_file) as writer:
    for algo in algorithms:
      algo_df = df.loc[df['exp']==algo]
      algo_df.to_excel(writer, sheet_name=algo, index=False)

  return None

def pickleDF(data_list, algo, columns, title=''):
  if title: title = '_' + title
  data_file = f'/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/data/all_results_df_{algo}{title}.pkl'

  df = pd.DataFrame(data_list, columns=columns)

  df.to_pickle(data_file)
  
  return None

# Predictions & Splits

## Load results

In [10]:
data_file = '/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/data/all_results_df_wLabels.pkl'
all_results = pd.read_pickle(data_file)
all_results

Unnamed: 0,index,exp,ds,ood_ratio,thr,acc,P,R,F1,bin_acc,sensitivity,specifity,k_1_preds,k_1_gt
0,0,thr,mnist,0,0.0,94.47,94.47,94.47,94.47,100.00,100.00,,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,1,thr,mnist,5,0.0,89.76,89.76,89.76,89.76,95.04,100.00,0.0,"[16, 16, 6, 16, 16, 4, 22, 16, 16, 16, 6, 16, ...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
2,2,thr,mnist,10,0.0,85.02,85.02,85.02,85.02,90.02,100.00,0.0,"[16, 16, 6, 16, 16, 4, 22, 16, 16, 16, 6, 16, ...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
3,3,thr,mnist,15,0.0,80.23,80.23,80.23,80.23,85.01,100.00,0.0,"[16, 16, 6, 16, 16, 4, 22, 16, 16, 16, 6, 16, ...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
4,4,thr,mnist,20,0.0,75.49,75.49,75.49,75.49,80.00,100.00,0.0,"[16, 16, 6, 16, 16, 4, 22, 16, 16, 16, 6, 16, ...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39746,13457,dpmDiscOr,rnd,85,,93.26,93.26,93.26,93.26,93.33,55.06,100.0,"[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
39747,13458,dpmDiscOr,rnd,90,,95.39,95.39,95.39,95.39,95.44,53.85,100.0,"[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
39748,13459,dpmDiscOr,rnd,95,,97.80,97.80,97.80,97.80,97.81,54.75,100.0,"[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
39749,13460,dpmDiscOr,rnd,100,,100.00,100.00,100.00,100.00,100.00,,100.0,"[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."


In [11]:
drop_columns = ['acc', 'P', 'R', 'F1', 'bin_acc', 'sensitivity',	'specifity']
all_results.drop(columns=drop_columns, inplace=True)
all_results

Unnamed: 0,index,exp,ds,ood_ratio,thr,k_1_preds,k_1_gt
0,0,thr,mnist,0,0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,1,thr,mnist,5,0.0,"[16, 16, 6, 16, 16, 4, 22, 16, 16, 16, 6, 16, ...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
2,2,thr,mnist,10,0.0,"[16, 16, 6, 16, 16, 4, 22, 16, 16, 16, 6, 16, ...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
3,3,thr,mnist,15,0.0,"[16, 16, 6, 16, 16, 4, 22, 16, 16, 16, 6, 16, ...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
4,4,thr,mnist,20,0.0,"[16, 16, 6, 16, 16, 4, 22, 16, 16, 16, 6, 16, ...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
...,...,...,...,...,...,...,...
39746,13457,dpmDiscOr,rnd,85,,"[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
39747,13458,dpmDiscOr,rnd,90,,"[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
39748,13459,dpmDiscOr,rnd,95,,"[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."
39749,13460,dpmDiscOr,rnd,100,,"[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3...","[37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 3..."


# Algos tests

## Define variables

In [12]:
LABELS = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 
          'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
          'a', 'b', 'd', 'e', 'f', 'g', 'h', 'n', 'q', 'r', 't']

In [13]:
print("Clean training data classes and labels")
for idx, label in enumerate(LABELS[10:]):
  print(f'{idx}: {label}')

Clean training data classes and labels
0: A
1: B
2: C
3: D
4: E
5: F
6: G
7: H
8: I
9: J
10: K
11: L
12: M
13: N
14: O
15: P
16: Q
17: R
18: S
19: T
20: U
21: V
22: W
23: X
24: Y
25: Z
26: a
27: b
28: d
29: e
30: f
31: g
32: h
33: n
34: q
35: r
36: t


In [14]:
algorithms = ['thr', 'disc', 'thrDiscAnd', 'thrDiscOr', 'dpm', 'dpmDiscAnd', 'dpmDiscOr'] # DPM # ROC # AUC  
datasets = ['mnist', 'bmnist', 'kmnist', 'fmnist', 'num', 'rnd', 'clean']
ratios = [str(n) for n in range(0, 101, 5)]
thrs = [str(i/100) for i in range(100)]
thrs.extend(['0.995', '0.999', '1'])
scores_list = ['acc', 'acc_balanced', 'P_macro', 'P_weighted', 'R_macro', 'R_weighted', 'F1_macro', 'F1_weighted'] #, 'auc_macro', 'auc_weighted']
ood_class = 37

In [48]:
results, k1_results, bin_results = list(), list(), list()
columns = ['exp',	'ds',	'ood_ratio',	'thr', \
           'acc', 'acc_balanced', 'P_macro', 'P_weighted', 'R_macro', 'R_weighted', 'F1_macro', 'F1_weighted', \
           'acc_bin', 'acc_balanced_bin', 'P_macro_bin', 'P_weighted_bin', 'R_macro_bin', 'R_weighted_bin', 'F1_macro_bin', 'F1_weighted_bin', \
           'sensitivity', 'specifity'] #, 'auc_macro', 'auc_weighted']

In [16]:
# all_k_1_predictions = dict()
# for ds in datasets:
#   all_k_1_predictions[ds] = dict()
#   for algo in algorithms:
#     all_k_1_predictions[ds][algo] = dict()
#     if 'thr' in algo:
#       for thr in thrs:
#         all_k_1_predictions[ds][algo][thr] = dict()

In [17]:
# all_k_1_predictions.keys(), all_k_1_predictions['mnist'].keys(), all_k_1_predictions['mnist']['disc'].keys(), all_k_1_predictions['mnist']['thr'].keys()

In [18]:
# all_k_1_gt = dict()
# for ds in datasets:
#   all_k_1_gt[ds] = dict()
#   for algo in algorithms:
#     all_k_1_gt[ds][algo] = dict()
#     if 'thr' in algo:
#       for thr in thrs:
#         all_k_1_gt[ds][algo][thr] = dict()

In [19]:
# all_k_1_gt.keys(), all_k_1_gt['mnist'].keys(), all_k_1_gt['mnist']['disc'].keys(), all_k_1_gt['mnist']['thr'].keys()

## Threshold

In [49]:
algo_exp = 'thr'
for thr in tqdm(thrs):
  for ds in datasets:
   for ratio in ratios:
      if (ds == 'clean') and (int(ratio)>0): break

      filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) & (all_results['thr']==thr)
      row = all_results.loc[filter]
      y_pred = row['k_1_preds'].tolist()[0].copy()
      y_true = row['k_1_gt'].tolist()[0].copy()

      (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=37)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

      (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=37)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

      results.append([algo_exp, ds, ratio, thr, \
                      acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                      b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                      sensitivity, specifity])

100%|██████████| 103/103 [12:57<00:00,  7.55s/it]


In [50]:
# writeToExcel(k1_results, columns, sort_columns=datasets, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, sort_columns=datasets, algorithms=algorithms, binary='binary')

In [51]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## Discriminator

In [52]:
algo_exp = 'disc'
for ds in tqdm(datasets):
  for ratio in ratios:
    if (ds == 'clean') and (int(ratio)>0): break

    filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) #& (all_results['thr']==thr)
    row = all_results.loc[filter]
    y_pred = row['k_1_preds'].tolist()[0].copy()
    y_true = row['k_1_gt'].tolist()[0].copy()

    (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=37)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

    (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=37)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

    if 'thr' not in algo_exp: thr = np.nan
    results.append([algo_exp, ds, ratio, thr, \
                    acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                    b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                    sensitivity, specifity])

100%|██████████| 7/7 [00:07<00:00,  1.02s/it]


In [53]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [54]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## ThrDisc AND

In [55]:
algo_exp = 'thrDiscAnd'
for thr in tqdm(thrs):
  for ds in datasets:
   for ratio in ratios:
      if (ds == 'clean') and (int(ratio)>0): break

      filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) & (all_results['thr']==thr)
      row = all_results.loc[filter]
      y_pred = row['k_1_preds'].tolist()[0].copy()
      y_true = row['k_1_gt'].tolist()[0].copy()

      (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=37)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

      (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=37)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

      results.append([algo_exp, ds, ratio, thr, \
                      acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                      b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                      sensitivity, specifity])

100%|██████████| 103/103 [12:36<00:00,  7.35s/it]


In [56]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [57]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## ThrDisc OR

In [58]:
algo_exp = 'thrDiscOr'
for thr in tqdm(thrs):
  for ds in datasets:
   for ratio in ratios:
      if (ds == 'clean') and (int(ratio)>0): break

      filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) & (all_results['thr']==thr)
      row = all_results.loc[filter]
      y_pred = row['k_1_preds'].tolist()[0].copy()
      y_true = row['k_1_gt'].tolist()[0].copy()

      (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=37)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

      (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=37)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

      results.append([algo_exp, ds, ratio, thr, \
                      acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                      b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                      sensitivity, specifity])

100%|██████████| 103/103 [12:33<00:00,  7.32s/it]


In [59]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [60]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## DPM

In [61]:
algo_exp = 'dpm'
for ds in tqdm(datasets):
  for ratio in ratios:
    if (ds == 'clean') and (int(ratio)>0): break

    filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) #& (all_results['thr']==thr)
    row = all_results.loc[filter]
    y_pred = row['k_1_preds'].tolist()[0].copy()
    y_true = row['k_1_gt'].tolist()[0].copy()

    (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=37)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

    (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=37)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

    if 'thr' not in algo_exp: thr = np.nan
    results.append([algo_exp, ds, ratio, thr, \
                    acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                    b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                    sensitivity, specifity])

100%|██████████| 7/7 [00:07<00:00,  1.03s/it]


In [62]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [63]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## dpmDisc AND

In [64]:
algo_exp = 'dpmDiscAnd'
for ds in tqdm(datasets):
  for ratio in ratios:
    if (ds == 'clean') and (int(ratio)>0): break

    filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) #& (all_results['thr']==thr)
    row = all_results.loc[filter]
    y_pred = row['k_1_preds'].tolist()[0].copy()
    y_true = row['k_1_gt'].tolist()[0].copy()

    (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=37)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

    (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=37)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

    if 'thr' not in algo_exp: thr = np.nan
    results.append([algo_exp, ds, ratio, thr, \
                    acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                    b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                    sensitivity, specifity])

100%|██████████| 7/7 [00:07<00:00,  1.08s/it]


In [65]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [66]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## dpmDisc OR

In [67]:
algo_exp = 'dpmDiscOr'
for ds in tqdm(datasets):
  for ratio in ratios:
    if (ds == 'clean') and (int(ratio)>0): break

    filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) #& (all_results['thr']==thr)
    row = all_results.loc[filter]
    y_pred = row['k_1_preds'].tolist()[0].copy()
    y_true = row['k_1_gt'].tolist()[0].copy()

    (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=37)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

    (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=37)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

    if 'thr' not in algo_exp: thr = np.nan
    results.append([algo_exp, ds, ratio, thr, \
                    acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                    b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                    sensitivity, specifity])

100%|██████████| 7/7 [00:07<00:00,  1.03s/it]


In [68]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [69]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

# Parse data

In [None]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

In [71]:
pickleDF(results, algo_exp, columns, title='all')

In [74]:
data_file = '/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/data/all_results_df_all.pkl'
all_ = pd.read_pickle(data_file)
all_

Unnamed: 0,exp,ds,ood_ratio,thr,acc,acc_balanced,P_macro,P_weighted,R_macro,R_weighted,...,acc_bin,acc_balanced_bin,P_macro_bin,P_weighted_bin,R_macro_bin,R_weighted_bin,F1_macro_bin,F1_weighted_bin,sensitivity,specifity
0,thr,mnist,0,0.0,94.47,92.60,94.79,94.65,92.60,94.47,...,100.00,100.00,100.00,100.00,100.00,100.00,100.00,100.00,100.00,
1,thr,mnist,5,0.0,89.76,90.17,86.82,87.75,90.17,89.76,...,95.04,50.00,47.52,90.33,50.00,95.04,48.73,92.62,100.00,0.0
2,thr,mnist,10,0.0,85.02,90.14,84.26,81.84,90.14,85.02,...,90.02,50.00,45.01,81.04,50.00,90.02,47.37,85.29,100.00,0.0
3,thr,mnist,15,0.0,80.23,90.06,82.44,76.23,90.06,80.23,...,85.01,50.00,42.51,72.27,50.00,85.01,45.95,78.13,100.00,0.0
4,thr,mnist,20,0.0,75.49,90.04,81.00,70.93,90.04,75.49,...,80.00,50.00,40.00,64.01,50.00,80.00,44.45,71.12,100.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39746,dpmDiscOr,rnd,85,,93.26,54.00,94.01,93.33,54.00,93.26,...,93.33,77.53,96.37,93.81,77.53,93.33,83.62,92.49,55.06,100.0
39747,dpmDiscOr,rnd,90,,95.39,52.86,90.94,95.16,52.86,95.39,...,95.44,76.93,97.59,95.66,76.93,95.44,83.77,94.82,53.85,100.0
39748,dpmDiscOr,rnd,95,,97.80,54.21,91.61,97.61,54.21,97.80,...,97.81,77.38,98.87,97.86,77.38,97.81,84.81,97.50,54.75,100.0
39749,dpmDiscOr,rnd,100,,100.00,100.00,100.00,100.00,100.00,100.00,...,100.00,100.00,100.00,100.00,100.00,100.00,100.00,100.00,,100.0
