# Import packages

In [1]:
# import tensorflow as tf
# from tensorflow import keras
# from tensorflow.keras.utils import to_categorical
# import tensorflow_datasets as tfds

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2
import os
import random
from tqdm import tqdm
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support, \
                            balanced_accuracy_score, roc_auc_score, classification_report

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Utility function

In [3]:
def uncompressArray(file_dir):
  uncompressed_data = []
  with open(file_dir, 'rb') as f:
    loaded_file = np.load(f)
    ks = list(loaded_file.keys())
    print("First, check the data!")
    print(f"Keys: {ks}")
    ans = input("Please enter 'y' if you want to proceed: ")
    if ans == 'y':
      print("\nloading data !")
      for k in ks:
        uncompressed_data.append(loaded_file[k].copy())
        print(f"load: {k}")
    else:
      print("data is not loaded!")
  return uncompressed_data

def reshape_data(X):
  X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
  return X

In [4]:
def roundScore(score):
  return round(score*100, 2)

def binEval(bin_preds, bin_gt):
  bin_preds = np.array(bin_preds)
  bin_gt = np.array(bin_gt)

  TP = np.sum((bin_preds == 1) & (bin_gt == 1))
  TN = np.sum((bin_preds == 0) & (bin_gt == 0))
  FP = np.sum((bin_preds == 1) & (bin_gt == 0))
  FN = np.sum((bin_preds == 0) & (bin_gt == 1))

  sensitivity = roundScore((TP)/(TP+FN))
  specifity = roundScore((TN)/(FP+TN))

  return sensitivity, specifity

def classReport(y_pred, y_true, binary=False, ood_class=28):

  class_report, support = list(), list()

  report = classification_report(y_true, y_pred, output_dict=True)
  keys  =['precision', 'recall', 'f1-score'] #, 'support']
  
  # detailed_results = ['acc', 'acc_balanced', 'P_macro', 'P_weighted', 'R_macro', 'R_weighted', 'F1_macro', 'F1_weighted'] #, 'auc_macro', 'auc_weighted', [support]]

  class_report.append(roundScore(report['accuracy']))
  class_report.append(roundScore(balanced_accuracy_score(y_true=y_true, y_pred=y_pred)))

  for key in keys:
    class_report.append(roundScore(report['macro avg'][key]))
    class_report.append(roundScore(report['weighted avg'][key]))

  # class_report.append(roundScore(roc_auc_score(y_true=y_true, y_score=y_pred, average='macro')))
  # class_report.append(roundScore(roc_auc_score(y_true=y_true, y_score=y_pred, average='weighted')))

  sensitivity, specifity = np.nan, np.nan

  if binary:
    sensitivity, specifity = binEval(y_pred, y_true)
    # no_classes = ood_class+1
    # support = list()
    # classes = [str(i) for i in range(2)]
    # for c in classes:
    #   support.append(report[c]['support'])

  class_report.append(sensitivity)
  class_report.append(specifity)

  # class_report.append(support)

  return class_report.copy()

def binResults(y_pred, y_true, ood_class=28):

  bin_preds = list(np.where(y_pred!=ood_class, 1, 0))
  bin_gt = list(np.where(y_true!=ood_class, 1, 0))

  return bin_preds.copy(), bin_gt.copy()

def evaluate(y_pred, y_true, binary=False, ood_class=28):
  if binary:
    y_pred, y_true = binResults(y_pred, y_true)
  
  return classReport(y_pred=y_pred, y_true=y_true, binary=binary)

In [5]:
def writeToExcel(data_list, columns, algorithms, title=''):
  if title: title = '_' + title
  excel_file = f'/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/predictions/all_results_extended{title}_arabic.xlsx'

  df = pd.DataFrame(data_list, columns=columns)

  with pd.ExcelWriter(excel_file) as writer:
    for algo in algorithms:
      algo_df = df.loc[df['exp']==algo]
      algo_df.to_excel(writer, sheet_name=algo, index=False)

  return None

def pickleDF(data_list, algo, columns, title=''):
  if title: title = '_' + title
  data_file = f'/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/data/all_results_df_{algo}{title}_arabic.pkl'

  df = pd.DataFrame(data_list, columns=columns)

  df.to_pickle(data_file)
  
  return None

# Predictions & Splits

## Load results

In [6]:
data_file = '/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/data/all_results_df_wLabels_arabic.pkl'
all_results = pd.read_pickle(data_file)
all_results

Unnamed: 0,index,exp,ds,ood_ratio,thr,acc,P,R,F1,bin_acc,sensitivity,specifity,k_1_preds,k_1_gt
0,0,thr,mnist,0,0.0,81.01,81.01,81.01,81.01,100.00,100.00,,"[16, 6, 18, 24, 16, 25, 1, 11, 8, 10, 19, 13, ...","[16.0, 6.0, 18.0, 24.0, 16.0, 25.0, 1.0, 11.0,..."
1,1,thr,mnist,5,0.0,77.50,77.50,77.50,77.50,95.04,100.00,0.0,"[6, 6, 4, 4, 6, 6, 17, 6, 17, 6, 6, 6, 17, 6, ...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
2,2,thr,mnist,10,0.0,73.82,73.82,73.82,73.82,90.02,100.00,0.0,"[6, 6, 4, 4, 6, 6, 17, 6, 17, 6, 6, 6, 17, 6, ...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
3,3,thr,mnist,15,0.0,70.34,70.34,70.34,70.34,85.01,100.00,0.0,"[6, 6, 4, 4, 6, 6, 17, 6, 17, 6, 6, 6, 17, 6, ...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
4,4,thr,mnist,20,0.0,66.80,66.80,66.80,66.80,80.00,100.00,0.0,"[6, 6, 4, 4, 6, 6, 17, 6, 17, 6, 6, 6, 17, 6, ...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39746,13457,dpmDiscOr,rnd,85,,89.91,89.91,89.91,89.91,89.94,32.25,100.0,"[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
39747,13458,dpmDiscOr,rnd,90,,93.24,93.24,93.24,93.24,93.27,31.85,100.0,"[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
39748,13459,dpmDiscOr,rnd,95,,96.64,96.64,96.64,96.64,96.65,30.99,100.0,"[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
39749,13460,dpmDiscOr,rnd,100,,100.00,100.00,100.00,100.00,100.00,,100.0,"[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."


In [7]:
drop_columns = ['acc', 'P', 'R', 'F1', 'bin_acc', 'sensitivity', 'specifity']
all_results.drop(columns=drop_columns, inplace=True)
all_results

Unnamed: 0,index,exp,ds,ood_ratio,thr,k_1_preds,k_1_gt
0,0,thr,mnist,0,0.0,"[16, 6, 18, 24, 16, 25, 1, 11, 8, 10, 19, 13, ...","[16.0, 6.0, 18.0, 24.0, 16.0, 25.0, 1.0, 11.0,..."
1,1,thr,mnist,5,0.0,"[6, 6, 4, 4, 6, 6, 17, 6, 17, 6, 6, 6, 17, 6, ...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
2,2,thr,mnist,10,0.0,"[6, 6, 4, 4, 6, 6, 17, 6, 17, 6, 6, 6, 17, 6, ...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
3,3,thr,mnist,15,0.0,"[6, 6, 4, 4, 6, 6, 17, 6, 17, 6, 6, 6, 17, 6, ...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
4,4,thr,mnist,20,0.0,"[6, 6, 4, 4, 6, 6, 17, 6, 17, 6, 6, 6, 17, 6, ...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
...,...,...,...,...,...,...,...
39746,13457,dpmDiscOr,rnd,85,,"[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
39747,13458,dpmDiscOr,rnd,90,,"[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
39748,13459,dpmDiscOr,rnd,95,,"[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."
39749,13460,dpmDiscOr,rnd,100,,"[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2...","[28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 2..."


# Algos tests

## Define variables

In [None]:
letters_list = ['أ','ب','ت','ث','ج','ح','خ','د','ذ','ر','ز','س','ش','ص','ض','ط','ظ','ع','غ','ف','ق','ك','ل','م','ن','ه','و','ي']

for idx, letter in enumerate(letters_list):
  print(f'{letter} : {idx}')

أ : 0
ب : 1
ت : 2
ث : 3
ج : 4
ح : 5
خ : 6
د : 7
ذ : 8
ر : 9
ز : 10
س : 11
ش : 12
ص : 13
ض : 14
ط : 15
ظ : 16
ع : 17
غ : 18
ف : 19
ق : 20
ك : 21
ل : 22
م : 23
ن : 24
ه : 25
و : 26
ي : 27


In [None]:
algorithms = ['thr', 'disc', 'thrDiscAnd', 'thrDiscOr', 'dpm', 'dpmDiscAnd', 'dpmDiscOr'] # DPM # ROC # AUC  
datasets = ['mnist', 'bmnist', 'kmnist', 'fmnist', 'arabic', 'rnd', 'clean']
ratios = [str(n) for n in range(0, 101, 5)]
thrs = [str(i/100) for i in range(100)]
thrs.extend(['0.995', '0.999', '1'])
scores_list = ['acc', 'acc_balanced', 'P_macro', 'P_weighted', 'R_macro', 'R_weighted', 'F1_macro', 'F1_weighted'] #, 'auc_macro', 'auc_weighted']
ood_class = 28

In [None]:
results, k1_results, bin_results = list(), list(), list()
columns = ['exp',	'ds',	'ood_ratio',	'thr', \
           'acc', 'acc_balanced', 'P_macro', 'P_weighted', 'R_macro', 'R_weighted', 'F1_macro', 'F1_weighted', \
           'acc_bin', 'acc_balanced_bin', 'P_macro_bin', 'P_weighted_bin', 'R_macro_bin', 'R_weighted_bin', 'F1_macro_bin', 'F1_weighted_bin', \
           'sensitivity', 'specifity'] #, 'auc_macro', 'auc_weighted']

## Threshold

In [None]:
algo_exp = 'thr'
for thr in tqdm(thrs):
  for ds in datasets:
   for ratio in ratios:
      if (ds == 'clean') and (int(ratio)>0): break

      filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) & (all_results['thr']==thr)
      row = all_results.loc[filter]
      y_pred = row['k_1_preds'].tolist()[0].copy()
      y_true = row['k_1_gt'].tolist()[0].copy()

      (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=ood_class)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

      (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=ood_class)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

      results.append([algo_exp, ds, ratio, thr, \
                      acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                      b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                      sensitivity, specifity])

100%|██████████| 103/103 [13:30<00:00,  7.87s/it]


In [None]:
# writeToExcel(k1_results, columns, sort_columns=datasets, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, sort_columns=datasets, algorithms=algorithms, binary='binary')

In [None]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## Discriminator

In [None]:
algo_exp = 'disc'
for ds in tqdm(datasets):
  for ratio in ratios:
    if (ds == 'clean') and (int(ratio)>0): break

    filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) #& (all_results['thr']==thr)
    row = all_results.loc[filter]
    y_pred = row['k_1_preds'].tolist()[0].copy()
    y_true = row['k_1_gt'].tolist()[0].copy()

    (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=ood_class)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

    (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=ood_class)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

    if 'thr' not in algo_exp: thr = np.nan
    results.append([algo_exp, ds, ratio, thr, \
                    acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                    b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                    sensitivity, specifity])

100%|██████████| 7/7 [00:07<00:00,  1.03s/it]


In [None]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [None]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## ThrDisc AND

In [None]:
algo_exp = 'thrDiscAnd'
for thr in tqdm(thrs):
  for ds in datasets:
   for ratio in ratios:
      if (ds == 'clean') and (int(ratio)>0): break

      filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) & (all_results['thr']==thr)
      row = all_results.loc[filter]
      y_pred = row['k_1_preds'].tolist()[0].copy()
      y_true = row['k_1_gt'].tolist()[0].copy()

      (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=ood_class)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

      (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=ood_class)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

      results.append([algo_exp, ds, ratio, thr, \
                      acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                      b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                      sensitivity, specifity])

100%|██████████| 103/103 [13:22<00:00,  7.79s/it]


In [None]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [None]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## ThrDisc OR

In [None]:
algo_exp = 'thrDiscOr'
for thr in tqdm(thrs):
  for ds in datasets:
   for ratio in ratios:
      if (ds == 'clean') and (int(ratio)>0): break

      filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) & (all_results['thr']==thr)
      row = all_results.loc[filter]
      y_pred = row['k_1_preds'].tolist()[0].copy()
      y_true = row['k_1_gt'].tolist()[0].copy()

      (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=ood_class)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

      (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=ood_class)
      
      # if 'thr' not in algo_exp: thr = np.nan
      # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

      results.append([algo_exp, ds, ratio, thr, \
                      acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                      b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                      sensitivity, specifity])

100%|██████████| 103/103 [13:06<00:00,  7.64s/it]


In [None]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [None]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## DPM

In [None]:
algo_exp = 'dpm'
for ds in tqdm(datasets):
  for ratio in ratios:
    if (ds == 'clean') and (int(ratio)>0): break

    filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) #& (all_results['thr']==thr)
    row = all_results.loc[filter]
    y_pred = row['k_1_preds'].tolist()[0].copy()
    y_true = row['k_1_gt'].tolist()[0].copy()

    (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=ood_class)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

    (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=ood_class)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

    if 'thr' not in algo_exp: thr = np.nan
    results.append([algo_exp, ds, ratio, thr, \
                    acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                    b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                    sensitivity, specifity])

100%|██████████| 7/7 [00:07<00:00,  1.02s/it]


In [None]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [None]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## dpmDisc AND

In [None]:
algo_exp = 'dpmDiscAnd'
for ds in tqdm(datasets):
  for ratio in ratios:
    if (ds == 'clean') and (int(ratio)>0): break

    filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) #& (all_results['thr']==thr)
    row = all_results.loc[filter]
    y_pred = row['k_1_preds'].tolist()[0].copy()
    y_true = row['k_1_gt'].tolist()[0].copy()

    (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=ood_class)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

    (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=ood_class)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

    if 'thr' not in algo_exp: thr = np.nan
    results.append([algo_exp, ds, ratio, thr, \
                    acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                    b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                    sensitivity, specifity])

100%|██████████| 7/7 [00:07<00:00,  1.04s/it]


In [None]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [None]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

## dpmDisc OR

In [None]:
algo_exp = 'dpmDiscOr'
for ds in tqdm(datasets):
  for ratio in ratios:
    if (ds == 'clean') and (int(ratio)>0): break

    filter = (all_results['exp']==algo_exp) & (all_results['ds']==ds) & (all_results['ood_ratio']==ratio) #& (all_results['thr']==thr)
    row = all_results.loc[filter]
    y_pred = row['k_1_preds'].tolist()[0].copy()
    y_true = row['k_1_gt'].tolist()[0].copy()

    (acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, _, _) = evaluate(y_pred=y_pred, y_true=y_true, binary=False, ood_class=ood_class)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # k1_results.append([algo_exp, ds, ratio, thr, acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted])

    (b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, sensitivity, specifity)= evaluate(y_pred=y_pred, y_true=y_true, binary=True, ood_class=ood_class)
    
    # if 'thr' not in algo_exp: thr = np.nan
    # bin_results.append([algo_exp, ds, ratio, thr, b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted])

    if 'thr' not in algo_exp: thr = np.nan
    results.append([algo_exp, ds, ratio, thr, \
                    acc, acc_balanced, P_macro, P_weighted, R_macro, R_weighted, F1_macro, F1_weighted, \
                    b_acc, b_acc_balanced, b_P_macro, b_P_weighted, b_R_macro, b_R_weighted, b_F1_macro, b_F1_weighted, \
                    sensitivity, specifity])

100%|██████████| 7/7 [00:06<00:00,  1.00it/s]


In [None]:
# writeToExcel(k1_results, columns, algorithms=algorithms, binary='')
# writeToExcel(bin_results, columns, algorithms=algorithms, binary='binary')

In [None]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

# Parse data

In [None]:
# pickleDF(k1_results, algo_exp, columns, binary='')
# pickleDF(bin_results, algo_exp, columns, binary='binary')

In [None]:
pickleDF(results, algo_exp, columns, title='all')

In [None]:
data_file = '/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/data/all_results_df_all_arabic.pkl'
all_ = pd.read_pickle(data_file)
all_

Unnamed: 0,exp,ds,ood_ratio,thr,acc,acc_balanced,P_macro,P_weighted,R_macro,R_weighted,...,acc_bin,acc_balanced_bin,P_macro_bin,P_weighted_bin,R_macro_bin,R_weighted_bin,F1_macro_bin,F1_weighted_bin,sensitivity,specifity
0,thr,mnist,0,0.0,81.01,81.01,82.04,82.04,81.01,81.01,...,100.00,100.00,100.00,100.00,100.00,100.00,100.00,100.00,100.00,
1,thr,mnist,5,0.0,77.50,78.73,77.09,75.90,78.73,77.50,...,95.04,50.00,47.52,90.33,50.00,95.04,48.73,92.62,100.00,0.0
2,thr,mnist,10,0.0,73.82,79.17,75.97,70.84,79.17,73.82,...,90.02,50.00,45.01,81.04,50.00,90.02,47.37,85.29,100.00,0.0
3,thr,mnist,15,0.0,70.34,79.87,75.58,66.52,79.87,70.34,...,85.01,50.00,42.51,72.27,50.00,85.01,45.95,78.13,100.00,0.0
4,thr,mnist,20,0.0,66.80,80.62,75.43,62.49,80.62,66.80,...,80.00,50.00,40.00,64.01,50.00,80.00,44.45,71.12,100.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39746,dpmDiscOr,rnd,85,,89.91,34.42,94.88,90.26,34.42,89.91,...,89.94,66.13,94.72,91.00,66.13,89.94,71.60,87.65,32.25,100.0
39747,dpmDiscOr,rnd,90,,93.24,33.85,94.77,93.24,33.85,93.24,...,93.27,65.92,96.53,93.74,65.92,93.27,72.35,91.65,31.85,100.0
39748,dpmDiscOr,rnd,95,,96.64,33.29,92.60,96.40,33.29,96.64,...,96.65,65.50,98.30,96.77,65.50,96.65,72.80,95.80,30.99,100.0
39749,dpmDiscOr,rnd,100,,100.00,100.00,100.00,100.00,100.00,100.00,...,100.00,100.00,100.00,100.00,100.00,100.00,100.00,100.00,,100.0
