# Import packages

In [1]:
# import tensorflow as tf
# from tensorflow import keras
# from tensorflow.keras.utils import to_categorical
# import tensorflow_datasets as tfds

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2
import os
import random
import bisect

# Utility function

In [2]:
def uncompressArray(file_dir):
  uncompressed_data = []
  with open(file_dir, 'rb') as f:
    loaded_file = np.load(f)
    ks = list(loaded_file.keys())
    print("First, check the data!")
    print(f"Keys: {ks}")
    ans = input("Please enter 'y' if you want to proceed: ")
    if ans == 'y':
      print("\nloading data !")
      for k in ks:
        uncompressed_data.append(loaded_file[k].copy())
        print(f"load: {k}")
    else:
      print("data is not loaded!")
  return uncompressed_data

def reshape_data(X):
  X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
  return X

# Data preprocessing

## Import data

### Clean data

In [3]:
data_file = '/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/data/arabic_data.npz'

X_clean_train, X_clean_test, Y_clean_train, Y_clean_test = uncompressArray(data_file)

First, check the data!
Keys: ['X_train_arab', 'X_test_arab', 'Y_train_arab', 'Y_test_arab']
Please enter 'y' if you want to proceed: y

loading data !
load: X_train_arab
load: X_test_arab
load: Y_train_arab
load: Y_test_arab


In [4]:
X_clean_train.shape, X_clean_test.shape, Y_clean_train.shape, Y_clean_test.shape

((40320, 28, 28, 1), (10080, 28, 28, 1), (40320,), (10080,))

In [5]:
# X_clean_train = X_clean_train/255.
# X_clean_test = X_clean_test/255.

# Generate CDF and rCDF

## Import model

In [6]:
# class_name = 'alexNet_classifier'
# class_path = f'/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/saved_models/{class_name}.h5'
# classifier = tf.keras.models.load_model(class_path)

## DPM implementation

In [7]:
def DistFun(val_decisions, gt, n_classes, reverse=False):
  '''
  decisions: num images x num of classes
  gt : num images x 1
  '''
  # DF = np.zeros((n_classes, 1))
  DF = list()

  for class_idx in range(n_classes):
    if not reverse:
      samples = val_decisions[gt==class_idx][:, class_idx]
    else:
      samples = val_decisions[gt!=class_idx][:, class_idx]
    
    samples = list(samples)
    n = len(samples)
    samples.sort(reverse=reverse)
    # DF[class_idx] = np.zeros((n+1, 2))
    DF.append(np.zeros((n+1, 2)))

    for sample_idx in range(n):
      DF[class_idx][sample_idx][0] = samples[sample_idx]
      DF[class_idx][sample_idx][1] = (sample_idx+1)/(n+2)
    
    infinity = 'inf' if not reverse else '-inf'
    DF[class_idx][n][0] = float(infinity)
    DF[class_idx][n][1] = (n+1)/(n+2)

  return np.array(DF)

def CDFs(decisions, gt, n_classes):
  cdf = DistFun(decisions, gt, n_classes, reverse=False)
  rcdf = DistFun(decisions, gt, n_classes, reverse=True)
  return cdf, rcdf

In [8]:
# clean_class_Preds = classifier.predict(X_clean_test)
data_file = '/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/data/arabic_alex_preds.npz'

clean_class_Preds = uncompressArray(data_file)

First, check the data!
Keys: ['arab_preds']
Please enter 'y' if you want to proceed: y

loading data !
load: arab_preds


In [9]:
Y_clean_test.shape

(10080,)

In [10]:
cdf, rcdf = CDFs(clean_class_Preds[0], Y_clean_test, 28)

In [11]:
cdf, rcdf = np.array(cdf), np.array(rcdf)

In [12]:
data_file = '/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/data/CDFs_arabic.npz'
np.savez_compressed(data_file, cdf=cdf, rcdf=rcdf)

In [13]:
# data_file = '/content/drive/MyDrive/PhD/Szeged22_paper/Atca_Cyber_long_paper/data/CDFs.npz'

# cdf, rcdf = uncompressArray(data_file)

In [14]:
npz_file = np.load(data_file, allow_pickle=True)
D = dict(zip((k for k in npz_file), (npz_file[k] for k in npz_file)))

In [15]:
D.keys()

dict_keys(['cdf', 'rcdf'])