In [3]:
# MO-GAAL Class
"""Multiple-Objective Generative Adversarial Active Learning.
Part of the codes are adapted from
https://github.com/leibinghe/GAAL-based-outlier-detection
"""
# Author: Winston Li <jk_zhengli@hotmail.com>
# License: BSD 2 clause

# Code slightly updated to run on Keras 3 by Markus Haug

from __future__ import division
from __future__ import print_function

from collections import defaultdict

import numpy as np
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted

from pyod.models.base import BaseDetector
from pyod.models.gaal_base import create_discriminator, create_generator

from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD


class MO_GAAL(BaseDetector):
    """Multi-Objective Generative Adversarial Active Learning.

    MO_GAAL directly generates informative potential outliers to assist the
    classifier in describing a boundary that can separate outliers from normal
    data effectively. Moreover, to prevent the generator from falling into the
    mode collapsing problem, the network structure of SO-GAAL is expanded from
    a single generator (SO-GAAL) to multiple generators with different
    objectives (MO-GAAL) to generate a reasonable reference distribution for
    the whole dataset.
    Read more in the :cite:`liu2019generative`.

    Parameters
    ----------
    contamination : float in (0., 0.5), optional (default=0.1)
        The amount of contamination of the data set, i.e.
        the proportion of outliers in the data set. Used when fitting to
        define the threshold on the decision function.

    k : int, optional (default=10)
        The number of sub generators.

    stop_epochs : int, optional (default=20)
        The number of epochs of training. The number of total epochs equals to three times of stop_epochs.

    lr_d : float, optional (default=0.01)
        The learn rate of the discriminator.

    lr_g : float, optional (default=0.0001)
        The learn rate of the generator.


    momentum : float, optional (default=0.9)
        The momentum parameter for SGD.

    Attributes
    ----------
    decision_scores_ : numpy array of shape (n_samples,)
        The outlier scores of the training data.
        The higher, the more abnormal. Outliers tend to have higher
        scores. This value is available once the detector is fitted.

    threshold_ : float
        The threshold is based on ``contamination``. It is the
        ``n_samples * contamination`` most abnormal samples in
        ``decision_scores_``. The threshold is calculated for generating
        binary outlier labels.

    labels_ : int, either 0 or 1
        The binary labels of the training data. 0 stands for inliers
        and 1 for outliers/anomalies. It is generated by applying
        ``threshold_`` on ``decision_scores_``.
    """

    def __init__(self, k=10, stop_epochs=20, lr_d=0.01, lr_g=0.0001, momentum=0.9, contamination=0.1):
        super(MO_GAAL, self).__init__(contamination=contamination)
        self.k = k
        self.stop_epochs = stop_epochs
        self.lr_d = lr_d
        self.lr_g = lr_g
        self.momentum = momentum

    def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """

        X = check_array(X)
        self._set_n_classes(y)
        self.train_history = defaultdict(list)
        names = locals()
        epochs = self.stop_epochs * 3
        stop = 0
        latent_size = X.shape 1 
        data_size = X.shape 0 
        # Create discriminator
        self.discriminator = create_discriminator(latent_size, data_size)
        self.discriminator.compile(
            optimizer=SGD(learning_rate=self.lr_d, momentum=self.momentum), loss='binary_crossentropy')

        # Create k combine models
        for i in range(self.k):
            names'sub_generator' + str(i)  = create_generator(latent_size)
            latent = Input(shape=(latent_size,))
            names 'fake' + str(i)  = names 'sub_generator' + str(i) (latent)
            self.discriminator.trainable = False
            names 'fake' + str(i)  = self.discriminator(names 'fake' + str(i) )
            names 'combine_model' + str(i)  = Model(latent,
                                                    names 'fake' + str(i) )
            names 'combine_model' + str(i) .compile(
                optimizer=SGD(learning_rate=self.lr_g,
                              momentum=self.momentum),
                loss='binary_crossentropy')

        # Start iteration
        for epoch in range(epochs):
            print('Epoch {} of {}'.format(epoch + 1, epochs))
            batch_size = min(500, data_size)
            num_batches = int(data_size / batch_size)

            for index in range(num_batches):
                print('\nTesting for epoch {} index {}:'.format(epoch + 1,
                                                                index + 1))

                # Generate noise
                noise_size = batch_size
                noise = np.random.uniform(0, 1, (int(noise_size), latent_size))

                # Get training data
                data_batch = X index * batch_size: (index + 1) * batch_size 

                # Generate potential outliers
                block = ((1 + self.k) * self.k) // 2
                for i in range(self.k):
                    if i != (self.k - 1):
                        noise_start = int(
                            (((self.k + (self.k - i + 1)) * i) / 2) * (
                                    noise_size // block))
                        noise_end = int(
                            (((self.k + (self.k - i)) * (i + 1)) / 2) * (
                                    noise_size // block))
                        names 'noise' + str(i)  = noise noise_start:noise_end 
                        names 'generated_data' + str(i)  = names 
                            'sub_generator' + str(i) .predict(
                            names 'noise' + str(i) , verbose=0)
                    else:
                        noise_start = int(
                            (((self.k + (self.k - i + 1)) * i) / 2) * (
                                    noise_size // block))
                        names 'noise' + str(i)  = noise noise_start:noise_size 
                        names 'generated_data' + str(i)  = names 
                            'sub_generator' + str(i) .predict(
                            names 'noise' + str(i) , verbose=0)

                # Concatenate real data to generated data
                for i in range(self.k):
                    if i == 0:
                        x = np.concatenate(
                            (data_batch, names 'generated_data' + str(i) ))
                    else:
                        x = np.concatenate(
                            (x, names 'generated_data' + str(i) ))
                y = np.array( 1  * batch_size +  0  * int(noise_size))

                # Train discriminator
                discriminator_loss = self.discriminator.train_on_batch(x, y)
                self.train_history 'discriminator_loss' .append(
                    discriminator_loss)

                # Get the target value of sub-generator
                pred_scores = self.discriminator.predict(X).ravel()

                for i in range(self.k):
                    names 'T' + str(i)  = np.percentile(pred_scores,
                                                        i / self.k * 100)
                    names 'trick' + str(i)  = np.array(
                         float(names 'T' + str(i) )  * noise_size)

                # Train generator
                noise = np.random.uniform(0, 1, (int(noise_size), latent_size))
                if stop == 0:
                    for i in range(self.k):
                        names 'sub_generator' + str(i) + '_loss'  = \
                            names 'combine_model' + str(i) .train_on_batch(
                                noise, names 'trick' + str(i) )
                        self.train_history 
                            'sub_generator{}_loss'.format(i) .append(
                            names 'sub_generator' + str(i) + '_loss' )
                else:
                    for i in range(self.k):
                        names 'sub_generator' + str(i) + '_loss'  = names 
                            'combine_model' + str(i) .evaluate(noise, names 
                            'trick' + str(i) )
                        self.train_history 
                            'sub_generator{}_loss'.format(i) .append(
                            names 'sub_generator' + str(i) + '_loss' )

                generator_loss = 0
                for i in range(self.k):
                    # Access the last loss value which is the most recent one
                    generator_loss += names 'sub_generator' + str(i) + '_loss'  -1 

                generator_loss = generator_loss / self.k
                self.train_history 'generator_loss' .append(generator_loss)

                # Stop training generator
                if epoch + 1 > self.stop_epochs:
                    stop = 1

        # Detection result
        self.decision_scores_ = self.discriminator.predict(X).ravel()
        self._process_decision_scores()
        return self

    def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        check_is_fitted(self,  'discriminator' )
        X = check_array(X)
        pred_scores = self.discriminator.predict(X).ravel()
        return pred_scores




In [4]:
import pickle

# imoprt data science libraries
import pandas as pd
from pandas import DataFrame as df
import matplotlib.pyplot as plt
import numpy as np

# Import ML libraries
import keras
import model_utils as mutils
from model_utils.evaluation import get_metrics, evaluate_model, table
from sklearn.preprocessing import StandardScaler
import pyod
import time
from joblib import dump, load

In [5]:
for current_k_fold in  3 :
  print("current fold: ", current_k_fold)

  # set seed
  SEED=current_k_fold**3
  np.random.seed(SEED)


  # deserialize pre-processed data
  path_to_pickle = f'../data/creditcard/cc13_preprocessed_k{current_k_fold}.pkl'

  with open(path_to_pickle, "rb") as f:
      data = pickle.load(f)
      X_train = data "X_train" .to_numpy()
      y_train = data "y_train" .to_numpy()

      X_val = data "X_val" .to_numpy()
      y_val = data "y_val" .to_numpy()

      X_test = data "X_test" .to_numpy()
      y_test = data "y_test" .to_numpy()

      col_names = data "col_names" 

  print("Data loaded successfully")

  # Reshape
  y_train = y_train.reshape(-1, 1)
  y_test = y_test.reshape(-1, 1)
  y_val = y_val.reshape(-1, 1)

  # Set Weight
  res_value_counts = df(y_train).value_counts()
  weight_for_0 = 1.0 / res_value_counts 0 
  weight_for_1 = 1.0 / res_value_counts 1 

  scaler = StandardScaler()
  scaler.fit(X_train)

  X_train = scaler.transform(X_train)
  X_val = scaler.transform(X_val)
  X_test = scaler.transform(X_test)

  # run for current fold
  contamination = len(y_train y_train == 1 ) / len(y_train) # proportion of frauds in the training dataset
  n_sub_generators = 5
  lr_discriminator = 0.01
  lr_generator = 0.0001
  epochs = 1

  mo_gaal = MO_GAAL(
      k=n_sub_generators,
      stop_epochs=epochs,
      contamination=contamination,
      lr_d=lr_discriminator,
      lr_g=lr_generator,
  )

  # train in supervised manner
  start = time.time()
  clf = mo_gaal.fit(X_train, y_train) # 67 min for 1 epoch and n_sub_generators = 5
  elapsed = time.time() - start

  # evaluate
  scores = mo_gaal.predict_proba(X_test)
  scores_normal = df(mo_gaal.predict_proba(df(X_train) y_train == 0 ) :,1 )
  scores_anomal = df(mo_gaal.predict_proba(df(X_train) y_train == 1 ) :, 0 )

  # let's find the best threshold
  best_metric = 0.
  best_th = 0.

  for threshold in np.arange(0., 1.0, 0.001):
      current_metric = get_metrics(y_test, scores :, 1 , op=">", threshold=threshold) "AUCPRC" 
      if current_metric > best_metric:
          best_metric = current_metric
          best_th = threshold

  print("Best Metric Score:", best_metric)
  print("Best Threshold: ", best_th)

  metrics = get_metrics(y_test, scores :, 1 , threshold=best_th)

  print("metrics for fold: ", current_k_fold)
  print(metrics)

  # save model
  dump(mo_gaal, f'./saved_models/MOGAAL/mo_gaal_CC_k{current_k_fold}.joblib')

  elapsed = time.strftime("%H:%M:%S", time.gmtime(elapsed))

[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 267us/step

Testing for epoch 1 index 196:
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 254us/step

Testing for epoch 1 index 197:
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 244us/step

Testing for epoch 1 index 198:
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 252us/step

Testing for epoch 1 index 199:
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 277us/step

Testing for epoch 1 index 200:
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 268us/step

Testing for epoch 1 index 201:
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 275us/step

Testing for epoch 1 index 202:
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 276us/step

Testing for epoch 1 index 203:
[1m7094/7094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 269us/step

Testing for epoch 1 index 204:


# Evaluate

In [18]:
fold1 =  {'tn': 28291.0, 'fp': 34.0, 'fn': 39.0, 'tp': 9.0, 'precision': 0.2093, 'recall': 0.1875, 'AUCPRC': 0.062, 'F1': 0.1978, 'ROCAUC': 0.7896, 'MCC': 0.1968, 'ACC': 0.9974, 'GMEAN': 0.4328} 

fold2 =  {'tn': 28219.0, 'fp': 106.0, 'fn': 46.0, 'tp': 2.0, 'precision': 0.0185, 'recall': 0.0417, 'AUCPRC': 0.0018, 'F1': 0.0256, 'ROCAUC': 0.204, 'MCC': 0.0253, 'ACC': 0.9946, 'GMEAN': 0.2037} 

fold3 =  {'tn': 28237.0, 'fp': 88.0, 'fn': 41.0, 'tp': 7.0, 'precision': 0.0737, 'recall': 0.1458, 'AUCPRC': 0.0174, 'F1': 0.0979, 'ROCAUC': 0.7625, 'MCC': 0.1015, 'ACC': 0.9955, 'GMEAN': 0.3813} 

fold4 =  {'tn': 28298.0, 'fp': 28.0, 'fn': 46.0, 'tp': 1.0, 'precision': 0.0345, 'recall': 0.0213, 'AUCPRC': 0.0021, 'F1': 0.0263, 'ROCAUC': 0.2507, 'MCC': 0.0258, 'ACC': 0.9974, 'GMEAN': 0.1458} 

fold5 = {'tn': 28278.0, 'fp': 47.0, 'fn': 46.0, 'tp': 2.0, 'precision': 0.0408, 'recall': 0.0417, 'AUCPRC': 0.0235, 'F1': 0.0412, 'ROCAUC': 0.2612, 'MCC': 0.0396, 'ACC': 0.9967, 'GMEAN': 0.204}

fold6 =  {'tn': 28270.0, 'fp': 56.0, 'fn': 37.0, 'tp': 10.0, 'precision': 0.1515, 'recall': 0.2128, 'AUCPRC': 0.0593, 'F1': 0.177, 'ROCAUC': 0.7943, 'MCC': 0.1779, 'ACC': 0.9967, 'GMEAN': 0.4608} 

fold7 =  {'tn': 28297.0, 'fp': 28.0, 'fn': 19.0, 'tp': 29.0, 'precision': 0.5088, 'recall': 0.6042, 'AUCPRC': 0.5197, 'F1': 0.5524, 'ROCAUC': 0.9596, 'MCC': 0.5536, 'ACC': 0.9983, 'GMEAN': 0.7769} 

fold8 =  {'tn': 28297.0, 'fp': 28.0, 'fn': 19.0, 'tp': 29.0, 'precision': 0.5088, 'recall': 0.6042, 'AUCPRC': 0.5197, 'F1': 0.5524, 'ROCAUC': 0.9596, 'MCC': 0.5536, 'ACC': 0.9983, 'GMEAN': 0.7769} 

fold9 =  {'tn': 28305.0, 'fp': 20.0, 'fn': 38.0, 'tp': 10.0, 'precision': 0.3333, 'recall': 0.2083, 'AUCPRC': 0.08, 'F1': 0.2564, 'ROCAUC': 0.7131, 'MCC': 0.2625, 'ACC': 0.998, 'GMEAN': 0.4563} 

fold10 =  {'tn': 28312.0, 'fp': 14.0, 'fn': 19.0, 'tp': 28.0, 'precision': 0.6667, 'recall': 0.5957, 'AUCPRC': 0.5711, 'F1': 0.6292, 'ROCAUC': 0.9932, 'MCC': 0.6296, 'ACC': 0.9988, 'GMEAN': 0.7717} 

In [10]:
from model_utils.evaluation import table

In [63]:
import pandas as pd
from pandas import DataFrame as df

kfold_results = df(columns=['tn', 'fp', 'fn', 'tp', 'precision', 'recall', 'AUCPRC', 'F1', 'ROCAUC', 'MCC', 'ACC', 'GMEAN'])

for fold in [fold1, fold2, fold3, fold4, fold5, fold6, fold7, fold8, fold9, fold10]:
	kfold_results = pd.concat([kfold_results, df([fold])], ignore_index=True)

kfold_results = kfold_results.drop(['tn', 'fp', 'fn', 'tp', 'ROCAUC', 'ACC'] , axis=1)
kfold_results.agg(lambda x: f'{x.mean():.4f} ± {x.std():.4f}')

  kfold_results = pd.concat([kfold_results, df([fold])], ignore_index=True)


precision    0.2546 ± 0.2358
recall       0.2663 ± 0.2415
AUCPRC       0.1857 ± 0.2441
F1           0.2556 ± 0.2361
MCC          0.2566 ± 0.2365
GMEAN        0.4610 ± 0.2438
dtype: object

In [66]:
kfold_results.describe().round(4)

Unnamed: 0,precision,recall,AUCPRC,F1,MCC,GMEAN
count,10.0,10.0,10.0,10.0,10.0,10.0
mean,0.2546,0.2663,0.1857,0.2556,0.2566,0.461
std,0.2358,0.2415,0.2441,0.2361,0.2365,0.2438
min,0.0185,0.0213,0.0018,0.0256,0.0253,0.1458
25%,0.049,0.0677,0.0189,0.0554,0.0551,0.2483
50%,0.1804,0.1979,0.0606,0.1874,0.1874,0.4446
75%,0.4649,0.5,0.4098,0.4784,0.4808,0.694
max,0.6667,0.6042,0.5711,0.6292,0.6296,0.7769
