In [1]:
import tensorflow as tf

from functools import partial
import itertools
import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, Embedding, multiply, LeakyReLU, ReLU, Softmax
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

from tensorflow.python.framework.ops import disable_eager_execution, enable_eager_execution
disable_eager_execution()
# enable_eager_execution()

from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

from tensorflow.keras.utils import to_categorical

import warnings
warnings.simplefilter("ignore")

2021-12-24 11:40:42.677099: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory
2021-12-24 11:40:42.677144: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


ImportError: cannot import name 'np_config' from 'tensorflow.python.ops.numpy_ops' (/home/osboxes/.local/share/virtualenvs/EC-GAN_NIDS-LDpVHeKH/lib/python3.8/site-packages/tensorflow/python/ops/numpy_ops/__init__.py)

In [2]:
tf.__version__

'2.3.0'

In [2]:
try:
    gpus = tf.config.list_physical_devices('GPU')
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print("Physical GPUs:", len(gpus))
    print("Logical GPUs:", len(logical_gpus))

except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

Physical GPUs: 1
Logical GPUs: 1


### Read data

In [3]:
x_train = np.load("data/preserve50/x_train.npy")
y_train = np.load("data/preserve50/y_train.npy")
x_test = np.load("data/preserve50/x_test.npy")
y_test = np.load("data/preserve50/y_test.npy")

### Model Definition

In [4]:
class RandomWeightedAverage(tf.keras.layers.Layer):
    """Provides a (random) weighted average between real and generated image samples"""
    
    def __init__(self, batch_size):
        super().__init__()
        self.batch_size = batch_size
    
    def call(self, inputs, **kwargs):
        alpha = tf.random.uniform((self.batch_size, 1))
        return (alpha * inputs[0]) + ((1 - alpha) * inputs[1])
    
    def compute_output_shape(self, input_shape):
        return input_shape[0]

    
class ECGAN():
    def __init__(self, 
                 x_train, 
                 y_train, 
                 num_classes: int, 
                 latent_dim: int, 
                 batch_size: int,
                 n_critic: int,
                 conf_thresh: float,
                 adv_weight: float):
        """Implement EC-GAN with an WCGAN-GP and MLP.        
        
        Attributes
        ---------
        x_train : numpy.ndarray
            Real data without labels used for training.
            (Created with sklearn.model_selection.train_test_split
        
        y_train : numpy.ndarray
            Real data labels.
            
        num_classes : int
            Number of data classes. Number of unique elements in y_train.
            
        data_dim : int
            Data dimension. Number of columns in x_train.
            
        latent_dim : int
            Dimension of random noise vector (z), used for training
            the generator.
            
        batch_size : int
            Size of training batch in each epoch.
        
        n_critic : int
            Number of times the critic (discriminator) will be trained
            in each epoch.
            
        conf_thresh : float
            Confidence threshold. EC-GAN parameter which decides how good
            the generated sample needs to be, for it to be fed to the 
            classifier.
        
        adv_weight : float
            Adverserial weight. EC-GAN parameter which represents the 
            importance fake data has on classifier training.
            Value has been taken from the original paper.
        
        """
        
        self.x_train = x_train.copy()
        self.y_train = y_train.copy()
        
        # Store labels as one-hot vectors.
        self.y_train_onehot = to_categorical(y_train)
        
        self.num_classes = num_classes
        self.data_dim = x_train.shape[1]
        
        self.latent_dim = latent_dim
        self.batch_size = batch_size
        
        # WCGAN-GP parameters. 
        self.n_critic = n_critic
        
        # EC-GAN parameters.
        self.conf_thresh = conf_thresh
        self.adv_weight = adv_weight
        
        # Log training progress.
        self.losslog = []
        self.class_acc_log = []
        self.class_loss_log = []

        # Adam optimizer for WCGAN-GP, suggested by original paper.
        optimizer = Adam(learning_rate=0.0005, beta_1=0.05, beta_2=0.9)

        # Categorical crossentropy loss function for the classifier.
        self.cce_loss = tf.keras.losses.CategoricalCrossentropy()

        # Build the generator, critic and classifier
        self.generator = self.build_generator()
        self.critic = self.build_critic()
        self.classifier = self.build_classifier()

        
        #-------------------------------
        # Construct Computational Graph
        #       for the Critic
        #-------------------------------

        # Freeze generator's layers while training critic.
        self.generator.trainable = False

        # Data input (real sample).
        real_data = Input(shape=self.data_dim, name="Real_data")
        # Noise input (z).
        noise = Input(shape=(self.latent_dim,), name="Noise")
        # Label input.
        label = Input(shape=(1,), name="Label")
        
        # Generate data based of noise (fake sample)
        fake_data = self.generator([noise, label])
        
        # Critic (discriminator) determines validity of the real and fake images.
        fake = self.critic([fake_data, label])
        valid = self.critic([real_data, label])
        
        # Construct weighted average between real and fake images.
        interpolated_data = RandomWeightedAverage(self.batch_size)([real_data, fake_data])
        
        # Determine validity of weighted sample.
        validity_interpolated = self.critic([interpolated_data, label])
        
        
        # Use Python partial to provide loss function with additional
        # 'averaged_samples' argument.
        partial_gp_loss = partial(self.gradient_penalty_loss,
                          averaged_samples=interpolated_data)
        # Keras requires function names.
        partial_gp_loss.__name__ = 'gradient_penalty' 
        
        self.critic_model = Model(
            inputs=[real_data, label, noise],
            outputs=[valid, fake, validity_interpolated]
        )
        
        self.critic_model.compile(loss=[self.wasserstein_loss,
                                        self.wasserstein_loss,
                                        partial_gp_loss],
                                  optimizer=optimizer,
                                  loss_weights=[1, 1, 10])
 
        #-------------------------------
        # Construct Computational Graph
        #         for Generator
        #-------------------------------

        # For the generator we freeze other's layers.
        self.critic.trainable = False
        self.generator.trainable = True

        # Sampled noise for input to generator.
        noise = Input(shape=(self.latent_dim,), name="Noise")
        
        # Add label to input.
        label = Input(shape=(1,), name="Label")
        
        # Generate data based of noise.
        fake_data = self.generator([noise, label])

        # Discriminator determines validity.
        valid = self.critic([fake_data, label])

        # Defines generator model.
        self.generator_model = Model([noise, label], valid)
        
        self.generator_model.compile(loss=self.wasserstein_loss, 
                                     optimizer=optimizer)

        
        
        #-------------------------------
        # Construct Computational Graph
        #   for the Classifier (real)
        #-------------------------------
        
        # Real data classifier training
        
        real_data = Input(shape=self.data_dim, name="Real_data")
        
        real_predictions = self.classifier(real_data)
        
        self.real_classifier_model = Model(real_data, real_predictions)
        
        self.real_classifier_model.compile(loss="categorical_crossentropy",
                                           optimizer="adamax",
                                           metrics=["accuracy"])
        
        #-------------------------------
        # Construct Computational Graph
        #   for the Classifier (fake)
        #-------------------------------
        
        # Fake data classifier training
        
        noise = Input(shape=(self.latent_dim,), name="Noise")
        fake_labels = Input(shape=(1,), name="Label")
        
        real_data = Input(shape=self.data_dim, name="Real_data")
        
        fake_data = self.generator([noise, fake_labels])
        
        fake_predictions = self.classifier(fake_data)
        
        self.fake_classifier_model = Model([noise, fake_labels], fake_predictions)
        
        self.fake_classifier_model.compile(loss=self.ecgan_loss, 
                                           optimizer="adamax",
                                           metrics=["accuracy"])

        
        
    def ecgan_loss(self, y_true, y_pred):
        """Calculate loss for fake data predictions."""
        
        max_values = tf.math.reduce_max(y_pred, axis=1)
        
        max_index = tf.where(tf.math.greater(max_values, self.conf_thresh))
        
        loss = self.adv_weight * self.cce_loss(y_true[max_index], y_pred[max_index])
        
        return loss
    

    def gradient_penalty_loss(self, y_true, y_pred, averaged_samples):
        """
        Computes gradient penalty based on prediction and weighted real / fake samples
        """
        gradients = K.gradients(y_pred, averaged_samples)[0]
        # compute the euclidean norm by squaring ...
        gradients_sqr = K.square(gradients)
        #   ... summing over the rows ...
        gradients_sqr_sum = K.sum(gradients_sqr,
                                  axis=np.arange(1, len(gradients_sqr.shape)))
        #   ... and sqrt
        gradient_l2_norm = K.sqrt(gradients_sqr_sum)
        # compute lambda * (1 - ||grad||)^2 still for each single sample
        gradient_penalty = K.square(1 - gradient_l2_norm)
        # return the mean as loss over all the batch samples
        return K.mean(gradient_penalty)


    def wasserstein_loss(self, y_true, y_pred):
        return K.mean(y_true * y_pred)

    def build_generator(self):

        model = Sequential(name="Generator")
        
        # First hidden layer.
        model.add(Dense(256, input_dim=self.latent_dim))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dropout(0.3))
        
        # Second hidden layer.
        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dropout(0.3))
        
        # Third hidden layer.
        model.add(Dense(1024))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dropout(0.3))
        
        # Output layer.
        model.add(Dense(self.data_dim, activation="tanh"))
        
        model.summary()
        
        # Noise and label input layers.
        noise = Input(shape=(self.latent_dim,), name="Noise")
        label = Input(shape=(1,), dtype="int32", name="Label")
        
        # Embed labels into onehot encoded vectors.
        label_embedding = Flatten(name="Flatten")(Embedding(self.num_classes, self.latent_dim, name="Embedding")(label))
        
        # Multiply noise and embedded labels to be used as model input.
        model_input = multiply([noise, label_embedding], name="Multiply")
        
        generated_data = model(model_input)

        return Model(inputs=[noise, label], 
                     outputs=generated_data, 
                     name="Generator")

    def build_critic(self):

        model = Sequential(name="Critic")

        # First hidden layer.
        model.add(Dense(1024, input_dim=self.data_dim))
        model.add(LeakyReLU(alpha=0.2))
        
        # Second hidden layer.        
        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        
        # Third hidden layer.
        model.add(Dense(256))
        model.add(LeakyReLU(alpha=0.2))

        # Output layer with linear activation.
        model.add(Dense(1))

        model.summary()
        
        # Artificial data input.
        generated_sample = Input(shape=self.data_dim, name="Generated_data")
        # Label input.
        label = Input(shape=(1,), dtype="int32", name="Label") 
        
        # Embedd label as onehot vector.
        label_embedding = Flatten(name="Flatten")(Embedding(self.num_classes, self.data_dim, name="Embedding")(label))
        
        # Multiply fake data sample with label embedding to get critic input.
        model_input = multiply([generated_sample, label_embedding], name="Multiply")
        
        validity = model(model_input)

        return Model(inputs=[generated_sample, label], 
                     outputs=validity, 
                     name="Critic")
    
    def build_classifier(self):
        
        model = Sequential(name="Classifier")
        
        # First hidden layer.
        model.add(Dense(128, input_dim=self.data_dim))
        model.add(ReLU())
        model.add(Dropout(0.3))
        
        # Second hidden layer.
        model.add(Dense(256))
        model.add(ReLU())
        model.add(Dropout(0.3))
        
        model.add(Dense(128))
        model.add(ReLU())
        model.add(Dropout(0.3))
        
        # Output layer.
        model.add(Dense(self.num_classes))
        model.add(Softmax())
        
        model.summary()
        
        # Data input.
        data = Input(shape=self.data_dim, name="Data")

        # CLassifier outout is class predictions vector.
        predictions = model(data)
        
        return Model(inputs=data,
                     outputs=predictions,
                     name="Classifier")
        
        
    def train(self, epochs):
        
        self.epochs = epochs

        # Adversarial ground truths.
        valid = -(np.ones((self.batch_size, 1)))
        fake =  np.ones((self.batch_size, 1))
        dummy = np.zeros((self.batch_size, 1))

        # Number of batches.
        self.n_batches = math.floor(self.x_train.shape[0] / self.batch_size)

        overhead = self.x_train.shape[0] % self.batch_size
         
        for epoch in range(epochs):
            
            # Reset training set.
            self.x_train = x_train.copy()
            self.y_train = y_train.copy()

            # Select random overhead rows that do not fit into batches.
            rand_overhead_idx = np.random.choice(range(self.x_train.shape[0]), overhead, replace=False)

            # Remove random overhead rows.
            self.x_train = np.delete(self.x_train, rand_overhead_idx, axis=0)
            self.y_train = np.delete(self.y_train, rand_overhead_idx, axis=0)


            # Split training data into batches.
            x_batches = np.split(self.x_train, self.n_batches)
            y_batches = np.split(self.y_train, self.n_batches)
            
            for x_batch, y_batch, i in zip(x_batches, y_batches, range(self.n_batches)):   
                
                if epoch < 5:
                    
                    for _ in range(self.n_critic):

                        # ---------------------
                        #  Train Critic
                        # ---------------------

                        # Generate random noise.
                        noise = np.random.normal(0, 1, (self.batch_size, self.latent_dim))

                        # Train the critic.
                        d_loss = self.critic_model.train_on_batch(
                            [x_batch, y_batch, noise],                                      
                            [valid, fake, dummy])


                    # ---------------------
                    #  Train Generator
                    # ---------------------

                    # Generate sample of artificial labels.
                    generated_labels = np.random.randint(1, self.num_classes, self.batch_size).reshape(-1, 1)

                    # Train generator.
                    g_loss = self.generator_model.train_on_batch([noise, generated_labels], valid)


                    # ---------------------
                    #  Train Classifier
                    # ---------------------

                    # One-hot encode real labels.
                    y_batch = to_categorical(y_batch, self.num_classes)

                    # One-hot encode generated labels.
                    generated_labels_onehot = to_categorical(generated_labels, self.num_classes)

                    real_loss = self.real_classifier_model.train_on_batch(x_batch, y_batch)

                    fake_loss = self.fake_classifier_model.train_on_batch([noise, generated_labels], generated_labels_onehot)

                    # Classifier loss as presented in EC-GAN paper.
                    c_loss = (real_loss[0] + fake_loss[0]) / (1 + self.adv_weight)

                    avg_acc = np.mean([real_loss[1], fake_loss[1]])
                
                else:
                    
                    # ---------------------
                    #  Train Classifier
                    # ---------------------
                    
                    # Generate random noise.
                    noise = np.random.normal(0, 1, (self.batch_size, self.latent_dim))

                    # Generate sample of artificial labels.
                    generated_labels = np.random.randint(1, self.num_classes, self.batch_size).reshape(-1, 1)

                    # One-hot encode real labels.
                    y_batch = to_categorical(y_batch, self.num_classes)

                    # One-hot encode generated labels.
                    generated_labels_onehot = to_categorical(generated_labels, self.num_classes)

                    real_loss = self.real_classifier_model.train_on_batch(x_batch, y_batch)

                    fake_loss = self.fake_classifier_model.train_on_batch([noise, generated_labels], generated_labels_onehot)

                    # Classifier loss as presented in EC-GAN paper.
                    c_loss = (real_loss[0] + fake_loss[0]) / (1 + self.adv_weight)

                    avg_acc = np.mean([real_loss[1], fake_loss[1]])


                # ---------------------
                #  Logging
                # ---------------------

                self.losslog.append([d_loss[0], g_loss, c_loss])
                self.class_loss_log.append([real_loss[0], fake_loss[0], c_loss])
                self.class_acc_log.append([real_loss[1], fake_loss[1], avg_acc])

                # Plot progress.
                DLOSS = "%.4f" % d_loss[0]
                GLOSS = "%.4f" % g_loss
                CLOSS = "%.4f" % c_loss
                RLOSS = "%.4f" % real_loss[0]
                FLOSS = "%.4f" % fake_loss[0]
                CACC  = "%.4f" % real_loss[1]
                
                if i % 100 == 0:
                    print (f"{epoch} - {i}/{self.n_batches} \t [D loss: {DLOSS}] [G loss: {GLOSS}] [R loss: {RLOSS} | F loss: {FLOSS} | C loss: {CLOSS} - C acc: {CACC}]")


In [None]:
gan = ECGAN(x_train,
            y_train,
            num_classes=15,
            latent_dim=32,
            batch_size=128,
            n_critic=5,
            conf_thresh=.2,
            adv_weight=.1
            )

gan.train(epochs=30)