In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
import nslkdd
import nslkdd41
import nslkdd41batch
import unsw
import classifier

#UNSW dataset
dt = unsw.read_data_sets()
mb_size = 32
z_dim = 14
X_dim = dt.train.features.shape[1]
y_dim = dt.train.labels.shape[1]
h_dim = 100
c = 0
lr = 1e-3


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


""" Q(z|X) """
X = tf.placeholder(tf.float32, shape=[None, X_dim])
c = tf.placeholder(tf.float32, shape=[None, y_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

Q_W1 = tf.Variable(xavier_init([X_dim , h_dim]))
Q_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

Q_W2 = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2 = tf.Variable(tf.zeros(shape=[z_dim]))

theta_Q = [Q_W1, Q_W2, Q_b1, Q_b2]


def Q(X):
    #inputs = tf.concat(axis=1, values=[X, c])
    h = tf.nn.relu(tf.matmul(X, Q_W1) + Q_b1)
    z = tf.matmul(h, Q_W2) + Q_b2
    return z


""" P(X|z) """
P_W1 = tf.Variable(xavier_init([y_dim + z_dim, h_dim]))
P_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

P_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
P_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_P = [P_W1, P_W2, P_b1, P_b2]


def P(z, c):
    inputs = tf.concat(axis=1, values=[z, c])
    h = tf.nn.relu(tf.matmul(inputs, P_W1) + P_b1)
    logits = tf.matmul(h, P_W2) + P_b2
    prob = tf.nn.sigmoid(logits)
    return prob, logits


""" D(z) """
D_W1 = tf.Variable(xavier_init([z_dim + y_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


def D(z, c):
    inputs = tf.concat(axis=1, values=[z, c])
    h = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)
    logits = tf.matmul(h, D_W2) + D_b2
    prob = tf.nn.sigmoid(logits)
    return prob


""" Training """
z_sample = Q(X)
_, logits = P(z_sample, c)

# Sample from random z
X_samples, _ = P(z, c)

# E[log P(X|z)]
recon_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X))

# Adversarial loss to approx. Q(z|X)
D_real = D(z, c)
D_fake = D(z_sample, c)

D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))
G_loss = -tf.reduce_mean(tf.log(D_fake))

AE_solver = tf.train.AdamOptimizer().minimize(recon_loss, var_list=theta_P + theta_Q)
D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_Q)

sess = tf.Session()
sess.run(tf.global_variables_initializer())



In [None]:
for it in range(100000):
    X_mb, y_mb = dt.train.next_batch(mb_size)
    z_mb = np.random.randn(mb_size, z_dim)

    _, recon_loss_curr = sess.run([AE_solver, recon_loss], feed_dict={X: X_mb, c: y_mb})
    _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, z: z_mb, c: y_mb})
    _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={X: X_mb, c: y_mb})

    if it % 1000 == 0:
        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}; Recon_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr, recon_loss_curr))

       

In [None]:
X_train = dt.train.features
Y_train = dt.train.labels

X_test = dt.test.features
Y_test = dt.test.labels

In [None]:
#print ("-------------------------NONE----------------------")


#print ("SVM")
#sv_gan, svm_fpr_gan, svm_tpr_gan, svm_roc_gan = svm_svc(X, Y, X_test,Y_test)
#print ("Decision tree")
#dt_gan, dt_fpr_gan, dt_tpr_gan, dt_roc_gan = classifier.decisiontree(X_train, Y_train, X_test,Y_test)
#print ("Random forest")
#rf_gan, rf_fpr_gan, rf_tpr_gan, rf_roc_gan = classifier.randomforest(X_train, Y_train, X_test,Y_test)

In [None]:
#n_samples = 35000
#y = np.zeros(shape=[n_samples, y_dim])
#idx = 0
#y[:, idx] = 1.
#X_gen0 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
#Y_gen0 = y

#n_samples = 35000
#y = np.zeros(shape=[n_samples, y_dim])
#idx = 1
#y[:, idx] = 1.
#X_gen1 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
#Y_gen1 = y

#n_samples = 33000
#y = np.zeros(shape=[n_samples, y_dim])
#idx = 2
#y[:, idx] = 1.
#X_gen2 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
#Y_gen2 = y

#n_samples = 4000
#y = np.zeros(shape=[n_samples, y_dim])
#idx = 3
#y[:, idx] = 1.
#X_gen3 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
#Y_gen3 = y

#n_samples = 21000
#y = np.zeros(shape=[n_samples, y_dim])
#idx = 4
#y[:, idx] = 1.
#X_gen4 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
#Y_gen4 = y

#n_samples = 27000
#y = np.zeros(shape=[n_samples, y_dim])
#idx = 7
#y[:, idx] = 1.
#X_gen5 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
#Y_gen5 = y

#n_samples = 36000
#y = np.zeros(shape=[n_samples, y_dim])
#idx = 8
#y[:, idx] = 1.
#X_gen6 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
#Y_gen6 = y

#n_samples = 37000
#y = np.zeros(shape=[n_samples, y_dim])
#idx = 9
#y[:, idx] = 1.
#X_gen7 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
#Y_gen7 = y

#n_samples = 19000
#y = np.zeros(shape=[n_samples, y_dim])
#idx = 5
#y[:, idx] = 1.
#X_gen8 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
#Y_gen8 = y

#n_samples = 20000
#y = np.zeros(shape=[n_samples, y_dim])
#idx = 3
#y[:, idx] = 1.
#X_gen9 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
#Y_gen9 = y





In [None]:
#unsw
n_samples = 3500
y = np.zeros(shape=[n_samples, y_dim])
idx = 0
y[:, idx] = 1.
X_gen0 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
Y_gen0 = y

n_samples = 3500
y = np.zeros(shape=[n_samples, y_dim])
idx = 1
y[:, idx] = 1.
X_gen1 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
Y_gen1 = y

n_samples = 3300
y = np.zeros(shape=[n_samples, y_dim])
idx = 2
y[:, idx] = 1.
X_gen2 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
Y_gen2 = y

n_samples = 4000
y = np.zeros(shape=[n_samples, y_dim])
idx = 3
y[:, idx] = 1.
X_gen3 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
Y_gen3 = y

n_samples = 2100
y = np.zeros(shape=[n_samples, y_dim])
idx = 4
y[:, idx] = 1.
X_gen4 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
Y_gen4 = y

n_samples = 2700
y = np.zeros(shape=[n_samples, y_dim])
idx = 7
y[:, idx] = 1.
X_gen5 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
Y_gen5 = y

n_samples = 3600
y = np.zeros(shape=[n_samples, y_dim])
idx = 8
y[:, idx] = 1.
X_gen6 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
Y_gen6 = y

n_samples = 3700
y = np.zeros(shape=[n_samples, y_dim])
idx = 9
y[:, idx] = 1.
X_gen7 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
Y_gen7 = y

n_samples = 1900
y = np.zeros(shape=[n_samples, y_dim])
idx = 5
y[:, idx] = 1.
X_gen8 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
Y_gen8 = y

n_samples = 2000
y = np.zeros(shape=[n_samples, y_dim])
idx = 3
y[:, idx] = 1.
X_gen9 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
Y_gen9 = y

In [None]:
import ll_parzen
#combine Generated samples and Original Samples
l = len (X_train)
#nslkdd
#X = np.concatenate ((X_train, X_gen0, X_gen1, X_gen2, X_gen3), axis = 0)
#Y = np.concatenate ((Y_train, Y_gen0, Y_gen1, Y_gen2, Y_gen3), axis = 0)

#unsw
X = np.concatenate ((X_train, X_gen0, X_gen1, X_gen2, X_gen3, X_gen4, X_gen5, X_gen6, X_gen7, X_gen8, X_gen9), axis = 0)
Y = np.concatenate ((Y_train, Y_gen0, Y_gen1,Y_gen2, Y_gen3, Y_gen4, Y_gen5, Y_gen6, Y_gen7, Y_gen8, Y_gen9), axis = 0)
X_gen = X[l:len(X)]
ll_parzen.ll(X_gen, X_test, 0.01, 32)
from collections import Counter
Y_count = np.argmax(Y, axis=1)
print('Resampled dataset shape {}'.format(Counter(Y_count)))

s = np.arange(X.shape[0])
np.random.shuffle(s)
X = X[s]
Y = Y[s]

print ("-------------------------CAAE----------------------")


print ("SVM")
sv_gan, svm_fpr_gan, svm_tpr_gan, svm_roc_gan = classifier.svm(X, Y, X_test,Y_test)
print ("Decision tree")
dt_gan, dt_fpr_gan, dt_tpr_gan, dt_roc_gan = classifier.decisiontree(X, Y, X_test,Y_test)
print ("Random forest")
rf_gan, rf_fpr_gan, rf_tpr_gan, rf_roc_gan = classifier.randomforest(X, Y, X_test,Y_test)