In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
#import nslkdd
#import nslkdd41
#import unsw
import classifier
import cloud

#cloud: hidden layer size 25, bottleneck layer: 10


dt = cloud.read_data_sets()
mb_size = 100

X_dim = dt.train.features.shape[1]
y_dim = dt.train.labels.shape[1]
h_dim = 25
z_dim = 10
c = 0
lr = 1e-3


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


""" Q(z|X) """
X = tf.placeholder(tf.float32, shape=[None, X_dim])
c = tf.placeholder(tf.float32, shape=[None, y_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

Q_W1 = tf.Variable(xavier_init([X_dim + y_dim, h_dim]))
Q_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

Q_W2 = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2 = tf.Variable(tf.zeros(shape=[z_dim]))

theta_Q = [Q_W1, Q_W2, Q_b1, Q_b2]


def Q(X,c):
    inputs = tf.concat(axis=1, values=[X, c])
    h = tf.nn.relu(tf.matmul(inputs, Q_W1) + Q_b1)
    z = tf.matmul(h, Q_W2) + Q_b2
    return z


""" P(X|z) """
P_W1 = tf.Variable(xavier_init([y_dim + z_dim, h_dim]))
P_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

P_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
P_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_P = [P_W1, P_W2, P_b1, P_b2]


def P(z, c):
    inputs = tf.concat(axis=1, values=[z, c])
    h = tf.nn.relu(tf.matmul(inputs, P_W1) + P_b1)
    logits = tf.matmul(h, P_W2) + P_b2
    prob = tf.nn.sigmoid(logits)
    return prob, logits


""" D(z) """
D_W1 = tf.Variable(xavier_init([z_dim + y_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


def D(z, c):
    inputs = tf.concat(axis=1, values=[z, c])
    h = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)
    logits = tf.matmul(h, D_W2) + D_b2
    prob = tf.nn.sigmoid(logits)
    return prob


""" Training """
# Add noise to X
noise_factor = 0.25
X_noise = X + noise_factor * tf.random_normal(tf.shape(X))
X_noise = tf.clip_by_value(X_noise, 0., 1.)

z_sample = Q(X_noise, c)
_, logits = P(z_sample, c)

# Sample from random z
X_samples, _ = P(z, c)

# E[log P(X|z)]
recon_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X))

# Adversarial loss to approx. Q(z|X)
D_real = D(z, c)
D_fake = D(z_sample, c)

D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))
G_loss = -tf.reduce_mean(tf.log(D_fake))

AE_solver = tf.train.AdamOptimizer().minimize(recon_loss, var_list=theta_P + theta_Q)
D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_Q)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [None]:
X_train = dt.train.features
Y_train = dt.train.labels

X_test = dt.test.features
Y_test = dt.test.labels

In [None]:
print ("-------------------------original data----------------------")


print ("SVM")
svm_roc_gan = classifier.svm(X_train, Y_train, X_test,Y_test)
print ("Decision tree")
dt_roc_gan = classifier.decisiontree(X_train, Y_train, X_test,Y_test)
print ("Random forest")
rf_roc_gan = classifier.randomforest(X_train, Y_train, X_test,Y_test)

In [None]:
for it in range(10000):
    X_mb, y_mb = dt.train.next_batch(mb_size)
    z_mb = np.random.randn(mb_size, z_dim)

    _, recon_loss_curr = sess.run([AE_solver, recon_loss], feed_dict={X: X_mb, c: y_mb})
    _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, z: z_mb, c: y_mb})
    _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={X: X_mb, c: y_mb})

    if it % 1000 == 0:
        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}; Recon_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr, recon_loss_curr))

In [None]:
#cloud data
n_samples = 1000
y = np.zeros(shape=[n_samples, y_dim])
idx = 1
y[:, idx] = 1.
X_gen0 = sess.run(X_samples, feed_dict={z: np.random.randn(n_samples, z_dim), c: y})
Y_gen0 = y

In [None]:
def define_knn (X, Y, X_gen, Y_gen, k, min_c):
	l = len (X)
	X_new = np.concatenate((X, X_gen), axis = 0)
	Y_new = np.concatenate((Y, Y_gen), axis = 0)
	nn = NearestNeighbors (n_neighbors = k, algorithm = "ball_tree").fit (X_new)
	distances, indices = nn.kneighbors(X_new)

	c = 0
	print (min_c)
	for i in range (l, l+len(X_gen)):
		label_nn = indices[i]
		Y_nn = Y_new [label_nn]
		nn_minor = len (Y_nn[Y_nn == min_c])
		nn_major = len (Y_nn[Y_nn != min_c])
		y_gen = np.zeros((1, 2))
		y_gen [0, min_c] = 1
		print (Y.shape)
		print (y_gen.shape)
		print (y_gen)
		if (nn_minor > 1) & (nn_major > 1):
			X_g =  X_gen [i-l,:]
			X_g = np.reshape(X_g, (1, len(X_g)))
			X = np.concatenate ((X, X_g), axis = 0)
			Y = np.concatenate ((Y,y_gen), axis = 0)

	return X,Y

In [None]:
k = 50
min_c = 0
from sklearn.neighbors import NearestNeighbors
X, Y = define_knn (X_train, Y_train, X_gen0, Y_gen0, k, min_c)

In [None]:
import ll_parzen
#combine Generated samples and Original Samples
l = len (X_train)
X = np.asarray(X)
Y = np.asarray(Y)

X_gen = X[l:len(X)]
ll_parzen.ll(X_gen, X_test, 0.01, 32)
from collections import Counter
Y_count = np.argmax(Y, axis=1)
print('Resampled dataset shape {}'.format(Counter(Y_count)))


s = np.arange(X.shape[0])
np.random.shuffle(s)
X = X[s]
Y = Y[s]

print ("-------------------------CdaAE----------------------")

print ("SVM")
svm_roc_gan = classifier.svm(X, Y, X_test,Y_test)
print ("Decision tree")
dt_roc_gan = classifier.decisiontree(X, Y, X_test,Y_test)
print ("Random forest")
roc_gan = classifier.randomforest(X, Y, X_test,Y_test)