In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.contrib.layers as layers
from sklearn.metrics import roc_auc_score

import generate
import plot
import models

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"



# Real data

In [None]:
# generate fake results
half = 500
labels = np.array([1]*half + [0]*half)
logits = np.random.uniform(size=2*half)
    
# make a test plot
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
plot.data1D(ax[0], generate.triangular1D(100), generate.triangular1D(100))
plot.roc_curve(ax[1], labels, logits)
plt.show()

# Noise distribution

In [None]:
NOISE_DIM = 3

noise = generate.input_noise(300, NOISE_DIM)

fig, ax = plt.subplots(figsize=(5, 5))
_ = ax.scatter(noise[:, 0], noise[:, 1], alpha=0.7)
_ = ax.set_xlabel('noise dimension 1')
_ = ax.set_ylabel('noise dimension 2')
plt.show()

# Make the graphs and run the computation

In [None]:
COUNTER = 0

In [None]:
BATCH_SIZE = 1024 #  2x for the adversary, which gets both real and fake data
N_PRE_STEPS = 100
N_STEPS = 300
N_DRAW = 10
COUNTER+=1 # so that new graphs are created each time

# create the input placeholders
T_input_noise = tf.placeholder(tf.float32, shape=(BATCH_SIZE, NOISE_DIM), name='InputNoise')
T_real_data = tf.placeholder(tf.float32, shape=(BATCH_SIZE, 1), name='DataBatch')

# create the computational graph for the fake data
T_fake_data, vars_G = models.Generator1D(T_input_noise, name='MyGenerator1D_{}'.format(COUNTER))
TB_fake_data = tf.summary.histogram('fake_data', T_fake_data)

# create the input to the adversary
T_comb_data = tf.concat([T_real_data, T_fake_data], axis=0)
T_comb_labels = tf.one_hot(tf.constant([1]*BATCH_SIZE + [0]*BATCH_SIZE, dtype=tf.int32), depth=2)

# create the adversary (discriminator)
T_logits, vars_A = models.Adversary(T_comb_data, name='MyAdversary_{}'.format(COUNTER))
TB_logits = tf.summary.histogram('TB_logits', T_logits)
TB_labels = tf.summary.histogram('TB_labels', T_comb_labels)

# create the optimisation graphs
T_loss_A = tf.math.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=T_comb_labels, logits=T_logits))
T_opt_A = tf.train.AdamOptimizer().minimize(T_loss_A, var_list=vars_A)
TB_loss_A = tf.summary.scalar('TB_Loss_A', T_loss_A)

# generator
T_loss_G = - T_loss_A
T_opt_G = tf.train.AdamOptimizer().minimize(T_loss_G, var_list=vars_G)

# tensorboard
merged = tf.summary.merge([TB_fake_data, TB_logits, TB_labels, TB_loss_A])
writer = tf.summary.FileWriter(logdir='tensorboard')

# make a session and initialise all variables
sess = tf.Session()
sess.run(tf.global_variables_initializer())

def get_feed_dict():
    real_data = generate.triangular1D(BATCH_SIZE)
    input_noise = generate.input_noise(BATCH_SIZE, NOISE_DIM)
    return {T_real_data:real_data, T_input_noise:input_noise}

# first run the pretraining steps (just the discriminator)
aurocs = []
losses_A = []
for i in range(N_PRE_STEPS):

    # prepare the inputs
    feed_dict = get_feed_dict()
    
    # run the computation (discriminator, performance monitoring)
    _ = sess.run(T_opt_A, feed_dict=feed_dict)
    to_run = [T_fake_data, T_comb_labels, T_logits, T_loss_A]
    fake_data, labels, logits, loss_A = sess.run(to_run, feed_dict=feed_dict)
    
    # plot only N times
    aurocs.append(roc_auc_score(labels, logits))
    losses_A.append(loss_A)
    do_plot = (i in [n* int(N_PRE_STEPS/N_DRAW) for n in range(N_DRAW)]) or (i == N_PRE_STEPS-1)
    
    if do_plot:
        print('Pretraining step {}/{}'.format(i, N_PRE_STEPS))
        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
        plot.data1D(ax[0], generate.triangular1D(BATCH_SIZE), fake_data.ravel())
        plot.roc_curve(ax[1], labels[:,1], logits[:,1])
        plt.show()


        
# now run the training steps, and plot the fake and real data
for i in range(N_STEPS):
    
    # prepare the inputs
    feed_dict = get_feed_dict()
    
    # run the computation (discriminator, generator, performance monitoring)
    for _ in range(10):
        _ = sess.run(T_opt_A, feed_dict=get_feed_dict())
    _ = sess.run(T_opt_G, feed_dict=feed_dict)
    to_run = [T_fake_data, T_comb_labels, T_logits, T_loss_A]
    fake_data, labels, logits, loss_A = sess.run(to_run, feed_dict=feed_dict)
    
    # run summary separately
    summary = sess.run(merged, feed_dict=feed_dict)
    
    # store tensorboard
    writer.add_summary(summary, i)
    
    # plot only ten times
    aurocs.append(roc_auc_score(labels, logits))
    losses_A.append(loss_A)
    do_plot = (i in [n* int(N_STEPS/N_DRAW) for n in range(N_DRAW)]) or (i == N_STEPS-1)

    if do_plot:
        print('Training step {}/{}'.format(i, N_STEPS))
        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
        plot.data1D(ax[0], generate.triangular1D(BATCH_SIZE), fake_data.ravel())
        plot.roc_curve(ax[1], labels[:,1], logits[:,1])
        plt.show()
    
print('Done')
    

In [None]:
nplots=2
fig, ax = plt.subplots(1, nplots, sharex=True, figsize=(nplots*6, 5))
ax[0].plot(range(len(aurocs)), aurocs, c='r')
ax[0].set_xlabel('Training steps')
ax[0].set_ylabel('AUROC')
ax[1].plot(range(len(losses_A)), losses_A, c='g')
ax[1].set_xlabel('Training steps')
ax[1].set_ylabel('Adversary loss')
plt.show()