In [1]:
#encoding=utf-8
import sys
import pandas as pd
import numpy as np
import codecs
import tensorflow as tf

import argparse
import numpy as np
from scipy.stats import norm
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib import animation, rc
import seaborn as sns
from IPython.display import HTML

In [2]:
data = pd.read_csv("./reprocess.txt", sep=" ",header=None)

(5543, 10067)

In [6]:
data_t = data.transpose()
samples, feature_nums = data_t.shape
feature_nums

5543

In [12]:
train_data = data_t.iloc[0:5000]
complete_data = data_t.iloc[5000:5543,:]

In [13]:
complete_data.shape

(543, 5543)

In [5]:
seed = 42
np.random.seed(seed)
tf.set_random_seed(seed)

In [8]:
class DataDistribution(object):
    def __init__(self):
        self.mu = -1
        self.sigma = 1

    def sample(self, N):
        samples = np.random.normal(self.mu, self.sigma, N)
        samples.sort()
        return samples      

In [9]:
class GeneratorDistribution(object):
    def __init__(self, range):
        self.range = range

    def sample(self, N):
        return np.linspace(-self.range, self.range, N) + \
            np.random.random(N) * 0.01

In [12]:

def mlp(input, h_dim):
    init_const = tf.constant_initializer(0.0)
    init_norm = tf.random_normal_initializer()
    w0 = tf.get_variable('w0', [input.get_shape()[1], h_dim], initializer=init_norm)
    b0 = tf.get_variable('b0', [h_dim], initializer=init_const)
    w1 = tf.get_variable('w1', [h_dim, h_dim], initializer=init_norm)
    b1 = tf.get_variable('b1', [h_dim], initializer=init_const)
    h0 = tf.tanh(tf.matmul(input, w0) + b0)
    h1 = tf.tanh(tf.matmul(h0, w1) + b1)
    return h1, [w0, b0, w1, b1]

def generator(input, h_dim, feature_nums):
    transform, params = mlp(input, h_dim)
    init_const = tf.constant_initializer(0.0)
    init_norm = tf.random_normal_initializer()
    w = tf.get_variable('g_w', [h_dim, feature_nums], initializer=init_norm)
    b = tf.get_variable('g_b', [feature_nums], initializer=init_const)
    h = tf.matmul(transform, w) + b
    return h, params + [w, b]

def discriminator(input, h_dim, feature_nums):
    transform, params = mlp(input, h_dim)
    init_const = tf.constant_initializer(0.0)
    init_norm = tf.random_normal_initializer()
    w = tf.get_variable('d_w', [h_dim, feature_nums], initializer=init_norm)
    b = tf.get_variable('d_b', [feature_nums], initializer=init_const)
    h = tf.sigmoid(tf.matmul(transform, w) + b)
    return h, params + [w, b]

In [16]:
def optimizer(loss, var_list, num_epochs):
    initial_learning_rate = 0.01
    decay = 0.95
    num_decay_steps = num_epochs // 4
    batch = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(
        initial_learning_rate,
        batch,
        num_decay_steps,
        decay,
        staircase=True
    )
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        loss,
        global_step=batch,
        var_list=var_list
    )
    return optimizer

In [24]:
class DataSet:
    
    def __init__(self, data, batch_size, shuffle=True, random_sample_mu = 0, random_sample_sigma=1):
        self.data = data
        self.samples, self.feature_nums = data.shape
        self.cnt = 0
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.random_sample_mu = random_sample_mu
        self.random_sample_sigma = random_sample_sigma
        
    def next(self, generate_random=True):
        feature_nums, batch_size = self.feature_nums, self.batch_size
        if self.cnt + batch_size >= self.samples:
            if self.shuffle:
                np.random.shuffle(data)
            self.cnt = 0
        be, en = self.cnt, min(self.samples, self.cnt + batch_size)
#         yield data[be, en]
        batch_data = self.data[be : en]
        random_samles = np.random.normal(self.random_sample_mu, self.random_sample_sigma, (batch_size, feature_nums))
        self.cnt = (self.cnt + batch_size) % self.samples
        return batch_data, random_samles

dataset = DataSet(data_t, 128)

In [12]:
anim_frames = []

def plot_distributions(GAN, session, loss_d, loss_g):
    num_points = 100000
    num_bins = 100
    xs = np.linspace(-GAN.gen.range, GAN.gen.range, num_points)
    bins = np.linspace(-GAN.gen.range, GAN.gen.range, num_bins)

    # p(data)
    d_sample = GAN.data.sample(num_points)

    # decision boundary
    ds = np.zeros((num_points, 1))  # decision surface
    for i in range(num_points // GAN.batch_size):
        ds[GAN.batch_size * i:GAN.batch_size * (i + 1)] = session.run(GAN.D1, {
            GAN.x: np.reshape(xs[GAN.batch_size * i:GAN.batch_size * (i + 1)], (GAN.batch_size, 1))
        })

    # p(generator)
    zs = np.linspace(-GAN.gen.range, GAN.gen.range, num_points)
    gs = np.zeros((num_points, 1))  # generator function
    for i in range(num_points // GAN.batch_size):
        gs[GAN.batch_size * i:GAN.batch_size * (i + 1)] = session.run(GAN.G, {
            GAN.z: np.reshape(
                zs[GAN.batch_size * i:GAN.batch_size * (i + 1)],
                (GAN.batch_size, 1)
            )
        })
           
    anim_frames.append((d_sample, ds, gs, loss_d, loss_g))

In [25]:
class GAN(object):
    def __init__(self, dataset, steps, mlp_hidden_size=2000):
        self.dataset = dataset
        self.steps = steps
        self.log_every = 10
        self.batch_size = dataset.batch_size
        self.mlp_hidden_size = mlp_hidden_size
        self.feature_nums = dataset.feature_nums
        self._create_model()

    def _create_model(self):
        

        # This defines the generator network - it takes samples from a noise
        # distribution as input, and passes them through an MLP.
        with tf.variable_scope('G'):
            self.z = tf.placeholder(tf.float32, shape=(self.batch_size, self.feature_nums))
            self.G, theta_g = generator(self.z, self.mlp_hidden_size, self.feature_nums)

        # The discriminator tries to tell the difference between samples from the
        # true data distribution (self.x) and the generated samples (self.z).
        #
        # Here we create two copies of the discriminator network (that share parameters),
        # as you cannot use the same network with different inputs in TensorFlow.
        with tf.variable_scope('D') as scope:
            self.x = tf.placeholder(tf.float32, shape=(self.batch_size, self.feature_nums))
            self.D1, self.theta_d1 = discriminator(self.x, self.mlp_hidden_size, self.feature_nums)
            scope.reuse_variables()
            self.D2, self.theta_d2 = discriminator(self.G, self.mlp_hidden_size, self.feature_nums)

        # Define the loss for discriminator and generator networks (see the original
        # paper for details), and create optimizers for both
        self.loss_d = tf.reduce_mean(-tf.log(self.D1) - tf.log(1 - self.D2))
        self.loss_g = tf.reduce_mean(-tf.log(self.D2))

        self.opt_d = optimizer(self.loss_d, self.theta_d2, self.steps)
        self.opt_g = optimizer(self.loss_g, theta_g, self.steps)
    
    def train(self):
        with tf.Session() as session:
            tf.global_variables_initializer().run()
            
            for step in range(self.steps):
                
                batch_data, random_data = self.dataset.next()

                loss_d, _ = session.run([self.loss_d, self.opt_d], {
                    self.x: batch_data,
                    self.z: random_data
                })

                # update generator
                loss_g, _ = session.run([self.loss_g, self.opt_g], {
                    self.z: random_data
                })

                if step % self.log_every == 0:
                    print('{}: {}\t{}'.format(step, loss_d, loss_g))

In [26]:
steps = 1000

tf.reset_default_graph()
model = GAN(dataset, steps)
model.train()

0: inf	nan
10: nan	nan
20: nan	nan


KeyboardInterrupt: 

In [15]:
f, ax = plt.subplots(figsize=(6,4))
f.suptitle('1D Generative Adversarial Network', fontsize=15)
plt.ylabel('Probability')
ax.set_xlim(-6, 6)
ax.set_ylim(0, 1.4)
line_d, = ax.plot([], [], label='p_d')
line_ds, = ax.plot([], [], label='decision boundary')
line_g, = ax.plot([], [], label='
                  p_g')
frame_text = ax.text(0.02, 0.95,'',horizontalalignment='left',verticalalignment='top', transform=ax.transAxes)
ax.legend()

<matplotlib.legend.Legend at 0x1baaff94518>

In [18]:
plt.rcParams['animation.ffmpeg_path'] = r'D:\ffmpeg-20170904-6cadbb1-win64-static\bin\ffmpeg.exe'

def init():
    line_d.set_data([],[])
    line_ds.set_data([],[])
    line_g.set_data([],[])
    frame_text.set_text('Start')
    return (line_d,) + (line_ds,) + (line_g,) + (frame_text,)

def animate(i):
    bins = np.linspace(-5, 5, 50)
    x = np.linspace(-5, 5, 100000)
    
    histd, _ = np.histogram(anim_frames[i][0], bins=bins, normed=True)
    line_d.set_data(bins[1:], histd)

    ds = anim_frames[i][1]
    line_ds.set_data(x, ds)
    
    histg, _ = np.histogram(anim_frames[i][2], bins=bins, normed=True)
    line_g.set_data(bins[1:], histg)
    
    frame_text.set_text('Timestep = %.1d/%.1d' % (i, len(anim_frames)))
    
    return (line_d,) + (line_ds,) + (line_g,) + (frame_text,)

anim = animation.FuncAnimation(f, animate, init_func=init,
                               frames=len(anim_frames), blit=True)

HTML(anim.to_html5_video())

KeyError: 'ffmpeg'

In [None]:
f_loss, ax_loss = plt.subplots(figsize=(6,3))
f_loss.suptitle('Training Loss', fontsize=15)
plt.xlabel('Epochs')
plt.ylabel('Loss')
ax_loss.set_xlim(-100, num_epochs)
ax_loss.set_ylim(0.5, 2)
line_loss_d, = ax_loss.plot([], [], label='loss_d')
line_loss_g, = ax_loss.plot([], [], label='loss_g')
ax_loss.legend()

In [None]:
def init_loss():
    line_loss_d.set_data([],[])
    line_loss_g.set_data([],[])
    frame_text.set_text('Start')
    return (line_loss_d,) + (line_loss_g,)

def animate_loss(i):
    xs=[]
    ys_d=[]
    ys_g=[]    
    for point in range(0, i):
        xs.append(point*5)
        ys_d.append(anim_frames[point][3])
        ys_g.append(anim_frames[point][4])
        
    line_loss_d.set_data(xs, ys_d)
    line_loss_g.set_data(xs, ys_g)
    
    return (line_loss_d,) + (line_loss_g,)

anim_loss = animation.FuncAnimation(f_loss, animate_loss, init_func=init_loss,
                               frames=len(anim_frames), blit=True)

HTML(anim_loss.to_html5_video())