In [8]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
from keras import backend as K
K.set_session(session)

from Utils import *
from Models import *

In [6]:
from gensim.models import KeyedVectors
import sentencepiece as spm
sp = spm.SentencePieceProcessor()
sp.Load('/work/data/bpe/en.wiki.bpe.op50000.model')
bpe = KeyedVectors.load_word2vec_format("/work/data/bpe/en.wiki.bpe.op50000.d200.w2v.bin", binary=True)

In [39]:
run = AAE(100,10, bpe.get_keras_embedding(True), [5,6])

In [40]:
x = np.random.randint(10, size=(10,10))
y = np.random.randint(10, size=(10,10,100))
valid = np.ones(10)
fake = np.zeros(10)
run.model.fit(x, [y, valid, fake, y, fake, valid])

Epoch 1/1


<keras.callbacks.History at 0x7fd3c7196630>

In [38]:
from keras_adversarial.legacy import l1l2
from keras_adversarial import AdversarialModel, fix_names, n_choice
from keras_adversarial import AdversarialOptimizerSimultaneous, normal_latent_sampling
from keras.layers import LeakyReLU, Activation
import os

class AAE(object):
    def __init__(self, nb_words, max_len, emb, dim, comp_topk=None, ctype=None, epsilon_std=1.0, save_model='best_model'):
        self.dim = dim
        self.comp_topk = comp_topk
        self.ctype = ctype
        self.epsilon_std = epsilon_std
        self.save_model = save_model

        self.nb_words = nb_words
        self.max_len = max_len
        self.emb = emb

        self.decoder = self.create_decoder()
        self.encoder = self.create_encoder()
        autoencoder = Model(self.encoder.inputs, self.decoder(self.encoder(self.encoder.inputs)))
        discriminator = self.create_discriminator()

        x = self.encoder.inputs[0]
        z = self.encoder(x)
        xpred = self.decoder(z)
        zreal = normal_latent_sampling((self.dim[1],))(x)
        yreal = discriminator(zreal)
        yfake = discriminator(z)
        aae = Model(x, fix_names([xpred, yfake, yreal], ["xpred", "yfake", "yreal"]))
        
        generative_params = self.decoder.trainable_weights + self.encoder.trainable_weights
        
        self.model = AdversarialModel(base_model=aae,
                                 player_params=[generative_params, discriminator.trainable_weights],
                                 player_names=["decoder", "discriminator"])
        self.model.adversarial_compile(adversarial_optimizer=AdversarialOptimizerSimultaneous(),
                                  player_optimizers=[Adam(1e-4, decay=1e-4), Adam(1e-3, decay=1e-4)],
                                  loss={"yfake": "binary_crossentropy", "yreal": "binary_crossentropy",
                                    "xpred": "mean_squared_error"},
                              player_compile_kwargs=[{"loss_weights": {"yfake": 1e-2, "yreal": 1e-2, "xpred": 1}}] * 2)

    def create_discriminator(self):

        z = Input((self.dim[1],))
        h = z
        h = Dense(self.dim[0], name="discriminator_h1")(h)
        h = LeakyReLU(0.2)(h)
        h = Dense(self.dim[1], name="discriminator_h2")(h)
        h = LeakyReLU(0.2)(h)
        y = Dense(1, name="discriminator_y", activation="sigmoid")(h)
        return Model(z, y)
    
    def create_decoder(self):
        
        z = Input(shape=(self.dim[1],))

        act = 'tanh'
        decoder_h = Dense(self.dim[0], kernel_initializer='glorot_normal', activation=act)
        decoder_mean = Dense(self.nb_words, activation='softmax')

        h_decoded = decoder_h(z)
        h_decoded = RepeatVector(self.max_len)(h_decoded)
        h_decoded = Bidirectional(LSTM(self.dim[0], return_sequences=True, name='dec_lstm_1'))(h_decoded)
        x_decoded_mean = TimeDistributed(decoder_mean, name='decoded_mean')(h_decoded)


        return Model(z, x_decoded_mean, name="decoder")
    
    def create_encoder(self):
        
        act = 'tanh'
        x = Input(shape=(self.max_len,))
        embed_layer = self.emb
        bilstm = Bidirectional(LSTM(self.dim[0], name='lstm_1'))
        hidden_layer1 = Dense(self.dim[0], kernel_initializer='glorot_normal', activation=act)
        
        h1 = embed_layer(x)
        h1 = bilstm(h1)
        h1 = hidden_layer1(h1)

        self.z_mean = Dense(self.dim[1], kernel_initializer='glorot_normal')(h1)
        self.z_log_var = Dense(self.dim[1], kernel_initializer='glorot_normal')(h1)

        if self.comp_topk != None:
            self.z_mean = KCompetitive(self.comp_topk, self.ctype)(self.z_mean)

        z = Lambda(self.sampling, output_shape=(self.dim[1],))([self.z_mean, self.z_log_var])
        
        return Model(x, z, name="encoder")



    def vae_loss(self, x, x_decoded_mean):
        # xent_loss =  self.max_len * K.sum(K.binary_crossentropy(x_decoded_mean, x), axis=-1)
        x = K.flatten(x)
        x_decoded_mean = K.flatten(x_decoded_mean)
        xent_loss = self.max_len * objectives.binary_crossentropy(x, x_decoded_mean)
        kl_loss = - 0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis=-1)

        return xent_loss + kl_loss



    def sampling(self, args):
        z_mean, z_log_var = args
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], self.dim[1]), mean=0.,\
                                  stddev=self.epsilon_std)

        return z_mean + K.exp(z_log_var / 2) * epsilon

    def initModel(self, sp, bpe_dict):
        self.sp = sp
        self.bpe_dict = bpe_dict

    def batch_generator(self, reader, train_data, batch_size):
        while True:
            for df in reader:
                
                x = parse_texts_bpe(df.q.tolist(), self.sp, self.bpe_dict, self.max_len, True)
                x_one_hot = to_categorical(x, self.nb_words)
                x_one_hot = x_one_hot.reshape(batch_size, self.max_len, self.nb_words)
                valid = np.ones(batch_size)
                fake = np.zeros(batch_size)
                yield x, [x_one_hot, valid, fake, x_one_hot, fake, valid]

In [18]:
import matplotlib as mpl

# This line allows mpl to run with no DISPLAY defined
mpl.use('Agg')

from keras.layers import Dense, Reshape, Flatten, Input, merge
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras_adversarial.legacy import l1l2
import keras.backend as K
import pandas as pd
import numpy as np
from keras_adversarial.image_grid_callback import ImageGridCallback

from keras_adversarial import AdversarialModel, fix_names, n_choice
from keras_adversarial import AdversarialOptimizerSimultaneous, normal_latent_sampling
# from mnist_utils import mnist_data
from keras.layers import LeakyReLU, Activation
import os


def model_generator(latent_dim, input_shape, hidden_dim=512, reg=lambda: l1l2(1e-7, 0)):
    return Sequential([
        Dense(hidden_dim, name="generator_h1", input_dim=latent_dim, W_regularizer=reg()),
        LeakyReLU(0.2),
        Dense(hidden_dim, name="generator_h2", W_regularizer=reg()),
        LeakyReLU(0.2),
        Dense(np.prod(input_shape), name="generator_x_flat", W_regularizer=reg()),
        Activation('sigmoid'),
        Reshape(input_shape, name="generator_x")],
        name="generator")


def model_encoder(latent_dim, input_shape, hidden_dim=512, reg=lambda: l1l2(1e-7, 0)):
    x = Input(input_shape, name="x")
    h = Flatten()(x)
    h = Dense(hidden_dim, name="encoder_h1", W_regularizer=reg())(h)
    h = LeakyReLU(0.2)(h)
    h = Dense(hidden_dim, name="encoder_h2", W_regularizer=reg())(h)
    h = LeakyReLU(0.2)(h)
    mu = Dense(latent_dim, name="encoder_mu", W_regularizer=reg())(h)
    log_sigma_sq = Dense(latent_dim, name="encoder_log_sigma_sq", W_regularizer=reg())(h)
    z = merge([mu, log_sigma_sq], mode=lambda p: p[0] + K.random_normal(K.shape(p[0])) * K.exp(p[1] / 2),
              output_shape=lambda p: p[0])
    return Model(x, z, name="encoder")


def model_discriminator(latent_dim, output_dim=1, hidden_dim=512,
                        reg=lambda: l1l2(1e-7, 1e-7)):

    z = Input((latent_dim,))
    h = z
    h = Dense(hidden_dim, name="discriminator_h1", W_regularizer=reg())(h)
    h = LeakyReLU(0.2)(h)
    h = Dense(hidden_dim, name="discriminator_h2", W_regularizer=reg())(h)
    h = LeakyReLU(0.2)(h)
    y = Dense(output_dim, name="discriminator_y", activation="sigmoid", W_regularizer=reg())(h)
    return Model(z, y)


def example_aae(path, adversarial_optimizer):
    # z \in R^100
    latent_dim = 100
    # x \in R^{28x28}
    input_shape = (28, 28)

    # generator (z -> x)
    generator = model_generator(latent_dim, input_shape)
    # encoder (x ->z)
    encoder = model_encoder(latent_dim, input_shape)
    # autoencoder (x -> x')
    autoencoder = Model(encoder.inputs, generator(encoder(encoder.inputs)))
    # discriminator (z -> y)
    discriminator = model_discriminator(latent_dim)

    # assemple AAE
    x = encoder.inputs[0]
    z = encoder(x)
    xpred = generator(z)
    zreal = normal_latent_sampling((latent_dim,))(x)
    yreal = discriminator(zreal)
    yfake = discriminator(z)
    aae = Model(x, fix_names([xpred, yfake, yreal], ["xpred", "yfake", "yreal"]))

    # print summary of models
    generator.summary()
    encoder.summary()
    discriminator.summary()
    autoencoder.summary()

    # build adversarial model
    generative_params = generator.trainable_weights + encoder.trainable_weights
    model = AdversarialModel(base_model=aae,
                             player_params=[generative_params, discriminator.trainable_weights],
                             player_names=["generator", "discriminator"])
    model.adversarial_compile(adversarial_optimizer=adversarial_optimizer,
                              player_optimizers=[Adam(1e-4, decay=1e-4), Adam(1e-3, decay=1e-4)],
                              loss={"yfake": "binary_crossentropy", "yreal": "binary_crossentropy",
                                    "xpred": "mean_squared_error"},
                              player_compile_kwargs=[{"loss_weights": {"yfake": 1e-2, "yreal": 1e-2, "xpred": 1}}] * 2)

#     # load mnist data
#     xtrain, xtest = mnist_data()

#     # callback for image grid of generated samples
#     def generator_sampler():
#         zsamples = np.random.normal(size=(10 * 10, latent_dim))
#         return generator.predict(zsamples).reshape((10, 10, 28, 28))

#     generator_cb = ImageGridCallback(os.path.join(path, "generated-epoch-{:03d}.png"), generator_sampler)

#     # callback for image grid of autoencoded samples
#     def autoencoder_sampler():
#         xsamples = n_choice(xtest, 10)
#         xrep = np.repeat(xsamples, 9, axis=0)
#         xgen = autoencoder.predict(xrep).reshape((10, 9, 28, 28))
#         xsamples = xsamples.reshape((10, 1, 28, 28))
#         samples = np.concatenate((xsamples, xgen), axis=1)
#         return samples

#     autoencoder_cb = ImageGridCallback(os.path.join(path, "autoencoded-epoch-{:03d}.png"), autoencoder_sampler)

#     # train network
#     # generator, discriminator; pred, yfake, yreal
#     n = xtrain.shape[0]
#     y = [xtrain, np.ones((n, 1)), np.zeros((n, 1)), xtrain, np.zeros((n, 1)), np.ones((n, 1))]
#     ntest = xtest.shape[0]
#     ytest = [xtest, np.ones((ntest, 1)), np.zeros((ntest, 1)), xtest, np.zeros((ntest, 1)), np.ones((ntest, 1))]
#     history = model.fit(x=xtrain, y=y, validation_data=(xtest, ytest), callbacks=[generator_cb, autoencoder_cb],
#                         nb_epoch=100, batch_size=32)

#     # save history
#     df = pd.DataFrame(history.history)
#     df.to_csv(os.path.join(path, "history.csv"))

#     # save model
#     encoder.save(os.path.join(path, "encoder.h5"))
#     generator.save(os.path.join(path, "generator.h5"))
#     discriminator.save(os.path.join(path, "discriminator.h5"))


def main():
    example_aae("output/aae", AdversarialOptimizerSimultaneous())


if __name__ == "__main__":
    main()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
generator_h1 (Dense)         (None, 512)               51712     
_________________________________________________________________
leaky_re_lu_17 (LeakyReLU)   (None, 512)               0         
_________________________________________________________________
generator_h2 (Dense)         (None, 512)               262656    
_________________________________________________________________
leaky_re_lu_18 (LeakyReLU)   (None, 512)               0         
_________________________________________________________________
generator_x_flat (Dense)     (None, 784)               402192    
_________________________________________________________________
activation_2 (Activation)    (None, 784)               0         
_________________________________________________________________
generator_x (Reshape)        (None, 28, 28)            0         
Total para

In [2]:
1e2

100.0