In [1]:
from Models import *
from Utils import *
from FastModels import *
import warnings
from livelossplot import PlotLossesKeras
# warnings.filterwarnings('ignore')

from gensim.models import KeyedVectors
import sentencepiece as spm

max_len = 15
enablePadding = True

sp = spm.SentencePieceProcessor()
sp.Load('/work/data/bpe/en.wiki.bpe.op50000.model')
bpe = KeyedVectors.load_word2vec_format("/work/data/bpe/en.wiki.bpe.op50000.d200.w2v.bin", binary=True)
bpe.index2word = [''] + bpe.index2word + ['<sos>'] + ['<eos>']  # add empty string 
nb_words = len(bpe.index2word)
# word2index
bpe_dict = {bpe.index2word[i]: i for i in range(len(bpe.index2word))}
# construct embedding_matrix
embedding_matrix = np.concatenate([np.zeros((1, bpe.vector_size)), bpe.vectors, np.zeros((2, bpe.vector_size))]) # add zero vector for empty string (i.e. used for padding)

embedding_layer = Embedding(nb_words,
                    embedding_matrix.shape[-1],
                    weights=[embedding_matrix],
                    input_length=max_len,
                    trainable=True)

Using TensorFlow backend.


In [310]:
class AAE():
    
    def __init__(self, nb_words, max_len, embedding_matrix, dim, optimizer=Adam(), mode=1,enableWasserstein=False, enableBOW=False, enableS2S=False):
        self.dim = dim
        self.nb_words = nb_words
        self.max_len = max_len
        self.embedding_matrix = embedding_matrix
        self.optimizer = optimizer
        self.enableWasserstein = enableWasserstein
        self.enableS2S = enableS2S
        self.mode = 1
        self.enableBOW = enableBOW
        
        self.hidden_dim = self.dim[0]
        self.latent_dim = self.dim[1]



        self.build()

    def build(self):
        
        self.ae, self.gs_encoder, self.encoder = self.build_ae()
        
        self.discriminator = self.build_gs_discriminator()
        
        inputs = self.ae.inputs
        
        rec_pred = self.ae(inputs)
        aae_penalty = self.discriminator(self.gs_encoder(inputs[0]))
        
        self.discriminator.compile(optimizer=self.optimizer, loss="binary_crossentropy", metrics=['accuracy'])
        self.model = Model(inputs, [rec_pred, aae_penalty])
        self.discriminator.trainable = False
        self.model.compile(optimizer=self.optimizer, loss=["sparse_categorical_crossentropy", "binary_crossentropy"], loss_weights=[1, 1e-3])
        
        
    def wasserstein_loss(self, y_true, y_pred):
        return K.mean(y_true * y_pred)
    
    def build_ae(self):

        encoder_inputs = Input(shape=(self.max_len,))
        self.encoder_embedding = Embedding(self.nb_words,
                                        self.embedding_matrix.shape[-1],
                                        weights=[self.embedding_matrix],
                                        input_length=self.max_len,
                                        name="enc_embedding",
                                        mask_zero=True,
                                        trainable=True)

        self.encoder_lstm = GRU(self.hidden_dim, return_state=True, name="enc_gru")

        x = self.encoder_embedding(encoder_inputs)
        _, self.state = self.encoder_lstm(x)

        self.mean = Dense(self.latent_dim)
        self.var = Dense(self.latent_dim)

        state_mean = self.mean(self.state)
        state_var = self.var(self.state)

        state_z = Lambda(self.sampling, name="kl")([state_mean, state_var])


        decoder_inputs = Input(shape=(self.max_len,), name="dec_input")

        self.latent2hidden = Dense(self.hidden_dim)
        self.decoder_lstm = GRU(self.hidden_dim, return_sequences=True)
        self.decoder_dense = Dense(self.nb_words, activation='softmax' if not self.enableWasserstein else "linear", name="rec")
        self.decoder_embedding = Embedding(self.nb_words,
                                        self.embedding_matrix.shape[-1],
                                        weights=[self.embedding_matrix],
                                        input_length=self.max_len,
                                        name="dec_embedding",
                                        mask_zero=True,
                                        trainable=True)
        
        x = self.decoder_embedding(decoder_inputs)
        decoder_outputs = self.decoder_lstm(x, initial_state=self.latent2hidden(state_z))
        rec_outputs = self.decoder_dense(decoder_outputs)

        return Model([encoder_inputs, decoder_inputs], rec_outputs), Model(encoder_inputs, state_z), Model(encoder_inputs, self.state)
    
    def build_gs_discriminator(self):
        
        inputs = Input((self.latent_dim,), name="gs_dis_input")
        
        dense1 = Dense(self.hidden_dim, name="dis_dense1")
        dense2 = Dense(self.latent_dim, name="dis_dense2")
        dense3 = Dense(1, activation="sigmoid" if not self.enableWasserstein else "linear")

        outputs = dense3(dense2(dense1(inputs)))
        
        return Model(inputs, outputs)
    
    def sampling(self, args):
        z_mean, z_log_var = args
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], K.shape(z_mean)[1]), mean=0.,\
                                  stddev=1)
        return z_mean + K.exp(z_log_var / 2) * epsilon 

    def name(self):
        return "aae" % self.mode if not self.enableWasserstein else "wae"




In [314]:
# K.clear_session()
run = AAE(nb_words, max_len, embedding_matrix, [2,1])


TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

In [327]:
%%time
x = np.random.randint(nb_words, size=(256, 15))
x_ = np.zeros((256, nb_words))
y = np.ones((256, 2))
y[:, 0] = 1
for i, j in enumerate(x):
    x_[i][j] = 1
    
x_ = np.expand_dims(x, axis=-1)


CPU times: user 20 ms, sys: 0 ns, total: 20 ms
Wall time: 16.7 ms


In [331]:
old = all_weights(run.model)

In [328]:
run.model.fit([x,x], [x_, z], verbose=0, batch_size=64)

<keras.callbacks.History at 0x7f67d450deb8>

In [323]:
yy = np.random.randint(nb_words, size=(256, 100))
z = np.random.randint(2, size=(256))
run.discriminator.train_on_batch(yy, z)

[7.6545115, 0.5234375]

In [329]:
new = run.discriminator.get_weights()

In [333]:
old == new

False

In [309]:
import numpy as np
from keras.models import Sequential
from keras.engine.topology import Input, Container
from keras.engine.training import Model
from keras.layers.core import Dense

def all_weights(m):
    return [list(w.reshape((-1))) for w in m.get_weights()]

def random_fit(m):
    x1 = np.random.random(10).reshape((5, 2))
    y1 = np.random.random(10).reshape((5, 2))
    m.fit(x1, y1, verbose=False)

np.random.seed(100)

# Discriminator model
x = in_x = Input((2, ))
x = Dense(1)(x)
x = Dense(2)(x)
model_D = Model(in_x, x)

# Compile D
model_D.compile(optimizer="sgd", loss="mse")

# Generator model
x = in_x = Input((2, ))
x = Dense(1)(x)
x = Dense(2)(x)
model_G = Model(in_x, x)

# Adversarial model
model_A = Sequential()
model_A.add(model_G)
model_A.add(model_D)

# Compile A 
model_D.trainable = False # set D in A "trainable=False"
model_A.compile(optimizer="sgd", loss="mse")

# Watch which weights are updated by model.fit
print("Initial Weights")
print("G: %s" % all_weights(model_G))
print("D: %s" % all_weights(model_D))
print("A : %s" % all_weights(model_A))

random_fit(model_D)

print 
print("after training D --- D and D in A changed")
print("G: %s" % all_weights(model_G))
print("D: %s" % all_weights(model_D))
print("A : %s" % all_weights(model_A))

random_fit(model_A)

print 
print("after training A --- D didn't changed!")
print("G: %s" % all_weights(model_G))
print("D: %s" % all_weights(model_D))
print("A : %s" % all_weights(model_A))


random_fit(model_D)
print 
print("after training D")
print("G: %s" % all_weights(model_G))
print("D: %s" % all_weights(model_D))
print("A : %s" % all_weights(model_A))

random_fit(model_A)

print
print("after training A")
print("G: %s" % all_weights(model_G))
print("D: %s" % all_weights(model_D))
print("A : %s" % all_weights(model_A))


Initial Weights
G: [[-1.3648417, -1.1097677], [0.0], [0.7505046, -0.022380948], [0.0, 0.0]]
D: [[-0.75828314, -1.1783319], [0.0], [1.1859695, 0.23665011], [0.0, 0.0]]
A : [[-1.3648417, -1.1097677], [0.0], [0.7505046, -0.022380948], [0.0, 0.0], [-0.75828314, -1.1783319], [0.0], [1.1859695, 0.23665011], [0.0, 0.0]]
after training D --- D and D in A changed
G: [[-1.3648417, -1.1097677], [0.0], [0.7505046, -0.022380948], [0.0, 0.0]]
D: [[-0.7526134, -1.1682525], [0.017999575], [1.1735275, 0.23064853], [0.013865177, 0.0065746866]]
A : [[-1.3648417, -1.1097677], [0.0], [0.7505046, -0.022380948], [0.0, 0.0], [-0.7526134, -1.1682525], [0.017999575], [1.1735275, 0.23064853], [0.013865177, 0.0065746866]]
after training A --- D didn't changed!
G: [[-1.3615896, -1.1104604], [0.0024672896], [0.7453306, -0.030412478], [0.0034840428, 0.0054081436]]
D: [[-0.7526134, -1.1682525], [0.017999575], [1.1735275, 0.23064853], [0.013865177, 0.0065746866]]
A : [[-1.3615896, -1.1104604], [0.0024672896], [0.74533