In [1]:


import tensorflow as tf
import numpy as np
from data_gen import *
import time
import preprocess as pre
import argparse



In [0]:


# parser = argparse.ArgumentParser()
# parser.add_argument("--batch_size", type=int,  default=128)
# parser.add_argument("--char_embed_size", type=int,  default=32)
# parser.add_argument("--feat_embed_size", type=int, default=32)
# parser.add_argument("--hidden_size", type=int, default=265)
# parser.add_argument("--epochs", type=int, default=50)
# args = parser.parse_args()



In [2]:


char2int, feat2val, max_r, max_w = pre.process(['russian'])
# print(feat2val)
data = pre.convert(char2int, feat2val, max_r, max_w, langs=['russian'], for_cnn=True)
clean_data = pre.convert(char2int, feat2val, max_r, max_w, langs=['russian'], train_set=False, for_cnn=True)
gen_data = pre.convert(char2int, feat2val, max_r, max_w, langs=['russian'], train_set=False, for_cnn=True)
int2char = {val: key for val, key in enumerate(char2int)}



In [3]:


batch_size = 128
max_root = max_r + 2
max_word = max_w + 2
n_feature = data[1].shape[1]
hidden_size = 256
feat_embed_size = 32
char_embed_size = 32
EPOCHS = 10
n_batches = len(data[0]) // batch_size
print("Total Data: {0} Total Batches {1}".format(len(data[0]), n_batches))



Total Data: 12599 Total Batches 98


In [4]:


class Encoder(tf.keras.Model):
    def __init__(self, enc_units, feat_units, batch_size):
        super(Encoder, self).__init__()
        self.batch_size = batch_size
        self.enc_units = enc_units
        self.cnn = tf.keras.layers.Conv2D(32, (3, 3), padding="same", activation="relu")
        self.pool = tf.keras.layers.MaxPool2D(2, 2)
        self.flat = tf.keras.layers.Flatten()

        self.fc1 = tf.keras.layers.Dense(feat_units, activation="relu", name="feature_output")
        self.fc2 = tf.keras.layers.Dense(enc_units, activation="relu", name="state_out")
        
    def call(self, w, f):
        x = self.cnn(w)
        x = self.pool(x)
        x = self.flat(x)
        feat = self.fc1(f)
        state = tf.concat([x, feat], axis=1)
        state = self.fc2(state)
        return state, feat



In [5]:


class Decoder(tf.keras.Model):
    def __init__(self, dec_units, output_size, batch_size):
        super(Decoder, self).__init__()
        self.batch_size = batch_size
        self.dec_units = dec_units
        self.gru = tf.keras.layers.GRU(self.dec_units,
                                       return_sequences=True,
                                       return_state=True,
                                       recurrent_initializer='glorot_uniform', name="decoder_gru")
        self.fc = tf.keras.layers.Dense(output_size, activation="softmax")


    def call(self, x, feat, hidden):
        # enc_output shape == (batch_size, max_length, hidden_size)
        # x = tf.concat([context_vector, x, feat], axis=-1)
        x = tf.expand_dims(x, 1)
        output, state = self.gru(x, initial_state=hidden)
        output = tf.reshape(output, (-1, output.shape[2]))

        x = self.fc(output)
        return x, state#, attention_weights



In [6]:


def predict(encoder, decoder, inputs, n_steps):
    # encode
    root, feat = inputs[0], inputs[1]
    state, feat = encoder(inputs[0], inputs[1])
    
    start_word = '<'
    start_mat = pre.word_to_matrix(start_word, char2int, 1, ' ')
    
    target_seq = np.zeros((root.shape[0], len(char2int)), dtype=np.float32) + np.array(start_mat, dtype=np.float32)
    outputs = list()
    for t in range(n_steps):
        # predict next char
        target_seq, state = decoder(target_seq, feat, state)
        
        outputs.append(target_seq)
    return np.stack(outputs)



In [7]:


decoder = Decoder(hidden_size, len(char2int), batch_size)
encoder = Encoder(hidden_size, feat_embed_size, batch_size)

# x = np.random.randn(10, 15, 28,1)
# f = np.random.randn(10, 32)
# h = tf.cast(np.random.randn(10, 256), tf.float64)
# x, f = encoder(x, f)
# decoder(x, f, None)


In [8]:


optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.CategoricalCrossentropy()

def loss_function(real, pred):
    loss_ = loss_object(real, pred)

    return tf.reduce_mean(loss_)



In [9]:


@tf.function
def train_step(root, feature, dec_input, target):
    loss = 0
    
    with tf.GradientTape() as tape:
        enc_hidden, feat = encoder(root, feature)
        # print(enc_hidden.shape)
        dec_hidden = enc_hidden

        for t in range(target.shape[1]):
            predictions, dec_hidden = decoder(dec_input[:, t], feat, dec_hidden)
            loss += loss_function(target[:, t], predictions)

        batch_loss = (loss / int(target.shape[1]))
        variables = encoder.trainable_variables + decoder.trainable_variables

        gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))

    return batch_loss



In [10]:


def test_model(test_data, log=False):
    test_n_batches, test_batch_size =  int(test_data[0].shape[0] / batch_size), batch_size  
    print(test_n_batches * test_batch_size)
    test_gen = pre.gen(data, batch_size)
    # shows sample examples and calculates accuracy
    test_batches = len(test_data[0]) // batch_size
    total, correct = 0, 0
    in_word = 0
    sims = []
    for b in range(test_batches - 1):
        # get data from test data generator
        [root, feat, dec_in], y = next(test_gen)
        root = np.expand_dims(root, axis=3)
        pred = predict(encoder, decoder, [root, feat], max_word)
        for k in range(pred.shape[1]):
            indexes = pred[:, k]#.argmax(axis=1)
            r = ''.join(pre.matrix_to_word(root[k], int2char)).strip()[1:-1]
            w = ''.join(pre.matrix_to_word(dec_in[k], int2char)).strip()[1:-1]
            t = ''.join(pre.matrix_to_word(indexes, int2char)).strip()[:-1]
            if w == t:
                correct += 1
            else:
                if log:
                    print(r, w, t)


        total += batch_size
        return float(correct)/float(total)*100.0



In [11]:


gen = pre.gen(data, batch_size)



In [12]:


for epoch in range(10):
    start = time.time()

    # enc_hidden = encoder.initialize_hidden_state()
    total_loss = 0

    for step in range(n_batches):
        [root, feat, dec_in], y = next(gen)
        root = np.expand_dims(root, axis=3)
        batch_loss = train_step(root, feat, dec_in, y)
        total_loss += batch_loss

#         if step % (n_batches // 1) == 0:
#             print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
#                                                      step,
#                                                      batch_loss.numpy()))
    clean_accuracy = test_model(clean_data)
    gen_accuracy = test_model(gen_data)
    print('Epoch {} Loss {:.4f} Gen Accuracy {:.4f} Clean Accuracy {:.4f}'.format(epoch + 1,total_loss / n_batches, gen_accuracy, clean_accuracy))
    
test_model(clean_data, log=True)


1792
1792
Epoch 1 Loss 1.4215 Gen Accuracy 0.0000 Clean Accuracy 0.0000
1792
1792
Epoch 2 Loss 0.8685 Gen Accuracy 0.0000 Clean Accuracy 0.0000
1792
1792
Epoch 3 Loss 0.5733 Gen Accuracy 0.7812 Clean Accuracy 0.0000
1792
1792
Epoch 4 Loss 0.4079 Gen Accuracy 1.5625 Clean Accuracy 2.3438
1792
1792
Epoch 5 Loss 0.3168 Gen Accuracy 10.1562 Clean Accuracy 6.2500
1792
1792
Epoch 6 Loss 0.2596 Gen Accuracy 8.5938 Clean Accuracy 7.8125
1792
1792
Epoch 7 Loss 0.2223 Gen Accuracy 20.3125 Clean Accuracy 16.4062
1792
1792
Epoch 8 Loss 0.1883 Gen Accuracy 18.7500 Clean Accuracy 25.0000
1792
1792
Epoch 9 Loss 0.1652 Gen Accuracy 28.1250 Clean Accuracy 25.0000
1792
1792
Epoch 10 Loss 0.1427 Gen Accuracy 38.2812 Clean Accuracy 34.3750
1792
удавы удавом удавам
цыганского цыганский цыганские
уводом уводах уводые
противоречиям противоречии противоречие
правопорядки правопорядок правопородки
христадельфианское христадельфианском христадальфиньномом
снизили снизят снизит>
приморскую приморском примосском>

38.28125