# Morphological Encoder-Decoder (MED)
Kann and Schütze, 2016

In [1]:
import keras
from keras.layers import Bidirectional, Concatenate, Dot, Input, GRU, Embedding
from keras.layers import RepeatVector, Dense, Activation
from keras.layers import MaxoutDense
from keras.models import Model, Sequential()
from keras.optimizers import Adadelta
from keras import backend as K
import numpy as np
from utils import *

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Load data

In [2]:
dataset = load_dataset("data/german-task2-train")
print("Dataset size: " + str(dataset.shape[0]))
print("\n" + str(dataset[0]))

Dataset size: 12800

[list(['pos=ADJ', 'case=ACC', 'comp=CMPR', 'gen=FEM', 'num=SG'])
 'aerodynamischere'
 list(['pos=ADJ', 'case=ACC', 'comp=SPRL', 'gen=NEUT', 'num=SG'])
 'aerodynamischstes']


In [4]:
X, Y, Xoh, Yoh, input_vocab, output_vocab, Tx, Ty = preprocess_data(dataset)
print("X shape: " + str(X.shape))
print("Y shape: " + str(Y.shape))
print("Xoh shape: " + str(Xoh.shape))
print("Yoh shape: " + str(Yoh.shape))

print("\n" + str(X[0,:]))
print(Xoh[0,:])
print("\nInput vocabulary:\n" + str(input_vocab))
print("\nOutput vocabulary:\n" + str(output_vocab))
print("\nTx: {}, Ty: {}".format(Tx, Ty))


X shape: (25600, 43)
Y shape: (25600, 31)
Xoh shape: (25600, 43, 82)
Yoh shape: (25600, 31, 32)

[52 34 38 41 48 77 59 64 68 73  2  6 19 16  5 26 15  2 14 10 20  4  9  6
 19  6  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]

Input vocabulary:
['<pad>', '<unk>', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ß', 'ä', 'ö', 'ü', 'IN=alt=LGSPEC1', 'IN=aspect=PFV', 'IN=case=ACC', 'IN=case=DAT', 'IN=case=GEN', 'IN=case=NOM', 'IN=comp=CMPR', 'IN=comp=SPRL', 'IN=finite=NFIN', 'IN=gen=FEM', 'IN=gen=MASC', 'IN=gen=NEUT', 'IN=mood=IND', 'IN=mood={OPT/SBJV}', 'IN=mood={SBJV/COND}', 'IN=num=PL', 'IN=num=SG', 'IN=per=1', 'IN=per=2', 'IN=per=3', 'IN=pos=ADJ', 'IN=pos=N', 'IN=pos=V', 'IN=tense=PRS', 'IN=tense=PST', 'OUT=alt=LGSPEC1', 'OUT=aspect=PFV', 'OUT=case

## Initialization

In [5]:
in_vocab_size = len(input_vocab)
out_vocab_size = len(output_vocab)

repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(100, activation = "tanh")
densor2 = Dense(1, activation = "relu")
activator = Activation("softmax", name="attention_weights")
dotor = Dot(axes = 1)

## Attention model
Bahdanau et. al., 2014

In [6]:
def attention_step(a, s_prev):
    s_prev = repeator(s_prev)
    concat = concatenator([a, s_prev])
    e = densor1(concat)
    e = densor2(e)
    alphas = activator(e)
    context = dotor([alphas, a])
    
    return context

## Encoder-Decoder model

In [14]:
enc_size = 100
dec_size = 100
embed_size = 300

def encoder_decoder(Tx, Ty, enc_size, dec_size, in_vocab_size, out_vocab_size):
    model = Sequential()
    model.add(Embedding(in_vocab_size, embed_size, input_length=Tx))
    model.add(Bidirectional(GRU(enc_size, return_sequences=True, kernel_initializer="glorot_normal"), name="encoder"))
    """
    X = Input(shape=(Tx, in_vocab_size)) #Input(shape=(Tx,))
    s0 = Input(shape=(dec_size,), name='s0')
    
    encoder = Bidirectional(GRU(enc_size, return_sequences=True, kernel_initializer="glorot_normal"), name="encoder")
    decoder = GRU(dec_size, return_state=True, kernel_initializer="orthogonal", name="decoder")
    out_layer = Dense(out_vocab_size, activation="softmax", kernel_initializer="glorot_normal") #MaxoutDense(out_vocab_size)
    #embeddings = Embedding(in_vocab_size, embed_size, input_length=Tx)(X)
    
    s = s0
    outputs = []
    a = encoder(X) #encoder(embeddings)
    for t in range(Ty):  
        context = attention_step(a, s)
        s, _ = decoder(context, initial_state=s)
        out = out_layer(s)
        outputs.append(out)
    
    model = Model(inputs=[X, s0], outputs=outputs, name="MED")
    
    return model
    """

In [15]:
model = encoder_decoder(Tx, Ty, enc_size, dec_size, in_vocab_size, out_vocab_size)

In [16]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 43, 82)       0                                            
__________________________________________________________________________________________________
s0 (InputLayer)                 (None, 100)          0                                            
__________________________________________________________________________________________________
encoder (Bidirectional)         (None, 43, 200)      109800      input_3[0][0]                    
__________________________________________________________________________________________________
repeat_vector_1 (RepeatVector)  (None, 43, 100)      0           s0[0][0]                         
                                                                 decoder[0][0]                    
          

In [17]:
out = model.compile(optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'], loss='categorical_crossentropy')

s0 = np.zeros((X.shape[0], dec_size))
outputs = list(Yoh.swapaxes(0, 1))

In [18]:
model.fit([Xoh, s0], outputs, epochs=20, batch_size=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20


Epoch 20/20


<keras.callbacks.History at 0x7f90f487b828>

In [20]:
model.save("models/model.h5")

  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node.arguments) + '. They will not be included '
  str(node

In [41]:
print("Train set predictions:")
for i in range(0, 100):
    prediction = model.predict([Xoh[i].reshape(1,Xoh[0].shape[0], Xoh[0].shape[1]), s0])
    prediction = np.argmax(prediction, axis = -1)
    output = list(filter(lambda x: x != "<pad>", [output_vocab[int(i)] for i in prediction]))
    print("true: " + str(dataset[i, 3]) + ", predicted: " + "".join(output))

Train set predictions:
true: aerodynamischstes, predicted: aerodynymischssess
true: aktivstes, predicted: akkiivstes
true: ambitionierter, predicted: ambittonierteer
true: aufnahmefähigerer, predicted: aufeuhmhhähhigerer
true: aufrechtste, predicted: aufrrecteste
true: ausdrucksloserer, predicted: ausdruccslosseeer
true: ausgegrautste, predicted: ausgegrauteette
true: automatischsten, predicted: automatischsten
true: autoritärerer, predicted: autoritirtere
true: bedauernswerteste, predicted: bedauuresswerteste
true: bedingungslosester, predicted: bedindungglossstee
true: beknackte, predicted: beknacktee
true: beschönigender, predicted: beschööngender
true: bestreitbarere, predicted: bestreitbarere
true: brechreizerregenderes, predicted: beechrrrzzzeeeeenderes
true: dehnbarstes, predicted: dehnbarsste
true: dekorativeres, predicted: dekorativeres
true: demokratischere, predicted: demokratischere
true: dominanter, predicted: dominanter
true: dubiosesten, predicted: dubiosesstnn
true: dur