# Morphological Encoder-Decoder (MED)
Kann and Schütze, 2016

In [2]:
import keras
from keras.layers import Bidirectional, Concatenate, Dot, Input, GRU, Embedding
from keras.layers import RepeatVector, Dense, Activation
from keras.layers import MaxoutDense
from keras.models import Model
from keras.optimizers import Adadelta
from keras import backend as K
import numpy as np
from utils import *

## Load data

In [3]:
dataset, char_vocab, tag_vocab = load_dataset("data/german-task2-train")
print("Dataset size: " + str(dataset.shape[0]))
print("Character vocabulary:\n" + str(char_vocab))
print("\nTag vocabulary:\n" + str(tag_vocab))
print("\n" + str(dataset[0]))

Dataset size: 12800
Character vocabulary:
['<pad>', '<unk>', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ß', 'ä', 'ö', 'ü']

Tag vocabulary:
['<unk>', 'alt=LGSPEC1', 'aspect=PFV', 'case=ACC', 'case=DAT', 'case=GEN', 'case=NOM', 'comp=CMPR', 'comp=SPRL', 'finite=NFIN', 'gen=FEM', 'gen=MASC', 'gen=NEUT', 'mood=IND', 'mood={OPT/SBJV}', 'mood={SBJV/COND}', 'num=PL', 'num=SG', 'per=1', 'per=2', 'per=3', 'pos=ADJ', 'pos=N', 'pos=V', 'tense=PRS', 'tense=PST']

['pos=ADJ,case=ACC,comp=CMPR,gen=FEM,num=SG' 'aerodynamischere'
 'pos=ADJ,case=ACC,comp=SPRL,gen=NEUT,num=SG' 'aerodynamischstes']


## Initialization

In [4]:
Tx = 50
Ty = 30

# initialize layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(100, activation = "tanh")
densor2 = Dense(1, activation = "relu")
activator = Activation("softmax", name="attention_weights") # We are using a custom softmax(axis = 1) loaded in this notebook
dotor = Dot(axes = 1)

## Attention model
Bahdanau et. al., 2014

In [5]:
def attention_step(a, s_prev):
    s_prev = repeator(s_prev)
    concat = concatenator([a, s_prev])
    e = densor1(concat)
    e = densor2(e)
    alphas = activator(e)
    context = dotor([alphas, a])
    
    return context

## Encoder-Decoder model

In [6]:
enc_size = 100
dec_size = 100
embed_size = 300

def encoder_decoder(Tx, Ty, enc_size, dec_size, vocab_size):
    X = Input(shape=(Tx,)) #X = Input(shape=(Tx, vocab_size))
    s0 = Input(shape=(enc_size,), name='s0')
    
    encoder = Bidirectional(GRU(enc_size, return_sequences=True), name="encoder")
    decoder = GRU(dec_size, return_state=True, name="decoder")
    maxout = MaxoutDense(vocab_size)
    embeddings = Embedding(vocab_size, embed_size, input_length=Tx)(X)
    
    s = s0
    outputs = []
    a = encoder(embeddings)
    for t in range(Ty):  
        context = attention_step(a, s)
        s, _ = decoder(context, initial_state=s)
        out = maxout(s)
        outputs.append(out)
    
    model = Model(inputs=[X, s0], outputs=outputs, name="MED")
    
    return model

In [7]:
model = encoder_decoder(Tx, Ty, enc_size, dec_size, 100)



In [8]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 50)           0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 50, 300)      30000       input_1[0][0]                    
__________________________________________________________________________________________________
s0 (InputLayer)                 (None, 100)          0                                            
__________________________________________________________________________________________________
encoder (Bidirectional)         (None, 50, 200)      240600      embedding_1[0][0]                
__________________________________________________________________________________________________
repeat_vec

In [9]:
out = model.compile(optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'], loss='categorical_crossentropy')