# 10.3.1 Ein einfaches Encoder-Decoder-Modell

### 01 - Daten laden

In [2]:
from os.path import join
import numpy as np
import pandas as pd

path = r'..\Data'

### 1) Misspelling-Daten laden
df_train = pd.read_csv(join(path, 'train_spelling.csv'))
df_test = pd.read_csv(join(path, 'test_spelling.csv'))
df = df_train.append(df_test)
df.head()
len(df), len(df_train), len(df_test)

X, y = df['misspelling'].values.tolist(), df['word'].values.tolist()
X[:5], y[:5]

(['jeddes', 'verlchivdene', 'zwnächst', 'gang', 'aßso'],
 ['jedes', 'verschiedene', 'zunächst', 'ging', 'also'])

### 02 - Zeitversetzte Eingabe produzieren (Für Teacher Force Verfahren)

In [3]:
def insert_start_sign(words: list, start='\t'):
    new_words = []
    for word in words:
        new = start + word
        new_words.append(new)
    return new_words

X1 = X
X2 = insert_start_sign(y)
X1[:5], X2[:5], y[:5]

(['jeddes', 'verlchivdene', 'zwnächst', 'gang', 'aßso'],
 ['\tjedes', '\tverschiedene', '\tzunächst', '\tging', '\talso'],
 ['jedes', 'verschiedene', 'zunächst', 'ging', 'also'])

### 03 - One Hot Encodierung der Sequenzen

In [4]:
#### 3) One-Hot-Encodierung
def max_char_length(words: list):
    char_len = 0
    for word in words:
        if len(word) > char_len:
            char_len = len(word)
    return char_len

def words_to_char_matrix( words: list, 
                          char_dic: dict, 
                          max_len: int,
                          end_sign='\n'):
    x = np.zeros(shape=(len(words), max_len, len(char_dic)), dtype='int32')
    for idx, word in enumerate(words):
        for i, char in enumerate(word):
            x[idx, i, char_dic[char]] = 1
        for i in range(len(word), max_len):
            x[idx, i, char_dic[end_sign]] = 1
    return x

def char_matrix_to_words( matrix: np.array,
                          char_dic: dict):
    char_dic_rev = dict([ (idx, char) for char, idx in char_dic.items()])
    char_list = []
    for seq in matrix:
        y_idx = np.argmax(seq)
        y_char = char_dic_rev[y_idx]
        char_list.append(y_char)
    return char_list

#### Funktionen aufrufen

In [5]:
char_dic = dict([(char, i) for i, char 
                in enumerate(list('.abcdefghijklmnopqrstuvwxyzäöüß\n\t')) ])
len_X1 = max_char_length(X1)
len_X2 = max_char_length(X2)
len_y = max_char_length(y)
len_X1, len_X2, len_y


X1_ = words_to_char_matrix(X1, char_dic, len_X1)
X2_ = words_to_char_matrix(X2, char_dic, len_X2)
y_ = words_to_char_matrix(y, char_dic, len_X2)
X1_.shape, X2_.shape, y_.shape

((15921, 15, 33), (15921, 15, 33), (15921, 15, 33))

#### Test (Rückübersetzung)

In [6]:
char_matrix_to_words(X1_[0], char_dic)
char_matrix_to_words(X2_[0], char_dic)
char_matrix_to_words(y_[0], char_dic)

['j',
 'e',
 'd',
 'e',
 's',
 '\n',
 '\n',
 '\n',
 '\n',
 '\n',
 '\n',
 '\n',
 '\n',
 '\n',
 '\n']

### 04 - Sequence-to-Sequence Trainingsmodell aufsetzen

In [17]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (GRU, Input, 
                                     Dense, TimeDistributed)

units= 100
encoder_in = Input(shape=(None, len(char_dic)), name='encoder_in')
encoder_state = GRU( units=units, 
                     name='encoder_gru')(encoder_in)

decoder_in = Input(shape=(None, len(char_dic)), name='decoder_in')
decoder_gru = GRU( units=units, return_sequences=True, return_state=True, 
           name='decoder_gru')
gru_out, gru_state = decoder_gru(decoder_in, initial_state=encoder_state)
dense = Dense(units=len(char_dic), activation='softmax')
decoder_out = TimeDistributed(dense, name='time_distributed')(gru_out)

model = Model([encoder_in, decoder_in], decoder_out )
model.compile( loss='categorical_crossentropy', optimizer='adam', 
               metrics=['accuracy'])
model.summary()

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_in (InputLayer)         [(None, None, 33)]   0                                            
__________________________________________________________________________________________________
decoder_in (InputLayer)         [(None, None, 33)]   0                                            
__________________________________________________________________________________________________
encoder_gru (GRU)               (None, 100)          40500       encoder_in[0][0]                 
__________________________________________________________________________________________________
decoder_gru (GRU)               [(None, None, 100),  40500       decoder_in[0][0]                 
                                                                 encoder_gru[0][0]     

### 05 - Training des Modells

In [18]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

stopping = EarlyStopping( monitor='val_loss', 
                            patience=3,
                            restore_best_weights=True)
checkpoint = ModelCheckpoint( filepath='SeqToSeq_spelling.h5',
                              monitor='val_loss',
                              save_best_only=True)
history = model.fit( [X1_, X2_], y_, 
                     epochs=50, 
                     batch_size=32,
                     validation_split=.2,
                     callbacks=[stopping, checkpoint])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50


#### Angelerntes Modell laden

In [19]:
from tensorflow.keras.models import load_model
model = load_model( 'SeqToSeq_spelling.h5')
model.summary()

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_in (InputLayer)         [(None, None, 33)]   0                                            
__________________________________________________________________________________________________
decoder_in (InputLayer)         [(None, None, 33)]   0                                            
__________________________________________________________________________________________________
encoder_gru (GRU)               (None, 100)          40500       encoder_in[0][0]                 
__________________________________________________________________________________________________
decoder_gru (GRU)               [(None, None, 100),  40500       decoder_in[0][0]                 
                                                                 encoder_gru[0][0]     

### 06 - Inferenzmodell aufsetzen

In [21]:
### 1) Encoder Modell zusammenstecken
encoder_in = model.get_layer('encoder_in').input
encoder_state = model.get_layer('encoder_gru').output

encoder_model = Model(encoder_in, encoder_state)

### 2) Decoder-Modell zusammensetzen
units = 100
decoder_state_input = Input(shape=(units,), name='decoder_state_in')
decoder_inputs = model.get_layer('decoder_in').input
decoder_gru = model.get_layer('decoder_gru')
decoder_time = model.get_layer('time_distributed')

decoder_outputs, decoder_state = decoder_gru(
    decoder_inputs, initial_state=decoder_state_input)
decoder_outputs = decoder_time(decoder_outputs)

decoder_model = Model(
    [decoder_inputs, decoder_state_input],
    [decoder_outputs, decoder_state])

encoder_model.summary(), decoder_model.summary()

Model: "functional_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_in (InputLayer)      [(None, None, 33)]        0         
_________________________________________________________________
encoder_gru (GRU)            (None, 100)               40500     
Total params: 40,500
Trainable params: 40,500
Non-trainable params: 0
_________________________________________________________________
Model: "functional_11"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
decoder_in (InputLayer)         [(None, None, 33)]   0                                            
__________________________________________________________________________________________________
decoder_state_in (InputLayer)   [(None, 100)]        0                                       

(None, None)

#### Beispielschätzungen durchführen

In [22]:
import numpy as np
def predict_correct(misspelled, encoder, decoder, 
                    char_dict: dict, len_seq=15):

    input_seq = words_to_char_matrix([misspelled], char_dict, len_seq)
    index_char_dict = dict([(i, char) for char, i in char_dict.items()])

    state = encoder.predict(input_seq)
    decoder_seq = np.zeros(shape=(1, 1, len(char_dict)), dtype='int32')
    decoder_seq[0, 0, char_dict['\t']] = 1
    
    decoded_word = ''
    for i in range(len_seq):
        output_char, dec_state = decoder.predict(
                                 [decoder_seq] + [state])

        char_index = np.argmax(output_char[0, 0])
        char = index_char_dict[char_index]
        
        if char == '\n':
            return decoded_word

        decoded_word += char
        decoder_seq = np.zeros(shape=(1, 1, len(char_dict)), dtype='int32')
        decoder_seq[0, 0, char_index] = 1

        state = dec_state

    return decoded_word

In [23]:
misspelled = 'vielheiicht'
predict_correct(misspelled, encoder_model, decoder_model, char_dic)

'vielleicht'