# 7.3	Praxis des Anlernens eines konvolutionalen Netzes mit Textdaten (Rechtschreibkorrektur)

### 01 - Laden der Daten und des Objekts vom Typ SequenceEncoder 
Angelernt und gespeichert wurde das Objekt im Code zu Kapitel 6 - rekurrente Netze

In [3]:
import numpy as np
import pandas as pd
import joblib
from os.path import join
from sequence_encoder import SequenceEncoder

path = r'..\Data'

## SequenceEncoder-Objekt laden
seq_encoder = joblib.load('seq_encoder.pkl')

## Daten laden
df_train = pd.read_csv(join(path, 'train_spelling.csv'))
df_test = pd.read_csv(join(path, 'test_spelling.csv'))
df_train.head()

Unnamed: 0,misspelling,word
0,elwern,eltern
1,gemeinsan,gemeinsam
2,beispielsweiße,beispielsweise
3,promramm,programm
4,yeinen,keinen


### 02 - Daten für Analyse vorbereiten
Hier werden die x-Daten mit Hilfe des SequenceEncoder-Objekts (Methode: gen_one_hot_data) in sequenzielle One-Hot-Arrays zerlegt. Die y-Daten werden als Integers codiert (Methode: word_to_int).

In [6]:
## X/y separieren
X_train, y_train = df_train['misspelling'], df_train['word']
X_test, y_test = df_test['misspelling'], df_test['word']

## Daten mit seq_encoder umwandeln
X_train = seq_encoder.gen_one_hot_data(X_train)
X_test = seq_encoder.gen_one_hot_data(X_test)
y_train = np.array([seq_encoder.word_to_int(word) for word in y_train.values])
y_test = np.array([seq_encoder.word_to_int(word) for word in y_test.values])

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((12737, 15, 32), (3184, 15, 32), (12737,), (3184,))

### 03 - Aufbau des Modells

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, GRU, Dense, Conv1D, MaxPool1D, Flatten

model = Sequential()
model.add(Conv1D( filters=32, 
                  input_shape=(15, 32), 
                  kernel_size=3,
                  activation='relu'))
model.add(Bidirectional(GRU(units=32)))
model.add(Dense(units=500, activation='softmax'))
model.summary()

model.compile(  loss='sparse_categorical_crossentropy', 
                optimizer='adam',
                metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 13, 32)            3104      
_________________________________________________________________
bidirectional (Bidirectional (None, 64)                12672     
_________________________________________________________________
dense (Dense)                (None, 500)               32500     
Total params: 48,276
Trainable params: 48,276
Non-trainable params: 0
_________________________________________________________________


### 04 - Anlernen des Modells

In [8]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

stopping = EarlyStopping( monitor='val_loss', 
                          patience=1,
                          restore_best_weights=True)
checkpoint = ModelCheckpoint( filepath='model_auto_correction_bid.h5',
                              monitor='val_loss',
                              save_best_only=True)

history = model.fit( X_train, y_train, 
                     epochs=100,
                     batch_size=32,
                     callbacks=[stopping, checkpoint],
                     validation_data=(X_test, y_test) )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100


### 05 - Tests durchführen

In [9]:
test_example = np.array(['alllein', 'frahe', 'beistiel'])
test_example = seq_encoder.gen_one_hot_data(test_example)

pred_word_prob = model.predict(test_example)
pred_word_idx = np.argmax(pred_word_prob, axis=1)

## Ergebnisse decodieren
for idx in pred_word_idx:
    print(seq_encoder.int_to_word(idx))

allein
frage
beispiel
