# Finale Aufgabe für Praktikum Deep Learning <br>Textgenerierung mit RNN: Modelltraining

* **Name:** Fabian Schotte
* **Email:** fabian.schotte@rwu.de
* **Matrikelnummer:** 35604
* **Studiengang:** Angewandte Informatik

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import os
import matplotlib.pylab as plt
from work import models
import time 

os.makedirs("models", exist_ok=True)

## Vorbereitung


### Laden der Trainingsdaten

In [126]:
df_train = pd.read_csv('work/kaggle_sentiment/tweet_sentiment_train.csv', encoding='utf-8', encoding_errors='replace')
df_test = pd.read_csv('work/kaggle_sentiment/tweet_sentiment_test.csv', encoding='utf-8', encoding_errors='replace')

kaggle_text_train = df_train['text'].str.cat(sep='\n')
kaggle_text_test = df_test['text'].str.cat(sep='\n')
# kaggle_text = kaggle_text_train + '\n' + kaggle_text_test
kaggle_text = kaggle_text_train
# kaggle_text = kaggle_text_test

print(kaggle_text[:500])

 I`d have responded, if I were going
 Sooo SAD I will miss you here in San Diego!!!
my boss is bullying me...
 what interview! leave me alone
 Sons of ****, why couldn`t they put them on the releases we already bought
http://www.dothebouncy.com/smf - some shameless plugging for the best Rangers forum on earth
2am feedings for the baby are fun when he is all smiles and coos
Soooo high
 Both of you
 Journey!? Wow... u just became cooler.  hehe... (is that possible!?)
 as much as i love to be hopef


In [127]:
vocab = sorted(set(kaggle_text))
print(vocab)
print(f"vocab size = {len(vocab)}")

['\t', '\n', ' ', '!', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', '\xa0', '´', '½', '¿', 'Â', 'ï']
vocab size = 102


## Preprocessing

In [128]:
# example_texts = ['hello world', 'hello world']
# chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
# chars

In [129]:
ids_from_chars = keras.layers.StringLookup(vocabulary=list(vocab), mask_token=None)
# ids = ids_from_chars(chars)
# print(ids)

chars_from_ids = keras.layers.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)
# chars = chars_from_ids(ids)
# print(chars)

# tf.strings.reduce_join(chars, axis=-1).numpy()

In [130]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [131]:
all_ids = ids_from_chars(tf.strings.unicode_split(kaggle_text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(1905188,), dtype=int64, numpy=array([ 3, 43, 66, ..., 85, 11, 11])>

In [132]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

 
I
`
d
 
h
a
v
e
 


In [133]:
seq_length = 100

In [134]:
sequences = ids_dataset.batch(seq_length + 1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq).numpy())
for seq in sequences.take(1):
  print(text_from_ids(seq).numpy())

[b' ' b'I' b'`' b'd' b' ' b'h' b'a' b'v' b'e' b' ' b'r' b'e' b's' b'p'
 b'o' b'n' b'd' b'e' b'd' b',' b' ' b'i' b'f' b' ' b'I' b' ' b'w' b'e'
 b'r' b'e' b' ' b'g' b'o' b'i' b'n' b'g' b'\n' b' ' b'S' b'o' b'o' b'o'
 b' ' b'S' b'A' b'D' b' ' b'I' b' ' b'w' b'i' b'l' b'l' b' ' b'm' b'i'
 b's' b's' b' ' b'y' b'o' b'u' b' ' b'h' b'e' b'r' b'e' b' ' b'i' b'n'
 b' ' b'S' b'a' b'n' b' ' b'D' b'i' b'e' b'g' b'o' b'!' b'!' b'!' b'\n'
 b'm' b'y' b' ' b'b' b'o' b's' b's' b' ' b'i' b's' b' ' b'b' b'u' b'l'
 b'l' b'y' b'i']
b' I`d have responded, if I were going\n Sooo SAD I will miss you here in San Diego!!!\nmy boss is bullyi'


In [135]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [136]:
# split_input_target(list("Tensorflow"))

In [137]:
dataset = sequences.map(split_input_target)
len(dataset)

18863

In [138]:
for input_example, target_example in dataset.take(1):
    print("Input:", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())
    print("Input shape:", input_example.shape)
    print("Target shape:", target_example.shape)

Input: b' I`d have responded, if I were going\n Sooo SAD I will miss you here in San Diego!!!\nmy boss is bully'
Target: b'I`d have responded, if I were going\n Sooo SAD I will miss you here in San Diego!!!\nmy boss is bullyi'
Input shape: (100,)
Target shape: (100,)


In [139]:
BATCH_SIZE = 150
BUFFER_SIZE = 1000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
)
dataset

<_BatchDataset element_spec=(TensorSpec(shape=(150, 100), dtype=tf.int64, name=None), TensorSpec(shape=(150, 100), dtype=tf.int64, name=None))>

## RNN-Modelle
### Variablen für Layers

In [140]:
vocab_size = len(ids_from_chars.get_vocabulary())
embedding_dim = 256
rnn_units = 2048

### 1. GRU-Modell

In [141]:
gru_model_1 = models.get_gru_model_1(vocab_size, embedding_dim, rnn_units)

#### Testen des Modells

In [142]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions_model_1 = gru_model_1(input_example_batch)
    print(example_batch_predictions_model_1.shape, "# (batch_size, sequence_length, vocab_size)")

(150, 100, 103) # (batch_size, sequence_length, vocab_size)


In [143]:
gru_model_1.summary()

In [144]:
sampled_indices_model_1 = tf.random.categorical(example_batch_predictions_model_1[0], num_samples=1)
sampled_indices_model_1 = tf.squeeze(sampled_indices_model_1, axis=-1).numpy()
sampled_indices_model_1

array([ 13,  26,  15,  28,  20,  95,  60,  17,  49,  84,  78,  67,  62,
        24,  26,  92,  58,  13,  59,  17,  34,   3,  99,  29,  38,  91,
        19,  71,  75,  42,  49,  86,   0,  38,  55,  28,  42,  36,  16,
        30,   6,  65,  85,  82,   4,  77,  57,   0,  41,  90,  10,  51,
        84,  47,  84,  78,  97,  45,  49, 101,  74,   2,  62,  11,  97,
        93,  48,  52,  46,  83,  98,  57, 102,  74,  53,  87,  32,  62,
        39, 101,  83,  62,  97,  10,  75,  70,  30,  18,  26,  97,  65,
        38,  56,  86,  14,  54,  14,  98,  72,   7])

In [145]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices_model_1).numpy())

Input:
 b't sit down through the whole thing  mcfly did you see me and ma best mate we were in tutus\n2 days wi'

Next Char Predictions:
 b'+8-:2}Z/Orla\\68zX+Y/@ \xc2\xbd;Dy1eiHOt[UNK]DU:HB.<$_sp!kW[UNK]Gx(QrMrl\xc2\xa0KO\xc3\x82h\n\\)\xc2\xa0{NRLq\xc2\xb4W\xc3\xafhSu>\\E\xc3\x82q\\\xc2\xa0(id<08\xc2\xa0_DVt,T,\xc2\xb4f%'


### Training

#### Loss

In [146]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [147]:
example_batch_mean_loss_model_1 = loss(target_example_batch, example_batch_predictions_model_1)
print("Prediction shape: ", example_batch_predictions_model_1.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss_model_1)

Prediction shape:  (150, 100, 103)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.634127, shape=(), dtype=float32)


  output, from_logits = _get_logits(


In [148]:
tf.exp(example_batch_mean_loss_model_1).numpy()

np.float32(102.938034)

##### Optimizer

In [149]:
gru_model_1.compile(optimizer='adam', loss=loss, metrics=['accuracy'], run_eagerly=True)

##### Early Stopping

In [150]:
early_stopping_gru_model_1 = keras.callbacks.EarlyStopping(monitor="loss", min_delta=0.002, patience=2)

#### Konfiguration von Checkpoints

In [151]:
checkpoint_dir = './work/training_checkpoints/gru_model_1'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}.weights.h5")

checkpoint_callback_gru_model_1=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

#### Ausführen des Trainings

In [152]:
EPOCHS = 30

In [None]:
start = time.perf_counter()
gru_model_1_history = gru_model_1.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback_gru_model_1, early_stopping_gru_model_1])
end = time.perf_counter()
gru_model_1_training_time = end - start

Epoch 1/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 255ms/step - accuracy: 0.1883 - loss: 3.5488
Epoch 2/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 250ms/step - accuracy: 0.3953 - loss: 2.1911
Epoch 3/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 249ms/step - accuracy: 0.4892 - loss: 1.8226
Epoch 4/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 249ms/step - accuracy: 0.5288 - loss: 1.6601
Epoch 5/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 253ms/step - accuracy: 0.5521 - loss: 1.5631
Epoch 6/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 250ms/step - accuracy: 0.5712 - loss: 1.4829
Epoch 7/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 253ms/step - accuracy: 0.5886 - loss: 1.4118
Epoch 8/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 252ms/step - accuracy: 0.6081 - loss: 1.3346
Epoch 9/30
[1m1

#### Speichern

In [154]:
gru_model_1.save('models/gru_model_1.keras')

### 2. GRU-Modell

In [155]:
gru_model_2 = models.get_gru_model_2(vocab_size, embedding_dim, rnn_units)

#### Testen des Modells

In [156]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions_gru_model_2 = gru_model_2(input_example_batch)
    print(example_batch_predictions_gru_model_2.shape, "# (batch_size, sequence_length, vocab_size)")

(150, 100, 103) # (batch_size, sequence_length, vocab_size)


In [157]:
gru_model_2.summary()

In [158]:
sampled_indices_gru_model_2 = tf.random.categorical(example_batch_predictions_gru_model_2[0], num_samples=1)
sampled_indices_gru_model_2 = tf.squeeze(sampled_indices_gru_model_2, axis=-1).numpy()
sampled_indices_gru_model_2

array([ 45, 102,  59,  94,  42,  23,  77,  91,  76,  80,  40,  61,  52,
         8, 102,  90,  63,   3,  75,  57,  90, 102,  80,  23,   2,  79,
        94,  97,  74,  85,  68,  13,  40,  72,  96,  23,  65,   5,  61,
        65,  61,  59,  64,  47,  64,  57,  98,  64,  59,  92,  62,  87,
        20,  41,  52,  70,  76,  81,  47,  13,  76,   8,  14,  39,  33,
        20,  73,  11,   3,  60,  29,   0,  82,  85,  64,  87,  20,  34,
        51,  15,  10,  71,  83,  92,  20,  13,  87,  90,  38,  92,   5,
        60,  98,  40, 100,  79,  36,  90,  85,  77])

In [159]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices_gru_model_2).numpy())

Input:
 b'your race photos to runners\nGoodmorning twitter, oh my gosh, i woke up soooo nice, lol ... oh hai th'

Next Char Predictions:
 b'K\xc3\xafY|H5kyjnF[R&\xc3\xafx] iWx\xc3\xafn5\nm|\xc2\xa0hsb+Ff~5_#[_[Y^M^W\xc2\xb4^Yz\\u2GRdjoM+j&,E?2g) Z;[UNK]ps^u2@Q-(eqz2+uxDz#Z\xc2\xb4F\xc2\xbfmBxsk'


### Training

#### Loss

In [160]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [161]:
example_batch_mean_loss_model_2 = loss(target_example_batch, example_batch_predictions_gru_model_2)
print("Prediction shape: ", example_batch_predictions_gru_model_2.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss_model_2)

Prediction shape:  (150, 100, 103)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.6353617, shape=(), dtype=float32)


In [162]:
tf.exp(example_batch_mean_loss_model_1).numpy()

np.float32(102.938034)

##### Optimizer

In [163]:
gru_model_2.compile(optimizer='adam', loss=loss, metrics=['accuracy'], run_eagerly=True)

##### Early Stopping

In [164]:
early_stopping_gru_model_2 = keras.callbacks.EarlyStopping(monitor="loss", min_delta=0.002, patience=2)

#### Konfiguration von Checkpoints

In [165]:
checkpoint_dir = './work/training_checkpoints/gru_model_2'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}.weights.h5")

checkpoint_callback_gru_model_2=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

#### Ausführen des Trainings

In [166]:
EPOCHS = 30

In [None]:
start = time.perf_counter()
gru_model_2_history = gru_model_2.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback_gru_model_2, early_stopping_gru_model_2])
end = time.perf_counter()
gru_model_2_training_time = end - start

Epoch 1/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 255ms/step - accuracy: 0.1830 - loss: 3.5651
Epoch 2/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 251ms/step - accuracy: 0.3903 - loss: 2.2093
Epoch 3/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 252ms/step - accuracy: 0.4868 - loss: 1.8306
Epoch 4/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 253ms/step - accuracy: 0.5273 - loss: 1.6671
Epoch 5/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 252ms/step - accuracy: 0.5501 - loss: 1.5720
Epoch 6/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 252ms/step - accuracy: 0.5679 - loss: 1.4959
Epoch 7/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 255ms/step - accuracy: 0.5845 - loss: 1.4266
Epoch 8/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 252ms/step - accuracy: 0.6024 - loss: 1.3566
Epoch 9/30
[1m1

#### Speichern

In [None]:
gru_model_2.save('models/gru_model_2.keras')

## Vergleich

In [None]:
loss1 = gru_model_1_history.history['loss']
loss2 = gru_model_2_history.history['loss']

accuracy1 = gru_model_1_history.history['accuracy']
accuracy2 = gru_model_2_history.history['accuracy']

perplexity1 = np.exp(loss1)
perplexity2 = np.exp(loss2)

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(12, 4)) 

axes[0].plot(loss1, label="GRU Model 1", linestyle='-', color='blue')
axes[0].plot(loss2, label="GRU Model 2", linestyle='-', color='red')
axes[0].set_xlabel("Epochs")
axes[0].set_ylabel("Loss")
axes[0].set_title("Comparison of Training Histories")
axes[0].legend()

axes[1].plot(accuracy1, label="GRU Model 1", linestyle='-', color='blue')
axes[1].plot(accuracy2, label="GRU Model 2", linestyle='-', color='red')
axes[1].set_xlabel("Epochs")
axes[1].set_ylabel("Accuracy")
axes[1].set_title("Accuracy Comparison")
axes[1].legend()

axes[2].plot(perplexity1, label="GRU Model 1", linestyle='-', color='blue')
axes[2].plot(perplexity2, label="GRU Model 2", linestyle='-', color='red')
axes[2].set_xlabel("Epochs")
axes[2].set_ylabel("Perplexity")
axes[2].set_title("Perplexity Comparison")
axes[2].legend()

plt.tight_layout()

plt.show()


## LSTM-Modell