In [1]:
import tensorflow as tf
from tensorflow import keras
import tensorflow_hub as hub
import tensorflow_datasets as tfds




  from .autonotebook import tqdm as notebook_tqdm


In [8]:
datasets, info = tfds.load("imdb_reviews", as_supervised=True, with_info=True)
train_size = info.splits["train"].num_examples

In [9]:
def preprocess(X_batch, y_batch):
    X_batch = tf.strings.substr(X_batch, 0, 300)
    X_batch = tf.strings.regex_replace(X_batch, b"<br\\s*/?>", b" ")
    X_batch = tf.strings.regex_replace(X_batch, b"[^a-zA-Z']", b" ")
    X_batch = tf.strings.split(X_batch)
    return X_batch.to_tensor(default_value=b"<pad>"), y_batch


In [10]:
from collections import Counter
vocabulary = Counter()
for X_batch, y_batch in datasets["train"].batch(32).map(preprocess):
    for review in X_batch:
        vocabulary.update(list(review.numpy()))

vocabulary.most_common()[:3]

[(b'<pad>', 214309), (b'the', 61137), (b'a', 38564)]

In [11]:
vocab_size = 10000
truncated_vocabulary = [
    word for word, count in vocabulary.most_common()[:vocab_size]]

In [12]:
words = tf.constant(truncated_vocabulary)
word_ids = tf.range(len(truncated_vocabulary), dtype=tf.int64)
vocab_init = tf.lookup.KeyValueTensorInitializer(words, word_ids)
num_oov_buckets = 1000
table = tf.lookup.StaticVocabularyTable(vocab_init, num_oov_buckets)

In [13]:
table.lookup(tf.constant([b"This movie was faaaaaantastic".split()]))

<tf.Tensor: shape=(1, 4), dtype=int64, numpy=array([[   22,    12,    11, 10053]], dtype=int64)>

In [14]:
def encode_words(X_batch, y_batch):
 return table.lookup(X_batch), y_batch
train_set = datasets["train"].batch(32).map(preprocess)
train_set = train_set.map(encode_words).prefetch(1)


In [15]:
embed_size = 128
model = keras.models.Sequential([
    keras.layers.Embedding(vocab_size + num_oov_buckets, embed_size,
    input_shape=[None]),
    keras.layers.GRU(128, return_sequences=True),
    keras.layers.GRU(128),
    keras.layers.Dense(1, activation="sigmoid")
])
model.compile(loss="binary_crossentropy", optimizer="adam",
    metrics=["accuracy"])
history = model.fit(train_set, epochs=5)

Epoch 1/5


  super().__init__(**kwargs)


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 103ms/step - accuracy: 0.5380 - loss: 0.6742
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 104ms/step - accuracy: 0.7936 - loss: 0.4485
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 104ms/step - accuracy: 0.8855 - loss: 0.2891
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 122ms/step - accuracy: 0.9370 - loss: 0.1795
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 126ms/step - accuracy: 0.9560 - loss: 0.1262


In [16]:
def preprocess_sentence(sentence):
    sentence = tf.strings.substr(sentence, 0, 300)
    sentence = tf.strings.regex_replace(sentence, b"<br\\s*/?>", b" ")
    sentence = tf.strings.regex_replace(sentence, b"[^a-zA-Z']", b" ")
    sentence = tf.strings.split(sentence)
    return sentence

In [24]:
example_sentence = "This movie was fantastic"

# Preprocessing des Beispielsatzes
def preprocess_sentence(sentence):
    sentence = tf.strings.substr(sentence, 0, 300)
    sentence = tf.strings.regex_replace(sentence, b"<br\\s*/?>", b" ")
    sentence = tf.strings.regex_replace(sentence, b"[^a-zA-Z']", b" ")
    sentence = tf.strings.split(sentence)
    return sentence.to_tensor(default_value=b"<pad>")

# Encodieren des Beispielsatzes
def encode_sentence(sentence):
    preprocessed_sentence = preprocess_sentence(sentence)
    encoded_sentence = table.lookup(preprocessed_sentence)
    return encoded_sentence

# Vorhersage für den Beispielsatz
def predict_sentence(sentence):
    encoded_sentence = encode_sentence(tf.constant([sentence]))
    prediction = model.predict(encoded_sentence)
    return prediction

# Vorhersage für den Beispielhafter Satz
prediction = predict_sentence(example_sentence)

# Ergebnis anzeigen
def ergebnis(input):
    if input>0.5:
        output = 'positive'
    else:
        output = 'negative'
    return output

print(f"Vorhersage für den Satz '{example_sentence}': " + ergebnis(prediction)) 
#print(f"Vorhersage für den Satz '{example_sentence}': {prediction[0][0]:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step


TypeError: can only concatenate str (not "NoneType") to str