In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.text import one_hot

# Simple Data

In [2]:
data = [
    'Well done!',
    'Good work',
    'Great effort',
    'nice work',
    'Excellent!',
    'Weak',
    'Poor effort!',
    'not good',
    'poor work',
    'Could have done better.'
]

# define class labels
labels = np.array([1,1,1,1,1,0,0,0,0,0])

# Preprocess Input Data

In [4]:
# Tokenize input
tokenizer = Tokenizer(
    num_words=None,
    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
    lower=True,
    split=" ",
    char_level=False,
    oov_token=None,
    document_count=0
)
tokenizer.fit_on_texts(data)

# Get our training data word index
word_index = tokenizer.word_index
print(f"word index: \n{word_index}")

# Encode training data sentences into sequences
train_sequences = tokenizer.texts_to_sequences(data)
print(f"\nencoded train data: \n{train_sequences}")

# Get max training sequence length
maxlen = max([len(x) for x in train_sequences])
print(f"\nmax length: {maxlen}")

# Pad the training sequences
train_padded = pad_sequences(train_sequences, padding='post', truncating='post', maxlen=maxlen)
print(f"\npadded: \n{train_padded}")

# Convert to one-hot encoding
vocab_size = 50

word index: 
{'work': 1, 'done': 2, 'good': 3, 'effort': 4, 'poor': 5, 'well': 6, 'great': 7, 'nice': 8, 'excellent': 9, 'weak': 10, 'not': 11, 'could': 12, 'have': 13, 'better': 14}

encoded train data: 
[[6, 2], [3, 1], [7, 4], [8, 1], [9], [10], [5, 4], [11, 3], [5, 1], [12, 13, 2, 14]]

max length: 4

padded: 
[[ 6  2  0  0]
 [ 3  1  0  0]
 [ 7  4  0  0]
 [ 8  1  0  0]
 [ 9  0  0  0]
 [10  0  0  0]
 [ 5  4  0  0]
 [11  3  0  0]
 [ 5  1  0  0]
 [12 13  2 14]]


# Model

In [5]:
# define the model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(vocab_size, 8, input_length=maxlen))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# summarize the model
print(model.summary())

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 4, 8)              400       
_________________________________________________________________
flatten (Flatten)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________
None


# Fit Model

In [6]:
model.fit(train_padded, labels, epochs=10, verbose=0, steps_per_epoch=len(data))
# evaluate the model
loss, accuracy = model.evaluate(train_padded, labels, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Accuracy: 100.000000
