In [1]:
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten


In [2]:
from tensorflow.keras.layers import Embedding

In [3]:
# define documents
docs = ['Well done!',
        'Good work',
        'Great effort',
        'nice work',
        'Excellent!',
        'Weak',
        'Poor effort!',
        'not good',
        'poor work',
        'Could have done better.']

In [4]:
# 1 = Positive Sentiment
# 0 = Negative Sentiment

In [5]:
# define class labels
labels = [1,1,1,1,1,0,0,0,0,0]

In [6]:
# integer encode the documents
vocab_size = 50
encoded_docs = [one_hot(d, vocab_size) for d in docs]

In [7]:
print(encoded_docs)

[[31, 26], [41, 37], [27, 44], [33, 37], [39], [12], [24, 44], [38, 41], [24, 37], [11, 46, 26, 29]]


In [8]:
# pad documents to a max length of 4 words
max_length = 4
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')

In [9]:
print(padded_docs)

[[31 26  0  0]
 [41 37  0  0]
 [27 44  0  0]
 [33 37  0  0]
 [39  0  0  0]
 [12  0  0  0]
 [24 44  0  0]
 [38 41  0  0]
 [24 37  0  0]
 [11 46 26 29]]


In [10]:
# define the model
model = Sequential()
model.add(Embedding(vocab_size, 8, input_length=max_length))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

In [11]:
# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

In [12]:
# summarize the model
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 4, 8)              400       
_________________________________________________________________
flatten (Flatten)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________


In [13]:
import numpy as np

In [14]:
padded_docs = np.array(padded_docs)
padded_docs

array([[31, 26,  0,  0],
       [41, 37,  0,  0],
       [27, 44,  0,  0],
       [33, 37,  0,  0],
       [39,  0,  0,  0],
       [12,  0,  0,  0],
       [24, 44,  0,  0],
       [38, 41,  0,  0],
       [24, 37,  0,  0],
       [11, 46, 26, 29]])

In [15]:
labels = np.array(labels)
labels

array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

In [16]:
# fit the model
history = model.fit(padded_docs, labels, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [17]:
history.history.keys()

dict_keys(['loss', 'acc'])

In [18]:
# evaluate the model
loss, accuracy = model.evaluate(padded_docs, labels, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Accuracy: 89.999998


In [19]:
# Predict new document sentiment

In [20]:
doc_test = ['Great nice']



In [21]:
encoded_doc_test = [one_hot(d, vocab_size) for d in doc_test]

In [22]:
encoded_doc_test

[[27, 33]]

In [23]:
padded_doc_test = pad_sequences(encoded_doc_test, maxlen=max_length, padding='post')

In [24]:
padded_doc_test

array([[27, 33,  0,  0]])

In [25]:
yhat = model.predict_classes(padded_doc_test, verbose=0)



Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [26]:
yhat

array([[1]])

In [27]:
# 1 means Positive Sentiment

In [None]:
# OR 

In [32]:
yhat1  = model.predict(padded_doc_test)

In [33]:
yhat1

array([[0.5377396]], dtype=float32)

In [None]:
# 0.5377 is > 0.5 hence Positive Sentiment