In [1]:
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.embeddings import Embedding

     
# define documents
docs = ['I love this sandwich',
        'this is an amazing place!',
        'I feel very good about these beers.',
        'this is my best work.',
        'what an awesome view',
        'I do not like this restaurant',
        'I am tired of this stuff.',
        'I can\'t deal with this',
        'he is my sworn enemy!',
        'my boss is horrible.']

# define class labels
labels = [1,1,1,1,1,0,0,0,0,0]

# integer encode the documents
vocab_size = 25
encoded_docs = [one_hot(d, vocab_size) for d in docs]
print(encoded_docs)

# pad documents to a max length of 4 words
max_length = 6
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
print(padded_docs)

# define the model
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=max_length))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

# summarize the model
print(model.summary())

# fit the model
model.fit(padded_docs, labels, epochs=50, verbose=0)

# evaluate the model
loss, accuracy = model.evaluate(padded_docs, labels, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Using TensorFlow backend.


[[8, 12, 20, 23], [20, 16, 3, 12, 18], [8, 13, 24, 21, 12, 3, 12], [20, 16, 15, 3, 10], [10, 3, 5, 17], [8, 21, 1, 9, 20, 9], [8, 7, 23, 22, 20, 9], [8, 20, 5, 1, 20], [12, 16, 15, 20, 1], [15, 16, 16, 4]]
[[ 8 12 20 23  0  0]
 [20 16  3 12 18  0]
 [13 24 21 12  3 12]
 [20 16 15  3 10  0]
 [10  3  5 17  0  0]
 [ 8 21  1  9 20  9]
 [ 8  7 23 22 20  9]
 [ 8 20  5  1 20  0]
 [12 16 15 20  1  0]
 [15 16 16  4  0  0]]
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 6, 10)             250       
_________________________________________________________________
flatten_1 (Flatten)          (None, 60)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 61        
Total params: 311
Trainable params: 311
Non-trainable params: 0
_________________________________________________________

In [2]:
testset = [
       ('the beer was good.', 'pos'),
       ('I do not enjoy my job', 'neg'),
       ("I feel amazing!", 'pos'),
       ("I can't believe I'm doing this.", 'neg')
       ]

for item in testset:
    test =item[0]
    encoded = one_hot(test, vocab_size) 
    padded = pad_sequences([encoded], maxlen=max_length, padding='post')
    print (test, ' : ', model.predict_classes(padded)[0])

the beer was good.  :  [1]
I do not enjoy my job  :  [0]
I feel amazing!  :  [1]
I can't believe I'm doing this.  :  [0]
