In [12]:
sentences = ["I loved the movie, it was fantastic!",
             "The food was terrible, I wouldn't recommend it.",
             "The book was amazing, couldn't put it down.",
             "It was terrible film"]

import numpy as np

labels = np.array([1, 0, 1, 0])  # 1: Positive, 0: Negative

In [15]:
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()

tokenizer.fit_on_texts(sentences)

sequences = tokenizer.texts_to_sequences(sentences)
sequences

[[4, 6, 3, 7, 1, 2, 8],
 [3, 9, 2, 5, 4, 10, 11, 1],
 [3, 12, 2, 13, 14, 15, 1, 16],
 [1, 2, 5, 17]]

In [16]:
word_index = tokenizer.word_index

In [19]:
# max([ len(seq) for seq in sequences ])

8

In [20]:
from keras.preprocessing.sequence import pad_sequences 
max_length = max([ len(seq) for seq in sequences ])

paded_sequences = pad_sequences(sequences,maxlen=max_length)

print(paded_sequences)


[[ 0  4  6  3  7  1  2  8]
 [ 3  9  2  5  4 10 11  1]
 [ 3 12  2 13 14 15  1 16]
 [ 0  0  0  0  1  2  5 17]]


In [22]:
from keras.models import Sequential
from keras.layers import Input, Embedding, GRU, Dense

vocab_size = len(tokenizer.word_index) +1

embedding_dim = 32

model = Sequential()

model.add(Input(shape=(max_length,)))

model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length))

model.add(GRU(32))
model.add(Dense(1, activation='sigmoid'))



In [31]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(paded_sequences, labels, epochs=20,batch_size=1)

Epoch 1/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 1.0000 - loss: 0.0278
Epoch 2/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.0152 
Epoch 3/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.0142 
Epoch 4/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 0.0099
Epoch 5/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 0.0045
Epoch 6/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.0033 
Epoch 7/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 1.0000 - loss: 0.0045
Epoch 8/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 0.0027
Epoch 9/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x23d759f3f50>

In [32]:
model.evaluate(paded_sequences,labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 341ms/step - accuracy: 1.0000 - loss: 6.8676e-04


[0.0006867620977573097, 1.0]

In [33]:
new_sentences = ["The food was fantastic!", 
                 "The movie was terrible"]

new_sentences = tokenizer.texts_to_sequences(new_sentences)
new_sentences_pad = pad_sequences(new_sentences,maxlen=max_length)

In [34]:
new_sentences,new_sentences_pad

([[3, 9, 2, 8], [3, 7, 2, 5]],
 array([[0, 0, 0, 0, 3, 9, 2, 8],
        [0, 0, 0, 0, 3, 7, 2, 5]]))

In [35]:
y_pred = model.predict(new_sentences_pad)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step


In [36]:
y_pred

array([[0.00088157],
       [0.00104583]], dtype=float32)