In [1]:
sentences = ["I loved the movie, it was fantastic!",
             "The food was terrible, I wouldn't recommend it.",
             "The book was amazing, couldn't put it down.",
             "It was terrible film"]

import numpy as np

labels = np.array([1, 0, 1, 0])  # 1: Positive, 0: Negative

In [2]:
from keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer()

tokenizer.fit_on_texts(sentences)

sequences = tokenizer.texts_to_sequences(sentences)
sequences

[[4, 6, 3, 7, 1, 2, 8],
 [3, 9, 2, 5, 4, 10, 11, 1],
 [3, 12, 2, 13, 14, 15, 1, 16],
 [1, 2, 5, 17]]

In [3]:
word_index = tokenizer.word_index
word_index

{'it': 1,
 'was': 2,
 'the': 3,
 'i': 4,
 'terrible': 5,
 'loved': 6,
 'movie': 7,
 'fantastic': 8,
 'food': 9,
 "wouldn't": 10,
 'recommend': 11,
 'book': 12,
 'amazing': 13,
 "couldn't": 14,
 'put': 15,
 'down': 16,
 'film': 17}

In [4]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_length = max([len(seq) for seq in sequences])

padded_sequences = pad_sequences(sequences, maxlen=max_length)

print(padded_sequences)

[[ 0  4  6  3  7  1  2  8]
 [ 3  9  2  5  4 10 11  1]
 [ 3 12  2 13 14 15  1 16]
 [ 0  0  0  0  1  2  5 17]]


In [5]:
max_length

8

In [6]:
from keras.models import Sequential
from keras.layers import Input, Embedding, GRU, Dense

vocab_size = len(tokenizer.word_index) +1

embedding_dim = 32

model = Sequential()

model.add(Input(shape=(max_length,)))

model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length))

model.add(GRU(32))
model.add(Dense(1, activation='sigmoid'))

In [7]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
model.fit(padded_sequences, labels, epochs=10, batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x165a2017850>

In [9]:
loss, accuracy = model.evaluate(padded_sequences, labels)
print(f"Accuracy: {accuracy*100:.2f}%")

Accuracy: 100.00%


In [10]:
new_sentences = ["The food was fantastic!", 
                 "The movie was terrible"]

new_sequences = tokenizer.texts_to_sequences(new_sentences)

new_padded_sequences = pad_sequences(new_sequences, maxlen=max_length)

predictions = model.predict(new_padded_sequences)

for sentence, prediction in zip(new_sentences, predictions):
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    print(f"Sentence: {sentence} - Sentiment: {sentiment}  ")

Sentence: The food was fantastic! - Sentiment: Positive  
Sentence: The movie was terrible - Sentiment: Negative  
