In [16]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Flatten
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [17]:
# corpus
sentences = [
    "I love this movie",
    "This film is terrible",
    "I really enjoyed this show",
    "The plot was boring",
    "Amazing storyline and great acting",
    "Worst movie I have ever seen"
]

# labels (1 = Positive, 0 = Negative)
labels = np.array([1, 0, 1, 0, 1, 0])


In [18]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)

# convert words to integers
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1 
sequences = tokenizer.texts_to_sequences(sentences)

# compute CBoW (average word embeddings)
def cbow_representation(sequences, vocab_size):
    cbow_vectors = []
    for seq in sequences:
        if len(seq) > 0:
            cbow_vectors.append(np.mean(seq))  # average of word indices
        else:
            cbow_vectors.append(0)
    return np.array(cbow_vectors).reshape(-1, 1)  # reshape for model

X_cbow = cbow_representation(sequences, vocab_size)


In [19]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=8),  # word embeddings
    Flatten(),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')  # binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


In [20]:
model.fit(X_cbow, labels, epochs=10, verbose=1)


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.5000 - loss: 0.6984
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.5000 - loss: 0.6969
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step - accuracy: 0.5000 - loss: 0.6955
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.5000 - loss: 0.6940
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step - accuracy: 0.5000 - loss: 0.6925
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - accuracy: 0.6667 - loss: 0.6912
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step - accuracy: 0.6667 - loss: 0.6899
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step - accuracy: 0.6667 - loss: 0.6887
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

<keras.src.callbacks.history.History at 0x778b72b2df30>

In [21]:
new_texts = ["The movie was great", "I did not like the film","this film is sooo good"]
new_sequences = tokenizer.texts_to_sequences(new_texts)
new_cbow = cbow_representation(new_sequences, vocab_size)

predictions = model.predict(new_cbow)
predicted_labels = ["Positive" if p > 0.5 else "Negative" for p in predictions]
print(predicted_labels)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
['Negative', 'Negative', 'Positive']
