In [23]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding

In [24]:
reviews = ['nice food',
           'amazing restaurant',
           'too good',
           'just loved it!',
           'will go again',
           'horrible food',
           'never go there',
           'poor service',
           'poor quality',
           'needs improvement']

sentiment = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

In [41]:
# Converting the vocabulary to one-hot encoded values
one_hot("amazing restaurant", 50)

[28, 17]

In [43]:
vocab_size = 50
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
encoded_reviews

[[18, 2],
 [28, 17],
 [44, 44],
 [15, 13, 2],
 [4, 17, 27],
 [24, 2],
 [20, 17, 26],
 [9, 35],
 [9, 33],
 [19, 36]]

In [44]:
for (x, y) in zip(reviews, encoded_reviews):
    print(f"{x}: {y}")

nice food: [18, 2]
amazing restaurant: [28, 17]
too good: [44, 44]
just loved it!: [15, 13, 2]
will go again: [4, 17, 27]
horrible food: [24, 2]
never go there: [20, 17, 26]
poor service: [9, 35]
poor quality: [9, 33]
needs improvement: [19, 36]


**Padding is needed because some sentences are longer than the others. Considering the longest sentence as 3 worded.**

In [45]:
max_length = 3
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
padded_reviews

array([[18,  2,  0],
       [28, 17,  0],
       [44, 44,  0],
       [15, 13,  2],
       [ 4, 17, 27],
       [24,  2,  0],
       [20, 17, 26],
       [ 9, 35,  0],
       [ 9, 33,  0],
       [19, 36,  0]], dtype=int32)

In [46]:
embedded_vector_size = 5

model = Sequential()
model.add(Embedding(vocab_size, embedded_vector_size, input_length=max_length, name="embedding"))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))



In [47]:
X = padded_reviews
y = sentiment

In [48]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [49]:
model.fit(X, y, epochs=50, verbose=0)

<keras.src.callbacks.history.History at 0x7c3a7e817990>

In [50]:
model.summary()

In [51]:
model.predict(X)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step


array([[0.5127551 ],
       [0.51225644],
       [0.5164165 ],
       [0.5717383 ],
       [0.5706238 ],
       [0.48266923],
       [0.4922055 ],
       [0.4336029 ],
       [0.43163663],
       [0.44208992]], dtype=float32)

In [52]:
loss, accuracy = model.evaluate(X, y)
print(f"Accuracy: {accuracy*100}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241ms/step - accuracy: 1.0000 - loss: 0.6172
Accuracy: 100.0


In [53]:
weights = model.get_layer('embedding').get_weights()[0]
weights

array([[-6.02774173e-02,  3.73034514e-02,  6.23819456e-02,
         3.98649536e-02, -4.95097935e-02],
       [-2.43814234e-02, -1.48032419e-02,  3.38371433e-02,
         2.37550400e-02,  3.24929878e-03],
       [-9.88618378e-03, -2.88310330e-02, -5.92683218e-02,
        -2.66816863e-03,  8.29743519e-02],
       [ 4.60431464e-02, -3.29525955e-02, -2.21547373e-02,
         2.87402906e-02, -7.59349018e-03],
       [-5.31673878e-02, -4.50693034e-02, -4.45739515e-02,
         5.57482615e-02,  5.62389195e-02],
       [ 1.02826245e-02,  3.11025120e-02, -4.52963710e-02,
        -2.61529218e-02, -7.53035396e-03],
       [ 1.90835930e-02, -3.53990309e-02, -1.94629915e-02,
        -6.68205321e-04, -1.32164136e-02],
       [ 1.46739967e-02,  3.35278399e-02, -3.34803089e-02,
        -3.92388105e-02, -3.65290791e-03],
       [-4.64635268e-02, -1.87263265e-02, -2.34540459e-02,
         1.31437071e-02, -2.35473160e-02],
       [ 5.87764606e-02,  1.00572377e-01,  1.12556532e-01,
        -1.19570844e-01

In [54]:
len(weights)

50