<h1 span style="text-align:center;color:mediumvioletred;">Word Embedding</h1>

In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding

In [2]:
reviews = ['nice food',
          'amazing restaurant',
          'too good',
          'just loved it!',
          'will go again',
          'horrible food',
          'never go there',
          'poor service',
          'poor quality',
          'needs improvement']

sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [3]:
 one_hot("amazing restaurant",30)

[20, 29]

## One Hot Encoding

In [4]:
vocab_size = 30
encoded_reviews = [one_hot(r, vocab_size) for r in reviews]
encoded_reviews

[[7, 11],
 [20, 29],
 [18, 27],
 [27, 12, 1],
 [4, 22, 3],
 [5, 11],
 [4, 22, 14],
 [23, 1],
 [23, 11],
 [3, 22]]

## Padding

In [5]:
max_length = 3
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
print(padded_reviews)

[[ 7 11  0]
 [20 29  0]
 [18 27  0]
 [27 12  1]
 [ 4 22  3]
 [ 5 11  0]
 [ 4 22 14]
 [23  1  0]
 [23 11  0]
 [ 3 22  0]]


## Model Training

In [23]:
embedded_vector_size = 4

model = Sequential()
model.add(Embedding(vocab_size, embedded_vector_size, input_shape=(max_length,), name="embedding"))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

In [24]:
X = padded_reviews
y = sentiment

In [25]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [26]:
model.fit(X, y, epochs=50, verbose=0)

<keras.src.callbacks.history.History at 0x29e97fbebc0>

In [29]:
loss, accuracy = model.evaluate(X, y)
accuracy

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 1.0000 - loss: 0.6265


1.0

In [31]:
weights = model.get_layer('embedding').get_weights()[0]
weights

array([[ 0.01587586, -0.03880154, -0.05306739, -0.01057586],
       [ 0.00541151,  0.03424314,  0.08409727,  0.07655231],
       [ 0.00412492,  0.0304434 , -0.04003568,  0.00931121],
       [-0.04466035,  0.07674143, -0.02593524,  0.03047501],
       [ 0.01600984,  0.0340883 ,  0.0486629 ,  0.03856809],
       [-0.06778751, -0.09183896, -0.0072297 ,  0.03641407],
       [-0.02843057, -0.04571807, -0.03580713, -0.04759192],
       [ 0.03186986,  0.09286188,  0.02169219, -0.00905535],
       [-0.03579915,  0.02691886,  0.00278994, -0.02901855],
       [ 0.02683768,  0.02550597,  0.01682508, -0.03234436],
       [ 0.01870188,  0.00747727,  0.01112487, -0.04299109],
       [ 0.0152657 ,  0.02909892, -0.06797381, -0.02065363],
       [ 0.07885258, -0.07058944,  0.00247379, -0.06637497],
       [-0.01395156,  0.02539039, -0.04223986, -0.01686073],
       [-0.08270715, -0.09313475, -0.02227282, -0.02037397],
       [-0.00705319, -0.00551475,  0.0400109 ,  0.03762256],
       [-0.03952348,  0.

In [32]:
len(weights)

30

In [38]:
#nice
weights[7]

array([ 0.03186986,  0.09286188,  0.02169219, -0.00905535], dtype=float32)

In [39]:
#food
weights[11]

array([ 0.0152657 ,  0.02909892, -0.06797381, -0.02065363], dtype=float32)

In [40]:
#good
weights[27]

array([ 0.06803567, -0.03581112,  0.04765156, -0.09913166], dtype=float32)