In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Embedding

In [2]:
reviews = ['nice food',
           'amazing restaurant',
           'too good',
           'just loved it!',
           'will go again',
           'horrible food',
           'never go there',
           'poor service',
           'poor quality',
           'needs improvement' ]
sentiment = np.array([1,1,1,1,1,0,0,0,0,0]) # first 5 reviews are pos and other 5 are neg

In [3]:
# one-hot encoding
vocab_size = 30

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(reviews)

In [4]:
tokenizer.word_index

{'food': 1,
 'go': 2,
 'poor': 3,
 'nice': 4,
 'amazing': 5,
 'restaurant': 6,
 'too': 7,
 'good': 8,
 'just': 9,
 'loved': 10,
 'it': 11,
 'will': 12,
 'again': 13,
 'horrible': 14,
 'never': 15,
 'there': 16,
 'service': 17,
 'quality': 18,
 'needs': 19,
 'improvement': 20}

In [5]:
encoded_reviews = tokenizer.texts_to_sequences(reviews)
encoded_reviews

[[4, 1],
 [5, 6],
 [7, 8],
 [9, 10, 11],
 [12, 2, 13],
 [14, 1],
 [15, 2, 16],
 [3, 17],
 [3, 18],
 [19, 20]]

In [6]:
# padding
max_length = 3

padded_reviews = pad_sequences(encoded_reviews,maxlen=max_length ,padding='post') #post means add padding towards the end
padded_reviews

array([[ 4,  1,  0],
       [ 5,  6,  0],
       [ 7,  8,  0],
       [ 9, 10, 11],
       [12,  2, 13],
       [14,  1,  0],
       [15,  2, 16],
       [ 3, 17,  0],
       [ 3, 18,  0],
       [19, 20,  0]])

In [7]:
embedded_vector_size = 4

model = Sequential()
model.add(Embedding(vocab_size,embedded_vector_size,input_length=max_length,name='embedding'))
model.add(Flatten())
model.add(Dense(1,activation='sigmoid'))



In [8]:
X = padded_reviews
y = sentiment

In [17]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [21]:
model.fit(X,y,epochs=50,verbose=1)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.5000 - loss: 0.6895
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.5000 - loss: 0.6881
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.5000 - loss: 0.6868
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - accuracy: 0.6000 - loss: 0.6855
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.6000 - loss: 0.6842
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.6000 - loss: 0.6829
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.6000 - loss: 0.6815
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.6000 - loss: 0.6802
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x1c13931a120>

In [29]:
model.summary()

In [27]:
model.evaluate(X,y)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 1.0000 - loss: 0.6185


[0.6185365319252014, 1.0]

In [37]:
weights = model.get_layer('embedding').get_weights()[0]
weights

array([[ 0.00760365, -0.04077124,  0.01009945,  0.02795788],
       [ 0.0067455 , -0.00943165,  0.01764909, -0.03157214],
       [-0.04416599, -0.02904432,  0.0349124 , -0.0403167 ],
       [ 0.0253161 , -0.07208843,  0.04258391,  0.10750379],
       [-0.03983277,  0.09372105, -0.03066111, -0.03153707],
       [-0.08827002,  0.09051858, -0.08269653, -0.04763479],
       [-0.07594646, -0.08253967,  0.01388633, -0.06220239],
       [-0.0502503 ,  0.09427023, -0.01602718, -0.01753966],
       [-0.0943645 , -0.0563799 ,  0.00657706, -0.05412415],
       [-0.04741317,  0.07301871, -0.07914773, -0.00916681],
       [-0.01363947, -0.03059234,  0.08607233, -0.00591755],
       [-0.06143253,  0.00274314, -0.01167533,  0.08887132],
       [-0.05119812,  0.00800811, -0.09144018, -0.00499722],
       [-0.0918686 ,  0.07578178, -0.0587894 ,  0.09971076],
       [ 0.08069103, -0.0112391 ,  0.08223619,  0.01411799],
       [ 0.05946832, -0.05895283,  0.00424831,  0.0484579 ],
       [ 0.07119505, -0.

In [41]:
len(weights)
# as the vocab_size is 30

30

In [43]:
weights[4] # nice

array([-0.03983277,  0.09372105, -0.03066111, -0.03153707], dtype=float32)

In [45]:
weights[5] # amazing

array([-0.08827002,  0.09051858, -0.08269653, -0.04763479], dtype=float32)