In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten,Embedding

In [2]:
reviews = ['nice food',
           'amazing restaurant',
           'too good',
           'just loved it!',
           'will go again',
           'horrible food',
           'never go there',
           'poor service',
           'poor quality',
           'needs improvement']
sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [3]:
one_hot('amazing restaurant',30)

[27, 27]

In [4]:
vocab_size = 30
encoded_reviews = [one_hot(d,vocab_size) for d in reviews]
encoded_reviews

[[2, 17],
 [27, 27],
 [3, 26],
 [5, 4, 26],
 [11, 1, 16],
 [2, 17],
 [1, 1, 24],
 [3, 4],
 [3, 24],
 [22, 19]]

* need some padding cause max lenght is 3 so rest of the need padding

In [5]:
max_length = 3

padded_reviews = pad_sequences(encoded_reviews,maxlen=max_length,padding='post')
padded_reviews

array([[ 2, 17,  0],
       [27, 27,  0],
       [ 3, 26,  0],
       [ 5,  4, 26],
       [11,  1, 16],
       [ 2, 17,  0],
       [ 1,  1, 24],
       [ 3,  4,  0],
       [ 3, 24,  0],
       [22, 19,  0]], dtype=int32)

In [14]:
embeded_vector_size = 4

model = Sequential()
model.add(Embedding(vocab_size, embeded_vector_size, input_length=max_length, name='embedding'))
model.add(Flatten())
model.add(Dense(1,activation='sigmoid'))

In [15]:
X = padded_reviews
y = sentiment

In [16]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [17]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 4)              120       
                                                                 
 flatten_1 (Flatten)         (None, 12)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 13        
                                                                 
Total params: 133
Trainable params: 133
Non-trainable params: 0
_________________________________________________________________


In [18]:
model.fit(X,y,epochs=50,verbose=0)

<keras.callbacks.History at 0x7f3fc8b0ccd0>

In [19]:
loss , accuracy = model.evaluate(X,y)
accuracy



0.800000011920929

In [21]:
weights = model.get_layer('embedding').get_weights()[0]
weights

array([[-0.0314142 , -0.06493254, -0.09706067, -0.07242422],
       [ 0.07028067, -0.00894138, -0.04103758, -0.03243817],
       [-0.02880857,  0.05978336,  0.00173136, -0.02060972],
       [ 0.07652824, -0.03864245, -0.02338296, -0.06870939],
       [ 0.03593595, -0.0434747 ,  0.00350312, -0.01341594],
       [-0.0145924 ,  0.0876888 ,  0.07066885,  0.09504179],
       [-0.02577316, -0.04372619,  0.00270408, -0.04185338],
       [ 0.02973056,  0.01609527,  0.00668732,  0.00611676],
       [ 0.02118429,  0.03436064, -0.04781644, -0.03411738],
       [-0.01425415, -0.03154204,  0.04329867,  0.01399295],
       [ 0.00653674, -0.00736402,  0.04275322, -0.02391659],
       [-0.01343706,  0.07420679,  0.01538229,  0.00585263],
       [ 0.02675606,  0.03737288,  0.00606962, -0.0258383 ],
       [-0.02730844,  0.00063884,  0.02533522,  0.02656407],
       [-0.03734788, -0.04574281, -0.01447355,  0.01092777],
       [-0.01251636, -0.04405355,  0.01587956,  0.02618636],
       [ 0.04999624,  0.

In [22]:
len(weights)

30

In [23]:
weights[2]

array([-0.02880857,  0.05978336,  0.00173136, -0.02060972], dtype=float32)

In [24]:
weights[27]

array([-0.08742663,  0.03401563,  0.04934322,  0.02689881], dtype=float32)

In [26]:
from numpy.linalg import norm
cosine = np.dot(weights[2],weights[27])/(norm(weights[2])*norm(weights[27]))
cosine

0.5371698