In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense

In [29]:
reviews = ["Never coming back", "horrible service", "rude waitress", "cold food", "horrible food",
           "awesome", "awesome services!", "rocks", "poor work", "couldn\'t have done better"]

labels = [1 for i in range(5)] + [0 for i in range(5)]   # [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
labels = np.array(labels)

In [8]:
vocab_size = 50
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
print(f"encoded_reviews : {encoded_reviews}")

encoded_reviews : [[33, 24, 28], [49, 23], [27, 11], [32, 28], [49, 28], [12], [12, 25], [33], [12, 7], [44, 36, 26, 27]]


In [9]:
max_length = 4
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
print(padded_reviews)

[[33 24 28  0]
 [49 23  0  0]
 [27 11  0  0]
 [32 28  0  0]
 [49 28  0  0]
 [12  0  0  0]
 [12 25  0  0]
 [33  0  0  0]
 [12  7  0  0]
 [44 36 26 27]]


In [27]:
model = Sequential()
embedding_layer = Embedding(input_dim=vocab_size, output_dim=8, input_length=max_length)
model.add(embedding_layer)
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 4, 8)              400       
                                                                 
 flatten (Flatten)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________


In [30]:
model.fit(padded_reviews, labels, epochs=100, verbose=0)

<keras.callbacks.History at 0x20d1fd78f40>

In [34]:
for embeddings in embedding_layer.get_weights()[0]:
    print([np.round(x, 4) for x in embeddings])

[0.1326, 0.1034, 0.1217, -0.1528, 0.0588, 0.0855, 0.0986, -0.09]
[0.0238, 0.0045, -0.0195, -0.0091, 0.0074, -0.0092, -0.0201, -0.04]
[0.0283, 0.0378, 0.0009, -0.006, -0.0097, 0.0166, -0.0457, -0.0025]
[0.0048, -0.0231, 0.0434, -0.0258, -0.0229, -0.0254, 0.0272, -0.0046]
[0.038, 0.0136, 0.0188, 0.04, -0.0005, -0.048, -0.0094, 0.0316]
[-0.0053, 0.0443, 0.0086, -0.0293, -0.0268, -0.0097, 0.0102, 0.0254]
[0.0412, 0.0227, -0.0143, -0.0398, 0.0419, -0.0434, 0.0052, -0.0402]
[0.1363, 0.1136, 0.1122, -0.0797, 0.0817, 0.0838, 0.0712, -0.1221]
[0.041, -0.0181, 0.0454, 0.0104, 0.0334, 0.0044, -0.0177, 0.0235]
[-0.0287, -0.0489, 0.0124, -0.044, -0.017, 0.0352, 0.042, -0.0149]
[0.0286, -0.0476, 0.0016, 0.042, -0.0074, 0.0153, 0.0244, 0.0167]
[-0.1447, -0.0824, -0.1357, 0.1227, -0.1388, -0.1136, -0.1229, 0.1024]
[0.0504, 0.1039, -0.1, 0.0799, 0.1543, -0.134, -0.0763, 0.066]
[-0.0264, -0.0378, 0.0079, -0.0389, -0.0288, -1e-04, 0.0223, 0.0314]
[0.0022, 0.0365, -0.0085, -0.0198, 0.0185, 0.0392, -0.005,

In [40]:
embeddings = embedding_layer.get_weights()[0]

In [41]:
print(dir(embeddings))

['T', '__abs__', '__add__', '__and__', '__array__', '__array_finalize__', '__array_function__', '__array_interface__', '__array_prepare__', '__array_priority__', '__array_struct__', '__array_ufunc__', '__array_wrap__', '__bool__', '__class__', '__class_getitem__', '__complex__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dir__', '__divmod__', '__dlpack__', '__dlpack_device__', '__doc__', '__eq__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__', '__imul__', '__index__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lshift__', '__lt__', '__matmul__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__', '__or__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmod__', '__reduce__', '