# Import libs

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Flatten, Embedding

# Prepare Dataset

In [2]:
reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']

sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [3]:
vocab_size = 50

embed_reviews = [one_hot(d, vocab_size) for d in reviews]
embed_reviews

#read more on how this is one hot encoded

[[3, 13],
 [37, 13],
 [18, 36],
 [2, 46, 32],
 [16, 48, 38],
 [8, 13],
 [2, 48, 35],
 [24, 27],
 [24, 15],
 [23, 49]]

In [7]:
max_seq_length = 3

padded_reviews = pad_sequences(embed_reviews, max_seq_length, padding='post')
padded_reviews

array([[ 3, 13,  0],
       [37, 13,  0],
       [18, 36,  0],
       [ 2, 46, 32],
       [16, 48, 38],
       [ 8, 13,  0],
       [ 2, 48, 35],
       [24, 27,  0],
       [24, 15,  0],
       [23, 49,  0]])

In [9]:
embed_vector_size = 5

model = keras.Sequential([
    Embedding(vocab_size, embed_vector_size, input_length=max_seq_length, name="Embedding",),
    Flatten(),
    Dense(1, activation="sigmoid")
])

model.compile(optimizer="adam",
             loss="binary_crossentropy",
             metrics=["accuracy"])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Embedding (Embedding)       (None, 3, 5)              250       
                                                                 
 flatten_1 (Flatten)         (None, 15)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 16        
                                                                 
Total params: 266
Trainable params: 266
Non-trainable params: 0
_________________________________________________________________


In [14]:
model.fit(padded_reviews, sentiment, epochs=50, verbose=0)

<keras.callbacks.History at 0x2a77a5fdf60>

In [15]:
model.evaluate(padded_reviews, sentiment)



[0.39829856157302856, 1.0]

In [22]:
weights = model.get_layer("Embedding").get_weights()[0]

In [27]:
## embeddings
### nice -> 3
### good -> 36

print(weights[3])
print(weights[36])

[0.19070357 0.10064964 0.17045629 0.12621589 0.11716728]
[-0.1582198   0.15729532 -0.17512059 -0.18721211 -0.14537744]


In [28]:
## embeddings
### horrible -> 8
### poor -> 24

print(weights[8])
print(weights[24])

[-0.12102377 -0.12180194 -0.13093184 -0.18050157 -0.12543216]
[-0.17251255 -0.18605933 -0.13183947 -0.15586282 -0.11665632]


In [29]:
model.save("model")



INFO:tensorflow:Assets written to: model\assets


INFO:tensorflow:Assets written to: model\assets
