<a href="https://colab.research.google.com/github/namantam1/ml-ai-dnn/blob/main/Word_embedding_using_tesorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Supervised Learning

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [None]:
reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']

sentiment = [1,1,1,1,1,0,0,0,0,0]

vocabolary_size = 100
max_length = 4
out_embedded_size = 5

df = pd.DataFrame(zip(reviews, sentiment), columns=["review", "sentiment"])
df.head()

Unnamed: 0,review,sentiment
0,nice food,1
1,amazing restaurant,1
2,too good,1
3,just loved it!,1
4,will go again,1


In [None]:
tf.keras.preprocessing.text.one_hot(reviews[1], 10)

[2, 1]

In [None]:
encoded_review = df.review.apply(lambda x: tf.keras.preprocessing.text.one_hot(x, vocabolary_size))
encoded_review

0        [48, 22]
1        [47, 46]
2         [36, 5]
3    [20, 35, 84]
4     [1, 54, 19]
5        [32, 22]
6    [52, 54, 55]
7        [64, 94]
8        [64, 82]
9         [11, 2]
Name: review, dtype: object

In [None]:
padded_review = tf.keras.preprocessing.sequence.pad_sequences(encoded_review, maxlen=max_length, padding="post")
padded_review

array([[48, 22,  0,  0],
       [47, 46,  0,  0],
       [36,  5,  0,  0],
       [20, 35, 84,  0],
       [ 1, 54, 19,  0],
       [32, 22,  0,  0],
       [52, 54, 55,  0],
       [64, 94,  0,  0],
       [64, 82,  0,  0],
       [11,  2,  0,  0]], dtype=int32)

In [None]:
model = tf.keras.models.Sequential(layers=[
    tf.keras.layers.Embedding(input_dim=vocabolary_size, output_dim=out_embedded_size, input_length=max_length)
])

model.summary()

res = model.predict(padded_review[:1])
display(res.shape, res)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 4, 5)              500       
                                                                 
Total params: 500
Trainable params: 500
Non-trainable params: 0
_________________________________________________________________


(1, 4, 5)

array([[[ 0.04483915,  0.04730549,  0.03211022, -0.03175589,
         -0.03915942],
        [-0.02853294,  0.02670611, -0.04709234,  0.04388363,
         -0.03390338],
        [ 0.0480032 ,  0.01492996, -0.00394572, -0.01552893,
          0.03739699],
        [ 0.0480032 ,  0.01492996, -0.00394572, -0.01552893,
          0.03739699]]], dtype=float32)

In [None]:
model = tf.keras.models.Sequential(layers=[
    tf.keras.layers.Embedding(input_dim=vocabolary_size, output_dim=out_embedded_size, 
                              input_length=max_length, name="embedding"),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=100, activation="relu"),
    tf.keras.layers.Dense(units=50, activation="relu"),
    tf.keras.layers.Dense(units=1, activation="sigmoid"),
])

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 4, 5)              500       
                                                                 
 flatten_1 (Flatten)         (None, 20)                0         
                                                                 
 dense_3 (Dense)             (None, 100)               2100      
                                                                 
 dense_4 (Dense)             (None, 50)                5050      
                                                                 
 dense_5 (Dense)             (None, 1)                 51        
                                                                 
Total params: 7,701
Trainable params: 7,701
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(padded_review, df.sentiment, epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7fb0499561c0>

In [None]:
predicted = model.predict(padded_review)

display(df.sentiment.to_numpy(), predicted.reshape(-1))



array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

array([9.9996042e-01, 9.9998915e-01, 9.9999154e-01, 9.9999666e-01,
       9.9997663e-01, 9.1626185e-05, 7.6990378e-05, 3.1882966e-05,
       2.6149066e-05, 4.3791046e-05], dtype=float32)

In [None]:
embedded_layer = model.get_layer("embedding")
weights = np.array(embedded_layer.get_weights()[0])

display(weights.shape, weights[0])

(100, 5)

array([ 0.14726722,  0.14564395, -0.15086071,  0.1396672 , -0.13746569],
      dtype=float32)