<a href="https://colab.research.google.com/github/namantam1/ml-ai-dnn/blob/main/Word_embedding_using_tesorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Supervised Learning

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [None]:
reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']

sentiment = [1,1,1,1,1,0,0,0,0,0]

vocabolary_size = 100
max_length = 4
out_embedded_size = 5

df = pd.DataFrame(zip(reviews, sentiment), columns=["review", "sentiment"])
df.head()

Unnamed: 0,review,sentiment
0,nice food,1
1,amazing restaurant,1
2,too good,1
3,just loved it!,1
4,will go again,1


In [None]:
tf.keras.preprocessing.text.one_hot(reviews[1], 10)

[2, 1]

In [None]:
encoded_review = df.review.apply(lambda x: tf.keras.preprocessing.text.one_hot(x, vocabolary_size))
encoded_review

0        [48, 22]
1        [47, 46]
2         [36, 5]
3    [20, 35, 84]
4     [1, 54, 19]
5        [32, 22]
6    [52, 54, 55]
7        [64, 94]
8        [64, 82]
9         [11, 2]
Name: review, dtype: object

In [None]:
padded_review = tf.keras.preprocessing.sequence.pad_sequences(encoded_review, maxlen=max_length, padding="post")
padded_review

array([[48, 22,  0,  0],
       [47, 46,  0,  0],
       [36,  5,  0,  0],
       [20, 35, 84,  0],
       [ 1, 54, 19,  0],
       [32, 22,  0,  0],
       [52, 54, 55,  0],
       [64, 94,  0,  0],
       [64, 82,  0,  0],
       [11,  2,  0,  0]], dtype=int32)

In [None]:
model = tf.keras.models.Sequential(layers=[
    tf.keras.layers.Embedding(input_dim=vocabolary_size, output_dim=out_embedded_size, input_length=max_length)
])

model.summary()

res = model.predict(padded_review[:1])
display(res.shape, res)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 4, 5)              500       
                                                                 
Total params: 500
Trainable params: 500
Non-trainable params: 0
_________________________________________________________________


(1, 4, 5)

array([[[ 0.04483915,  0.04730549,  0.03211022, -0.03175589,
         -0.03915942],
        [-0.02853294,  0.02670611, -0.04709234,  0.04388363,
         -0.03390338],
        [ 0.0480032 ,  0.01492996, -0.00394572, -0.01552893,
          0.03739699],
        [ 0.0480032 ,  0.01492996, -0.00394572, -0.01552893,
          0.03739699]]], dtype=float32)

In [17]:
model = tf.keras.models.Sequential(layers=[
    tf.keras.layers.Embedding(input_dim=vocabolary_size, output_dim=out_embedded_size, 
                              input_length=max_length, name="embedding"),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=100, activation="relu"),
    tf.keras.layers.Dense(units=50, activation="relu"),
    tf.keras.layers.Dense(units=1, activation="sigmoid"),
])

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 4, 5)              500       
                                                                 
 flatten_2 (Flatten)         (None, 20)                0         
                                                                 
 dense_6 (Dense)             (None, 100)               2100      
                                                                 
 dense_7 (Dense)             (None, 50)                5050      
                                                                 
 dense_8 (Dense)             (None, 1)                 51        
                                                                 
Total params: 7,701
Trainable params: 7,701
Non-trainable params: 0
_________________________________________________________________


In [18]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(padded_review, df.sentiment, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fb048016af0>

In [None]:
predicted = model.predict(padded_review)

display(df.sentiment.to_numpy(), predicted.reshape(-1))



array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

array([9.9996042e-01, 9.9998915e-01, 9.9999154e-01, 9.9999666e-01,
       9.9997663e-01, 9.1626185e-05, 7.6990378e-05, 3.1882966e-05,
       2.6149066e-05, 4.3791046e-05], dtype=float32)

In [None]:
embedded_layer = model.get_layer("embedding")
weights = np.array(embedded_layer.get_weights()[0])

display(weights.shape, weights[0])

(100, 5)

array([ 0.14726722,  0.14564395, -0.15086071,  0.1396672 , -0.13746569],
      dtype=float32)