# Creating Word Embedding Vector using Supervised Learning Technique

In [1]:
import numpy as np 
import tensorflow as tf 

In [2]:
reviews = [
    'nice food', 
    'amazing resturant', 
    'too good', 
    'just loved it!',
    'will go again', 
    'horrible food', 
    'never go there',
    'poor service', 
    'poor quality', 
    'needs improvement'
]

sentiments = np.array([1,1,1,1,1,0,0,0,0,0])

In [3]:
# initialize the vocab size. Say its 30 
VOCAB_SIZE=30

In [4]:
# for demo - below will change the text to one hot encoded matrix for values between 0 to 30 
tf.keras.preprocessing.text.one_hot(reviews[2], 30)

[28, 24]

In [5]:
encoded_reviews = [tf.keras.preprocessing.text.one_hot(reviews[i], VOCAB_SIZE) for i, value in enumerate(reviews)]
encoded_reviews

[[3, 5],
 [25, 27],
 [28, 24],
 [19, 24, 5],
 [8, 21, 2],
 [15, 5],
 [5, 21, 17],
 [2, 22],
 [2, 27],
 [9, 21]]

In [6]:
# Since we need to keep number of neurons stagnant; we can define the max size 
MAX_SIZE = 3

In [7]:
# Lets pad sequences using keras 
padded_reviews = tf.keras.preprocessing.sequence.pad_sequences(encoded_reviews, maxlen=MAX_SIZE, padding="post")
padded_reviews

array([[ 3,  5,  0],
       [25, 27,  0],
       [28, 24,  0],
       [19, 24,  5],
       [ 8, 21,  2],
       [15,  5,  0],
       [ 5, 21, 17],
       [ 2, 22,  0],
       [ 2, 27,  0],
       [ 9, 21,  0]], dtype=int32)

In [8]:
# Create Embedded Vector size 
EMBEDDED_VECTOR_SIZE=5

# create a model 
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDED_VECTOR_SIZE, input_length=MAX_SIZE, name="embedding"))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(1, activation="sigmoid"))



In [9]:
# my X are reviews and y are sentiment vectors 
X=padded_reviews
y=sentiments

In [10]:
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

2025-06-22 17:58:28.616878: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2025-06-22 17:58:28.616943: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-06-22 17:58:28.616950: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-06-22 17:58:28.616987: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-06-22 17:58:28.617009: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [11]:
model.summary()

In [12]:
model.fit(X, y, epochs=50, verbose=0)

2025-06-22 17:58:33.907880: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


<keras.src.callbacks.history.History at 0x344d88ed0>

In [13]:
loss ,accuracy = model.evaluate(X, y)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step - accuracy: 1.0000 - loss: 0.6223


In [15]:
loss ,accuracy

(0.6222866773605347, 1.0)

In [19]:
weights = model.get_layer('embedding').get_weights()[0]

In [20]:
weights.shape

(30, 5)

In [21]:
# Lets check the weights given for nice and amazing i.e. 3 and 25 as per the existing model 
weights[3]

array([-0.0251201 , -0.00345522, -0.09038085,  0.02633   , -0.00059451],
      dtype=float32)

In [22]:
weights[25]

array([-0.00650306, -0.00773259, -0.03421186,  0.04236356, -0.0897515 ],
      dtype=float32)

In [23]:
# measure consine similarity of these 2 vectors 
cosine_distance_loss = tf.keras.losses.CosineSimilarity()(weights[3], weights[25])
cosine_similarity_keras = 1 - cosine_distance_loss
cosine_similarity_keras

<tf.Tensor: shape=(), dtype=float32, numpy=1.4328821>

In [24]:
# for amazing and poor - 25 and 2 
cosine_distance_loss = tf.keras.losses.CosineSimilarity()(weights[25], weights[2])
cosine_similarity_keras = 1 - cosine_distance_loss
cosine_similarity_keras

<tf.Tensor: shape=(), dtype=float32, numpy=0.3105951>