# Word Embedding

In [24]:
from tensorflow.keras.preprocessing.text import one_hot

In [31]:
#initialize sentences
#perform one_hot processing with vocab size
#outcomes: vectors in form of indexes
#embedding layer
#initialize dimension value.

In [26]:
sentences=['the glass of milk',
    'the glass of juice',
          'the cup of tea',
          'i am a good boy',
          'i am a good developer',
          'understand the meaning of words',
          'your videos are good']

In [27]:
sentences

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'i am a good boy',
 'i am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [28]:
#vocab size
voc_size=10000 

## One hot representation

In [29]:
onehot_rep=[one_hot(words,voc_size) for words in sentences]

In [30]:
print(onehot_rep)

[[6722, 9324, 2002, 3275], [6722, 9324, 2002, 8921], [6722, 4320, 2002, 3172], [2852, 1651, 5082, 736, 6961], [2852, 1651, 5082, 736, 3338], [5789, 6722, 7582, 2002, 8574], [2003, 8084, 5712, 736]]


## Word embedding representation

In [32]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [33]:
import numpy as np

In [34]:
sent_length=8
embedded_docs=pad_sequences(onehot_rep,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 6722 9324 2002 3275]
 [   0    0    0    0 6722 9324 2002 8921]
 [   0    0    0    0 6722 4320 2002 3172]
 [   0    0    0 2852 1651 5082  736 6961]
 [   0    0    0 2852 1651 5082  736 3338]
 [   0    0    0 5789 6722 7582 2002 8574]
 [   0    0    0    0 2003 8084 5712  736]]


In [35]:
#initilizing dimensions
dim=10

In [36]:
model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')

2021-10-08 22:59:26.315742: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [37]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [38]:
print(model.predict(embedded_docs))

[[[ 0.03389004  0.02536752  0.02545383  0.03708256  0.04841951
   -0.02678344 -0.01469102 -0.02894928  0.04415143  0.02525022]
  [ 0.03389004  0.02536752  0.02545383  0.03708256  0.04841951
   -0.02678344 -0.01469102 -0.02894928  0.04415143  0.02525022]
  [ 0.03389004  0.02536752  0.02545383  0.03708256  0.04841951
   -0.02678344 -0.01469102 -0.02894928  0.04415143  0.02525022]
  [ 0.03389004  0.02536752  0.02545383  0.03708256  0.04841951
   -0.02678344 -0.01469102 -0.02894928  0.04415143  0.02525022]
  [-0.04636281 -0.02601916  0.02530989 -0.03642897 -0.02429405
   -0.03466612 -0.00718997  0.04947514 -0.01854759 -0.02173584]
  [-0.00043516 -0.01049237 -0.03603063  0.04718112 -0.03791835
   -0.01329758 -0.01817858  0.04206738 -0.01716094 -0.01423267]
  [-0.00724452 -0.03487508  0.01413577  0.0155651   0.01115178
    0.02716875 -0.01442585  0.04754141  0.02046839 -0.04003128]
  [ 0.00542656  0.03600279  0.00405061  0.03336746  0.01424668
    0.014463    0.002432    0.04648768  0.029357

2021-10-08 23:00:37.751723: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


In [40]:
embedded_docs[0]

array([   0,    0,    0,    0, 6722, 9324, 2002, 3275], dtype=int32)

In [42]:
print(model.predict(embedded_docs)[0])

[[ 0.03389004  0.02536752  0.02545383  0.03708256  0.04841951 -0.02678344
  -0.01469102 -0.02894928  0.04415143  0.02525022]
 [ 0.03389004  0.02536752  0.02545383  0.03708256  0.04841951 -0.02678344
  -0.01469102 -0.02894928  0.04415143  0.02525022]
 [ 0.03389004  0.02536752  0.02545383  0.03708256  0.04841951 -0.02678344
  -0.01469102 -0.02894928  0.04415143  0.02525022]
 [ 0.03389004  0.02536752  0.02545383  0.03708256  0.04841951 -0.02678344
  -0.01469102 -0.02894928  0.04415143  0.02525022]
 [-0.04636281 -0.02601916  0.02530989 -0.03642897 -0.02429405 -0.03466612
  -0.00718997  0.04947514 -0.01854759 -0.02173584]
 [-0.00043516 -0.01049237 -0.03603063  0.04718112 -0.03791835 -0.01329758
  -0.01817858  0.04206738 -0.01716094 -0.01423267]
 [-0.00724452 -0.03487508  0.01413577  0.0155651   0.01115178  0.02716875
  -0.01442585  0.04754141  0.02046839 -0.04003128]
 [ 0.00542656  0.03600279  0.00405061  0.03336746  0.01424668  0.014463
   0.002432    0.04648768  0.029357    0.04126594]]
