In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import one_hot

In [4]:
sentences=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]
sentences

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [5]:
# initialise vocab size (needed by one_hot encoder)
vocabulary_size = 10000

### One Hot Representation

In [8]:
one_hot_representation = [one_hot(word, vocabulary_size) for word in sentences]
one_hot_representation

[[5305, 9219, 3091, 8300],
 [5305, 9219, 3091, 5134],
 [5305, 8467, 3091, 5836],
 [8488, 7639, 1940, 2775, 8139],
 [8488, 7639, 1940, 2775, 6517],
 [4465, 5305, 413, 3091, 9732],
 [6112, 2885, 5253, 2775]]

### Word Embedding Representation

In [17]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import math


In [19]:
# sentences_length = [len(rep) for rep in one_hot_representation]
# max_length = max(sentences_length)

padded = pad_sequences(one_hot_representation, padding="post", maxlen=8)
padded


array([[5305, 9219, 3091, 8300,    0,    0,    0,    0],
       [5305, 9219, 3091, 5134,    0,    0,    0,    0],
       [5305, 8467, 3091, 5836,    0,    0,    0,    0],
       [8488, 7639, 1940, 2775, 8139,    0,    0,    0],
       [8488, 7639, 1940, 2775, 6517,    0,    0,    0],
       [4465, 5305,  413, 3091, 9732,    0,    0,    0],
       [6112, 2885, 5253, 2775,    0,    0,    0,    0]], dtype=int32)

In [20]:
# set number of word features / dimensions
dims = 15

In [24]:
model = Sequential()
model.add(Embedding(vocabulary_size, dims, input_length=8))
model.compile(optimizer="adam", loss="mse")

In [25]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 15)             150000    
Total params: 150,000
Trainable params: 150,000
Non-trainable params: 0
_________________________________________________________________


In [27]:
y_predict = model.predict(padded)

In [28]:
y_predict[0]

array([[-0.03243159,  0.03894584, -0.02255585,  0.03705763, -0.02539045,
         0.0050212 ,  0.01286564, -0.01472089,  0.01243739, -0.0386668 ,
         0.03280267, -0.03563291, -0.03540761,  0.001395  ,  0.01222966],
       [-0.04214566,  0.03893587,  0.047633  , -0.01741006,  0.00444462,
         0.02162782, -0.01613265,  0.02670724,  0.00922976,  0.03591191,
        -0.00718588, -0.03414162, -0.02848676, -0.00556102, -0.01119045],
       [ 0.01458139,  0.03671593, -0.0064689 ,  0.0128732 ,  0.02358225,
         0.00536178,  0.00716005, -0.04985807,  0.04758209,  0.02042777,
         0.0055116 , -0.01175649, -0.03521267, -0.02941729,  0.02394195],
       [ 0.04132408, -0.04446583, -0.02941412, -0.02392665, -0.02145866,
        -0.00569179,  0.01903151, -0.03258298,  0.03748168,  0.03108878,
         0.00434745, -0.01412719,  0.00450758,  0.0111858 , -0.01274691],
       [-0.0263589 , -0.02272595, -0.04362947,  0.01302197, -0.03842043,
        -0.01965245, -0.04014252,  0.00728333, 

In [30]:
model.get_weights()

[array([[-0.0263589 , -0.02272595, -0.04362947, ..., -0.03716709,
          0.03092046, -0.03345747],
        [-0.00259513,  0.04482089, -0.02629181, ..., -0.03773407,
          0.04211772,  0.01599195],
        [ 0.01623168, -0.03530085,  0.02973595, ..., -0.00976204,
          0.00884013, -0.04394472],
        ...,
        [-0.00578544,  0.00283064, -0.02904643, ..., -0.00318997,
          0.0263673 , -0.00819948],
        [-0.02025265,  0.00412619,  0.02375838, ..., -0.02619876,
          0.00672618, -0.03050303],
        [-0.01006657,  0.03020277,  0.02829039, ..., -0.0373757 ,
         -0.02255172,  0.01047849]], dtype=float32)]