<a href="https://colab.research.google.com/github/sarasafaee/nlp_basics/blob/main/word_embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding
from tensorflow.keras.models import Sequential

In [6]:
sentences = ['This YouTube channel is perfect',
            'Your videos are quite good',
            'I enjoy this channel',
            'I did not learn much from your videos',
            'Excellent',
            'poor work',
            'it is not a good channel']

In [7]:
vocab_size = 500
encodded_docs = [one_hot(sent, vocab_size) for sent in sentences]
encodded_docs

[[92, 287, 23, 473, 221],
 [430, 186, 414, 16, 467],
 [304, 335, 92, 23],
 [304, 264, 423, 104, 405, 116, 430, 186],
 [27],
 [156, 312],
 [168, 473, 423, 427, 467, 23]]

In [8]:
max_len = 8
padded_docs = pad_sequences(encodded_docs, padding = 'pre', maxlen= max_len)
padded_docs

array([[  0,   0,   0,  92, 287,  23, 473, 221],
       [  0,   0,   0, 430, 186, 414,  16, 467],
       [  0,   0,   0,   0, 304, 335,  92,  23],
       [304, 264, 423, 104, 405, 116, 430, 186],
       [  0,   0,   0,   0,   0,   0,   0,  27],
       [  0,   0,   0,   0,   0,   0, 156, 312],
       [  0,   0, 168, 473, 423, 427, 467,  23]], dtype=int32)

In [9]:
dim = 10
model = Sequential()
model.add(Embedding(vocab_size, dim, input_length= max_len))
model.compile(optimizer = 'adam', loss = 'mse')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             5000      
                                                                 
Total params: 5000 (19.53 KB)
Trainable params: 5000 (19.53 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [10]:
model.predict(padded_docs)



array([[[-0.03847998,  0.04121744,  0.0134051 , -0.01443634,
         -0.03348687, -0.0378849 ,  0.01684905, -0.01580058,
         -0.00466969, -0.02822484],
        [-0.03847998,  0.04121744,  0.0134051 , -0.01443634,
         -0.03348687, -0.0378849 ,  0.01684905, -0.01580058,
         -0.00466969, -0.02822484],
        [-0.03847998,  0.04121744,  0.0134051 , -0.01443634,
         -0.03348687, -0.0378849 ,  0.01684905, -0.01580058,
         -0.00466969, -0.02822484],
        [-0.02787707, -0.03256278,  0.03265903, -0.03301863,
         -0.03605138, -0.04742096, -0.00700595, -0.02377251,
         -0.03965864,  0.02398414],
        [ 0.04557702,  0.02900311, -0.01810764, -0.04300305,
         -0.04701379,  0.04893518, -0.04627411, -0.03056859,
         -0.04473313,  0.00149549],
        [-0.04340382,  0.01499641,  0.04011906, -0.01922567,
         -0.03915721,  0.01604437,  0.01757279, -0.03581861,
         -0.02045586,  0.00971531],
        [-0.00130011,  0.02439971, -0.04833846, -0.0

In [11]:
padded_docs[1]
model.predict(padded_docs[1])



array([[-0.03847998,  0.04121744,  0.0134051 , -0.01443634, -0.03348687,
        -0.0378849 ,  0.01684905, -0.01580058, -0.00466969, -0.02822484],
       [-0.03847998,  0.04121744,  0.0134051 , -0.01443634, -0.03348687,
        -0.0378849 ,  0.01684905, -0.01580058, -0.00466969, -0.02822484],
       [-0.03847998,  0.04121744,  0.0134051 , -0.01443634, -0.03348687,
        -0.0378849 ,  0.01684905, -0.01580058, -0.00466969, -0.02822484],
       [-0.02018186,  0.02380833,  0.04545284, -0.01947712, -0.02304385,
        -0.02988519,  0.02453799, -0.02957512, -0.03210346, -0.017826  ],
       [-0.01096687,  0.01862052, -0.03590324,  0.04238141,  0.00129429,
        -0.02543373, -0.01807926,  0.00539577, -0.03661361,  0.03836587],
       [-0.01417483,  0.03341857,  0.03185537, -0.01905475,  0.03654763,
         0.03677703, -0.01622742, -0.04338193,  0.01351453,  0.01121348],
       [ 0.0400859 , -0.04938477,  0.00185959,  0.04310516,  0.04228666,
         0.01056092, -0.04954561, -0.02434204