<a href="https://colab.research.google.com/github/setthawut8/ai/blob/main/%5BEmbedding%20Layer%5D%20tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Word Embedding Layer
(Inspiration https://youtu.be/Fuw0wv3X-0o)

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers 
from tensorflow.keras.layers import Dense, Flatten, Embedding

In [47]:
reviews = ['nice food',
           'amazing restaurant',
           'too good',
           'just loved it',
           'will go again',
           'horrible food',
           'never go there',
           'poor service',
           'poor quality',
           'needs improvement']

sentiments = np.array([1,1,1,1,1,0,0,0,0,0])

##1) One Hot Encoding

In [48]:
#create an array of number of each word
#(encoded number, size of vocab)
one_hot("amazing restaurant", 30)

[25, 2]

In [49]:
#no. of vocabulary words to be kept
vocab_size=50
#encoding words
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
encoded_reviews

[[22, 19],
 [38, 42],
 [4, 31],
 [41, 27, 9],
 [19, 37, 31],
 [40, 19],
 [35, 37, 32],
 [7, 44],
 [7, 1],
 [27, 6]]

##2) Padding

In [50]:
#some pharses are 2 or 3 words so some needed to padding to have the same length of array
maxlen=3
padded_reviews = pad_sequences(encoded_reviews, maxlen=maxlen, padding='post')
padded_reviews

array([[22, 19,  0],
       [38, 42,  0],
       [ 4, 31,  0],
       [41, 27,  9],
       [19, 37, 31],
       [40, 19,  0],
       [35, 37, 32],
       [ 7, 44,  0],
       [ 7,  1,  0],
       [27,  6,  0]], dtype=int32)

##3) Embedded Vector Size (features of each word) & Model

In [51]:
#Embeded vector size = numbers of features in a word
embeded_vector_size = 4

model = Sequential()
model.add(Embedding(vocab_size, embeded_vector_size, input_length=maxlen, name='embedding'))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

In [52]:
X = padded_reviews
y = sentiments

In [53]:
model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 4)              200       
                                                                 
 flatten_4 (Flatten)         (None, 12)                0         
                                                                 
 dense_4 (Dense)             (None, 1)                 13        
                                                                 
Total params: 213
Trainable params: 213
Non-trainable params: 0
_________________________________________________________________


In [54]:
model.fit(X, y, epochs=50, verbose=0)

<keras.callbacks.History at 0x7f4265cac910>

In [55]:
loss, accuracy = model.evaluate(X, y)
accuracy



1.0

In [56]:
weights = model.get_layer('embedding').get_weights()[0]
len(weights)

50

In [63]:
print(weights[22])
print(weights[3])

[ 0.09223264 -0.00312139 -0.01635139  0.01045673]
[ 0.01558313 -0.01015184 -0.04265321  0.01204729]


In [62]:
#follow this link for downloading weights for the embedding layer
#https://machinelearningmastery.com/use-word-embedding-layers-deep-learning-keras/#:~:text=4.%20Example%20of%20Using%20Pre%2DTrained%20GloVe%20Embedding

#Word2Vec 
(https://youtu.be/hQwFeIupNP0)