# Embeddings Explained

In [2]:
sentences = [
    'Machine Learning', 
    'Model training',
    'Supervised learning', 
    'Unsupervised learning', 
    'Reinforcement learning', 
    'Computer Hardware Engineering', 
    'Computer Security', 
    'Computer architecture', 
    'Operating System', 
    'Parallel computing'
]

In [3]:
# Get the tokens from the list of sentences 
token_sentences = [sen.split() for sen in sentences]
token_sentences

[['Machine', 'Learning'],
 ['Model', 'training'],
 ['Supervised', 'learning'],
 ['Unsupervised', 'learning'],
 ['Reinforcement', 'learning'],
 ['Computer', 'Hardware', 'Engineering'],
 ['Computer', 'Security'],
 ['Computer', 'architecture'],
 ['Operating', 'System'],
 ['Parallel', 'computing']]

In [4]:
# Get a set of unique tokens 
all_tokens = set([word for sentence in token_sentences for word in sentence])

In [5]:
# To increase the readability, let's sort the corpus of our text 
all_tokens = sorted(all_tokens)
all_tokens

['Computer',
 'Engineering',
 'Hardware',
 'Learning',
 'Machine',
 'Model',
 'Operating',
 'Parallel',
 'Reinforcement',
 'Security',
 'Supervised',
 'System',
 'Unsupervised',
 'architecture',
 'computing',
 'learning',
 'training']

In [None]:
# Integer encoding using Sklearn 
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(all_tokens)
integer_encoded

In [None]:
encoded_sentences = []
for sentence in sentences: 
    encoded_sentences.append(label_encoder.transform(sentence.split(' ')) + 1)
    
encoded_sentences

In [None]:
from keras.preprocessing.sequence import pad_sequences
# Since the sentences size is different, we need to add padding. 
max_length = 3 # No more than three words per sentence 
#padding = post since we need to fill the zeros at the end
padded_enc_sentences = pad_sequences(encoded_sentences, maxlen=max_length, padding='post')
padded_enc_sentences

## Building an Embedding Layer 

In [None]:
from keras.models import Sequential 
from keras.layers.embeddings import Embedding
from keras.layers import Flatten
from keras.layers import Dense 

In [None]:
model = Sequential()
embedding_layer = Embedding(input_dim=len(all_tokens), output_dim=2, input_length=3)
model.add(embedding_layer)
model.compile('adam', 'mse')

In [None]:
print(sentences[0])
print(padded_enc_sentences[0].shape)
model.predict(padded_enc_sentences[0].reshape(1,-1))