In [5]:
data = "Deep Learning(also known as deep structured learning) is a family of machine learning. It consists of artificial neural network. Learning can be supervised or unsupervised"

In [6]:
sentences = data.split(".")

In [7]:
sentences

['Deep Learning(also known as deep structured learning) is a family of machine learning',
 ' It consists of artificial neural network',
 ' Learning can be supervised or unsupervised']

In [19]:
#cleaning
clean_sent=[]
for sentence in sentences:
    sentence = "".join(char if char.isalnum() or char.isspace() else " " for char in sentence)
    sentence=sentence.lower()
    clean_sent.append(sentence)

clean_sent
    

['deep learning also known as deep structured learning  is a family of machine learning',
 ' it consists of artificial neural network',
 ' learning can be supervised or unsupervised']

In [30]:
#tokenize and sequences
from tensorflow.keras.preprocessing.text import Tokenizer
tokenize= Tokenizer()
tokenize.fit_on_texts(clean_sent)
sequences = tokenize.texts_to_sequences(clean_sent)
sequences

[[2, 1, 4, 5, 6, 2, 7, 1, 8, 9, 10, 3, 11, 1],
 [12, 13, 3, 14, 15, 16],
 [1, 17, 18, 19, 20, 21]]

In [35]:
#index to words and word to index
index_words = {}
words_to_index = {}
for i , sequence in enumerate(sequences):
    word_in_sent = clean_sent[i].split()
    for j, value in enumerate(sequence):

        index_words[value]= word_in_sent[j]
        words_to_index[word_in_sent[j]] = value

print(index_words)
print("\n")
print(words_to_index)

{2: 'deep', 1: 'learning', 4: 'also', 5: 'known', 6: 'as', 7: 'structured', 8: 'is', 9: 'a', 10: 'family', 3: 'of', 11: 'machine', 12: 'it', 13: 'consists', 14: 'artificial', 15: 'neural', 16: 'network', 17: 'can', 18: 'be', 19: 'supervised', 20: 'or', 21: 'unsupervised'}


{'deep': 2, 'learning': 1, 'also': 4, 'known': 5, 'as': 6, 'structured': 7, 'is': 8, 'a': 9, 'family': 10, 'of': 3, 'machine': 11, 'it': 12, 'consists': 13, 'artificial': 14, 'neural': 15, 'network': 16, 'can': 17, 'be': 18, 'supervised': 19, 'or': 20, 'unsupervised': 21}


In [41]:
#target and context
context_size=2
targets=[]
contexts=[]

total_words = len(tokenize.word_index)+1
for sequence in sequences:
    for i in range (context_size, len(sequence)-context_size):
        target=sequence[i]
        context = [sequence[i-2], sequence[i-1], sequence[i+1],sequence[i+2]]
        targets.append(target)
        contexts.append(context)

print(contexts)
print("\n")
print(targets)

[[2, 1, 5, 6], [1, 4, 6, 2], [4, 5, 2, 7], [5, 6, 7, 1], [6, 2, 1, 8], [2, 7, 8, 9], [7, 1, 9, 10], [1, 8, 10, 3], [8, 9, 3, 11], [9, 10, 11, 1], [12, 13, 14, 15], [13, 3, 15, 16], [1, 17, 19, 20], [17, 18, 20, 21]]


[4, 5, 6, 2, 7, 1, 8, 9, 10, 3, 3, 14, 18, 19]


In [43]:
#convert to array:
import numpy as np
x=np.array(contexts)
y=np.array(targets)

In [57]:
#model
emb_size = 10
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
model = Sequential()
model.add(layers.Embedding(input_dim= total_words, output_dim=emb_size, input_length= 2*context_size))
model.add(layers.GlobalAveragePooling1D())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(total_words, activation='softmax'))


In [60]:
model.compile (loss='sparse_categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])

In [61]:
history = model.fit(x, y, epochs= 50)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.0000e+00 - loss: 3.0920
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step - accuracy: 0.1429 - loss: 3.0843
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step - accuracy: 0.1429 - loss: 3.0776
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step - accuracy: 0.1429 - loss: 3.0706
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.1429 - loss: 3.0628
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.1429 - loss: 3.0537
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.1429 - loss: 3.0433
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step - accuracy: 0.1429 - loss: 3.0314
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [91]:
text_sent =['deep learning also',
           'learning can be']

In [92]:
for sent in text_sent:
    words = sent.split(" ")
    test_sent = []
    for word in words:
        test_sent.append(words_to_index.get(word))
    test_sent= np.array([test_sent])

    pred = model.predict(test_sent)
    pred = np.argmax(pred)
    print("Pred ", words, "\n = ", index_words.get(pred), "\n\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Pred  ['deep', 'learning', 'also'] 
 =  known 


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Pred  ['learning', 'can', 'be'] 
 =  supervised 


