In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Lambda, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pylab as plt
from sklearn.decomposition import PCA

In [None]:
corpus = [
    "The dog jump over a little fox",
    "A bird is singing song on tree",
    "A cat is playing videogames on laptop",
    "The boy is clicking photos with camera"
]

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
print("dictionary:", tokenizer.word_index)
print("length of dictionary:", len(tokenizer.word_index))

dictionary: {'a': 1, 'is': 2, 'the': 3, 'on': 4, 'dog': 5, 'jump': 6, 'over': 7, 'little': 8, 'fox': 9, 'bird': 10, 'singing': 11, 'song': 12, 'tree': 13, 'cat': 14, 'playing': 15, 'videogames': 16, 'laptop': 17, 'boy': 18, 'clicking': 19, 'photos': 20, 'with': 21, 'camera': 22}
length of dictionary: 22


In [None]:
sequences = tokenizer.texts_to_sequences(corpus)
print("sequences of texts:", sequences)

sequences of texts: [[3, 5, 6, 7, 1, 8, 9], [1, 10, 2, 11, 12, 4, 13], [1, 14, 2, 15, 16, 4, 17], [3, 18, 2, 19, 20, 21, 22]]


In [None]:
vocab_size = len(tokenizer.word_index) + 1
embedding_size = 22
window_size = 2

contexts = []
targets = []

for sequence in sequences:
  for i in range(window_size, len(sequence)-window_size):
    context = sequence[i-window_size:i] + sequence[i + 1 : i+ window_size + 1]
    target = sequence[i]
    contexts.append(context)
    targets.append(target)

print(contexts)
print(targets)

[[3, 5, 7, 1], [5, 6, 1, 8], [6, 7, 8, 9], [1, 10, 11, 12], [10, 2, 12, 4], [2, 11, 4, 13], [1, 14, 15, 16], [14, 2, 16, 4], [2, 15, 4, 17], [3, 18, 19, 20], [18, 2, 20, 21], [2, 19, 21, 22]]
[6, 7, 1, 2, 11, 12, 2, 15, 16, 2, 19, 20]


In [None]:
X = np.array(contexts)
y = to_categorical(targets, num_classes=vocab_size)

In [None]:
models = Sequential()
models.add(Embedding(input_dim=vocab_size, output_dim=embedding_size, input_length=2*window_size))
models.add(Lambda(lambda x: tf.reduce_mean(x, axis=1)))
models.add(Dense(units=vocab_size, activation='softmax'))

models.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

models.fit(X, y, epochs=50, verbose=1)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.0000e+00 - loss: 3.1378
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.0000e+00 - loss: 3.1338
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.0000e+00 - loss: 3.1298
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.0000e+00 - loss: 3.1258
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.0833 - loss: 3.1217
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.1667 - loss: 3.1177
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.1667 - loss: 3.1137
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.2500 - loss: 3.1097
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x7cf10965a410>