In [15]:
#Step 1: Import Libraries
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Lambda, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
import tensorflow.keras.backend as K


In [16]:
#Step 2: Text Data
text = "I love playing cricket and watching cricket matches"

# Tokenizer: convert words into numbers
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
word2idx = tokenizer.word_index
vocab_size = len(word2idx) + 1
seq = tokenizer.texts_to_sequences([text])


In [17]:
#Step 3: Create Context–Target Pairs
pairs = []
window = 2  # context words around target word

for i, target in enumerate(seq[0]):
    for j in range(max(0, i - window), min(len(seq[0]), i + window + 1)):
        if i != j:
            pairs.append((seq[0][j], target))

contexts = np.array([x[0] for x in pairs])
targets = np.array([x[1] for x in pairs])
targets = to_categorical(targets, vocab_size)  # one-hot encode targets


In [23]:
#Step 4: Create CBOW Model
from keras.models import Model
from keras.layers import Input, Embedding, Dense, GlobalAveragePooling1D

input_layer = Input(shape=(1,))
embedding_layer = Embedding(vocab_size, 8, name="embedding")(input_layer)
x = GlobalAveragePooling1D()(embedding_layer)     # ✅ replaces Lambda
output_layer = Dense(vocab_size, activation='softmax')(x)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='categorical_crossentropy')
model.summary()


In [24]:
#Step 5: Train Model
model.fit(contexts, targets, epochs=10)


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 2.0796
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - loss: 2.0780
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - loss: 2.0765
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - loss: 2.0750
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - loss: 2.0734
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 2.0719
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - loss: 2.0704
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - loss: 2.0689
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - loss: 2.0673
Epoch 10/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - loss: 2.0658


<keras.src.callbacks.history.History at 0x1cffabe72d0>

In [25]:
#Step 6: Predict a Word
test_word = "playing"
test_idx = np.array([[word2idx[test_word]]])
pred = model.predict(test_idx)
predicted_idx = np.argmax(pred)

for w, i in word2idx.items():
    if i == predicted_idx:
        predicted_word = w
        break

print(f"Context word: '{test_word}'")
print(f"Predicted target word: '{predicted_word}'")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step
Context word: 'playing'
Predicted target word: 'and'
