In [1]:
import tensorflow as tf
import tqdm


In [2]:
sentence = "The information was used as the basis for decision"

In [3]:
tokens = list(sentence.lower().split())
print(f"Length of tokens list is: {len(tokens)} \n Tokens are: {tokens}")

Length of tokens list is: 9 
 Tokens are: ['the', 'information', 'was', 'used', 'as', 'the', 'basis', 'for', 'decision']


In [4]:
vocab, index = {}, 1 # start index from 1
vocab['<pad>'] = 0 # index for padding token is 0
for token in tokens:
    if token not in vocab:
        vocab[token] = index
        index += 1
vocab_size = len(vocab)
print(vocab)

{'<pad>': 0, 'the': 1, 'information': 2, 'was': 3, 'used': 4, 'as': 5, 'basis': 6, 'for': 7, 'decision': 8}


In [5]:
# Inverse vocab to save mappings from integer indices to tokens

inverse_vocab = {index: token for token, index in vocab.items()}
print(inverse_vocab)

{0: '<pad>', 1: 'the', 2: 'information', 3: 'was', 4: 'used', 5: 'as', 6: 'basis', 7: 'for', 8: 'decision'}


In [6]:
test_sentence = [vocab[word] for word in tokens]
print(test_sentence)

[1, 2, 3, 4, 5, 1, 6, 7, 8]


In [7]:
# Generate skip-grams from one sentence

window_size = 2
positive_skip_grams, _ = tf.keras.preprocessing.sequence.skipgrams(
    test_sentence,
    vocabulary_size= vocab_size,
    window_size = window_size,
    negative_samples = 0
    )
print(len(positive_skip_grams))

30


In [8]:
for target, context in positive_skip_grams[:]:
    print(f"({target}, {context}): ({inverse_vocab[target]}, {inverse_vocab[context]})")

(6, 5): (basis, as)
(3, 4): (was, used)
(3, 5): (was, as)
(1, 7): (the, for)
(7, 8): (for, decision)
(4, 1): (used, the)
(6, 8): (basis, decision)
(8, 7): (decision, for)
(1, 5): (the, as)
(6, 1): (basis, the)
(5, 6): (as, basis)
(2, 4): (information, used)
(1, 2): (the, information)
(5, 3): (as, was)
(3, 1): (was, the)
(3, 2): (was, information)
(1, 3): (the, was)
(7, 6): (for, basis)
(4, 5): (used, as)
(6, 7): (basis, for)
(4, 3): (used, was)
(2, 3): (information, was)
(5, 4): (as, used)
(7, 1): (for, the)
(1, 6): (the, basis)
(4, 2): (used, information)
(2, 1): (information, the)
(5, 1): (as, the)
(1, 4): (the, used)
(8, 6): (decision, basis)
