In [12]:
import tensorflow as tf
import numpy as np

In [2]:
corpus_raw = "He is the king . The king is royal . She is the royal queen"

In [6]:
raw_sentence = corpus_raw.split(".")
sentences = []
for sentence in raw_sentence:
    sentences.append(sentence.strip().split())

sentences

[['He', 'is', 'the', 'king'],
 ['The', 'king', 'is', 'royal'],
 ['She', 'is', 'the', 'royal', 'queen']]

In [7]:
data = []
WINDOW_SIZE = 2

for sentence in sentences:
    for word_index, word in enumerate(sentence):
        start_index = max(word_index - WINDOW_SIZE, 0)
        end_index = min(word_index + WINDOW_SIZE + 1, len(sentence))

        for nb_word in sentence[start_index:word_index]:
            data.append([word, nb_word])

        for nb_word in sentence[word_index + 1 : end_index]:
            data.append([word, nb_word])


In [8]:
data

[['He', 'is'],
 ['He', 'the'],
 ['is', 'He'],
 ['is', 'the'],
 ['is', 'king'],
 ['the', 'He'],
 ['the', 'is'],
 ['the', 'king'],
 ['king', 'is'],
 ['king', 'the'],
 ['The', 'king'],
 ['The', 'is'],
 ['king', 'The'],
 ['king', 'is'],
 ['king', 'royal'],
 ['is', 'The'],
 ['is', 'king'],
 ['is', 'royal'],
 ['royal', 'king'],
 ['royal', 'is'],
 ['She', 'is'],
 ['She', 'the'],
 ['is', 'She'],
 ['is', 'the'],
 ['is', 'royal'],
 ['the', 'She'],
 ['the', 'is'],
 ['the', 'royal'],
 ['the', 'queen'],
 ['royal', 'is'],
 ['royal', 'the'],
 ['royal', 'queen'],
 ['queen', 'the'],
 ['queen', 'royal']]

In [9]:
words = []
for word in corpus_raw.split():
    if word != ".":
        words.append(word)
words = set(words)

word2int = {}
int2word = {}
vocab_size = len(words)

for i, word in enumerate(words):
    word2int[word] = i
    int2word[i] = word

print(word2int)
print(int2word)

{'She': 0, 'He': 1, 'king': 2, 'is': 3, 'queen': 4, 'the': 5, 'royal': 6, 'The': 7}
{0: 'She', 1: 'He', 2: 'king', 3: 'is', 4: 'queen', 5: 'the', 6: 'royal', 7: 'The'}


In [10]:
def to_one_hot(word_index, vocab_size):
    temp = np.zeros(vocab_size)
    temp[word_index] = 1
    return temp

In [13]:
x_train = []
y_train = []
for words in data:
    x_train.append(to_one_hot(word2int[words[0]], vocab_size))
    y_train.append(to_one_hot(word2int[words[1]], vocab_size))

In [15]:
x_train[:3]

[array([0., 1., 0., 0., 0., 0., 0., 0.]),
 array([0., 1., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 1., 0., 0., 0., 0.])]

In [16]:
y_train[:3]

[array([0., 0., 0., 1., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 1., 0., 0.]),
 array([0., 1., 0., 0., 0., 0., 0., 0.])]

In [17]:
x_train = np.asarray(x_train, dtype=np.float32)
y_train = np.asarray(y_train, dtype=np.float32)

In [18]:
x_train[:3]

array([[0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.]], dtype=float32)

In [27]:
class Word2Vec:
    def __init__(self, vocab_size=10, embedding_dim=5, optimizer='sgd',
                 epochs=1000, learning_rate=0.01):
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.epochs = epochs
        if optimizer == 'adam':
            self.optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
        else:
            self.optimizer = tf.optimizers.SGD(learning_rate=learning_rate)
        

        self.W1 = tf.Variable(tf.random.normal([self.vocab_size, 
                                                self.embedding_dim]))
        self.b1 = tf.Variable(tf.random.normal([self.embedding_dim]))

        self.W2 = tf.Variable(tf.random.normal([self.embedding_dim,
                                                self.vocab_size]))
        self.b2 = tf.Variable(tf.random.normal([self.vocab_size]))

    def vectorized(self, word_index):
        return (self.W1 + self.b1)[word_index]


    def train(self, x_train, y_train):
        for i in range(self.epochs):
            with tf.GradientTape() as tape:
                hidden_layer = tf.add(tf.matmul(x_train, self.W1), self.b1)
                output_layer = tf.add(tf.matmul(hidden_layer, self.W2), self.b2)

                pred = tf.nn.softmax(output_layer)
                loss = tf.reduce_mean(-tf.math.reduce_sum(y_train*tf.math.log(pred), axis=[1]))

                params = [self.W1, self.b1, self.W2, self.b2]
                grads = tape.gradient(loss, params)
                self.optimizer.apply_gradients(zip(grads,params))
            if i % 1000 == 0:
                print(loss)


In [28]:
w2v = Word2Vec(vocab_size=vocab_size, embedding_dim=5, optimizer='SGD', epochs=10000, learning_rate=0.1)
w2v.train(x_train, y_train)

tf.Tensor(3.7162118, shape=(), dtype=float32)
tf.Tensor(1.3957883, shape=(), dtype=float32)
tf.Tensor(1.3800758, shape=(), dtype=float32)
tf.Tensor(1.3773054, shape=(), dtype=float32)
tf.Tensor(1.3762228, shape=(), dtype=float32)
tf.Tensor(1.3756589, shape=(), dtype=float32)
tf.Tensor(1.3753175, shape=(), dtype=float32)
tf.Tensor(1.3750902, shape=(), dtype=float32)
tf.Tensor(1.374929, shape=(), dtype=float32)
tf.Tensor(1.3748091, shape=(), dtype=float32)
