# Word2Vec using Tensorflow
## This notebook outlines the concepts of applying Word2Vec using Tensorflow library

In [1]:
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
corpus_raw = 'He is the king . The king is royal . She is the royal  queen '

### Convert to lower case

In [3]:
corpus_raw = corpus_raw.lower()



[Quick Brown Fox Example](https://drive.google.com/open?id=1n8ElxWjh1a9kdPTCBbCmvP7MKj5vtD4U)

In [4]:
words = []
for word in corpus_raw.split():
    if word != '.': # because we don't want to treat . as a word
        words.append(word)
words = set(words) # so that all duplicate words are removed
word2int = {}
int2word = {}
vocab_size = len(words) # gives the total number of unique words
for i,word in enumerate(words):
    word2int[word] = i
    int2word[i] = word

In [5]:
print(word2int['queen'])

1


In [7]:
print(int2word[1])

queen


In [8]:
print(word2int['king'])

0


In [9]:
print(int2word[0])

king


### Creation of sentences

In [10]:
# raw sentences is a list of sentences.
raw_sentences = corpus_raw.split('.')
sentences = []
for sentence in raw_sentences:
    sentences.append(sentence.split())

In [11]:
print(sentences)

[['he', 'is', 'the', 'king'], ['the', 'king', 'is', 'royal'], ['she', 'is', 'the', 'royal', 'queen']]


### Creation of windows for word pairs

In [12]:
data = []
WINDOW_SIZE = 2
for sentence in sentences:
    for word_index, word in enumerate(sentence):
        for nb_word in sentence[max(word_index - WINDOW_SIZE, 0) : min(word_index + WINDOW_SIZE, len(sentence)) + 1] : 
            if nb_word != word:
                data.append([word, nb_word])

In [13]:
print(data)

[['he', 'is'], ['he', 'the'], ['is', 'he'], ['is', 'the'], ['is', 'king'], ['the', 'he'], ['the', 'is'], ['the', 'king'], ['king', 'is'], ['king', 'the'], ['the', 'king'], ['the', 'is'], ['king', 'the'], ['king', 'is'], ['king', 'royal'], ['is', 'the'], ['is', 'king'], ['is', 'royal'], ['royal', 'king'], ['royal', 'is'], ['she', 'is'], ['she', 'the'], ['is', 'she'], ['is', 'the'], ['is', 'royal'], ['the', 'she'], ['the', 'is'], ['the', 'royal'], ['the', 'queen'], ['royal', 'is'], ['royal', 'the'], ['royal', 'queen'], ['queen', 'the'], ['queen', 'royal']]


### One-hot vector encoding
say we have a vocabulary of 3 words : pen, pineapple, apple

- word2int['pen'] -> 0 -> [1 0 0]
- word2int['pineapple'] -> 1 -> [0 1 0]
- word2int['apple'] -> 2 -> [0 0 1]

In [14]:
# function to convert numbers to one hot vectors
def to_one_hot(data_point_index, vocab_size):
    temp = np.zeros(vocab_size)
    temp[data_point_index] = 1
    return temp
x_train = [] # input word
y_train = [] # output word
for data_word in data:
    x_train.append(to_one_hot(word2int[ data_word[0] ], vocab_size))
    y_train.append(to_one_hot(word2int[ data_word[1] ], vocab_size))
# convert them to numpy arrays
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)

In [15]:
print(x_train)

[[0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]]


In [16]:
print(x_train.shape, y_train.shape)

(34, 7) (34, 7)


In [17]:
print(y_train)
print(y_train.shape, y_train.shape)

[[0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]
(34, 7) (34, 7)


### Creation of Tensorflow model
#### Create Placeholders

In [18]:
# making placeholders for x_train and y_train
x = tf.placeholder(tf.float32, shape=(None, vocab_size))
y_label = tf.placeholder(tf.float32, shape=(None, vocab_size))

[Architecture](https://drive.google.com/open?id=1svyZ4AxL5YX4j1Wl_QGYVcCC9iiP6iXR)

In [19]:
EMBEDDING_DIM = 5 # you can choose your own number
W1 = tf.Variable(tf.random_normal([vocab_size, EMBEDDING_DIM]))
b1 = tf.Variable(tf.random_normal([EMBEDDING_DIM])) #bias
hidden_representation = tf.add(tf.matmul(x,W1), b1)

[Predictions](https://drive.google.com/open?id=122xvSIqD1G5tkAdO7BmT4gLAtKL990A4)

In [20]:
W2 = tf.Variable(tf.random_normal([EMBEDDING_DIM, vocab_size]))
b2 = tf.Variable(tf.random_normal([vocab_size]))
prediction = tf.nn.softmax(tf.add( tf.matmul(hidden_representation, W2), b2))

[Summary of embeddings](https://drive.google.com/open?id=1vlUxYlCAp_U0QmWgYizn8SRgxEpPsDDm)

In [21]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init) #make sure you do this!
# define the loss function:
cross_entropy_loss = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(prediction), reduction_indices=[1]))
# define the training step:
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy_loss)
n_iters = 10000
# train for n_iter iterations
for _ in range(n_iters):
    sess.run(train_step, feed_dict={x: x_train, y_label: y_train})
    print('loss is : ', sess.run(cross_entropy_loss, feed_dict={x: x_train, y_label: y_train}))

loss is :  4.3288918
loss is :  4.1128755
loss is :  3.9284754
loss is :  3.7681947
loss is :  3.626952
loss is :  3.5011601
loss is :  3.3881974
loss is :  3.2860737
loss is :  3.1932223
loss is :  3.1083705
loss is :  3.0304596
loss is :  2.9585981
loss is :  2.8920295
loss is :  2.8301113
loss is :  2.7723022
loss is :  2.7181454
loss is :  2.667259
loss is :  2.6193233
loss is :  2.5740712
loss is :  2.5312786
loss is :  2.4907572
loss is :  2.4523466
loss is :  2.4159086
loss is :  2.3813238
loss is :  2.3484874
loss is :  2.3173056
loss is :  2.2876923
loss is :  2.2595708
loss is :  2.2328682
loss is :  2.207517
loss is :  2.1834517
loss is :  2.1606116
loss is :  2.138936
loss is :  2.118367
loss is :  2.0988474
loss is :  2.0803204
loss is :  2.0627308
loss is :  2.0460234
loss is :  2.0301442
loss is :  2.01504
loss is :  2.0006588
loss is :  1.9869506
loss is :  1.9738666
loss is :  1.9613607
loss is :  1.9493891
loss is :  1.9379104
loss is :  1.9268864
loss is :  1.9162813

loss is :  1.3563391
loss is :  1.3562165
loss is :  1.356095
loss is :  1.3559737
loss is :  1.3558532
loss is :  1.3557333
loss is :  1.3556141
loss is :  1.3554955
loss is :  1.3553773
loss is :  1.35526
loss is :  1.3551433
loss is :  1.3550271
loss is :  1.3549112
loss is :  1.3547964
loss is :  1.354682
loss is :  1.354568
loss is :  1.3544549
loss is :  1.3543422
loss is :  1.35423
loss is :  1.3541185
loss is :  1.3540075
loss is :  1.3538973
loss is :  1.3537874
loss is :  1.353678
loss is :  1.3535694
loss is :  1.3534613
loss is :  1.3533536
loss is :  1.3532465
loss is :  1.35314
loss is :  1.353034
loss is :  1.3529285
loss is :  1.3528236
loss is :  1.3527193
loss is :  1.3526152
loss is :  1.352512
loss is :  1.352409
loss is :  1.3523068
loss is :  1.3522049
loss is :  1.3521036
loss is :  1.3520029
loss is :  1.3519025
loss is :  1.3518028
loss is :  1.3517033
loss is :  1.3516045
loss is :  1.3515062
loss is :  1.3514084
loss is :  1.351311
loss is :  1.3512142
loss i

loss is :  1.3314503
loss is :  1.3314329
loss is :  1.3314155
loss is :  1.3313981
loss is :  1.3313806
loss is :  1.3313633
loss is :  1.3313462
loss is :  1.331329
loss is :  1.3313118
loss is :  1.3312947
loss is :  1.3312777
loss is :  1.3312606
loss is :  1.3312438
loss is :  1.3312267
loss is :  1.33121
loss is :  1.331193
loss is :  1.3311763
loss is :  1.3311597
loss is :  1.3311429
loss is :  1.3311262
loss is :  1.3311095
loss is :  1.3310931
loss is :  1.3310766
loss is :  1.3310602
loss is :  1.3310436
loss is :  1.3310273
loss is :  1.3310109
loss is :  1.3309946
loss is :  1.3309784
loss is :  1.3309622
loss is :  1.3309458
loss is :  1.3309299
loss is :  1.3309139
loss is :  1.3308978
loss is :  1.3308817
loss is :  1.330866
loss is :  1.3308499
loss is :  1.3308343
loss is :  1.3308182
loss is :  1.3308024
loss is :  1.3307867
loss is :  1.330771
loss is :  1.3307555
loss is :  1.3307397
loss is :  1.3307241
loss is :  1.3307086
loss is :  1.3306932
loss is :  1.330677

loss is :  1.3264552
loss is :  1.3264493
loss is :  1.3264431
loss is :  1.3264371
loss is :  1.3264309
loss is :  1.3264251
loss is :  1.326419
loss is :  1.3264132
loss is :  1.3264071
loss is :  1.3264011
loss is :  1.326395
loss is :  1.3263891
loss is :  1.326383
loss is :  1.326377
loss is :  1.3263712
loss is :  1.3263655
loss is :  1.3263595
loss is :  1.3263535
loss is :  1.3263477
loss is :  1.3263417
loss is :  1.3263358
loss is :  1.32633
loss is :  1.3263241
loss is :  1.3263181
loss is :  1.3263124
loss is :  1.3263066
loss is :  1.3263009
loss is :  1.3262949
loss is :  1.3262892
loss is :  1.3262835
loss is :  1.3262777
loss is :  1.326272
loss is :  1.3262663
loss is :  1.3262606
loss is :  1.3262547
loss is :  1.3262491
loss is :  1.3262433
loss is :  1.3262376
loss is :  1.326232
loss is :  1.3262262
loss is :  1.3262205
loss is :  1.3262148
loss is :  1.3262092
loss is :  1.3262035
loss is :  1.326198
loss is :  1.3261925
loss is :  1.3261867
loss is :  1.3261812
l

loss is :  1.3244911
loss is :  1.3244882
loss is :  1.3244851
loss is :  1.3244821
loss is :  1.3244791
loss is :  1.3244761
loss is :  1.3244733
loss is :  1.3244703
loss is :  1.3244672
loss is :  1.3244642
loss is :  1.3244613
loss is :  1.3244582
loss is :  1.3244553
loss is :  1.3244524
loss is :  1.3244495
loss is :  1.3244464
loss is :  1.3244435
loss is :  1.3244406
loss is :  1.3244376
loss is :  1.3244348
loss is :  1.3244319
loss is :  1.3244289
loss is :  1.3244259
loss is :  1.324423
loss is :  1.32442
loss is :  1.3244171
loss is :  1.3244143
loss is :  1.3244114
loss is :  1.3244085
loss is :  1.3244056
loss is :  1.3244027
loss is :  1.3243998
loss is :  1.324397
loss is :  1.324394
loss is :  1.3243911
loss is :  1.3243881
loss is :  1.3243854
loss is :  1.3243825
loss is :  1.3243798
loss is :  1.3243768
loss is :  1.324374
loss is :  1.324371
loss is :  1.3243685
loss is :  1.3243654
loss is :  1.3243626
loss is :  1.3243597
loss is :  1.324357
loss is :  1.3243542


loss is :  1.3234121
loss is :  1.3234102
loss is :  1.3234087
loss is :  1.3234069
loss is :  1.323405
loss is :  1.3234034
loss is :  1.3234016
loss is :  1.3233999
loss is :  1.3233982
loss is :  1.3233966
loss is :  1.3233949
loss is :  1.3233932
loss is :  1.3233914
loss is :  1.3233898
loss is :  1.323388
loss is :  1.3233863
loss is :  1.3233845
loss is :  1.323383
loss is :  1.3233813
loss is :  1.3233796
loss is :  1.3233778
loss is :  1.3233762
loss is :  1.3233745
loss is :  1.3233728
loss is :  1.323371
loss is :  1.3233694
loss is :  1.3233677
loss is :  1.323366
loss is :  1.3233644
loss is :  1.3233627
loss is :  1.3233612
loss is :  1.3233594
loss is :  1.3233576
loss is :  1.3233559
loss is :  1.3233542
loss is :  1.3233527
loss is :  1.323351
loss is :  1.3233494
loss is :  1.3233478
loss is :  1.3233461
loss is :  1.3233445
loss is :  1.3233427
loss is :  1.323341
loss is :  1.3233393
loss is :  1.3233378
loss is :  1.323336
loss is :  1.3233346
loss is :  1.3233328


loss is :  1.3228105
loss is :  1.3228095
loss is :  1.3228083
loss is :  1.3228071
loss is :  1.3228061
loss is :  1.3228048
loss is :  1.3228036
loss is :  1.3228025
loss is :  1.3228015
loss is :  1.3228003
loss is :  1.3227992
loss is :  1.322798
loss is :  1.3227968
loss is :  1.3227957
loss is :  1.3227947
loss is :  1.3227935
loss is :  1.3227924
loss is :  1.3227912
loss is :  1.32279
loss is :  1.3227891
loss is :  1.3227878
loss is :  1.3227867
loss is :  1.3227856
loss is :  1.3227844
loss is :  1.3227834
loss is :  1.3227822
loss is :  1.3227811
loss is :  1.32278
loss is :  1.3227788
loss is :  1.3227777
loss is :  1.3227766
loss is :  1.3227756
loss is :  1.3227743
loss is :  1.3227732
loss is :  1.3227721
loss is :  1.322771
loss is :  1.3227699
loss is :  1.3227688
loss is :  1.3227677
loss is :  1.3227665
loss is :  1.3227655
loss is :  1.3227644
loss is :  1.3227633
loss is :  1.3227621
loss is :  1.3227609
loss is :  1.3227599
loss is :  1.3227589
loss is :  1.322757

loss is :  1.3223712
loss is :  1.3223705
loss is :  1.3223697
loss is :  1.3223691
loss is :  1.3223681
loss is :  1.3223673
loss is :  1.3223666
loss is :  1.3223658
loss is :  1.322365
loss is :  1.3223641
loss is :  1.3223635
loss is :  1.3223627
loss is :  1.322362
loss is :  1.322361
loss is :  1.3223602
loss is :  1.3223596
loss is :  1.3223586
loss is :  1.322358
loss is :  1.322357
loss is :  1.3223563
loss is :  1.3223556
loss is :  1.3223548
loss is :  1.3223542
loss is :  1.3223532
loss is :  1.3223524
loss is :  1.3223518
loss is :  1.3223509
loss is :  1.3223501
loss is :  1.3223494
loss is :  1.3223486
loss is :  1.3223479
loss is :  1.3223469
loss is :  1.3223462
loss is :  1.3223454
loss is :  1.3223448
loss is :  1.322344
loss is :  1.3223432
loss is :  1.3223424
loss is :  1.3223417
loss is :  1.3223408
loss is :  1.32234
loss is :  1.3223393
loss is :  1.3223386
loss is :  1.3223379
loss is :  1.3223369
loss is :  1.3223362
loss is :  1.3223354
loss is :  1.3223348


loss is :  1.3220751
loss is :  1.3220744
loss is :  1.3220739
loss is :  1.3220732
loss is :  1.3220726
loss is :  1.3220721
loss is :  1.3220714
loss is :  1.322071
loss is :  1.3220704
loss is :  1.3220698
loss is :  1.3220692
loss is :  1.3220685
loss is :  1.3220681
loss is :  1.3220675
loss is :  1.3220668
loss is :  1.3220663
loss is :  1.3220656
loss is :  1.3220651
loss is :  1.3220645
loss is :  1.3220639
loss is :  1.3220633
loss is :  1.3220627
loss is :  1.3220623
loss is :  1.3220615
loss is :  1.322061
loss is :  1.3220606
loss is :  1.3220598
loss is :  1.3220594
loss is :  1.3220587
loss is :  1.3220581
loss is :  1.3220576
loss is :  1.322057
loss is :  1.3220564
loss is :  1.3220557
loss is :  1.3220552
loss is :  1.3220547
loss is :  1.3220541
loss is :  1.3220537
loss is :  1.322053
loss is :  1.3220525
loss is :  1.3220519
loss is :  1.3220512
loss is :  1.3220508
loss is :  1.3220501
loss is :  1.3220495
loss is :  1.322049
loss is :  1.3220483
loss is :  1.32204

loss is :  1.3218595
loss is :  1.3218589
loss is :  1.3218585
loss is :  1.3218582
loss is :  1.3218576
loss is :  1.3218572
loss is :  1.3218566
loss is :  1.3218563
loss is :  1.3218557
loss is :  1.3218554
loss is :  1.3218548
loss is :  1.3218545
loss is :  1.3218541
loss is :  1.3218535
loss is :  1.3218529
loss is :  1.3218527
loss is :  1.3218521
loss is :  1.3218516
loss is :  1.3218513
loss is :  1.3218508
loss is :  1.3218504
loss is :  1.3218501
loss is :  1.3218496
loss is :  1.321849
loss is :  1.3218486
loss is :  1.3218482
loss is :  1.3218476
loss is :  1.3218472
loss is :  1.3218468
loss is :  1.3218462
loss is :  1.3218459
loss is :  1.3218454
loss is :  1.3218449
loss is :  1.3218446
loss is :  1.3218441
loss is :  1.3218436
loss is :  1.3218431
loss is :  1.3218428
loss is :  1.3218422
loss is :  1.321842
loss is :  1.3218414
loss is :  1.321841
loss is :  1.3218405
loss is :  1.32184
loss is :  1.3218397
loss is :  1.3218391
loss is :  1.3218389
loss is :  1.32183

loss is :  1.3216426
loss is :  1.3216424
loss is :  1.321642
loss is :  1.3216418
loss is :  1.3216412
loss is :  1.321641
loss is :  1.3216407
loss is :  1.3216404
loss is :  1.3216398
loss is :  1.3216395
loss is :  1.3216392
loss is :  1.3216391
loss is :  1.3216386
loss is :  1.3216383
loss is :  1.321638
loss is :  1.3216376
loss is :  1.3216373
loss is :  1.3216369
loss is :  1.3216367
loss is :  1.3216362
loss is :  1.321636
loss is :  1.3216355
loss is :  1.3216352
loss is :  1.321635
loss is :  1.3216347
loss is :  1.3216342
loss is :  1.3216339
loss is :  1.3216336
loss is :  1.3216333
loss is :  1.321633
loss is :  1.3216326
loss is :  1.3216323
loss is :  1.3216319
loss is :  1.3216316
loss is :  1.3216313
loss is :  1.3216311
loss is :  1.3216305
loss is :  1.3216302
loss is :  1.32163
loss is :  1.3216295
loss is :  1.3216293
loss is :  1.3216288
loss is :  1.3216286
loss is :  1.3216282
loss is :  1.321628
loss is :  1.3216276
loss is :  1.3216274
loss is :  1.3216271
l

loss is :  1.3214602
loss is :  1.3214598
loss is :  1.3214597
loss is :  1.3214594
loss is :  1.3214592
loss is :  1.3214589
loss is :  1.3214587
loss is :  1.3214583
loss is :  1.3214581
loss is :  1.3214579
loss is :  1.3214577
loss is :  1.3214575
loss is :  1.321457
loss is :  1.3214569
loss is :  1.3214567
loss is :  1.3214564
loss is :  1.3214562
loss is :  1.3214558
loss is :  1.3214556
loss is :  1.3214552
loss is :  1.3214552
loss is :  1.3214549
loss is :  1.3214548
loss is :  1.3214543
loss is :  1.321454
loss is :  1.321454
loss is :  1.3214538
loss is :  1.3214535
loss is :  1.3214531
loss is :  1.321453
loss is :  1.3214527
loss is :  1.3214525
loss is :  1.3214521
loss is :  1.321452
loss is :  1.3214517
loss is :  1.3214515
loss is :  1.3214511
loss is :  1.321451
loss is :  1.3214507
loss is :  1.3214504
loss is :  1.3214502
loss is :  1.3214499
loss is :  1.3214496
loss is :  1.3214494
loss is :  1.3214494
loss is :  1.3214488
loss is :  1.3214486
loss is :  1.321448

loss is :  1.3213669
loss is :  1.3213665
loss is :  1.3213664
loss is :  1.3213663
loss is :  1.321366
loss is :  1.3213657
loss is :  1.3213657
loss is :  1.3213654
loss is :  1.3213651
loss is :  1.3213649
loss is :  1.3213646
loss is :  1.3213644
loss is :  1.3213644
loss is :  1.321364
loss is :  1.3213639
loss is :  1.3213637
loss is :  1.3213634
loss is :  1.3213632
loss is :  1.3213632
loss is :  1.321363
loss is :  1.3213626
loss is :  1.3213625
loss is :  1.3213623
loss is :  1.321362
loss is :  1.3213619
loss is :  1.3213618
loss is :  1.3213615
loss is :  1.3213612
loss is :  1.321361
loss is :  1.3213607
loss is :  1.3213606
loss is :  1.3213603
loss is :  1.3213602
loss is :  1.3213599
loss is :  1.3213598
loss is :  1.3213595
loss is :  1.3213593
loss is :  1.3213592
loss is :  1.3213588
loss is :  1.3213587
loss is :  1.3213586
loss is :  1.3213583
loss is :  1.3213581
loss is :  1.3213578
loss is :  1.3213577
loss is :  1.3213576
loss is :  1.3213573
loss is :  1.32135

loss is :  1.3212851
loss is :  1.3212848
loss is :  1.3212847
loss is :  1.3212845
loss is :  1.3212843
loss is :  1.3212842
loss is :  1.321284
loss is :  1.3212839
loss is :  1.3212837
loss is :  1.3212833
loss is :  1.3212832
loss is :  1.3212831
loss is :  1.3212829
loss is :  1.3212826
loss is :  1.3212826
loss is :  1.3212824
loss is :  1.3212821
loss is :  1.3212821
loss is :  1.3212819
loss is :  1.3212817
loss is :  1.3212814
loss is :  1.3212813
loss is :  1.3212812
loss is :  1.321281
loss is :  1.3212806
loss is :  1.3212806
loss is :  1.3212805
loss is :  1.3212802
loss is :  1.32128
loss is :  1.3212799
loss is :  1.3212798
loss is :  1.3212794
loss is :  1.3212793
loss is :  1.3212792
loss is :  1.3212789
loss is :  1.3212787
loss is :  1.3212787
loss is :  1.3212786
loss is :  1.3212785
loss is :  1.3212781
loss is :  1.3212777
loss is :  1.3212777
loss is :  1.3212775
loss is :  1.3212774
loss is :  1.3212773
loss is :  1.321277
loss is :  1.3212768
loss is :  1.32127

loss is :  1.3212134
loss is :  1.3212131
loss is :  1.321213
loss is :  1.3212128
loss is :  1.3212126
loss is :  1.3212124
loss is :  1.3212123
loss is :  1.3212122
loss is :  1.321212
loss is :  1.3212118
loss is :  1.3212118
loss is :  1.3212116
loss is :  1.3212115
loss is :  1.3212113
loss is :  1.3212111
loss is :  1.3212111
loss is :  1.3212109
loss is :  1.3212106
loss is :  1.3212105
loss is :  1.3212105
loss is :  1.3212101
loss is :  1.32121
loss is :  1.3212099
loss is :  1.3212098
loss is :  1.3212097
loss is :  1.3212094
loss is :  1.3212093
loss is :  1.3212092
loss is :  1.3212092
loss is :  1.3212088
loss is :  1.3212087
loss is :  1.3212086
loss is :  1.3212084
loss is :  1.3212081
loss is :  1.3212081
loss is :  1.321208
loss is :  1.3212079
loss is :  1.3212076
loss is :  1.3212075
loss is :  1.3212074
loss is :  1.3212072
loss is :  1.3212069
loss is :  1.3212069
loss is :  1.3212067
loss is :  1.3212067
loss is :  1.3212065
loss is :  1.3212061
loss is :  1.32120

loss is :  1.3211285
loss is :  1.3211284
loss is :  1.3211281
loss is :  1.321128
loss is :  1.3211279
loss is :  1.3211278
loss is :  1.3211278
loss is :  1.3211277
loss is :  1.3211275
loss is :  1.3211274
loss is :  1.3211273
loss is :  1.321127
loss is :  1.321127
loss is :  1.3211267
loss is :  1.3211267
loss is :  1.3211265
loss is :  1.3211265
loss is :  1.3211262
loss is :  1.3211262
loss is :  1.321126
loss is :  1.321126
loss is :  1.321126
loss is :  1.3211259
loss is :  1.3211255
loss is :  1.3211255
loss is :  1.3211254
loss is :  1.3211253
loss is :  1.3211251
loss is :  1.321125
loss is :  1.321125
loss is :  1.3211248
loss is :  1.3211247
loss is :  1.3211246
loss is :  1.3211244
loss is :  1.3211243
loss is :  1.3211241
loss is :  1.3211241
loss is :  1.321124
loss is :  1.3211238
loss is :  1.3211236
loss is :  1.3211235
loss is :  1.3211234
loss is :  1.3211234
loss is :  1.3211231
loss is :  1.3211231
loss is :  1.3211228
loss is :  1.3211228
loss is :  1.3211226
l

loss is :  1.3210754
loss is :  1.3210752
loss is :  1.3210752
loss is :  1.321075
loss is :  1.321075
loss is :  1.3210748
loss is :  1.3210747
loss is :  1.3210746
loss is :  1.3210745
loss is :  1.3210745
loss is :  1.3210744
loss is :  1.3210742
loss is :  1.3210741
loss is :  1.321074
loss is :  1.3210739
loss is :  1.3210738
loss is :  1.3210738
loss is :  1.3210735
loss is :  1.3210735
loss is :  1.3210733
loss is :  1.3210733
loss is :  1.321073
loss is :  1.3210732
loss is :  1.3210728
loss is :  1.3210728
loss is :  1.3210727
loss is :  1.3210726
loss is :  1.3210726
loss is :  1.3210725
loss is :  1.3210723
loss is :  1.3210723
loss is :  1.3210722
loss is :  1.3210721
loss is :  1.321072
loss is :  1.3210719
loss is :  1.3210715
loss is :  1.3210715
loss is :  1.3210713
loss is :  1.3210713
loss is :  1.3210711
loss is :  1.321071
loss is :  1.321071
loss is :  1.3210709
loss is :  1.3210708
loss is :  1.3210708
loss is :  1.3210707
loss is :  1.3210707
loss is :  1.3210703

loss is :  1.3210158
loss is :  1.3210157
loss is :  1.3210157
loss is :  1.3210156
loss is :  1.3210154
loss is :  1.3210154
loss is :  1.3210154
loss is :  1.3210152
loss is :  1.3210151
loss is :  1.3210151
loss is :  1.3210149
loss is :  1.3210149
loss is :  1.3210149
loss is :  1.3210146
loss is :  1.3210146
loss is :  1.3210144
loss is :  1.3210144
loss is :  1.3210144
loss is :  1.3210142
loss is :  1.321014
loss is :  1.3210139
loss is :  1.3210139
loss is :  1.3210139
loss is :  1.3210137
loss is :  1.3210137
loss is :  1.3210136
loss is :  1.3210135
loss is :  1.3210135
loss is :  1.3210133
loss is :  1.3210135
loss is :  1.3210133
loss is :  1.3210132
loss is :  1.3210131
loss is :  1.3210129
loss is :  1.3210127
loss is :  1.3210126
loss is :  1.3210126
loss is :  1.3210124
loss is :  1.3210124
loss is :  1.3210124
loss is :  1.3210121
loss is :  1.3210121
loss is :  1.321012
loss is :  1.321012
loss is :  1.3210118
loss is :  1.3210118
loss is :  1.3210117
loss is :  1.321

loss is :  1.3209743
loss is :  1.3209742
loss is :  1.3209742
loss is :  1.3209741
loss is :  1.3209741
loss is :  1.3209741
loss is :  1.3209739
loss is :  1.3209739
loss is :  1.3209738
loss is :  1.3209736
loss is :  1.3209735
loss is :  1.3209735
loss is :  1.3209734
loss is :  1.3209734
loss is :  1.3209734
loss is :  1.3209732
loss is :  1.3209732
loss is :  1.3209732
loss is :  1.3209729
loss is :  1.3209729
loss is :  1.3209729
loss is :  1.3209728
loss is :  1.3209727
loss is :  1.3209727
loss is :  1.3209723
loss is :  1.3209724
loss is :  1.3209723
loss is :  1.3209721
loss is :  1.3209721
loss is :  1.3209721
loss is :  1.320972
loss is :  1.320972
loss is :  1.3209718
loss is :  1.3209717
loss is :  1.3209718
loss is :  1.3209716
loss is :  1.3209715
loss is :  1.3209715
loss is :  1.3209715
loss is :  1.3209715
loss is :  1.3209711
loss is :  1.3209711
loss is :  1.3209711
loss is :  1.320971
loss is :  1.3209709
loss is :  1.3209708
loss is :  1.3209708
loss is :  1.320

loss is :  1.3209301
loss is :  1.32093
loss is :  1.3209299
loss is :  1.3209298
loss is :  1.3209296
loss is :  1.3209295
loss is :  1.3209295
loss is :  1.3209295
loss is :  1.3209295
loss is :  1.3209294
loss is :  1.3209294
loss is :  1.3209294
loss is :  1.3209293
loss is :  1.3209292
loss is :  1.3209292
loss is :  1.3209292
loss is :  1.3209289
loss is :  1.3209289
loss is :  1.3209288
loss is :  1.3209288
loss is :  1.3209287
loss is :  1.3209286
loss is :  1.3209287
loss is :  1.3209285
loss is :  1.3209283
loss is :  1.3209283
loss is :  1.3209282
loss is :  1.3209282
loss is :  1.3209281
loss is :  1.320928
loss is :  1.3209279
loss is :  1.3209279
loss is :  1.3209279
loss is :  1.3209279
loss is :  1.3209277
loss is :  1.3209276
loss is :  1.3209276
loss is :  1.3209276
loss is :  1.3209275
loss is :  1.3209275
loss is :  1.3209274
loss is :  1.3209273
loss is :  1.3209273
loss is :  1.3209271
loss is :  1.3209269
loss is :  1.320927
loss is :  1.3209269
loss is :  1.3209

loss is :  1.3209016
loss is :  1.3209016
loss is :  1.3209016
loss is :  1.3209015
loss is :  1.3209014
loss is :  1.3209014
loss is :  1.3209013
loss is :  1.3209012
loss is :  1.320901
loss is :  1.3209009
loss is :  1.320901
loss is :  1.3209009
loss is :  1.3209009
loss is :  1.3209008
loss is :  1.3209009
loss is :  1.3209008
loss is :  1.3209007
loss is :  1.3209007
loss is :  1.3209006
loss is :  1.3209004
loss is :  1.3209004
loss is :  1.3209004
loss is :  1.3209004
loss is :  1.3209003
loss is :  1.3209002
loss is :  1.3209002
loss is :  1.3209001
loss is :  1.3208998
loss is :  1.3208998
loss is :  1.3208998
loss is :  1.3208997
loss is :  1.3208998
loss is :  1.3208996
loss is :  1.3208996
loss is :  1.3208996
loss is :  1.3208995
loss is :  1.3208994
loss is :  1.3208994
loss is :  1.3208994
loss is :  1.3208991
loss is :  1.3208992
loss is :  1.3208991
loss is :  1.3208991
loss is :  1.320899
loss is :  1.320899
loss is :  1.320899
loss is :  1.3208989
loss is :  1.32089

In [22]:
print(sess.run(W1))
print('----------')
print(sess.run(b1))
print('----------')

[[ 0.28948116  0.08470685  0.03620474 -1.0481737  -0.1368796 ]
 [ 0.54309297 -0.72021765  1.589704   -1.5140609  -1.1206442 ]
 [ 2.517031   -1.054853   -1.1518962   1.3571837   1.4983346 ]
 [ 1.4725986  -1.9899138   1.2463851   1.6770995  -0.9671374 ]
 [-0.7009469  -1.4029232  -1.2938435  -0.4064642   0.72156113]
 [-0.8952895   1.240494    0.03559423  0.2306714   0.23430994]
 [-0.26531518  0.95159227  1.378734    1.6397108   2.0329866 ]]
----------
[-1.4924787   0.45229056 -0.10834695 -1.6470788  -0.9546042 ]
----------


In [23]:
vectors = sess.run(W1 + b1)

# if you work it out, you will see that it has the same effect as running the node hidden representation
print(vectors)

[[-1.2029976   0.53699744 -0.07214221 -2.6952524  -1.0914838 ]
 [-0.94938576 -0.26792708  1.4813571  -3.1611395  -2.0752485 ]
 [ 1.0245522  -0.6025624  -1.2602432  -0.28989506  0.54373044]
 [-0.01988018 -1.5376233   1.1380382   0.03002071 -1.9217416 ]
 [-2.1934257  -0.9506327  -1.4021904  -2.0535429  -0.23304307]
 [-2.3877683   1.6927845  -0.07275272 -1.4164073  -0.72029424]
 [-1.7577939   1.4038829   1.270387   -0.00736797  1.0783825 ]]


In [24]:
print(vectors[ word2int['queen'] ])

[-0.94938576 -0.26792708  1.4813571  -3.1611395  -2.0752485 ]


In [25]:
def euclidean_dist(vec1, vec2):
    return np.sqrt(np.sum((vec1-vec2)**2))
def find_closest(word_index, vectors):
    min_dist = 10000 # to act like positive infinity
    min_index = -1
    query_vector = vectors[word_index]
    for index, vector in enumerate(vectors):
        if euclidean_dist(vector, query_vector) < min_dist and not np.array_equal(vector, query_vector):
            min_dist = euclidean_dist(vector, query_vector)
            min_index = index
    return min_index

In [26]:
print(int2word[find_closest(word2int['queen'], vectors)])
print(int2word[find_closest(word2int['he'], vectors)])
print(int2word[find_closest(word2int['she'], vectors)])

king
king
king
