In [54]:
# Word2vec basics using tensorflow

In [55]:
import numpy as np
import tensorflow as tf

In [56]:
corpus_raw = 'He is the king . The King is royal . She is the royal queen '
corpus_raw = corpus_raw.lower()
corpus_raw

'he is the king . the king is royal . she is the royal queen '

In [57]:
words = []

for word in corpus_raw.split():
    # we can't treat '.' as a word
    if word != '.':
        words.append(word)
        
words = set(words) # remove duplicate words

word2int = {}
int2word = {}

vocab_size = len(words) # total number of unique words

for i, word in enumerate(words):
    word2int[word] = i
    int2word[i] = word

In [58]:
# print(word2int['queen'])
# -> 42 (say)
# print(int2word[42])
# -> 'queen'

In [59]:
# raw sentence is a list of sentences.
raw_sentences = corpus_raw.split('.')

sentences = []

for sentence in raw_sentences:
    sentences.append(sentence.split())

In [60]:
sentences

[['he', 'is', 'the', 'king'],
 ['the', 'king', 'is', 'royal'],
 ['she', 'is', 'the', 'royal', 'queen']]

In [61]:
# Now we will generate our training data

data = []
WINDOW_SIZE = 2

for sentence in sentences:
    for word_index, word in enumerate(sentence):
        for nb_word in sentence[max(word_index - WINDOW_SIZE, 0) : min(word_index + WINDOW_SIZE, len(sentence)) + 1]:
            if nb_word != word:
                data.append([word, nb_word])

data

[['he', 'is'],
 ['he', 'the'],
 ['is', 'he'],
 ['is', 'the'],
 ['is', 'king'],
 ['the', 'he'],
 ['the', 'is'],
 ['the', 'king'],
 ['king', 'is'],
 ['king', 'the'],
 ['the', 'king'],
 ['the', 'is'],
 ['king', 'the'],
 ['king', 'is'],
 ['king', 'royal'],
 ['is', 'the'],
 ['is', 'king'],
 ['is', 'royal'],
 ['royal', 'king'],
 ['royal', 'is'],
 ['she', 'is'],
 ['she', 'the'],
 ['is', 'she'],
 ['is', 'the'],
 ['is', 'royal'],
 ['the', 'she'],
 ['the', 'is'],
 ['the', 'royal'],
 ['the', 'queen'],
 ['royal', 'is'],
 ['royal', 'the'],
 ['royal', 'queen'],
 ['queen', 'the'],
 ['queen', 'royal']]

In [62]:
# covert number to one hot vector

def one_hot_vector(data_point_index, vocab_size):
    temp = np.zeros(vocab_size)
    temp[data_point_index] = 1
    return temp

x_train = [] # input word
y_train = [] # output word

for data_word in data:
    x_train.append(one_hot_vector(word2int[data_word[0]], vocab_size))
    y_train.append(one_hot_vector(word2int[data_word[1]], vocab_size))
    
#convert them to numpy arrays
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)

In [63]:
for i in range(len(x_train)):
    print(x_train[i], y_train[i])
print(x_train.shape, y_train.shape)

[0. 1. 0. 0. 0. 0. 0.] [0. 0. 0. 1. 0. 0. 0.]
[0. 1. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 1. 0. 0. 0.] [0. 1. 0. 0. 0. 0. 0.]
[0. 0. 0. 1. 0. 0. 0.] [0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 1. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 1.]
[0. 0. 0. 0. 0. 1. 0.] [0. 1. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0.] [0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0.] [0. 0. 0. 0. 0. 0. 1.]
[0. 0. 0. 0. 0. 0. 1.] [0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1.] [0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 0. 0. 1. 0.] [0. 0. 0. 0. 0. 0. 1.]
[0. 0. 0. 0. 0. 1. 0.] [0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1.] [0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 0. 0. 0. 1.] [0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1.] [1. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 1. 0. 0. 0.] [0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 1. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 1.]
[0. 0. 0. 1. 0. 0. 0.] [1. 0. 0. 0. 0. 0. 0.]
[1. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0.] [0. 0. 0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0.] [0. 0. 0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0.] [0. 0. 0. 0

In [64]:
# making tensorflow model for x_train and y_train

x_label = tf.placeholder(tf.float32, shape=(None, vocab_size))
y_label = tf.placeholder(tf.float32, shape=(None, vocab_size))

In [65]:
# hidden layer

EMBEDDING_DIM = 5 # random

W1 = tf.Variable(tf.random_normal([vocab_size, EMBEDDING_DIM])) # weights
b1 = tf.Variable(tf.random_normal([EMBEDDING_DIM])) # bias

hidden_representation = tf.add(tf.matmul(x_label, W1), b1)

In [66]:
# output layer

W2 = tf.Variable(tf.random_normal([EMBEDDING_DIM, vocab_size]))
b2 = tf.Variable(tf.random_normal([vocab_size]))
prediction = tf.nn.softmax(tf.add( tf.matmul(hidden_representation, W2), b2))

In [67]:
# input_one_hot  --->  embedded repr. ---> predicted_neighbour_prob
# predicted_prob will be compared against a one hot vector to correct it.

In [73]:
# training network

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init) #make sure you do this!
# define the loss function:
cross_entropy_loss = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(prediction), reduction_indices=[1]))
# define the training step:
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy_loss)
n_iters = 10000
# train for n_iter iterations
for _ in range(n_iters):
    sess.run(train_step, feed_dict={x_label: x_train, y_label: y_train})
    print('loss is : ', sess.run(cross_entropy_loss, feed_dict={x_label: x_train, y_label: y_train}))

loss is :  3.8392348
loss is :  3.613927
loss is :  3.4238331
loss is :  3.2595308
loss is :  3.1155307
loss is :  2.988256
loss is :  2.87513
loss is :  2.7741444
loss is :  2.683645
loss is :  2.6022263
loss is :  2.528674
loss is :  2.461939
loss is :  2.401119
loss is :  2.3454418
loss is :  2.294253
loss is :  2.2470033
loss is :  2.2032309
loss is :  2.162551
loss is :  2.1246405
loss is :  2.0892298
loss is :  2.0560904
loss is :  2.025028
loss is :  1.9958758
loss is :  1.9684886
loss is :  1.9427387
loss is :  1.9185127
loss is :  1.8957077
loss is :  1.8742305
loss is :  1.8539958
loss is :  1.8349239
loss is :  1.8169405
loss is :  1.7999767
loss is :  1.7839671
loss is :  1.7688507
loss is :  1.7545696
loss is :  1.7410704
loss is :  1.7283016
loss is :  1.7162158
loss is :  1.7047685
loss is :  1.693918
loss is :  1.6836252
loss is :  1.6738541
loss is :  1.6645709
loss is :  1.6557443
loss is :  1.6473453
loss is :  1.639347
loss is :  1.6317242
loss is :  1.6244539
loss 

loss is :  1.3580256
loss is :  1.3579028
loss is :  1.3577805
loss is :  1.3576586
loss is :  1.3575373
loss is :  1.3574163
loss is :  1.357296
loss is :  1.357176
loss is :  1.3570564
loss is :  1.3569372
loss is :  1.3568183
loss is :  1.3567003
loss is :  1.3565824
loss is :  1.3564651
loss is :  1.3563482
loss is :  1.3562317
loss is :  1.3561156
loss is :  1.356
loss is :  1.3558848
loss is :  1.3557701
loss is :  1.3556557
loss is :  1.3555417
loss is :  1.3554282
loss is :  1.3553152
loss is :  1.3552027
loss is :  1.3550904
loss is :  1.3549786
loss is :  1.3548672
loss is :  1.3547564
loss is :  1.3546457
loss is :  1.3545357
loss is :  1.3544259
loss is :  1.3543166
loss is :  1.3542076
loss is :  1.3540993
loss is :  1.3539912
loss is :  1.3538834
loss is :  1.3537761
loss is :  1.3536694
loss is :  1.3535627
loss is :  1.3534569
loss is :  1.3533511
loss is :  1.3532459
loss is :  1.353141
loss is :  1.3530365
loss is :  1.3529325
loss is :  1.3528287
loss is :  1.3527253

loss is :  1.3323735
loss is :  1.3323505
loss is :  1.3323275
loss is :  1.3323046
loss is :  1.3322817
loss is :  1.3322589
loss is :  1.3322363
loss is :  1.3322136
loss is :  1.3321913
loss is :  1.3321689
loss is :  1.3321466
loss is :  1.3321242
loss is :  1.3321021
loss is :  1.3320801
loss is :  1.332058
loss is :  1.332036
loss is :  1.3320141
loss is :  1.3319923
loss is :  1.3319706
loss is :  1.3319489
loss is :  1.3319273
loss is :  1.3319057
loss is :  1.3318844
loss is :  1.3318629
loss is :  1.3318416
loss is :  1.3318205
loss is :  1.3317993
loss is :  1.3317783
loss is :  1.3317572
loss is :  1.3317363
loss is :  1.3317153
loss is :  1.3316946
loss is :  1.3316737
loss is :  1.3316531
loss is :  1.3316325
loss is :  1.3316121
loss is :  1.3315915
loss is :  1.3315711
loss is :  1.3315508
loss is :  1.3315306
loss is :  1.3315103
loss is :  1.3314902
loss is :  1.3314701
loss is :  1.3314502
loss is :  1.3314302
loss is :  1.3314104
loss is :  1.3313905
loss is :  1.33

loss is :  1.326536
loss is :  1.3265294
loss is :  1.3265224
loss is :  1.3265158
loss is :  1.326509
loss is :  1.3265022
loss is :  1.3264954
loss is :  1.326489
loss is :  1.3264822
loss is :  1.3264755
loss is :  1.3264688
loss is :  1.3264621
loss is :  1.3264555
loss is :  1.3264489
loss is :  1.3264422
loss is :  1.3264358
loss is :  1.3264291
loss is :  1.3264226
loss is :  1.3264161
loss is :  1.3264093
loss is :  1.326403
loss is :  1.3263965
loss is :  1.32639
loss is :  1.3263834
loss is :  1.326377
loss is :  1.3263706
loss is :  1.326364
loss is :  1.3263578
loss is :  1.3263513
loss is :  1.3263447
loss is :  1.3263385
loss is :  1.326332
loss is :  1.3263255
loss is :  1.3263193
loss is :  1.3263129
loss is :  1.3263065
loss is :  1.3263004
loss is :  1.326294
loss is :  1.3262877
loss is :  1.3262814
loss is :  1.3262751
loss is :  1.3262689
loss is :  1.3262626
loss is :  1.3262564
loss is :  1.3262502
loss is :  1.3262439
loss is :  1.3262377
loss is :  1.3262315
lo

loss is :  1.3244693
loss is :  1.3244661
loss is :  1.3244629
loss is :  1.3244598
loss is :  1.3244566
loss is :  1.3244534
loss is :  1.3244503
loss is :  1.3244472
loss is :  1.3244438
loss is :  1.3244407
loss is :  1.3244375
loss is :  1.3244344
loss is :  1.3244313
loss is :  1.3244281
loss is :  1.324425
loss is :  1.3244219
loss is :  1.3244188
loss is :  1.3244157
loss is :  1.3244125
loss is :  1.3244095
loss is :  1.3244065
loss is :  1.324403
loss is :  1.3244002
loss is :  1.3243971
loss is :  1.324394
loss is :  1.324391
loss is :  1.3243877
loss is :  1.3243846
loss is :  1.3243818
loss is :  1.3243785
loss is :  1.3243755
loss is :  1.3243725
loss is :  1.3243695
loss is :  1.3243665
loss is :  1.3243632
loss is :  1.3243604
loss is :  1.3243573
loss is :  1.3243543
loss is :  1.3243511
loss is :  1.3243481
loss is :  1.3243452
loss is :  1.3243421
loss is :  1.3243392
loss is :  1.324336
loss is :  1.324333
loss is :  1.3243301
loss is :  1.3243271
loss is :  1.324324

loss is :  1.3233794
loss is :  1.3233775
loss is :  1.3233758
loss is :  1.3233739
loss is :  1.3233721
loss is :  1.3233705
loss is :  1.3233687
loss is :  1.3233668
loss is :  1.3233649
loss is :  1.3233632
loss is :  1.3233614
loss is :  1.3233595
loss is :  1.3233578
loss is :  1.3233559
loss is :  1.3233544
loss is :  1.3233526
loss is :  1.3233508
loss is :  1.3233491
loss is :  1.3233472
loss is :  1.3233454
loss is :  1.3233436
loss is :  1.323342
loss is :  1.3233403
loss is :  1.3233385
loss is :  1.3233366
loss is :  1.3233349
loss is :  1.323333
loss is :  1.3233314
loss is :  1.3233297
loss is :  1.3233279
loss is :  1.3233261
loss is :  1.3233246
loss is :  1.3233229
loss is :  1.323321
loss is :  1.3233193
loss is :  1.3233175
loss is :  1.3233159
loss is :  1.323314
loss is :  1.3233123
loss is :  1.3233106
loss is :  1.3233088
loss is :  1.3233072
loss is :  1.3233055
loss is :  1.3233037
loss is :  1.3233019
loss is :  1.3233004
loss is :  1.3232987
loss is :  1.3232

loss is :  1.3227369
loss is :  1.3227357
loss is :  1.3227345
loss is :  1.3227334
loss is :  1.3227322
loss is :  1.3227311
loss is :  1.32273
loss is :  1.3227289
loss is :  1.3227278
loss is :  1.3227265
loss is :  1.3227254
loss is :  1.3227243
loss is :  1.3227232
loss is :  1.322722
loss is :  1.3227209
loss is :  1.3227198
loss is :  1.3227186
loss is :  1.3227174
loss is :  1.3227164
loss is :  1.3227152
loss is :  1.3227141
loss is :  1.3227129
loss is :  1.3227117
loss is :  1.3227108
loss is :  1.3227096
loss is :  1.3227085
loss is :  1.3227073
loss is :  1.3227061
loss is :  1.322705
loss is :  1.322704
loss is :  1.3227029
loss is :  1.3227016
loss is :  1.3227006
loss is :  1.3226994
loss is :  1.3226982
loss is :  1.3226972
loss is :  1.322696
loss is :  1.3226948
loss is :  1.3226938
loss is :  1.3226926
loss is :  1.3226917
loss is :  1.3226906
loss is :  1.3226895
loss is :  1.3226882
loss is :  1.3226871
loss is :  1.3226861
loss is :  1.3226849
loss is :  1.322683

loss is :  1.322324
loss is :  1.3223233
loss is :  1.3223225
loss is :  1.3223218
loss is :  1.3223208
loss is :  1.3223201
loss is :  1.3223193
loss is :  1.3223186
loss is :  1.3223177
loss is :  1.3223169
loss is :  1.3223162
loss is :  1.3223155
loss is :  1.3223145
loss is :  1.3223137
loss is :  1.3223128
loss is :  1.3223122
loss is :  1.3223113
loss is :  1.3223106
loss is :  1.3223097
loss is :  1.322309
loss is :  1.3223083
loss is :  1.3223075
loss is :  1.3223066
loss is :  1.3223059
loss is :  1.322305
loss is :  1.3223042
loss is :  1.3223034
loss is :  1.3223027
loss is :  1.3223017
loss is :  1.322301
loss is :  1.3223003
loss is :  1.3222996
loss is :  1.3222986
loss is :  1.3222979
loss is :  1.3222971
loss is :  1.3222964
loss is :  1.3222955
loss is :  1.322295
loss is :  1.322294
loss is :  1.3222933
loss is :  1.3222923
loss is :  1.3222916
loss is :  1.3222909
loss is :  1.3222902
loss is :  1.3222893
loss is :  1.3222886
loss is :  1.3222877
loss is :  1.322287

loss is :  1.3220404
loss is :  1.32204
loss is :  1.3220394
loss is :  1.3220388
loss is :  1.3220382
loss is :  1.3220375
loss is :  1.322037
loss is :  1.3220363
loss is :  1.3220358
loss is :  1.3220353
loss is :  1.3220345
loss is :  1.322034
loss is :  1.3220333
loss is :  1.3220327
loss is :  1.3220322
loss is :  1.3220316
loss is :  1.322031
loss is :  1.3220305
loss is :  1.32203
loss is :  1.3220292
loss is :  1.3220286
loss is :  1.3220282
loss is :  1.3220276
loss is :  1.322027
loss is :  1.3220263
loss is :  1.3220257
loss is :  1.3220251
loss is :  1.3220245
loss is :  1.322024
loss is :  1.3220234
loss is :  1.3220228
loss is :  1.3220222
loss is :  1.3220216
loss is :  1.322021
loss is :  1.3220205
loss is :  1.3220199
loss is :  1.3220192
loss is :  1.3220187
loss is :  1.322018
loss is :  1.3220176
loss is :  1.3220171
loss is :  1.3220164
loss is :  1.3220158
loss is :  1.3220152
loss is :  1.3220147
loss is :  1.3220141
loss is :  1.3220134
loss is :  1.3220129
los

loss is :  1.3218203
loss is :  1.3218199
loss is :  1.3218194
loss is :  1.321819
loss is :  1.3218185
loss is :  1.3218181
loss is :  1.3218175
loss is :  1.3218172
loss is :  1.3218166
loss is :  1.3218163
loss is :  1.3218158
loss is :  1.3218154
loss is :  1.3218148
loss is :  1.3218144
loss is :  1.3218141
loss is :  1.3218135
loss is :  1.3218129
loss is :  1.3218126
loss is :  1.3218122
loss is :  1.3218117
loss is :  1.3218112
loss is :  1.3218107
loss is :  1.3218104
loss is :  1.32181
loss is :  1.3218094
loss is :  1.3218089
loss is :  1.3218086
loss is :  1.3218081
loss is :  1.3218076
loss is :  1.3218071
loss is :  1.3218068
loss is :  1.3218063
loss is :  1.3218058
loss is :  1.3218054
loss is :  1.3218049
loss is :  1.3218045
loss is :  1.321804
loss is :  1.3218037
loss is :  1.3218031
loss is :  1.3218027
loss is :  1.3218024
loss is :  1.3218018
loss is :  1.3218014
loss is :  1.3218011
loss is :  1.3218005
loss is :  1.3218001
loss is :  1.3217995
loss is :  1.3217

loss is :  1.3216466
loss is :  1.3216462
loss is :  1.3216459
loss is :  1.3216455
loss is :  1.321645
loss is :  1.3216447
loss is :  1.3216444
loss is :  1.3216442
loss is :  1.3216436
loss is :  1.3216434
loss is :  1.3216429
loss is :  1.3216426
loss is :  1.3216424
loss is :  1.3216419
loss is :  1.3216417
loss is :  1.3216411
loss is :  1.321641
loss is :  1.3216405
loss is :  1.3216403
loss is :  1.3216397
loss is :  1.3216394
loss is :  1.3216392
loss is :  1.3216388
loss is :  1.3216383
loss is :  1.321638
loss is :  1.3216376
loss is :  1.3216373
loss is :  1.321637
loss is :  1.3216366
loss is :  1.3216363
loss is :  1.3216358
loss is :  1.3216355
loss is :  1.3216351
loss is :  1.3216349
loss is :  1.3216345
loss is :  1.321634
loss is :  1.3216337
loss is :  1.3216333
loss is :  1.3216331
loss is :  1.3216326
loss is :  1.3216323
loss is :  1.321632
loss is :  1.3216317
loss is :  1.3216314
loss is :  1.321631
loss is :  1.3216307
loss is :  1.3216302
loss is :  1.3216296

loss is :  1.3215117
loss is :  1.3215115
loss is :  1.3215113
loss is :  1.3215109
loss is :  1.3215107
loss is :  1.3215103
loss is :  1.32151
loss is :  1.3215097
loss is :  1.3215095
loss is :  1.3215094
loss is :  1.321509
loss is :  1.3215086
loss is :  1.3215083
loss is :  1.321508
loss is :  1.3215077
loss is :  1.3215076
loss is :  1.3215072
loss is :  1.3215069
loss is :  1.3215066
loss is :  1.3215064
loss is :  1.3215058
loss is :  1.3215055
loss is :  1.3215055
loss is :  1.3215053
loss is :  1.321505
loss is :  1.3215046
loss is :  1.3215042
loss is :  1.321504
loss is :  1.3215038
loss is :  1.3215035
loss is :  1.3215032
loss is :  1.3215028
loss is :  1.3215027
loss is :  1.3215023
loss is :  1.3215021
loss is :  1.3215017
loss is :  1.3215016
loss is :  1.3215013
loss is :  1.3215009
loss is :  1.3215007
loss is :  1.3215002
loss is :  1.3215
loss is :  1.3214998
loss is :  1.3214996
loss is :  1.3214992
loss is :  1.3214988
loss is :  1.3214985
loss is :  1.3214983
l

loss is :  1.3213904
loss is :  1.3213903
loss is :  1.3213899
loss is :  1.3213897
loss is :  1.3213893
loss is :  1.3213892
loss is :  1.3213891
loss is :  1.3213888
loss is :  1.3213886
loss is :  1.3213882
loss is :  1.321388
loss is :  1.3213878
loss is :  1.3213875
loss is :  1.3213874
loss is :  1.3213873
loss is :  1.3213869
loss is :  1.3213866
loss is :  1.3213866
loss is :  1.3213861
loss is :  1.3213859
loss is :  1.3213856
loss is :  1.3213855
loss is :  1.3213853
loss is :  1.3213851
loss is :  1.3213849
loss is :  1.3213845
loss is :  1.3213844
loss is :  1.3213842
loss is :  1.3213838
loss is :  1.3213836
loss is :  1.3213834
loss is :  1.3213832
loss is :  1.321383
loss is :  1.3213826
loss is :  1.3213824
loss is :  1.3213823
loss is :  1.3213822
loss is :  1.3213818
loss is :  1.3213817
loss is :  1.3213816
loss is :  1.3213812
loss is :  1.3213809
loss is :  1.3213807
loss is :  1.3213805
loss is :  1.3213803
loss is :  1.3213799
loss is :  1.3213797
loss is :  1.32

loss is :  1.3213053
loss is :  1.3213052
loss is :  1.3213049
loss is :  1.3213048
loss is :  1.3213046
loss is :  1.3213042
loss is :  1.3213041
loss is :  1.3213038
loss is :  1.3213036
loss is :  1.3213034
loss is :  1.3213034
loss is :  1.3213031
loss is :  1.3213029
loss is :  1.3213027
loss is :  1.3213025
loss is :  1.3213023
loss is :  1.3213022
loss is :  1.3213019
loss is :  1.3213016
loss is :  1.3213015
loss is :  1.3213015
loss is :  1.3213012
loss is :  1.3213011
loss is :  1.3213007
loss is :  1.3213005
loss is :  1.3213004
loss is :  1.3213001
loss is :  1.3212999
loss is :  1.3212998
loss is :  1.3212997
loss is :  1.3212994
loss is :  1.3212992
loss is :  1.321299
loss is :  1.3212988
loss is :  1.3212986
loss is :  1.3212985
loss is :  1.3212982
loss is :  1.321298
loss is :  1.3212978
loss is :  1.3212976
loss is :  1.3212975
loss is :  1.3212973
loss is :  1.3212973
loss is :  1.3212969
loss is :  1.3212967
loss is :  1.3212963
loss is :  1.3212962
loss is :  1.32

loss is :  1.3212304
loss is :  1.32123
loss is :  1.3212298
loss is :  1.3212296
loss is :  1.3212296
loss is :  1.3212293
loss is :  1.3212292
loss is :  1.3212291
loss is :  1.321229
loss is :  1.3212287
loss is :  1.3212286
loss is :  1.3212285
loss is :  1.3212283
loss is :  1.3212281
loss is :  1.3212279
loss is :  1.3212279
loss is :  1.3212276
loss is :  1.3212273
loss is :  1.3212271
loss is :  1.3212271
loss is :  1.321227
loss is :  1.3212267
loss is :  1.3212266
loss is :  1.3212265
loss is :  1.3212261
loss is :  1.3212261
loss is :  1.3212259
loss is :  1.3212258
loss is :  1.3212255
loss is :  1.3212254
loss is :  1.3212252
loss is :  1.3212252
loss is :  1.3212249
loss is :  1.3212248
loss is :  1.3212246
loss is :  1.3212245
loss is :  1.3212243
loss is :  1.3212242
loss is :  1.321224
loss is :  1.3212236
loss is :  1.3212235
loss is :  1.3212234
loss is :  1.3212233
loss is :  1.3212233
loss is :  1.321223
loss is :  1.3212229
loss is :  1.3212225
loss is :  1.321222

loss is :  1.3211647
loss is :  1.3211645
loss is :  1.3211643
loss is :  1.3211641
loss is :  1.3211641
loss is :  1.321164
loss is :  1.3211638
loss is :  1.3211638
loss is :  1.3211635
loss is :  1.3211633
loss is :  1.3211633
loss is :  1.3211632
loss is :  1.3211628
loss is :  1.3211628
loss is :  1.3211627
loss is :  1.3211625
loss is :  1.3211623
loss is :  1.3211621
loss is :  1.3211621
loss is :  1.3211619
loss is :  1.3211619
loss is :  1.3211616
loss is :  1.3211616
loss is :  1.3211615
loss is :  1.3211613
loss is :  1.3211612
loss is :  1.3211609
loss is :  1.3211608
loss is :  1.3211606
loss is :  1.3211604
loss is :  1.3211603
loss is :  1.3211603
loss is :  1.3211602
loss is :  1.3211601
loss is :  1.32116
loss is :  1.3211597
loss is :  1.3211595
loss is :  1.3211594
loss is :  1.3211592
loss is :  1.3211591
loss is :  1.3211589
loss is :  1.321159
loss is :  1.3211586
loss is :  1.3211584
loss is :  1.3211584
loss is :  1.3211583
loss is :  1.3211582
loss is :  1.3211

loss is :  1.3211092
loss is :  1.3211089
loss is :  1.3211088
loss is :  1.3211088
loss is :  1.3211086
loss is :  1.3211085
loss is :  1.3211083
loss is :  1.3211082
loss is :  1.3211081
loss is :  1.3211081
loss is :  1.3211077
loss is :  1.3211076
loss is :  1.3211075
loss is :  1.3211075
loss is :  1.3211073
loss is :  1.321107
loss is :  1.321107
loss is :  1.3211069
loss is :  1.3211069
loss is :  1.3211069
loss is :  1.3211067
loss is :  1.3211066
loss is :  1.3211063
loss is :  1.3211063
loss is :  1.3211061
loss is :  1.3211061
loss is :  1.3211058
loss is :  1.3211058
loss is :  1.3211056
loss is :  1.3211055
loss is :  1.3211055
loss is :  1.3211051
loss is :  1.321105
loss is :  1.321105
loss is :  1.321105
loss is :  1.3211048
loss is :  1.3211046
loss is :  1.3211046
loss is :  1.3211044
loss is :  1.3211043
loss is :  1.3211042
loss is :  1.321104
loss is :  1.3211038
loss is :  1.3211037
loss is :  1.3211036
loss is :  1.3211035
loss is :  1.3211033
loss is :  1.321103

loss is :  1.3210607
loss is :  1.3210607
loss is :  1.3210605
loss is :  1.3210604
loss is :  1.3210603
loss is :  1.3210602
loss is :  1.32106
loss is :  1.3210598
loss is :  1.3210598
loss is :  1.3210597
loss is :  1.3210596
loss is :  1.3210595
loss is :  1.3210595
loss is :  1.3210592
loss is :  1.3210592
loss is :  1.3210591
loss is :  1.321059
loss is :  1.3210589
loss is :  1.3210586
loss is :  1.3210586
loss is :  1.3210585
loss is :  1.3210584
loss is :  1.3210582
loss is :  1.3210582
loss is :  1.3210582
loss is :  1.321058
loss is :  1.3210577
loss is :  1.3210577
loss is :  1.3210576
loss is :  1.3210574
loss is :  1.3210574
loss is :  1.3210573
loss is :  1.3210572
loss is :  1.3210572
loss is :  1.3210572
loss is :  1.321057
loss is :  1.321057
loss is :  1.3210566
loss is :  1.3210566
loss is :  1.3210565
loss is :  1.3210562
loss is :  1.3210561
loss is :  1.3210561
loss is :  1.3210561
loss is :  1.3210559
loss is :  1.3210559
loss is :  1.3210558
loss is :  1.321055

loss is :  1.3210199
loss is :  1.3210198
loss is :  1.3210198
loss is :  1.3210196
loss is :  1.3210195
loss is :  1.3210193
loss is :  1.3210192
loss is :  1.3210192
loss is :  1.321019
loss is :  1.3210192
loss is :  1.3210189
loss is :  1.3210189
loss is :  1.3210188
loss is :  1.3210187
loss is :  1.3210187
loss is :  1.3210186
loss is :  1.3210185
loss is :  1.3210183
loss is :  1.3210181
loss is :  1.321018
loss is :  1.321018
loss is :  1.3210177
loss is :  1.3210177
loss is :  1.3210177
loss is :  1.3210175
loss is :  1.3210175
loss is :  1.3210173
loss is :  1.3210174
loss is :  1.3210173
loss is :  1.3210171
loss is :  1.321017
loss is :  1.3210169
loss is :  1.3210168
loss is :  1.3210167
loss is :  1.3210168
loss is :  1.3210166
loss is :  1.3210166
loss is :  1.3210163
loss is :  1.3210162
loss is :  1.3210163
loss is :  1.3210161
loss is :  1.3210161
loss is :  1.3210158
loss is :  1.3210157
loss is :  1.3210157
loss is :  1.3210155
loss is :  1.3210154
loss is :  1.3210

loss is :  1.3209817
loss is :  1.3209816
loss is :  1.3209815
loss is :  1.3209813
loss is :  1.3209813
loss is :  1.3209811
loss is :  1.3209811
loss is :  1.320981
loss is :  1.320981
loss is :  1.3209809
loss is :  1.3209809
loss is :  1.3209809
loss is :  1.3209807
loss is :  1.3209807
loss is :  1.3209807
loss is :  1.3209804
loss is :  1.3209804
loss is :  1.3209801
loss is :  1.3209801
loss is :  1.3209801
loss is :  1.32098
loss is :  1.3209798
loss is :  1.3209797
loss is :  1.3209797
loss is :  1.3209796
loss is :  1.3209796
loss is :  1.3209796
loss is :  1.3209794
loss is :  1.3209794
loss is :  1.3209794
loss is :  1.3209791
loss is :  1.320979
loss is :  1.3209789
loss is :  1.3209789
loss is :  1.3209788
loss is :  1.3209786
loss is :  1.3209786
loss is :  1.3209786
loss is :  1.3209784
loss is :  1.3209784
loss is :  1.3209782
loss is :  1.3209782
loss is :  1.3209782
loss is :  1.3209779
loss is :  1.320978
loss is :  1.3209778
loss is :  1.3209776
loss is :  1.320977

loss is :  1.3209479
loss is :  1.3209478
loss is :  1.3209478
loss is :  1.3209476
loss is :  1.3209476
loss is :  1.3209475
loss is :  1.3209474
loss is :  1.3209473
loss is :  1.3209473
loss is :  1.3209472
loss is :  1.320947
loss is :  1.320947
loss is :  1.3209468
loss is :  1.3209469
loss is :  1.3209468
loss is :  1.3209468
loss is :  1.3209467
loss is :  1.3209467
loss is :  1.3209466
loss is :  1.3209467
loss is :  1.3209463
loss is :  1.3209463
loss is :  1.3209465
loss is :  1.3209461
loss is :  1.320946
loss is :  1.3209459
loss is :  1.3209459
loss is :  1.3209459
loss is :  1.3209457
loss is :  1.3209457
loss is :  1.3209455
loss is :  1.3209455
loss is :  1.3209456
loss is :  1.3209454
loss is :  1.3209454
loss is :  1.3209454
loss is :  1.3209453
loss is :  1.3209451
loss is :  1.320945
loss is :  1.3209448
loss is :  1.3209448
loss is :  1.3209448
loss is :  1.3209447
loss is :  1.3209447
loss is :  1.3209445
loss is :  1.3209445
loss is :  1.3209444
loss is :  1.3209

loss is :  1.3209176
loss is :  1.3209176
loss is :  1.3209174
loss is :  1.3209174
loss is :  1.3209174
loss is :  1.3209171
loss is :  1.3209171
loss is :  1.320917
loss is :  1.320917
loss is :  1.3209168
loss is :  1.3209169
loss is :  1.3209168
loss is :  1.3209167
loss is :  1.3209165
loss is :  1.3209165
loss is :  1.3209165
loss is :  1.3209164
loss is :  1.3209164
loss is :  1.3209163
loss is :  1.3209163
loss is :  1.3209162
loss is :  1.3209162
loss is :  1.320916
loss is :  1.320916
loss is :  1.3209159
loss is :  1.3209158
loss is :  1.3209158
loss is :  1.3209157
loss is :  1.3209156
loss is :  1.3209156
loss is :  1.3209156
loss is :  1.3209155
loss is :  1.3209155
loss is :  1.3209153
loss is :  1.3209153
loss is :  1.3209152
loss is :  1.3209151
loss is :  1.320915
loss is :  1.320915
loss is :  1.3209147
loss is :  1.3209147
loss is :  1.3209147
loss is :  1.3209145
loss is :  1.3209145
loss is :  1.3209145
loss is :  1.3209144
loss is :  1.3209144
loss is :  1.320914

loss is :  1.3208902
loss is :  1.3208901
loss is :  1.3208901
loss is :  1.3208901
loss is :  1.3208898
loss is :  1.32089
loss is :  1.3208898
loss is :  1.3208898
loss is :  1.3208897
loss is :  1.3208895
loss is :  1.3208895
loss is :  1.3208895
loss is :  1.3208895
loss is :  1.3208894
loss is :  1.3208892
loss is :  1.3208892
loss is :  1.3208892
loss is :  1.3208891
loss is :  1.3208891
loss is :  1.320889
loss is :  1.320889
loss is :  1.320889
loss is :  1.3208888
loss is :  1.3208889
loss is :  1.3208886
loss is :  1.3208886
loss is :  1.3208885
loss is :  1.3208886
loss is :  1.3208885
loss is :  1.3208885
loss is :  1.3208883
loss is :  1.3208883
loss is :  1.3208882
loss is :  1.3208882
loss is :  1.3208882
loss is :  1.3208879
loss is :  1.3208879
loss is :  1.3208879
loss is :  1.3208879
loss is :  1.3208878
loss is :  1.3208877
loss is :  1.3208876
loss is :  1.3208876
loss is :  1.3208876
loss is :  1.3208876
loss is :  1.3208876
loss is :  1.3208872
loss is :  1.32088

loss is :  1.3208653
loss is :  1.3208653
loss is :  1.3208653
loss is :  1.320865
loss is :  1.320865
loss is :  1.320865
loss is :  1.3208649
loss is :  1.3208649
loss is :  1.3208648
loss is :  1.3208648
loss is :  1.3208647
loss is :  1.3208647
loss is :  1.3208647
loss is :  1.3208646
loss is :  1.3208646
loss is :  1.3208646
loss is :  1.3208643
loss is :  1.3208644
loss is :  1.3208643
loss is :  1.3208643
loss is :  1.3208642
loss is :  1.3208642
loss is :  1.3208641
loss is :  1.3208641
loss is :  1.3208641
loss is :  1.3208638
loss is :  1.320864
loss is :  1.3208638
loss is :  1.3208638
loss is :  1.3208636
loss is :  1.3208636
loss is :  1.3208636
loss is :  1.3208636
loss is :  1.3208636
loss is :  1.3208634
loss is :  1.3208632
loss is :  1.3208632
loss is :  1.3208632
loss is :  1.3208632
loss is :  1.3208631
loss is :  1.3208631
loss is :  1.320863
loss is :  1.3208629
loss is :  1.3208628
loss is :  1.3208628
loss is :  1.3208628
loss is :  1.3208628
loss is :  1.32086

In [74]:
print(sess.run(W1))
print('-' * 60)
print(sess.run(b1))

[[ 1.9302212   0.28671357  1.2116195   0.8683782  -0.8565541 ]
 [-0.4237613  -2.8019907   0.0730492  -0.8162556   1.3001902 ]
 [ 0.09848256 -2.336158    0.8140551   0.9311209   0.28391376]
 [ 0.09168796  0.99068713 -1.9715261   1.1108449  -0.20437998]
 [-1.7500073  -0.8484117  -1.5506805   0.06899809 -0.72594327]
 [-0.3333322   2.7256303   0.79834116 -1.3317351   1.3549297 ]
 [-1.2417868  -0.83650094  0.3195036  -0.7158945  -0.8043391 ]]
------------------------------------------------------------
[-0.44400597 -0.54203844  0.27943832 -1.2235019  -0.75923127]


In [75]:
# varibale 'vectors' will work as a lookup table for finding vectors for words

vectors = sess.run(W1 + b1)
print(vectors)
print("\n", vectors[word2int['queen']])

[[ 1.4862152  -0.25532487  1.4910579  -0.3551237  -1.6157854 ]
 [-0.8677673  -3.3440292   0.3524875  -2.0397575   0.54095894]
 [-0.34552342 -2.8781965   1.0934935  -0.29238105 -0.4753175 ]
 [-0.352318    0.4486487  -1.6920879  -0.11265707 -0.96361125]
 [-2.194013   -1.3904501  -1.2712421  -1.1545038  -1.4851745 ]
 [-0.77733815  2.1835918   1.0777795  -2.555237    0.5956984 ]
 [-1.6857928  -1.3785393   0.5989419  -1.9393964  -1.5635704 ]]

 [-2.194013  -1.3904501 -1.2712421 -1.1545038 -1.4851745]


In [76]:
# We have word vector using word2vec
# Now we have closest vector to a given vector

def euclidean_dist(vec1, vec2):
    return np.sqrt(np.sum((vec1-vec2)**2))

def find_closest(word_index, vectors):
    min_dist = 10000 # to act like positive infinity
    min_index = -1
    query_vector = vectors[word_index]
    for index, vector in enumerate(vectors):
        if euclidean_dist(vector, query_vector) < min_dist and not np.array_equal(vector, query_vector):
            min_dist = euclidean_dist(vector, query_vector)
            min_index = index
    return min_index

In [77]:
print(int2word[find_closest(word2int['king'], vectors)])
print(int2word[find_closest(word2int['queen'], vectors)])
print(int2word[find_closest(word2int['royal'], vectors)])

queen
king
she
