In [73]:
import tensorflow as tf
import math
from random import randint
import numpy as np
from nltk.corpus import stopwords
import string

In [74]:
chat = [
    [
    "king is husband of queen",
    "queen is wife of king",
    "king is male",
    "queen is female",
    "king marry queen"
    ]
]
valid_words = ["king","queen"]

In [75]:
words = set()
sentences = []

stops = stopwords.words('english')

for message in chat: 
    for sentence in message:
        
        # Lower case
        texts = sentence.lower().split(" ")

        # Remove punctuation
        texts = [''.join(c for c in x if c not in string.punctuation) for x in texts]

        # Remove numbers
        texts = [''.join(c for c in x if c not in '0123456789') for x in texts]

        # Remove stopwords
        texts = [' '.join([word for word in x.split() if word not in stops]) for x in texts]
    
        texts = [x for x in texts if x]
        
        sentences.append(sentence)
        
        for word in texts:
            words.add(word)

words = list(words)
word_dictionary_rev = {k:v for k, v in enumerate(words)}

In [76]:
wordVectorDim = 3
vocabSize = len(words)
batchSize = 4
numNegativeSample = int(batchSize/2)
windowSize = 3
learning_rate = 0.1
iterations = 100000
debug_iterations = 20000
debug_iterations = 200

In [77]:
nceWeights = tf.Variable(tf.random.truncated_normal([vocabSize, wordVectorDim], 
                                             stddev=1.0 / math.sqrt(wordVectorDim)))
nceBiases = tf.Variable(tf.zeros([vocabSize]))

In [78]:
train_inputs = tf.placeholder(tf.int32, shape=[batchSize])
train_outputs = tf.placeholder(tf.int32, shape=[batchSize, 1])

In [79]:
embeddings = tf.Variable(tf.random.uniform(
    [vocabSize, wordVectorDim],
    minval=-1.0,
    maxval=1.0))
embed = tf.nn.embedding_lookup(embeddings, train_inputs)

In [80]:
loss = tf.reduce_mean(
  tf.nn.nce_loss(weights=nceWeights,
                 biases=nceBiases,
                 labels=train_outputs,
                 inputs=embed,
                 num_sampled=numNegativeSample,
                 num_classes=vocabSize))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)

In [81]:
x = []
y = []

for sentence in sentences:
    sentenceWords = sentence.split(" ")
    sentenceWordsLen = len(sentenceWords)
    for i in range(sentenceWordsLen):
        wordsAfter = sentenceWords[i+1:min(sentenceWordsLen, i+windowSize+1)]
        wordsBefore = sentenceWords[max(0, i-windowSize):i]
        allWords = wordsAfter + wordsBefore
        for word in allWords:
            if word in words:
                if ( (sentenceWords[i] != word) and (sentenceWords[i] in words)):
                    x.append(words.index(sentenceWords[i]))
                    y.append(words.index(word))
                    
for a,b in zip(x,y):
    print(words[a],words[b])

king husband
husband queen
husband king
queen husband
queen wife
wife king
wife queen
king wife
king male
male king
queen female
female queen
king marry
king queen
marry queen
marry king
queen king
queen marry


In [82]:
def getTrainingBatch():
    num = randint(0, len(x) - batchSize - 1)
    arr = x[num:num + batchSize]
    labels = y[num:num + batchSize]
    # batch_data = np.array(batch_data)
    label_data = np.transpose(np.array([labels]))
    return arr,label_data

In [None]:
trainInputs, trainLabels  = getTrainingBatch()

In [None]:
valid_examples = [words.index(x) for x in valid_words]
valid_dataset = tf.constant(valid_examples, dtype=tf.int32)

In [None]:
# Cosine similarity between words
norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keepdims=True))
normalized_embeddings = embeddings / norm
valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)
similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True)

# Add variable initializer.
init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)

In [None]:
# Run the skip gram model.
loss_vec = []
loss_x_vec = []
for i in range(iterations):
    trainInputs, trainLabels  = getTrainingBatch()
    feed_dict={train_inputs: trainInputs, train_outputs: trainLabels}
   
    # Run the train step
    sess.run(optimizer, feed_dict=feed_dict)

    # Return the loss
    if (i + 1) % debug_iterations == 0:
        loss_val = sess.run(loss, feed_dict=feed_dict)
        loss_vec.append(loss_val)
        loss_x_vec.append(i+1)
        print('\nLoss at step {} : {}'.format(i+1, loss_val))
      
        # Validation: Print some random words and top 5 related words
        sim = sess.run(similarity, feed_dict=feed_dict)
        for j in range(len(valid_words)):
            valid_word = valid_words[j]
            # top_k = number of nearest neighbors
            top_k = 3
            nearest = (-sim[j, :]).argsort()[1:top_k+1]
            log_str = "Nearest to {}:".format(valid_word)
            for k in range(top_k):
                close_word = word_dictionary_rev[nearest[k]]
                log_str = '{} {},'.format(log_str, close_word)
            print(log_str)


Loss at step 200 : 1.346443772315979
Nearest to king: marry, male, female,
Nearest to queen: wife, female, male,

Loss at step 400 : 0.9364078044891357
Nearest to king: marry, male, female,
Nearest to queen: male, wife, husband,

Loss at step 600 : 2.0917153358459473
Nearest to king: male, marry, female,
Nearest to queen: male, husband, female,

Loss at step 800 : 1.0701323747634888
Nearest to king: male, marry, female,
Nearest to queen: male, husband, female,

Loss at step 1000 : 1.4059538841247559
Nearest to king: marry, male, female,
Nearest to queen: male, female, wife,

Loss at step 1200 : 0.6607356071472168
Nearest to king: marry, female, queen,
Nearest to queen: male, female, wife,

Loss at step 1400 : 1.8066346645355225
Nearest to king: female, marry, queen,
Nearest to queen: male, wife, husband,

Loss at step 1600 : 1.1467047929763794
Nearest to king: female, marry, queen,
Nearest to queen: male, husband, wife,

Loss at step 1800 : 0.9927818775177002
Nearest to king: female, 


Loss at step 14400 : 1.1545634269714355
Nearest to king: female, queen, husband,
Nearest to queen: male, king, female,

Loss at step 14600 : 1.2386170625686646
Nearest to king: female, queen, marry,
Nearest to queen: male, king, husband,

Loss at step 14800 : 0.9302080869674683
Nearest to king: female, marry, queen,
Nearest to queen: male, king, wife,

Loss at step 15000 : 1.826368808746338
Nearest to king: female, marry, wife,
Nearest to queen: male, king, female,

Loss at step 15200 : 1.3812000751495361
Nearest to king: female, marry, queen,
Nearest to queen: male, king, husband,

Loss at step 15400 : 0.8740431070327759
Nearest to king: female, queen, marry,
Nearest to queen: male, king, husband,

Loss at step 15600 : 1.531395435333252
Nearest to king: female, queen, marry,
Nearest to queen: male, king, husband,

Loss at step 15800 : 0.8150807619094849
Nearest to king: female, husband, marry,
Nearest to queen: male, king, husband,

Loss at step 16000 : 1.8211631774902344
Nearest to 


Loss at step 28400 : 1.0233253240585327
Nearest to king: female, queen, marry,
Nearest to queen: male, king, husband,

Loss at step 28600 : 1.5426623821258545
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 28800 : 1.2026007175445557
Nearest to king: female, marry, queen,
Nearest to queen: male, husband, wife,

Loss at step 29000 : 0.6736055016517639
Nearest to king: female, marry, husband,
Nearest to queen: male, wife, husband,

Loss at step 29200 : 1.3125309944152832
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 29400 : 1.1157325506210327
Nearest to king: female, queen, marry,
Nearest to queen: male, husband, king,

Loss at step 29600 : 1.3983553647994995
Nearest to king: female, wife, queen,
Nearest to queen: male, king, marry,

Loss at step 29800 : 1.4603530168533325
Nearest to king: female, marry, wife,
Nearest to queen: male, wife, husband,

Loss at step 30000 : 1.2816299200057983
Nearest to 


Loss at step 42800 : 1.0075734853744507
Nearest to king: female, queen, marry,
Nearest to queen: male, wife, husband,

Loss at step 43000 : 1.4791101217269897
Nearest to king: female, husband, marry,
Nearest to queen: male, marry, wife,

Loss at step 43200 : 1.1439077854156494
Nearest to king: female, husband, marry,
Nearest to queen: male, wife, marry,

Loss at step 43400 : 1.2216920852661133
Nearest to king: female, queen, wife,
Nearest to queen: male, marry, husband,

Loss at step 43600 : 0.946738600730896
Nearest to king: female, wife, marry,
Nearest to queen: male, husband, marry,

Loss at step 43800 : 1.140629768371582
Nearest to king: female, wife, husband,
Nearest to queen: male, husband, marry,

Loss at step 44000 : 0.9732373952865601
Nearest to king: female, wife, marry,
Nearest to queen: male, husband, marry,

Loss at step 44200 : 1.2283761501312256
Nearest to king: female, wife, marry,
Nearest to queen: male, husband, marry,

Loss at step 44400 : 1.985844612121582
Nearest 


Loss at step 56800 : 1.6366573572158813
Nearest to king: female, wife, marry,
Nearest to queen: male, husband, marry,

Loss at step 57000 : 1.1860289573669434
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 57200 : 1.1906875371932983
Nearest to king: female, marry, husband,
Nearest to queen: male, husband, wife,

Loss at step 57400 : 1.0208133459091187
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 57600 : 0.8815642595291138
Nearest to king: female, wife, marry,
Nearest to queen: male, husband, marry,

Loss at step 57800 : 0.6878843307495117
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 58000 : 1.2256181240081787
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 58200 : 1.0446738004684448
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 58400 : 1.2588250637054443
Nearest to


Loss at step 71000 : 1.201339602470398
Nearest to king: female, wife, marry,
Nearest to queen: male, husband, marry,

Loss at step 71200 : 1.1523070335388184
Nearest to king: female, wife, marry,
Nearest to queen: male, husband, marry,

Loss at step 71400 : 1.706369400024414
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 71600 : 0.6435823440551758
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 71800 : 0.8783795237541199
Nearest to king: female, wife, marry,
Nearest to queen: male, husband, marry,

Loss at step 72000 : 0.6152595281600952
Nearest to king: female, marry, queen,
Nearest to queen: male, husband, wife,

Loss at step 72200 : 1.4246459007263184
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 72400 : 0.7749946117401123
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 72600 : 1.5925226211547852
Nearest to ki


Loss at step 85200 : 1.9448246955871582
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 85400 : 1.3977744579315186
Nearest to king: female, marry, queen,
Nearest to queen: male, husband, wife,

Loss at step 85600 : 0.6800675392150879
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,

Loss at step 85800 : 1.1961207389831543
Nearest to king: female, wife, marry,
Nearest to queen: male, husband, marry,

Loss at step 86000 : 0.8571227192878723
Nearest to king: female, queen, marry,
Nearest to queen: male, husband, wife,

Loss at step 86200 : 1.0401958227157593
Nearest to king: female, queen, marry,
Nearest to queen: male, husband, wife,

Loss at step 86400 : 1.1655471324920654
Nearest to king: female, marry, wife,
Nearest to queen: male, husband, wife,
