In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import torch.nn.functional as F
from torch.autograd import Variable

In [3]:
#CUDA = torch.cuda.is_available()

from nltk.tokenize import word_tokenize

text = '''As the eight strange beings applauded, one of them even cupping a hand over her lipsticked mouth to cheer, Joel tried to grasp what was happening. The nine of them sat in a fire rimmed cavern around a conference table shaped from warm volcanic rock. A chandelier of human bones dangled from the cavern’s ceiling, and it rattled around at random like wind chimes. A massive goat-man with reddish-black skin and wicked horns on his head towered above the seven others, who flanked him to either side. They looked like pure stereotype. A fat slob with sixteen chins, a used car saleman looking guy with gold and silver jewelry all over him, a sultry dominatrix in skin tight leather. On the other side a disheveled looking college drop out, a pretty boy staring in a mirror, a bald, muscular dude who looked like someone’s pissed off step-dad and a sour faced woman glancing jealously around the room. Just where the hell was he? Joel concentrated on his last memory. He remembered highlighting pages as his private jet, “The Holy Gust,” flew over the sapphire waters of the Bahamas. He had been reviewing his sermon for Sunday – dotting the I’s and crossing the crosses, a little god humor there, praise him – and the pilot’s voice had crackled over the intercom about turbulence. Kimberly, his personal assistant, had taken his plow out of her mouth and put on her seat belt. The plane had shook and then'''.lower()

words = word_tokenize(text)

vocab = set(words)
w2i = {w:i for i,w in enumerate(vocab)}
i2w = {i:w for i,w in enumerate(vocab)}


from types import SimpleNamespace
import random
random.seed(42)

def generate_negative_samples(target_index, index_range, k):
    '''
    index_range: ranges of index to select from
    '''
    random_index = random.sample(index_range, 6)
    
    return  SimpleNamespace(
                target=w2i[words[target_index]],
                context=[w2i[word] for word in [words[index] for index in random_index]],
                label=0
    )
        
    

def text_to_train(words, context_window=2, k=6):
    '''
    Make training data from words.
    
    For 1 positive sample, generate `k` negative samples
    '''
    pos = []
    neg = []
    context_range = range(-context_window, context_window+1)
    for current_index in range(context_window, len(words) - context_window ) :
        #Positive Samples
        for relative_index in context_range:
            if current_index + relative_index != current_index:
                pos.append(SimpleNamespace(
                    target=w2i[words[current_index]],
                    context=w2i[words[current_index+relative_index]],
                    label=1
                ))
        #Negative Samples
        for _ in context_range:
            
            rand = random.random()
            
            lhs_index_range = None
            rhs_index_range = None
            # select from lhs of target
            if  (current_index - context_window - 2*k) > 0:
                #This also accounts for the fact that there should be ample samples on the LHS to select from
                lhs_index_range = range(0, current_index - context_window)
                
            if (current_index + context_window + 2*k ) < len(words):
                # If random value is >= 0.5 or there are not enough samples on the LHS
                rhs_index_range = range(current_index + context_window, len(words))
            
            if lhs_index_range and rhs_index_range:
                index_range = random.choice([lhs_index_range, rhs_index_range])
            elif lhs_index_range:
                index_range = lhs_index_range
            else:
                index_range = rhs_index_range

            neg.append(
                    generate_negative_samples(
                        current_index,
                        index_range=index_range,
                        k=k
                    )
                )
    return pos, neg
            
        

In [3]:
pos_data, neg_data = text_to_train(words)

print(pos_data[0])
print(neg_data[0])

namespace(context=37, label=1, target=140)
namespace(context=[45, 77, 88, 145, 40, 62], label=0, target=140)


In [4]:
def unpack_data_point(data_point):
    return data_point.target, data_point.context, data_point.label

def data_to_variable(data, dtype=torch.LongTensor):
    
    tensor = Variable(dtype(data))
    
    if CUDA:
        return tensor.cuda()
    return tensor

In [5]:


class SkigGram(nn.Module):
    
    def __init__(self, vocab_size, embedding_size):
        super().__init__()
        self.target_embedding = nn.Embedding(vocab_size, embedding_size)
        self.context_embedding = nn.Embedding(vocab_size, embedding_size)
        
    def forward(self, target, positive_ctx, negative_ctx):
        
        target = data_to_variable([target])
        positive_ctx = data_to_variable([positive_ctx])
        negative_ctx = data_to_variable(negative_ctx)
        
        
        
        pos_ = self.context_embedding(positive_ctx)
        neg_ = self.context_embedding(negative_ctx)
        target_ = self.target_embedding(target)
        
        
        pos_dot = torch.matmul(pos_,torch.t(target_))
        
        neg_dot = torch.matmul(target_, torch.t(-neg_.squeeze()))
        
        # Calculate the loss
        
        loss = -(F.logsigmoid(pos_dot) + F.logsigmoid(neg_dot).sum())
        
        #Maximize `loss`, hence, minimize `-loss`
        return loss

## Test the Model with dummies

In [6]:
from torch.autograd import Variable

pos = 1
neg = [10,11,12]

target = 0

In [7]:
model = SkigGram(20, 10).cuda()
loss = model(target, pos, neg)

In [8]:
loss

Variable containing:
 6.2671
[torch.cuda.FloatTensor of size 1x1 (GPU 0)]

In [9]:
del model
del loss

## Train the Model

### Training Params

In [10]:
learning_rate = 0.001
epochs = 100

In [11]:
model = SkigGram(len(vocab), 300)

if CUDA:
    model = model.cuda()

optimizer = optim.SGD(model.parameters(), lr = learning_rate)

In [12]:
print(neg_data[1])
tgt, ctx, lbl = unpack_data_point(neg_data[1])

namespace(context=[125, 96, 85, 109, 3, 139], label=0, target=140)


In [13]:
print([tgt])
print(ctx)
print([lbl])

[140]
[125, 96, 85, 109, 3, 139]
[0]


In [14]:
data_to_variable([tgt])

Variable containing:
 140
[torch.cuda.LongTensor of size 1 (GPU 0)]

In [15]:
%%time

losses = []

for epoch in range(epochs):
    total_loss = 0
    for pos,neg in zip(pos_data, neg_data):
        target, pos_ctx, label = unpack_data_point(pos)
        target, neg_ctx, label = unpack_data_point(neg)
        
        
        model.zero_grad()
        loss = model(target, pos_ctx, neg_ctx)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.data[0]
        
    if epoch%10 == 0:
        print("Loss is", total_loss)
    losses.append(total_loss)
        
        
        

Loss is 
 52470.0156
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Loss is 
 24601.7188
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Loss is 
 13144.7793
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Loss is 
 7350.9214
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Loss is 
 4317.2886
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Loss is 
 2652.1287
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Loss is 
 1772.1620
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Loss is 
 1362.9666
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Loss is 
 1184.9821
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Loss is 
 1107.6483
[torch.cuda.FloatTensor of size 1 (GPU 0)]

CPU times: user 1min 29s, sys: 9.97 s, total: 1min 39s
Wall time: 1min 39s
