# Global Vectors for Word Representation

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from collections import Counter, defaultdict

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

## Load Datasets

In [3]:
class GloveDataset:
    
    def __init__(self, text, n_words=200000, window_size=5):
        
        self._window_size = window_size
        self._tokens = text.split(' ')[:n_words]
        word_counter = Counter()
        word_counter.update(self._tokens)
        
        self._word2idx = {w:i for i, (w, _) in enumerate(word_counter.most_common())}
        self._idx2word = {i:w for w, i in self._word2idx.items()}
        self._vocab_len = len(self._word2idx)
        self._id_tokens = [self._word2idx[w] for w in self._tokens]
        self._create_coocurrence_matrix()
        
        print('Total of words: {}'.format(len(self._tokens)))
        print('Vocabulary length: {}'.format(self._vocab_len))
        
    def _create_coocurrence_matrix(self):
        
        co_matrix = defaultdict(Counter)
        for i, w in enumerate(self._id_tokens):
            start_i = max(i - self._window_size, 0)
            end_i = min(i + self._window_size + 1, len(self._id_tokens))
            for j in range(start_i, end_i):
                if i != j:
                    c = self._id_tokens[j]
                    co_matrix[w][c] += 1 / abs(j-i)
        
        self._i_idx = list()
        self._j_idx = list()
        self._xij = list()
        
        # create indexes and x values tensors
        for w, co in co_matrix.items():
            for c, v in co.items():
                self._i_idx.append(w)
                self._j_idx.append(c)
                self._xij.append(v)
        
        self._i_idx = torch.LongTensor(self._i_idx)
        self._j_idx = torch.LongTensor(self._j_idx)
        self._xij = torch.FloatTensor(self._xij)
    
    def get_batches(self, batch_size):
        # generate random idx
        random_idx = torch.LongTensor(np.random.choice(len(self._xij), len(self._xij), replace=False))
        
        for p in range(0, len(random_idx), batch_size):
            batch_idx = random_idx[p:p+batch_size]
            yield self._xij[batch_idx], self._i_idx[batch_idx], self._j_idx[batch_idx]

In [4]:
datasets = GloveDataset(open('./datasets/text8').read(), 10000000)

Total of words: 10000000
Vocabulary length: 189075


## Set Configs

In [5]:
LR = 0.05
N_EPOCHS = 100

BATCH_SIZE = 2048
EMBEDDING_DIM = 300
X_MAX = 100
ALPHA = 0.75

In [6]:
is_cuda = torch.cuda.is_available()

if is_cuda: device = torch.device('cuda')
else: device = torch.device('cpu')

## Build [Glove](https://nlp.stanford.edu/pubs/glove.pdf) Network

In [7]:
class Glove(nn.Module):
    
    def __init__(self, input_size, embedding_dim):
        super(Glove, self).__init__()
        
        self.vocab_size = input_size
        
        self.wi = nn.Embedding(self.vocab_size, embedding_dim)
        self.wj = nn.Embedding(self.vocab_size, embedding_dim)
        self.bi = nn.Embedding(self.vocab_size, 1)
        self.bj = nn.Embedding(self.vocab_size, 1)
        
        self.wi.weight.data.uniform_(-1, 1)
        self.wj.weight.data.uniform_(-1, 1)
        self.bi.weight.data.zero_()
        self.bj.weight.data.zero_()
        
    def forward(self, i_indices, h_indices):
        w_i = self.wi(i_indices)
        w_j = self.wj(j_indices)
        b_i = self.bi(i_indices).squeeze()
        b_j = self.bj(j_indices).squeeze()
        
        x = torch.sum(w_i * w_j, dim=1) + b_i + b_j
        
        return x

#### Initialize Glove Network

In [8]:
glove = Glove(datasets._vocab_len, EMBEDDING_DIM)
glove.to(device)

Glove(
  (wi): Embedding(189075, 300)
  (wj): Embedding(189075, 300)
  (bi): Embedding(189075, 1)
  (bj): Embedding(189075, 1)
)

## Set Loss Function

In [9]:
def wmse_loss(weights, inputs, targets):
    loss = weights * F.mse_loss(inputs, targets, reduction='none')
    loss = torch.mean(loss)
    
    return loss

In [12]:
def weight_loss(x, x_max, alpha):
    wx = (x/x_max) ** alpha
    wx = torch.min(wx, torch.ones_like(wx))
    
    return wx

## Set Optimizer

In [10]:
optimizer = torch.optim.Adagrad(glove.parameters(), lr=LR)

## Train Glove Network

---