In [1]:
import sys
sys.path.insert(0,"/work/pip")

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math, copy, time
from torch.autograd import Variable

In [33]:
class Encoder(nn.Module):
    def __init__(self, d_input, d_model, seq_len, deepN = None):
        super(Encoder, self).__init__()
        self.encode = nn.Linear(d_input*seq_len, d_model)
        if deepN is not None:
            self.linears = nn.ModuleList([nn.Linear(d_model*seq_len, d_model*seq_len) for _ in range(deepN)])
            self.deep = True
        else:
            self.deep = False
        
    def forward(self, x):
        x = F.relu(self.encode(x))
        if self.deep:
            for layer in self.linears:
                x = F.relu(layer(x))
        return x      

In [34]:
class Decoder(nn.Module):
    def __init__(self, d_input, d_model, seq_len, deepN = None):
        super(Decoder, self).__init__()
        self.decode = nn.Linear(d_model, d_input*seq_len)
        if deepN is not None:
            self.linears = nn.ModuleList([nn.Linear(d_model*seq_len, d_model*seq_len) for _ in range(deepN)])
            self.deep = True
        else:
            self.deep = False
    
    def forward(self, x):
        if self.deep:
            for layer in self.linears:
                x = F.relu(layer(x))
        x = F.relu(self.decode(x))
        return x

In [20]:
class EncoderDecoder(nn.Module):
    def __init__(self, encoder, decoder, noise = None):
        super(EncoderDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.noise = noise
    
    def forward(self, src):
        return self.decode(self.encode(src))
    
    def encode(self, src):
        if self.noise is not None:
            return self.encoder(self.noise(src))
        else:
            return self.encoder(src)
    
    def decode(self, memory):
        return self.decoder(memory)

In [32]:
def make_model(d_input, N = None, max_len = 24, dropout=0.15):
    #Define Dimensions
    d_model = int(d_input*max_len/4)
    
    Noise = nn.Dropout(dropout) if dropout is not None else None
    model = EncoderDecoder(
        Encoder(d_input, d_model, max_len, N),
        Decoder(d_input, d_model, max_len, N),
        Noise
    )
    
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model

In [37]:
class LossCompute:
    def __init__(self, criterion, opt = None):
        self.criterion = criterion
        self.opt = opt 
        
    def __call__(self, x, y, norm = None, train = True):
        loss = 0
        if norm is not None:
            for i, j , n in zip(x, y, norm):
                loss += self.criterion(i.contiguous(), j.contiguous())/n
        else:
            for i, j in zip(x, y):
                loss += self.criterion(i.contiguous(), j.contiguous())

        
        if train:
            loss.backward()
            if self.opt is not None:
                self.opt.step()
                self.opt.optimizer.zero_grad()
                
        return loss.item()
            

In [38]:
class NoamOpt:
    "Optim wrapper that implements rate."
    def __init__(self, model_size, factor, warmup, optimizer):
        self.optimizer = optimizer
        self._step = 0
        self.warmup = warmup
        self.factor = factor
        self.model_size = model_size
        self._rate = 0
        
    def step(self):
        "Update parameters and rate"
        self._step += 1
        rate = self.rate()
        for p in self.optimizer.param_groups:
            p['lr'] = rate
        self._rate = rate
        self.optimizer.step()
        
    def rate(self, step = None):
        "Implement `lrate` above"
        if step is None:
            step = self._step
        return self.factor * \
            (self.model_size ** (-0.5) *
            min(step ** (-0.5), step * self.warmup ** (-1.5)))
        
def get_std_opt(model):
    return NoamOpt(model.src_embed[0].d_model, 2, 4000,
            torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))

In [39]:
def data_gen(batch, d_input, seq_len, nbatches):
    "Generate random data for a src-tgt copy task."
    for i in range(nbatches):
        data = torch.from_numpy(np.random.uniform(-1, 1, size=(batch, 1, d_input*seq_len)))
        src = Variable(data, requires_grad=False)
        tgt = Variable(data, requires_grad=False)
        yield Batch(src, tgt)

In [40]:
class Batch():
    def __init__(self, src, tgt):
        self.src = src
        self.tgt = tgt

In [42]:
d_input = 17
seq_len = 24
batches = 512
loss_fn = LossCompute(nn.MSELoss())

model = make_model(d_input = d_input, max_len = seq_len).float()

for i, batch in enumerate(data_gen(batches, d_input, seq_len, 1)):
    tgt_pred = model.forward(batch.src.float())
    loss = loss_fn(tgt_pred, batch.tgt.float())
    print(loss)

201.0474853515625
