In [1]:
%load_ext autoreload

In [30]:
%autoreload 2
from tqdm import tqdm_notebook as tqdm
from readers import DstcDatasetReader
from allennlp.data import Vocabulary
from allennlp.data.iterators import BucketIterator, BasicIterator

# read data
reader = DstcDatasetReader()
train_dataset = reader.read("data/train") 

# build the vocab
vocab = Vocabulary.from_instances(train_dataset)

# litmus test
iterator = BasicIterator(batch_size=5)
iterator.index_with(vocab)
batch = next(iter(iterator(train_dataset)))

16142it [05:55, 45.37it/s] 

  0%|          | 0/16142 [00:00<?, ?it/s][A
  3%|▎         | 533/16142 [00:00<00:02, 5328.76it/s][A
  6%|▌         | 959/16142 [00:00<00:03, 4951.19it/s][A
  8%|▊         | 1247/16142 [00:00<00:03, 4070.34it/s][A
 10%|▉         | 1566/16142 [00:00<00:03, 3756.11it/s][A
 11%|█▏        | 1852/16142 [00:00<00:04, 3430.18it/s][A
 13%|█▎        | 2129/16142 [00:00<00:05, 2767.96it/s][A
 15%|█▌        | 2442/16142 [00:00<00:04, 2866.33it/s][A
 17%|█▋        | 2807/16142 [00:00<00:04, 3063.08it/s][A
 21%|██        | 3317/16142 [00:00<00:03, 3479.87it/s][A
 24%|██▍       | 3872/16142 [00:01<00:03, 3916.92it/s][A
 28%|██▊       | 4443/16142 [00:01<00:02, 4323.54it/s][A
 30%|███       | 4908/16142 [00:01<00:03, 3588.62it/s][A
 33%|███▎      | 5311/16142 [00:01<00:03, 3004.55it/s][A
 35%|███▌      | 5658/16142 [00:01<00:03, 3030.48it/s][A
 37%|███▋      | 6028/16142 [00:01<00:03, 3202.32it/s][A
 39%|███▉      | 6374/16142 [00:01<00:03, 2970.86it/s][A


In [29]:
from allennlp.models import Model
from allennlp.modules.token_embedders import PretrainedBertEmbedder
from allennlp.training.trainer import Trainer
from allennlp.nn.util import get_text_field_mask

import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn

In [31]:
class UserIntentPredictor(Model):
    
    def __init__(self, vocab):
        super().__init__(vocab)
        
        # pretrained embedding/language model
        self.emb = PretrainedBertEmbedder("bert-base-uncased", requires_grad=False, top_layer_only=True)
        
        # layers
        self.l0 = nn.Linear(self.emb.output_dim, self.emb.output_dim)
        self.l1 = nn.Linear(self.emb.output_dim, self.emb.output_dim)
        self.l2 = nn.Linear(1, 1)
        
    def forward_turn(self, batch, turn):
        # encode current utterance
        # utter = [batch, seq, emb]
        utter, _ = self.lm(
            input_ids=batch["x_utterance_tokens"][:,turn,:],
            attention_mask=batch["x_utterance_mask"][:,turn,:]
        )
        utter = self.l0(utter)

        # individually encode description
        # desc = [desc, batch, seq, emb]
        desc = []
        num_descriptions = batch["y_descriptions_tokens"].shape[2]
        for i in range(num_descriptions):
            each_desc, _ = self.lm(
                input_ids=batch["y_descriptions_tokens"][:,turn,i,:],
                attention_mask=batch["y_descriptions_mask"][:,turn,i,:]
            )
            each_desc = self.l1(each_desc)
            desc.append(each_desc)

        # score = [batch, desc, emb]
        scores = []
        for each_desc in desc:
            utter = utter.sum(1, keepdim=True) # [b,1,e] collapse across sequence length
            each_desc = each_desc.sum(1, keepdim=True).permute(0, 2, 1) # [b,e,1]
            each_score = torch.bmm(utter, each_desc) # [b,1,1]
            scores.append(each_score)
        scores = torch.cat(scores, dim=1)

        # [batch]
        turn_mask = batch["x_turn_mask"][:,turn:turn+1]

        # score = [batch, desc]
        scores = self.l2(scores).squeeze(2)
        scores = scores * turn_mask.float()
        return scores
    
    def forward(self, **params):
        # user utterance: [batch, turn seq]
        # intent desc: [batch, turn, desc, seq]
        # active intent (list): [batch, turn, desc]
        loss_value = 0
        num_turns = params["user_utterance"]["tokens"].shape[1]

        for turn in range(num_turns):
            target_scores = batch["y_scores"][:,turn,:].float()
            predicted_scores = model(batch, turn).float()

            # calculate loss
            loss = F.mse_loss(
                predicted_scores,
                target_scores,
            )

            loss.backward()
            model_optim.step()

            loss_value += loss.item()
        return loss_value # loss per dialogue
    


model = UserIntentPredictor(vocab).to("cuda")

optimizer = optim.SGD(model.parameters(), lr=0.0001)

trainer = Trainer(
    model=model,
    optimizer=optimizer,
    iterator=iterator,
    train_dataset=train_dataset,
    num_epochs=2,
    cuda_device=0
)

trainer.train()


  0%|          | 0/251 [00:00<?, ?it/s][A

> [0;32m<ipython-input-31-a195e4e913a8>[0m(54)[0;36mforward[0;34m()[0m
[0;32m     53 [0;31m        [0;32mimport[0m [0mipdb[0m[0;34m;[0m[0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m;[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 54 [0;31m        [0mloss_value[0m [0;34m=[0m [0;36m0[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     55 [0;31m        [0mnum_turns[0m [0;34m=[0m [0mbatch[0m[0;34m[[0m[0;34m"y_scores"[0m[0;34m][0m[0;34m.[0m[0mshape[0m[0;34m[[0m[0;36m1[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  params


{'user_utterance': {'tokens': tensor([[[  101,  1045,  2342,  ...,     0,     0,     0],
         [  101,  1045,  1005,  ...,     0,     0,     0],
         [  101,  2054,  2060,  ...,     0,     0,     0],
         ...,
         [  101,  1045,  2074,  ...,     0,     0,     0],
         [  101,  2748,  1010,  ...,     0,     0,     0],
         [  101,  2008,  1005,  ...,     0,     0,     0]],

        [[  101,  3531,  2393,  ...,     0,     0,     0],
         [  101,  1045,  2215,  ...,     0,     0,     0],
         [  101,  2064,  2017,  ...,     0,     0,     0],
         ...,
         [    0,     0,     0,  ...,     0,     0,     0],
         [    0,     0,     0,  ...,     0,     0,     0],
         [    0,     0,     0,  ...,     0,     0,     0]],

        [[  101,  1045,  2342,  ...,     0,     0,     0],
         [  101,  1045,  2342,  ...,     0,     0,     0],
         [  101,  2053,  2008,  ...,     0,     0,     0],
         ...,
         [    0,     0,     0,  ...,   

ipdb>  params["user_utterance"]["tokens"].shape


torch.Size([5, 13, 29])


ipdb>  params["user_utterance"]["mask"].shape


torch.Size([5, 13, 27])


ipdb>  from allennlp.nn.util import get_text_field_mask
ipdb>  get_text_field_mask(params["user_utterance"]["tokens"]).shape


*** IndexError: too many indices for tensor of dimension 3


ipdb>  get_text_field_mask(params["user_utterance"])


tensor([[[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]],

        [[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        [[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        [[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 1, 1, 1],
         [1, 1, 1,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]],

        [[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0

ipdb>  get_text_field_mask(params["user_utterance"]).shape


torch.Size([5, 13, 27])


ipdb>  params["intent_description"]["tokens"].shape


torch.Size([5, 13, 2, 15])


ipdb>  q


BdbQuit: 