# Setup

In [2]:
!pip install transformers tensorboardX gdown sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m89.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorboardX
  Downloading tensorboardX-2.6-py2.py3-none-any.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.5/114.5 KB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece
  Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m55.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[?25hColle

In [6]:
import gdown

url = "https://drive.google.com/uc?id=10GjAH-Qgv5G2HLzftg2AybUyAHZCnPDF&confirm=t"
output = "trained_model.zip"
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=10GjAH-Qgv5G2HLzftg2AybUyAHZCnPDF&confirm=t
To: /content/trained_model.zip
100%|██████████| 384M/384M [00:14<00:00, 25.8MB/s]


'trained_model.zip'

In [7]:
!unzip trained_model.zip
!mkdir trained_model
!mv config.json trained_model/config.json
!mv pytorch_model.bin trained_model/pytorch_model.bin

Archive:  trained_model.zip
  inflating: config.json             
  inflating: pytorch_model.bin       


# Inference implementation

In [5]:
from itertools import chain

import torch
from transformers import T5Tokenizer
from transformers.modeling_outputs import BaseModelOutput

import definitions
from model import T5WithSpan

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [8]:
def init_tokenizer(backbone):
  tokenizer = T5Tokenizer.from_pretrained(backbone)

  special_tokens = []

  # add domains
  domains = definitions.ALL_DOMAINS + ["general"]
  for domain in sorted(domains):
    token = "[" + domain + "]"
    special_tokens.append(token)

  # add intents
  intents = list(set(chain(*definitions.DIALOG_ACTS.values())))
  for intent in sorted(intents):
    token = "[" + intent + "]"
    special_tokens.append(token)

  # add slots
  slots = list(set(definitions.ALL_INFSLOT + definitions.ALL_REQSLOT))

  for slot in sorted(slots):
    token = "[value_" + slot + "]"
    special_tokens.append(token)

  special_tokens.extend(definitions.SPECIAL_TOKENS)
  tokenizer.add_special_tokens({"additional_special_tokens": special_tokens})

  return tokenizer

In [9]:
tokenizer = init_tokenizer("t5-small")

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [10]:
# max_seq_len is set to a constant for the Gumbel-Softmax variant
model = T5WithSpan.from_pretrained("trained_model",
                                    num_span=7,
                                    consistency_task=True,
                                    max_seq_len=104,
                                    expected_vocab_size=len(tokenizer)).eval().to(device)

Some weights of the model checkpoint at trained_model were not used when initializing T5WithSpan: ['span_head.weight', 'span_head.bias']
- This IS expected if you are initializing T5WithSpan from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing T5WithSpan from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [11]:
def extract_sequence_fragment(sequence, bos_token, eos_token, tokenizer_eos):
  if bos_token in sequence:
    sequence = sequence[sequence.index(bos_token):]

  if sequence[-1] == tokenizer_eos:
    sequence = sequence[:-1]

  if eos_token in sequence:
    sequence = sequence[:sequence.index(eos_token)+1]
  
  return sequence

In [34]:
def generate_answer(utterance, context=[], max_history=4):
  input = ""

  for turn in context[-4:]:
    input += turn
  
  input += utterance

  bos_action_token_id = tokenizer.convert_tokens_to_ids(definitions.BOS_ACTION_TOKEN)
  eos_action_token_id = tokenizer.convert_tokens_to_ids(definitions.EOS_ACTION_TOKEN)
  bos_resp_token_id = tokenizer.convert_tokens_to_ids(definitions.BOS_RESP_TOKEN)
  eos_resp_token_id = tokenizer.convert_tokens_to_ids(definitions.EOS_RESP_TOKEN)
  bos_db_token_id = tokenizer.convert_tokens_to_ids(definitions.BOS_DB_TOKEN)
  eos_db_token_id = tokenizer.convert_tokens_to_ids(definitions.EOS_DB_TOKEN)
  db_null_token_id = tokenizer.convert_tokens_to_ids(definitions.DB_NULL_TOKEN)

  input = definitions.BOS_USER_TOKEN + input + definitions.EOS_USER_TOKEN
  tokenized_input = tokenizer([input])

  input_ids = torch.tensor(tokenized_input.input_ids).to(device)
  attention_mask = torch.tensor(tokenized_input.attention_mask).to(device)

  encoder_outputs = model(input_ids=input_ids,
                          attention_mask=attention_mask,
                          return_dict=False,
                          encoder_only=True,
                          span_task=True)

  span_outputs, encoder_hidden_states = encoder_outputs

  if isinstance(encoder_hidden_states, tuple):
      last_hidden_state = encoder_hidden_states[0]
  else:
      last_hidden_state = encoder_hidden_states

  encoder_outputs = BaseModelOutput(
      last_hidden_state=last_hidden_state)

  belief_outputs = model.generate(encoder_outputs=encoder_outputs,
                                  attention_mask=attention_mask,
                                  max_length=100,
                                  top_p=0.7,
                                  decoder_type="belief")
  

  generated_belief = belief_outputs.cpu().numpy().tolist()[0]

  resp_decoder_input_ids = generated_belief
  if resp_decoder_input_ids[-1] == tokenizer.eos_token_id:
    resp_decoder_input_ids = resp_decoder_input_ids[:-1]

  resp_decoder_input_ids += [bos_db_token_id, db_null_token_id, eos_db_token_id]
  resp_decoder_input_ids = torch.tensor([resp_decoder_input_ids]).to(device)

  encoder_outputs = BaseModelOutput(last_hidden_state=last_hidden_state)

  resp_outputs = model.generate(
      encoder_outputs=encoder_outputs,
      attention_mask=attention_mask,
      decoder_input_ids=resp_decoder_input_ids,
      eos_token_id=tokenizer.eos_token_id,
      max_length=200,
      top_p=0.7,
      decoder_type="resp")

  generated_resp = resp_outputs.cpu().numpy().tolist()[0]
  
  resp_tokens = extract_sequence_fragment(generated_resp,
                                          bos_resp_token_id,
                                          eos_resp_token_id,
                                          tokenizer.eos_token_id)


  full_context_tokens = tokenized_input.input_ids[0] + generated_belief + generated_resp
  context.append(tokenizer.decode(full_context_tokens))

  return tokenizer.decode(resp_tokens)

# DEMO

In [40]:
CONTEXT = []

In [43]:
resp_string = generate_answer("On Thursday at 19:00.", CONTEXT)
resp_string

'<bos_resp> i am sorry, that time is not available. would you like to try a different time? <eos_resp>'

In [44]:
CONTEXT

['<bos_user> I would like to book a table in the Seni restaurant. <eos_user> </s><pad> <bos_belief> [restaurant] [value_name] Senii restaurant <eos_belief> </s><pad> <bos_belief> [restaurant] [value_name] Senii restaurant <eos_belief> <bos_db> [db_null] <eos_db> <bos_act> [restaurant] [request] time day <eos_act> <bos_resp> what day and time would you like to book? <eos_resp> </s>',
 '<bos_user> <bos_user> I would like to book a table in the Seni restaurant. <eos_user> </s><pad> <bos_belief> [restaurant] [value_name] Senii restaurant <eos_belief> </s><pad> <bos_belief> [restaurant] [value_name] Senii restaurant <eos_belief> <bos_db> [db_null] <eos_db> <bos_act> [restaurant] [request] time day <eos_act> <bos_resp> what day and time would you like to book? <eos_resp> </s>On Thursday at 19:00. <eos_user> </s><pad> <bos_belief> [restaurant] [value_name] Senii restaurant [value_time] 19:00 [value_day] friday <eos_belief> </s><pad> <bos_belief> [restaurant] [value_name] Senii restaurant [val