In [None]:
%load_ext autoreload
%autoreload 2
!git clone https://github.com/osmanio2/modular-bert-dst.git
import sys
root = '/content/modular-bert-dst'
sys.path.append(root)
!gdown https://drive.google.com/uc?id=1-6vYegAJyMd5O9qXccl_cnUAMXspldj8 -O {root}/data/train/dailogues.json
!{sys.executable} -m pip install torch==1.5.0+cu101 torchvision==0.6.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
!{sys.executable} -m pip install transformers==2.10
import os
import json
import random
import time
import argparse
import pickle
import torch as T
import numpy as np
from collections import defaultdict
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict, namedtuple
import transformers as tf

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


##DST design

In [None]:
from copy import deepcopy
import transformers as tf
import numpy as np
import dst_model as dst
import json
import pickle


def softmax(v):
    v = np.exp(v)
    return v/v.sum()

class DSTModel:

    def __init__(self, schema_url, slot_class_url, cat_slot_url, span_slot_url, transfer_slot_url,
                 intent_url, req_slot_url, device='cpu'):
        self.schema = json.load(open(schema_url))
        self.is_categorical = {f'{srv["service_name"]}--{s["name"]}': s['is_categorical'] 
                       for srv in self.schema for s in srv['slots']}
        slt_cls_lyr = pickle.load(open(slot_class_url, 'rb'))
        cat_slot_layer = pickle.load(open(cat_slot_url, 'rb'))
        span_slot_layer = pickle.load(open(span_slot_url, 'rb'))
        tran_slot_layer = pickle.load(open(transfer_slot_url, 'rb'))
        intent_layer = pickle.load(open(intent_url, 'rb'))
        req_slot_layer = pickle.load(open(req_slot_url, 'rb'))
        bert_model = dst.add_adapter_layers(tf.BertModel.from_pretrained('bert-base-uncased'),
                                            hidden_size=256)
        tokenizer = tf.BertTokenizer.from_pretrained('bert-base-uncased')
        self.slot_class = dst.SlotClassificationModule(bert_model, tokenizer, slt_cls_lyr, 
                                                       self.schema, device)
        self.cat_slot = dst.CategoricalSlotModule(bert_model, tokenizer, cat_slot_layer,
                                                  self.schema, device)
        self.span_slot = dst.SpanSlotModule(bert_model, tokenizer, span_slot_layer,
                                            self.schema, device)
        self.transfer_slot = dst.TransferSlotModule(bert_model, tokenizer, tran_slot_layer,
                                                    self.schema, device)
        self.intent_class = dst.IntentClassificationModule(bert_model, tokenizer, intent_layer,
                                                  self.schema, device)
        self.req_slot = dst.RequestSlotModule(bert_model, tokenizer, req_slot_layer,
                                                  self.schema, device)
    
    @staticmethod
    def _is_uncertain(scores):
        scores = np.array(list(scores))
        entropy = -(scores * np.log(scores)).sum()
        return entropy > 0.5/np.log(len(scores))

    @staticmethod
    def _extract_acts(frames):
        acts = set()
        for frame in frames:
            service = frame['service']
            for action in frame['actions']:
                act = action['act']
                slot = action['slot']
                if len(action['values']) == 0:
                    acts.add(f'{service}--{act}--{slot}--')
                    continue
                for value in action['values']:
                    acts.add(f'{service}--{act}--{slot}--{value}')
        return acts

    @staticmethod
    def _slot_value_in_act(act, serv_slot):
        service1, act, slot1, value1 = act.split('--')
        service2, slot2 = serv_slot.split('--')
        if act == 'OFFER' and service1 == service2 and slot1 == slot2:
            return value1
        return False
    
    @staticmethod
    def extract_slots_class(slot_scores):
        CLASSES = ['filled', 'transferred', 'dontcare', 'None']
        states = {}
        for score, slot, value in slot_scores:
            #if value == 'None':
            #    continue
            if value != 'None':
                states[slot] = value
            continue
            idx = int(np.argmax(score))
            if idx < 3 and (idx != 2 or score[idx] > 0.9):
                states[slot] = CLASSES[idx]
        return states

    def _process_filled(self, system, user, serv_slot):
        if self.is_categorical[serv_slot]:
            pred = self.cat_slot.predict(system, user, serv_slot)
            if self._is_uncertain(pred['predictions'].values()):
                return None
            return pred['state'][serv_slot]
        pred = self.span_slot.predict(system, user, serv_slot)
        score, pred = pred['predictions'][1], pred['state'][serv_slot]
        text = (system + user).replace(' ', '')
        if pred == 'Not mentioned' or len(pred.replace(' ', '')) > len(text)/2 or score < 0.4:
            return None
        return pred

    def _process_transferred(self, system, user, serv_slot, prev_states, acts):
        # check if it in system act
        for serv_act in acts[::-1]:
            for act in serv_act:
                in_act = self._slot_value_in_act(act, serv_slot)
                if in_act:
                    return in_act
        service, slot = serv_slot.split('--')
        for i, state in enumerate(prev_states[::-1]):
            for other_serv, slot_values in state.items():
                pred = self.transfer_slot.predict(system, user, serv_slot, other_serv,
                                                list(slot_values.keys()))
                if pred is not None:
                    return slot_values[pred['state'][serv_slot].split('--')[1]][0]
                for other_slot, other_val in slot_values.items():
                    if other_serv == service and other_slot == slot and i < 3:
                        return other_val[0]
        return None
    @staticmethod
    def transfer_label(state, labels, target_slot):
        for score, slot, value in state:
            if value == 'transferred' and slot == target_slot:
                return labels[slot][0]
        return None

    def predict_state(self, system, user, frame, acts, prev_states, states_=None):
        service = frame['service']
        frame['state']['labels'] = frame['state']['slot_values']
        frame['state']['slot_values'] = {}
        if len(prev_states) > 0 and service in prev_states[-1]:
            frame['state']['slot_values'] = prev_states[-1][service].copy()
        
        if states_ is None:
            states = self.slot_class.predict(system, user, service)['state']
        elif not states_:
            return
        else:
            states = self.extract_slots_class(states_[frame['service']])
        for slot, value in states.items():
            serv_slot = f'{service}--{slot}'
            #if slot in frame['state']['slot_values'] and frame['state']['slot_values'][slot][0] != 'dontcare' :
            #    continue
            if value == 'filled':
                value = self._process_filled(system, user, serv_slot)
                if value is None:
                    value = self._process_transferred(system, user, serv_slot, prev_states, acts)
                    #value = self.transfer_label(states_[frame['service']], frame['state']['labels'],
                    #                       slot)
            elif value == 'transferred':
                value = self._process_transferred(system, user, serv_slot, prev_states, acts)
                #value = self.transfer_label(states_[frame['service']], frame['state']['labels'],
                #                           slot)
                if value is None:
                    value = self._process_filled(system, user, serv_slot)

            if value is not None:
                frame['state']['slot_values'][slot] = [value]


    def predict_active_intent(self, system, user, prev_intents, intents, frame):
        frame['state']['intent_labels'] = frame['state']['active_intent']
        preds = self.intent_class.predict(system, user, frame['service'], prev_intents)
        frame['state']['active_intent'] = preds['state'][frame['service']]
        intents[frame['service']] = frame['state']['active_intent']

    def predict_request_slot(self, system, user, frame):
        frame['state']['req_slots_labels'] = frame['state']['requested_slots']
        frame['state']['requested_slots'] = self.req_slot.predict(system, user,
                                                                  frame['service'])['state']

    def attach_prediction(self, dialog, slot_scores=None):
        dialog = deepcopy(dialog)
        prev_states, acts = [], []
        system = ''
        prev_intents = {}
        dialog_id = dialog['dialogue_id']
        for i, turn in enumerate(dialog['turns']):
            if turn['speaker'] == 'USER':
                states = None
                if slot_scores is not None:
                    states = slot_scores.get(f'{dialog_id}-{i}', False)
                user = turn['utterance']
                prev_state, intents = {}, {}
                for frame in turn['frames']:
                    self.predict_state(system, user, frame, acts, prev_states, states)
                    self.predict_active_intent(system, user, prev_intents, intents, frame)
                    self.predict_request_slot(system, user, frame)
                    prev_state[frame['service']] = frame['state']['slot_values']
                prev_intents = intents
                prev_states.append(prev_state)
            elif turn['speaker'] == 'SYSTEM':
                system = turn['utterance']
                acts.append(self._extract_acts(turn['frames']))
        return dialog

In [None]:
class DSTModel2(DSTModel):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    @staticmethod
    def extract_slots_class(slot_scores):
        CLASSES = ['filled', 'transferred', 'dontcare', 'None']
        states = {}
        for score, slot, value in slot_scores:
            score = softmax(score)
            states[slot] = [(CLASSES[i], score[i]) for i in range(len(score)) if score[i] > 0.1]
        return states

    def _process_filled(self, system, user, serv_slot):
        if self.is_categorical[serv_slot]:
            pred = self.cat_slot.predict(system, user, serv_slot)
            score = pred['predictions'][pred['state'][serv_slot]]
        else:
            pred = self.span_slot.predict(system, user, serv_slot)
            score = pred['predictions'][1]
        return pred['state'][serv_slot], score

    def predict_state(self, system, user, frame, acts, prev_states, states_=None):
        service = frame['service']
        frame['state']['labels'] = frame['state']['slot_values']
        frame['state']['slot_values'] = {}
        if len(prev_states) > 0 and service in prev_states[-1]:
            frame['state']['slot_values'] = prev_states[-1][service].copy()
        
        if not states_:
            return
        states = self.extract_slots_class(states_[frame['service']])
        for slot, values in states.items():
            serv_slot = f'{service}--{slot}'
            pred = {}
            for cls, score in values:
                if cls == 'filled':
                    val, s = self._process_filled(system, user, serv_slot)
                    if val == 'Not mentioned':
                        pred['None'] = pred.get('None', 0) + s*score
                    pred[val] = s*score
                elif cls == 'transferred':
                    val = self._process_transferred(system, user, serv_slot, prev_states, acts)
                    pred[val] = score
                elif cls == 'dontcare':
                    pred['dontcare'] = score
                else:
                    pred['None'] = score
            value = sorted(pred, key=lambda x: pred[x], reverse=True)[0]
            if value != 'None':
                frame['state']['slot_values'][slot] = [value]
    

In [None]:
!nvidia-smi

Sun May 24 15:33:42 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P8    27W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [None]:
from collections import defaultdict
import json
def group_by_id(r):
    output = defaultdict(lambda: defaultdict(list))
    for id, score, label in zip(r['ids'], r['scores'], r['labels']):
        serv, slot, value, _  = label.split('--')
        output[id][serv].append((score, slot, value))
    return output
slot_scores = group_by_id(json.load(open(root + 'test-slot-scores-02-06.json', 'r')))

In [None]:

schema_url = root + 'test/schema.json'
slot_class_url = root + 'model-slot-class.pkl'
cat_slot_url = root + 'model_cat_slot.pkl'
intent_url = root + 'model_intent.pkl'
req_slot_url = root + 'model_req_slot2.pkl'
span_slot_url = root + 'model-span-slot.pkl'
transfer_slot_url = root + 'model-transfer-slot-best.pkl'
dialogs = json.load(open(root + 'test/dialogues.json'))
schema = json.load(open(root + 'test/schema.json'))

In [None]:
model = DSTModel(schema_url, slot_class_url, cat_slot_url, span_slot_url, transfer_slot_url,
                  intent_url, req_slot_url, device='cuda')

In [None]:
from tqdm import tqdm_notebook as tqdm
pred_dialogs = [model.attach_prediction(d, slot_scores) for d in tqdm(dialogs)]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, max=4201.0), HTML(value='')))




In [None]:
with open(root + 'test_dialogues_output-02-06-class-solved.json', 'w') as f:
    json.dump(pred_dialogs, f)

In [None]:
pred_dialogs = json.load(open(root + 'test_dialogues_output-02-06.json'))

In [None]:
import json
#pred_dialogs = json.load(open(root + 'dialogues_output-26-05.json'))
dialogs = json.load(open(root + 'test/dialogues.json'))
schema = json.load(open(root + 'test/schema.json'))

In [None]:
!{sys.executable} -m pip install fuzzywuzzy
import evaluate
def as_dict(dialogs):
    return {d['dialogue_id']: d for d in dialogs}

in_domain_services = evaluate.get_in_domain_services(root + "test/schema.json", root + "train/schema.json")
eval_services = {}
for service in schema:
    eval_services[service["service_name"]] = service

all_metric_aggregate, per_frame_metric = evaluate.get_metrics(as_dict(dialogs),as_dict(pred_dialogs),
                                                              eval_services, in_domain_services)

INFO:tensorflow:len(dataset_hyp)=4201, len(dataset_ref)=4201


In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 0.9227599965304883,
  'average_cat_accuracy': 0.7564844488648224,
  'average_goal_accuracy': 0.8036272152357345,
  'average_noncat_accuracy': 0.8129780508973931,
  'joint_cat_accuracy': 0.7348025048169556,
  'joint_goal_accuracy': 0.48836374262728766,
  'joint_noncat_accuracy': 0.6284840325049874,
  'requested_slots_f1': 0.9623297770838755,
  'requested_slots_precision': 0.9991579206060079,
  'requested_slots_recall': 0.9608378870673953,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0},
 '#SEEN_SERVICES': {'active_intent_accuracy': 0.9638616864546321,
  'average_cat_accuracy': 0.8630276564774383,
  'average_goal_accuracy': 0.8516266181440647,
  'average_noncat_accuracy': 0.813388327929597,
  'joint_cat_accuracy': 0.876949051544344,
  'joint_goal_accuracy': 0.671536541901378,
  'joint_noncat_accuracy': 0.7480892033105121,
  'requested_slots_f1': 0.9915070630037265,
  'requested_slots_precision': 0.99896

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 0.9227599965304883,
  'requested_slots_f1': 0.9623297770838755,
  'requested_slots_precision': 0.9991579206060079,
  'requested_slots_recall': 0.9608378870673953,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.883035388492409,
  'average_noncat_accuracy': 0.8866487984329462,
  'joint_goal_accuracy': 0.702005330942406,
  'joint_cat_accuracy': 0.8620183044315992,
  'joint_noncat_accuracy': 0.7976014128324226,
  'average_cat_accuracy': 0.8549624881816285},
 'Restaurants_2': {'active_intent_accuracy': 0.9800121138703816,
  'requested_slots_f1': 0.9814153038562488,
  'requested_slots_precision': 0.9979305471431457,
  'requested_slots_recall': 0.9800121138703816,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.9740186142356207,
  'average_noncat_accuracy': 0.9720949475487047,
  'joint_goal_accuracy': 0.900067

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 0.9227599965304883,
  'requested_slots_f1': 0.9623297770838755,
  'requested_slots_precision': 0.9991579206060079,
  'requested_slots_recall': 0.9608378870673953,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.8036272152357345,
  'average_noncat_accuracy': 0.8129780508973931,
  'joint_goal_accuracy': 0.48836374262728766,
  'joint_cat_accuracy': 0.7348025048169556,
  'joint_noncat_accuracy': 0.6284840325049874,
  'average_cat_accuracy': 0.7564844488648224},
 'Restaurants_2': {'active_intent_accuracy': 0.9800121138703816,
  'requested_slots_f1': 0.9814153038562488,
  'requested_slots_precision': 0.9979305471431457,
  'requested_slots_recall': 0.9800121138703816,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.9211568428030015,
  'average_noncat_accuracy': 0.9116983515307214,
  'joint_goal_accuracy': 0.489

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 0.9496107681098286,
  'requested_slots_f1': 0.9752566118290703,
  'requested_slots_precision': 0.9988751262287328,
  'requested_slots_recall': 0.9745752962380642,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 1.0,
  'average_noncat_accuracy': 1.0,
  'joint_goal_accuracy': 1.0,
  'joint_cat_accuracy': 1.0,
  'joint_noncat_accuracy': 1.0,
  'average_cat_accuracy': 1.0},
 'Media_2': {'active_intent_accuracy': 0.960668633235005,
  'requested_slots_f1': 0.9676499508357915,
  'requested_slots_precision': 0.9860701409373975,
  'requested_slots_recall': 0.9729596853490659,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 1.0,
  'average_noncat_accuracy': 1.0,
  'joint_goal_accuracy': 1.0,
  'joint_cat_accuracy': 1.0,
  'joint_noncat_accuracy': 1.0,
  'average_cat_accuracy': 1.0},
 'Media': {'active_intent_accuracy'

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 0.6290984392376424,
  'requested_slots_f1': 0.5362176114333187,
  'requested_slots_precision': 0.5857350755153417,
  'requested_slots_recall': 0.9247421099052805,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.9188969298116612,
  'average_noncat_accuracy': 0.9008233448723496,
  'joint_goal_accuracy': 0.6773245848832304,
  'joint_cat_accuracy': 0.8658129863713098,
  'joint_noncat_accuracy': 0.7608355869156729,
  'average_cat_accuracy': 0.9444507511257425},
 'Media_2': {'active_intent_accuracy': 0.6548672566371682,
  'requested_slots_f1': 0.6506765931544692,
  'requested_slots_precision': 0.6799246148803672,
  'requested_slots_recall': 0.9552605703048181,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.7840828828828829,
  'average_noncat_accuracy': 0.8340882882882884,
  'joint_goal_accuracy': 0.4075292035

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.918816633332353,
  'average_noncat_accuracy': 0.900668649935093,
  'joint_goal_accuracy': 0.6773245848832304,
  'joint_cat_accuracy': 0.8658129863713098,
  'joint_noncat_accuracy': 0.7608355869156729,
  'average_cat_accuracy': 0.9444507511257425},
 'Media_2': {'active_intent_accuracy': 1.0,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.7819747747747747,
  'average_noncat_accuracy': 0.8302234234234235,
  'joint_goal_accuracy': 0.407529203539823,
  'joint_cat_accuracy': 0.727630285152409,
  'joint_noncat_accuracy': 0.49349773844641104,
  'average_cat_accuracy'

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.9018511088561428,
  'average_noncat_accuracy': 0.8977216861387565,
  'joint_goal_accuracy': 0.6476674540783067,
  'joint_cat_accuracy': 0.8316383250298514,
  'joint_noncat_accuracy': 0.7553107489358438,
  'average_cat_accuracy': 0.8866074469292767},
 'Media_2': {'active_intent_accuracy': 1.0,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.7787165765765767,
  'average_noncat_accuracy': 0.8251585585585587,
  'joint_goal_accuracy': 0.44472409046214356,
  'joint_cat_accuracy': 0.7305801376597837,
  'joint_noncat_accuracy': 0.5404623402163226,
  'average_cat_accur

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.9169894972401149,
  'average_noncat_accuracy': 0.8985353887206116,
  'joint_goal_accuracy': 0.6719021721056869,
  'joint_cat_accuracy': 0.8658129863713098,
  'joint_noncat_accuracy': 0.7549156308624457,
  'average_cat_accuracy': 0.9444507511257425},
 'Media_2': {'active_intent_accuracy': 1.0,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.7778612612612612,
  'average_noncat_accuracy': 0.8252720720720721,
  'joint_goal_accuracy': 0.4019634218289086,
  'joint_cat_accuracy': 0.727630285152409,
  'joint_noncat_accuracy': 0.4896035398230089,
  'average_cat_accurac

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.905871452591743,
  'average_noncat_accuracy': 0.9037218015289197,
  'joint_goal_accuracy': 0.6470431061855275,
  'joint_cat_accuracy': 0.8268209330094289,
  'joint_noncat_accuracy': 0.7665138831153888,
  'average_cat_accuracy': 0.8938017936201612},
 'Media_2': {'active_intent_accuracy': 1.0,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 0.8321864864864865,
  'average_noncat_accuracy': 0.8231936936936937,
  'joint_goal_accuracy': 0.4318096361848574,
  'joint_cat_accuracy': 0.8839724680432645,
  'joint_noncat_accuracy': 0.47703087512291054,
  'average_cat_accura

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.89550932758164,
  'average_goal_accuracy': 0.8991869932448994,
  'average_noncat_accuracy': 0.8929840545218519,
  'joint_cat_accuracy': 0.8172273232593569,
  'joint_goal_accuracy': 0.6308647990949878,
  'joint_noncat_accuracy': 0.74714218337999,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0},
 '#SEEN_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.958382995629718,
  'average_goal_accuracy': 0.92831089833025,
  'average_noncat_accuracy': 0.9094322383442586,
  'joint_cat_accuracy': 0.9399750202042466,
  'joint_goal_accuracy': 0.7709880105834465,
  'joint_noncat_accuracy': 0.8089804597014926,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'sl

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.8954241873841147,
  'average_goal_accuracy': 0.9113732771552103,
  'average_noncat_accuracy': 0.9088642362613587,
  'joint_cat_accuracy': 0.9045168196977807,
  'joint_goal_accuracy': 0.7493351208344519,
  'joint_noncat_accuracy': 0.8195854180312153,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0},
 '#SEEN_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.9580519136538206,
  'average_goal_accuracy': 0.9351997619564356,
  'average_noncat_accuracy': 0.9180592300704623,
  'joint_cat_accuracy': 0.9476158989052972,
  'joint_goal_accuracy': 0.8105495648575304,
  'joint_noncat_accuracy': 0.8453709759837178,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.8983946342755514,
  'average_goal_accuracy': 0.9138459479200369,
  'average_noncat_accuracy': 0.9108776792153469,
  'joint_cat_accuracy': 0.7681475686581298,
  'joint_goal_accuracy': 0.5796289603865475,
  'joint_noncat_accuracy': 0.7328380373509222,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0},
 '#SEEN_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.9583333333333334,
  'average_goal_accuracy': 0.9296012184661177,
  'average_noncat_accuracy': 0.9080512508899856,
  'joint_cat_accuracy': 0.9226361031518625,
  'joint_goal_accuracy': 0.7172483785617367,
  'joint_noncat_accuracy': 0.7681333853459974,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.8983946342755514,
  'average_goal_accuracy': 0.9142272277616008,
  'average_noncat_accuracy': 0.9114783931919803,
  'joint_cat_accuracy': 0.7633713509284803,
  'joint_goal_accuracy': 0.5699532308164283,
  'joint_noncat_accuracy': 0.7265100855159721,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0},
 '#SEEN_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.9583333333333334,
  'average_goal_accuracy': 0.930147388927357,
  'average_noncat_accuracy': 0.9088399008126488,
  'joint_cat_accuracy': 0.9145544045257512,
  'joint_goal_accuracy': 0.7059767367706921,
  'joint_noncat_accuracy': 0.7626901017639078,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.8984324743633405,
  'average_goal_accuracy': 0.9125424811514063,
  'average_noncat_accuracy': 0.9096778162411654,
  'joint_cat_accuracy': 0.7612302878082925,
  'joint_goal_accuracy': 0.5607319517582544,
  'joint_noncat_accuracy': 0.7175864154619012,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0},
 '#SEEN_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.9583995497285127,
  'average_goal_accuracy': 0.9356482225965695,
  'average_noncat_accuracy': 0.9184554270702905,
  'joint_cat_accuracy': 0.9111013151127765,
  'joint_goal_accuracy': 0.706089531614654,
  'joint_noncat_accuracy': 0.7669772113975576,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.8858979452832331,
  'average_goal_accuracy': 0.8979495445944489,
  'average_noncat_accuracy': 0.8969898673013126,
  'joint_cat_accuracy': 0.7354551817844938,
  'joint_goal_accuracy': 0.5163622223491966,
  'joint_noncat_accuracy': 0.6579375206963991,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0},
 '#SEEN_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.9519103430009271,
  'average_goal_accuracy': 0.9200158552171501,
  'average_noncat_accuracy': 0.9036669244555745,
  'joint_cat_accuracy': 0.8983175372860187,
  'joint_goal_accuracy': 0.6371919154138398,
  'joint_noncat_accuracy': 0.6984340063229307,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.7942350626253453,
  'average_goal_accuracy': 0.8592235448101415,
  'average_noncat_accuracy': 0.870393321794317,
  'joint_cat_accuracy': 0.7467369374562524,
  'joint_goal_accuracy': 0.5521871637841776,
  'joint_noncat_accuracy': 0.6887890466311308,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0},
 '#SEEN_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.8699841080651569,
  'average_goal_accuracy': 0.8857732161373556,
  'average_noncat_accuracy': 0.8815852790258034,
  'joint_cat_accuracy': 0.8598927338182353,
  'joint_goal_accuracy': 0.6629819466757123,
  'joint_noncat_accuracy': 0.7367413089552239,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.8362801301699019,
  'average_goal_accuracy': 0.8811610655215351,
  'average_noncat_accuracy': 0.8882705682965527,
  'joint_cat_accuracy': 0.7896405484415531,
  'joint_goal_accuracy': 0.5883157820301415,
  'joint_noncat_accuracy': 0.7079445725735323,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0},
 '#SEEN_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.9183551847437426,
  'average_goal_accuracy': 0.9074757081882163,
  'average_noncat_accuracy': 0.8947275294002112,
  'joint_cat_accuracy': 0.8986848872235692,
  'joint_goal_accuracy': 0.6870622519674355,
  'joint_noncat_accuracy': 0.7500743550881953,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0

In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.9173761683127104,
  'average_goal_accuracy': 0.930597285214271,
  'average_noncat_accuracy': 0.9266816169046589,
  'joint_cat_accuracy': 0.9290155227076213,
  'joint_goal_accuracy': 0.8022980210913833,
  'joint_noncat_accuracy': 0.8598737698354871,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0},
 '#SEEN_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.9767083829956297,
  'average_goal_accuracy': 0.9486577305594885,
  'average_noncat_accuracy': 0.9280338931035329,
  'joint_cat_accuracy': 0.9696568951583279,
  'joint_goal_accuracy': 0.842696256173677,
  'joint_noncat_accuracy': 0.8638970635006785,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,


In [None]:
all_metric_aggregate

{'#ALL_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.8057952094448858,
  'average_goal_accuracy': 0.8190136317734835,
  'average_noncat_accuracy': 0.8240520553872782,
  'joint_cat_accuracy': 0.5296660765018323,
  'joint_goal_accuracy': 0.2599274480062891,
  'joint_noncat_accuracy': 0.4470958241392799,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0},
 '#SEEN_SERVICES': {'active_intent_accuracy': 1.0,
  'average_cat_accuracy': 0.8802973116143556,
  'average_goal_accuracy': 0.8527528050213068,
  'average_noncat_accuracy': 0.8327925413076036,
  'joint_cat_accuracy': 0.6826096539563589,
  'joint_goal_accuracy': 0.34901596231750337,
  'joint_noncat_accuracy': 0.49124878984124837,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1

In [None]:
!nvidia-smi

Sat May  9 22:16:31 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [None]:
import slot_class_data2 as DD
for d in dialogs:
    for r in DD.extract_labels(d, schema):
        if 'can you find me a bus going there' in r['user'].lower():
            print(r)

{'id': '17_00066-6', 'user': 'Maybe later. Can you find me a bus going there?', 'system': 'Would you like to buy tickets to this event?', 'acts': {'Events_1--OFFER_INTENT--intent--BuyEventTickets'}, 'labels': {'Buses_1--to_location--transferred'}, 'states': {'Events_1--category--Sports', 'Events_1--event_name--Whitecaps Vs Colorado Rapids', 'Events_1--date--March 8th', 'Buses_1--to_location--Vancouver', 'Events_1--city_of_event--Vancouver'}, 'other_states': {'Events_1--address_of_location': 'in_act--OFFER event location BC Place', 'Flights_3--outbound_departure_time': 'other_domain--Events_1--date--March 8th', 'Events_1--event_name': 'in_act--OFFER event name Whitecaps Vs Colorado Rapids', 'Events_1--date': 'in_act--OFFER date March 8th', 'Events_1--city_of_event': 'in_act--OFFER event location BC Place', 'Buses_1--to_location': 'other_domain--Events_1--city_of_event--Vancouver', 'Events_1--time': 'in_act--OFFER date March 8th', 'Buses_1--leaving_date': 'other_domain--Events_1--date--M

In [None]:
system = 'Do you need any other help?'
user = 'No. Thank you so much.'
service = 'Weather_1'
previous_intents = {'Weather_1': 'GetWeather'}
model.intent_class.predict(system, user, service, previous_intents)

{'state': {'Weather_1': 'GetWeather'},
 'predictions': {'NONE': 0.00032196927, 'GetWeather': 0.999678}}

In [None]:
import intent_data as D
tokenizer = tf.BertTokenizer.from_pretrained('bert-base-uncased')
data = D.IntentDataset(root + 'dev', tokenizer)

In [None]:
b = next(data.dataLoader(128))

In [None]:
d = model.attach_prediction(dialogs[0], slot_scores)

{'context': 'System: Hello, how can I help? [SEP] User: I wish to search a movie to watch online. Drama movie will be great. Search something directed by Likarion Wainaina. Search something iwth Stycie Waweru in it. [SEP] Service description: The widest selection and lowest prices for movie rentals', 'choices': ['None of the intentions are active', 'Find movies to watch by genre and, optionally, director or actors', 'Rent a movie to watch, with optional subtitles'], 'id2intent': ['NONE', 'FindMovies', 'RentMovie']}
{'context': 'System: I found 1 movie for you. What about Supa Modo? [SEP] User: Supa Modo is nice. [SEP] Service description: The widest selection and lowest prices for movie rentals', 'choices': ['None of the intentions are active', 'Find movies to watch by genre and, optionally, director or actors [SEP] Previous user intention', 'Rent a movie to watch, with optional subtitles'], 'id2intent': ['NONE', 'FindMovies', 'RentMovie']}
{'context': 'System: Will you rent the movie 

In [None]:
pred_dialogs[102]

{'dialogue_id': '10_00102',
 'services': ['Media_2', 'Music_1'],
 'turns': [{'frames': [{'service': 'Media_2',
     'slots': [{'exclusive_end': 60, 'slot': 'actors', 'start': 45},
      {'exclusive_end': 90, 'slot': 'director', 'start': 77},
      {'exclusive_end': 29, 'slot': 'genre', 'start': 22}],
     'state': {'active_intent': 'FindMovies',
      'requested_slots': [],
      'slot_values': {'actors': ['quentin kenihan'],
       'director': ['george miller'],
       'genre': ['fantasy']},
      'intent_labels': 'FindMovies'},
     'metrics': {'active_intent_accuracy': 1.0,
      'requested_slots_f1': 1.0,
      'requested_slots_precision': 1.0,
      'requested_slots_recall': 1.0,
      'slot_tagging_f1': 1.0,
      'slot_tagging_precision': 1.0,
      'slot_tagging_recall': 1.0,
      'average_goal_accuracy': 1.0,
      'average_cat_accuracy': 'NA',
      'average_noncat_accuracy': 1.0,
      'joint_goal_accuracy': 1.0,
      'joint_cat_accuracy': 1.0,
      'joint_noncat_accuracy

In [None]:
def evaluate(model, batch):
    token_type_ids = batch.x.token_type_ids
    scores = model(batch.x.input_ids, batch.x.attention_mask, batch.x.token_type_ids)[0]
    mask = batch.x.attention_mask[..., 0] # B.C
    scores = scores + (mask.float() - 1) * 1e8
    loss = T.nn.functional.cross_entropy(scores, batch.y)
    return scores, loss

def metrics(scores, labels):
    with T.no_grad():
        preds = T.max(scores, dim=-1)[1] == labels
        correct = T.where(preds == True)[0]
        wrong = T.where(preds == False)[0]
        return correct.shape[0]/scores.shape[0], correct.cpu().numpy(), wrong.cpu().numpy()

def evaluate_valid_data(model, valid_data):
    model.eval()
    with T.no_grad():
        acc, count = 0, 0
        scores, intents, ids = [], [], []
        for i, batch in enumerate(valid_data):
            batch = D.IntentDataset.map_to_cuda(batch)
            scores_, _ = evaluate(model, batch)
            acc_, _, _ = metrics(scores_, batch.y)
            acc += (acc_*len(batch.y))
            count += len(batch.y)
            scores.append(scores_.cpu().numpy())
            intents += batch.intent
            ids += batch.id
            sys.stderr.write(f'\r{i+1}/{len(valid_data)} acc: {acc_}{"": <30}')
        acc = acc/count
        scores = np.concatenate(scores)
    model.train()
    return {'acc': acc, 'scores': scores.tolist(), 'labels': intents, 'ids': ids}

In [None]:
valid_data = data.load_valid_data(batch_size=128)

In [None]:
model.intent_class._attach_module()
evaluate_valid_data(model.intent_class.model, valid_data)['acc']

89/204 acc: 0.71875                              

In [None]:
import evaluate
evaluate.get_metrics(as_dict(dialogs[:3]),as_dict(pred_dialogs[:3]), eval_services, in_domain_services)[0]

INFO:tensorflow:len(dataset_hyp)=3, len(dataset_ref)=3


{'#ALL_SERVICES': {'active_intent_accuracy': 0.8461538461538461,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 1.0,
  'average_noncat_accuracy': 1.0,
  'joint_goal_accuracy': 1.0,
  'joint_cat_accuracy': 1.0,
  'joint_noncat_accuracy': 1.0,
  'average_cat_accuracy': 1.0},
 'Media_2': {'active_intent_accuracy': 0.8571428571428571,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  'requested_slots_recall': 1.0,
  'slot_tagging_f1': 1.0,
  'slot_tagging_precision': 1.0,
  'slot_tagging_recall': 1.0,
  'average_goal_accuracy': 1.0,
  'average_noncat_accuracy': 1.0,
  'joint_goal_accuracy': 1.0,
  'joint_cat_accuracy': 1.0,
  'joint_noncat_accuracy': 1.0,
  'average_cat_accuracy': 1.0},
 'Media': {'active_intent_accuracy': 0.8571428571428571,
  'requested_slots_f1': 1.0,
  'requested_slots_precision': 1.0,
  

In [None]:
8 + 8 + 9

25