In [None]:
!git clone https://github.com/VinAIResearch/MISCA.git

In [None]:
!pip install pytorch-crf==0.7.2
!pip install scikit-learn==1.2.2
!pip install scipy==1.10.0
!pip install sentencepiece==0.1.97
!pip install seqeval==0.0.12
!pip install tensorboard==2.15

!pip install tokenizers==0.13.2
!pip install transformers==4.26.1
!pip install tqdm==4.64.1

!pip install six

# processdata

In [None]:
import os
import numpy as np
import torch
import logging
import copy
import json

from transformers import AutoTokenizer
from torch.utils.data import Dataset, TensorDataset
from torch.utils.data import DataLoader

#test
logger = logging.getLogger(__name__)

def convert_examples_to_features(examples, max_seq_len, tokenizer,
                                 pad_token_label_id=-100,
                                 cls_token_segment_id=0,
                                 pad_token_segment_id=0,
                                 sequence_a_segment_id=0,
                                 mask_padding_with_zero=True):
    # Setting based on the current model type
    cls_token = tokenizer.cls_token
    sep_token = tokenizer.sep_token
    unk_token = tokenizer.unk_token
    pad_token_id = tokenizer.pad_token_id

    features = []
    for (ex_index, example) in enumerate(examples):
        # Tokenize word by word (for NER)
        tokens = []
        heads = []
        # slot_labels_ids = []
        for word, slot_label in zip(example.text, example.slot_labels[1:-1]):
            word_tokens = tokenizer.tokenize(word)
            if not word_tokens:
                word_tokens = [unk_token]  # For handling the bad-encoded word
            heads.append(len(tokens) + 1) # +1 for the cls token
            tokens.extend(word_tokens)
        # Account for [CLS] and [SEP]
        special_tokens_count = 2
        if len(tokens) > max_seq_len - special_tokens_count:
            tokens = tokens[:(max_seq_len - special_tokens_count)]

        # Add [SEP] token
        heads += [len(tokens) + 1]
        tokens += [sep_token]
        token_type_ids = [sequence_a_segment_id] * len(tokens)

        # Add [CLS] token
        tokens = [cls_token] + tokens
        heads = [0] + heads
        token_type_ids = [cls_token_segment_id] + token_type_ids

        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        # The mask has 1 for real tokens and 0 for padding tokens. Only real
        # tokens are attended to.
        attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)

        # Zero-pad up to the sequence length.
        padding_length = max_seq_len - len(input_ids)
        input_ids = input_ids + ([pad_token_id] * padding_length)
        attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
        token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)

        assert len(input_ids) == max_seq_len, "Error with input length {} vs {}".format(len(input_ids), max_seq_len)
        assert len(attention_mask) == max_seq_len, "Error with attention mask length {} vs {}".format(len(attention_mask), max_seq_len)
        assert len(token_type_ids) == max_seq_len, "Error with token type length {} vs {}".format(len(token_type_ids), max_seq_len)
        assert len(heads) == len(example.slot_labels)

        if ex_index < 5:
            logger.info("*** Example ***")
            logger.info("guid: %s" % example.guid)
            logger.info("tokens: %s" % " ".join([str(x) for x in tokens]))
            logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
            logger.info("attention_mask: %s" % " ".join([str(x) for x in attention_mask]))
            logger.info("token_type_ids: %s" % " ".join([str(x) for x in token_type_ids]))
            logger.info("heads: %s" % " ".join([str(x) for x in heads]))

        features.append(
            InputExample(guid=example.guid,
                         words=input_ids,
                         chars=example.chars,
                         heads=heads,
                         attention_mask=attention_mask,
                         token_type_ids=token_type_ids,
                         intent_label=example.intent_label,
                         slot_labels=example.slot_labels,
                         text=example.text))

    return features


class Vocab(object):

    def __init__(self, min_freq=1):
        self.min_freq = min_freq
        self.word2index = {}
        self.index2word = []
        self.special_tokens = ['<PAD>', '<UNK>', '<s>', '</s>']

        self.count = {}

        self.pad_token = '<PAD>'
        self.pad_index = 0
        self.add(self.pad_token)

        self.unk_token = '<UNK>'
        self.unk_index = 1
        self.add(self.unk_token)

        self.start_token = '<s>'
        self.start_index = 2
        self.add(self.start_token)

        self.end_token = '</s>'
        self.end_index = 3
        self.add(self.end_token)

    def add(self, token):
        if isinstance(token, (list, tuple)):
            for element in token:
                self.add(element)
            return

        assert isinstance(token, str)

        if self.min_freq > 1 and token not in self.special_tokens:
            if len(token) > 1 and not token[0].isalnum():
                token = token[1:]

            if len(token) > 1 and not token[-1].isalnum():
                token = token[:-1]

        if token not in self.count:
            self.count[token] = 0
        self.count[token] += 1

        if token in self.special_tokens or (token not in self.word2index and self.count[token] >= self.min_freq):
            self.word2index[token] = len(self.index2word)
            self.index2word.append(token)

    def get_index(self, token):
        if isinstance(token, list):
            return [self.get_index(element) for element in token]

        assert isinstance(token, str)

        return self.word2index.get(token, self.unk_index)

    def get_token(self, index):
        if isinstance(index, list):
            return [self.get_token(element) for element in index]

        assert isinstance(index, int)
        return self.index2word[index]

    def save(self, path):
        torch.save(self.index2word, path)

    def load(self, path):
        self.index2word = torch.load(path)
        self.word2index = {word: i for i, word in enumerate(self.index2word)}

    def __len__(self):
        return len(self.index2word)

    def __str__(self):
        return f'Vocab object with {len(self.index2word)} instances'


class InputExample(object):
    """
    A single training/test example for simple sequence classification.

    Args:
        guid: Unique id for the example.
        words: list. The words of the sequence.
        intent_label: (Optional) string. The intent label of the example.
        slot_labels: (Optional) list. The slot labels of the example.
    """

    def __init__(self, guid, words, chars=None, heads=None, attention_mask=None, token_type_ids=None, intent_label=None, slot_labels=None, text=None):
        self.guid = guid
        self.words = words
        self.chars = chars
        self.heads = heads
        self.attention_mask = attention_mask
        self.token_type_ids = token_type_ids
        self.intent_label = intent_label
        self.slot_labels = slot_labels
        self.text = text

    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        """Serializes this instance to a Python dictionary."""
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        """Serializes this instance to a JSON string."""
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"



class TextLoader(Dataset):

    def __init__(self, args, mode):
        self.args = args
        self.intent_labels = get_intent_labels(args)
        self.slot_labels, self.hiers = get_slots_all(args)

        self.vocab = Vocab(min_freq=self.args.min_freq)
        self.chars = Vocab()
        self.examples = self.build(mode)
    def load_bert(self, tokenizer):
        pad_token_label_id = self.args.ignore_index
        self.examples = convert_examples_to_features(self.examples, self.args.max_seq_len, tokenizer,
                                                     pad_token_label_id=pad_token_label_id)
    @classmethod
    def read_file(cls, input_file, quotechar=None):
        """ Read data file of given path.
        :param file_path: path of data file.
        :return: list of sentence, list of slot and list of intent.
        """

        texts, slots, intents = [], [], []
        text, slot = [], []

        with open(input_file, 'r', encoding="utf8") as fr:
            for line in fr.readlines():
                items = line.strip().split()

                if len(items) == 1:
                    texts.append(text)
                    slots.append(slot)
                    if "/" not in items[0]:
                        intents.append(items)
                    else:
                        new = items[0].split("/")
                        intents.append([new[1]])

                    # clear buffer lists.
                    text, slot = [], []

                elif len(items) == 2:
                    text.append(items[0].strip())
                    slot.append(items[1].strip())

        return texts, slots, intents

    def _create_examples(self, texts, chars, intents, slots, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for i, (text, char, intent, slot) in enumerate(zip(texts, chars, intents, slots)):
            guid = "%s-%s" % (set_type, i)
            # 1. input_text
            words = self.vocab.get_index(text)  # Some are spaced twice
            words = [self.vocab.start_index] + words + [self.vocab.end_index]
            # char
            char  = self.chars.get_index(char)
            max_char = max([len(x) for x in char])
            for j in range(len(char)):
                char[j] = char[j] + [0] * (max_char - len(char[j]))
            char = [[0] * max_char] + char + [[0] * max_char]
            # 2. intent
            _intent = intent[0].split('#')
            intent_label = [0 for _ in self.intent_labels]
            for _int in _intent:
                idx = self.intent_labels.index(_int) if _int in self.intent_labels else self.intent_labels.index("UNK")
                intent_label[idx] = 1
            # 3. slot
            slot_labels = []
            for s in slot:
                slot_labels.append(self.slot_labels.index(s) if s in self.slot_labels else self.slot_labels.index("UNK"))
            slot_labels = [self.slot_labels.index('PAD')] + slot_labels + [self.slot_labels.index('PAD')]
            assert len(words) == len(slot_labels)
            examples.append(InputExample(guid=guid, words=words, chars=char, intent_label=intent_label, slot_labels=slot_labels, text=text))
        return examples

    def build(self, mode):
        data_path = os.path.join(self.args.data_dir, self.args.task, mode + '.txt')
        logger.info("LOOKING AT {}".format(data_path))
        texts, slots, intents = self.read_file(data_path)


        chars = []
        max_len = 0
        for text in texts:
            chars.append([])
            for word in text:
                chars[-1].append(list(word))

        cache = os.path.join(self.args.data_dir, f'vocab_{self.args.task}')
        if os.path.exists(cache):
            self.vocab.load(cache)
        elif mode == 'train':
            self.vocab.add(texts)
            self.vocab.save(cache)
        cache_chars = os.path.join(self.args.data_dir, f'chars_{self.args.task}')
        if os.path.exists(cache_chars):
            self.chars.load(cache_chars)
        elif mode == 'train':
            self.chars.add(chars)
            self.chars.save(cache_chars)

        return self._create_examples(texts=texts,
                                     chars=chars,
                                     intents=intents,
                                     slots=slots,
                                     set_type=mode)

    def __getitem__(self, index):
        example = self.examples[index]

        words = torch.tensor(example.words, dtype=torch.long)

        intent = torch.tensor(example.intent_label, dtype=torch.float)
        slot = torch.tensor(example.slot_labels, dtype=torch.long)
        chars = torch.tensor(example.chars, dtype=torch.long)

        if 'bert' in self.args.model_type:
            attention_mask = torch.tensor(example.attention_mask, dtype=torch.long)
            token_type_ids = torch.tensor(example.token_type_ids, dtype=torch.long)
            heads = torch.tensor(example.heads, dtype=torch.long)
            return (words, chars, heads, attention_mask, token_type_ids, intent, slot)
        else:
            return (words, chars, intent, slot)

    def __len__(self):
        return len(self.examples)

class TextCollate():
    def __init__(self, pad_index, num_intents, max_seq_len):
        self.pad_index = pad_index
        self.num_intents = num_intents
        self.max_seq_len = max_seq_len

    def __call__(self, batch):

        len_list = [len(x[-1]) for x in batch]
        len_char = [x[1].size(1) for x in batch]
        max_len = max(len_list)
        max_char = max(len_char)

        seq_lens = []

        bert = len(batch[0]) > 4

        char_padded = torch.LongTensor(len(batch), max_len, max_char)
        slot_padded = torch.LongTensor(len(batch), max_len)
        intent = torch.FloatTensor(len(batch), self.num_intents)
        char_padded.zero_()
        intent.zero_()
        slot_padded.zero_()

        if not bert:
            text_padded = torch.LongTensor(len(batch), max_len)
            text_padded.zero_()

        else:
            input_ids = torch.LongTensor(len(batch), self.max_seq_len)
            attention_mask = torch.LongTensor(len(batch), self.max_seq_len)
            token_type_ids = torch.LongTensor(len(batch), self.max_seq_len)
            heads = torch.LongTensor(len(batch), max_len)
            input_ids.zero_()
            attention_mask.zero_()
            token_type_ids.zero_()
            heads.zero_()
        # Get sorted index of len_list.
        sorted_index = np.argsort(len_list)[::-1]

        for i, index in enumerate(sorted_index):
            seq_lens.append(len_list[index])
            intent[i] = batch[index][-2]
            slot = batch[index][-1]
            slot_padded[i, :slot.size(0)] = slot
            char = batch[index][1]
            char_padded[i, :char.size(0), :char.size(1)] = char

            if not bert:
                text = batch[index][0]
                text_padded[i, :text.size(0)] = text
            else:
                input_ids[i] = batch[index][0]
                attention_mask[i] = batch[index][3]
                token_type_ids[i] = batch[index][4]
                head = batch[index][2]
                heads[i, :head.size(0)] = head
        if not bert:
            return text_padded, char_padded, intent, slot_padded, torch.tensor(seq_lens, dtype=torch.long)
        else:
            return input_ids, char_padded, heads, attention_mask, token_type_ids, intent, slot_padded, torch.tensor(seq_lens, dtype=torch.long)


#train_dataset = TextLoader(args, 'train')
#print(train_dataset[0])
# for x in train_dataset[0]:
#   print(x)
#   print(x.shape)
#   print("--")

#print([x for x in train_dataset[0]])


# utils

In [None]:
import os
import random
import logging

import torch
import numpy as np
from seqeval.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import average_precision_score, precision_recall_curve

from transformers import BertConfig
from transformers import BertTokenizer

MODEL_CLASSES = {
    #"lstm": (None, JointLSTM, None),
    'bert': (BertConfig, JointBERT, BertTokenizer)
}

MODEL_PATH_MAP = {
    #"lstm": "",
    'bert': 'bert-base-uncased'
}


def get_intent_labels(args):
    return [label.strip() for label in open(os.path.join(args.data_dir, args.task, args.intent_label_file), 'r', encoding='utf-8')]


def get_slot_labels(args):
    return [label.strip() for label in open(os.path.join(args.data_dir, args.task, args.slot_label_file), 'r', encoding='utf-8')]

def get_clean_labels(args):
    return [label.strip() for label in open(os.path.join(args.data_dir, args.task, args.slot_label_clean), 'r', encoding='utf-8')]

def get_slots_all(args):
    slot_labels = get_slot_labels(args)
    hier = ()
    if args.task == 'mixatis':
        slot_parents = get_clean_labels(args)
        hier = (slot_parents, )
    slot_type = sorted(set([name[2:] for name in slot_labels if name[:2] == 'B-' or name[:2] == 'I-']))
    hier += (slot_type, )
    return slot_labels, hier



def load_tokenizer(args):
    return MODEL_CLASSES[args.model_type][2].from_pretrained(args.model_name_or_path)


def init_logger():
    logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO)


def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if not args.no_cuda and torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)


def compute_metrics(intent_preds, intent_labels, slot_preds, slot_labels):
    # print(len(intent_preds), len(intent_labels), len(slot_preds), len(slot_labels))
    assert len(intent_preds) == len(intent_labels) == len(slot_preds) == len(slot_labels)
    results = {}
    intent_result = get_intent_acc(intent_preds, intent_labels)
    slot_result = get_slot_metrics(slot_preds, slot_labels)
    sementic_result = get_sentence_frame_acc(intent_preds, intent_labels, slot_preds, slot_labels)

    mean_intent_slot = (intent_result["intent_acc"] + slot_result["slot_f1"]) / 2

    results.update(intent_result)
    results.update(slot_result)
    results.update(sementic_result)
    results["mean_intent_slot"] = mean_intent_slot

    return results


def get_slot_metrics(preds, labels):
    assert len(preds) == len(labels)
    return {
        "slot_precision": precision_score(labels, preds),
        "slot_recall": recall_score(labels, preds),
        "slot_f1": f1_score(labels, preds)
    }


def get_intent_acc(preds, labels):
    # average_precision = average_precision_score(labels.reshape(-1), preds.reshape(-1))
    acc = ((preds == labels).all(1)).mean()

    tp = preds == 1.
    tl = labels == 1.
    correct = np.multiply(tp, tl).sum()

    tp = np.sum(tp)
    tl = np.sum(tl)

    p = correct / tp if tp > 0 else 0.0
    r = correct / tl if tl > 0 else 0.0
    f1 = 0.0 if p + r == 0.0 else 2 * p * r / (p + r)

    return {
        "intent_acc": acc,
        "intent_f1": f1,
    }


def read_prediction_text(args):
    return [text.strip() for text in open(os.path.join(args.pred_dir, args.pred_input_file), 'r', encoding='utf-8')]


def get_sentence_frame_acc(intent_preds, intent_labels, slot_preds, slot_labels):
    """For the cases that intent and all the slots are correct (in one sentence)"""
    # Get the intent comparison result
    intent_result = (intent_preds == intent_labels).all(1)

    # Get the slot comparision result
    slot_result = []
    for preds, labels in zip(slot_preds, slot_labels):
        assert len(preds) == len(labels)
        one_sent_result = True
        for p, l in zip(preds, labels):
            if p != l:
                one_sent_result = False
                break
        slot_result.append(one_sent_result)
    slot_result = np.array(slot_result)
    slot_acc = slot_result.mean()

    sementic_acc = np.multiply(intent_result, slot_result).mean()
    return {
        "semantic_frame_acc": sementic_acc,
        "slot_acc": slot_acc
    }


# Model

In [None]:
from __future__ import annotations

from typing import Callable, Optional

import torch
import torch.nn as nn
import numpy as np
import math
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence
from torch.nn.utils.rnn import pad_packed_sequence

class Biaffine(nn.Module):
    r"""
    Biaffine layer for first-order scoring :cite:`dozat-etal-2017-biaffine`.
    This function has a tensor of weights :math:`W` and bias terms if needed.
    The score :math:`s(x, y)` of the vector pair :math:`(x, y)` is computed as :math:`x^T W y / d^s`,
    where `d` and `s` are vector dimension and scaling factor respectively.
    :math:`x` and :math:`y` can be concatenated with bias terms.
    Args:
        n_in (int):
            The size of the input feature.
        n_out (int):
            The number of output channels.
        n_proj (Optional[int]):
            If specified, applies MLP layers to reduce vector dimensions. Default: ``None``.
        dropout (Optional[float]):
            If specified, applies a :class:`SharedDropout` layer with the ratio on MLP outputs. Default: 0.
        scale (float):
            Factor to scale the scores. Default: 0.
        bias_x (bool):
            If ``True``, adds a bias term for tensor :math:`x`. Default: ``True``.
        bias_y (bool):
            If ``True``, adds a bias term for tensor :math:`y`. Default: ``True``.
        decompose (bool):
            If ``True``, represents the weight as the product of 2 independent matrices. Default: ``False``.
        init (Callable):
            Callable initialization method. Default: `nn.init.zeros_`.
    """

    def __init__(
        self,
        n_x: int,
        n_y: int,
        n_out: int = 1,
        dropout: Optional[float] = 0,
        scale: int = 0,
        bias_x: bool = False,
        bias_y: bool = False,
        init: Callable = nn.init.zeros_
    ) -> Biaffine:
        super().__init__()

        self.n_x = n_x
        self.n_y = n_y
        self.n_out = n_out
        self.dropout = dropout
        self.scale = scale
        self.bias_x = bias_x
        self.bias_y = bias_y
        self.init = init

        # self.n_model = n_in
        self.weight = nn.Parameter(torch.Tensor(n_out, self.n_x + bias_x, self.n_y + bias_y))

        self.reset_parameters()

    def reset_parameters(self):
        self.init(self.weight)

    def forward(
        self,
        x: torch.Tensor,
        y: torch.Tensor
    ) -> torch.Tensor:
        r"""
        Args:
            x (torch.Tensor): ``[batch_size, seq_len, n_in]``.
            y (torch.Tensor): ``[batch_size, seq_len, n_in]``.
        Returns:
            ~torch.Tensor:
                A scoring tensor of shape ``[batch_size, n_out, seq_len, seq_len]``.
                If ``n_out=1``, the dimension for ``n_out`` will be squeezed automatically.
        """
        if self.bias_x:
            x = torch.cat((x, torch.ones_like(x[..., :1])), -1)
        if self.bias_y:
            y = torch.cat((y, torch.ones_like(y[..., :1])), -1)
        # [batch_size, n_out, seq_len, seq_len]
        s = torch.einsum('bxi,oij,byj->boxy', x, self.weight, y)
        return s.squeeze(1) / self.n_x ** self.scale


class IntentClassifier(nn.Module):
    def __init__(self, input_dim, num_intent_labels, dropout_rate=0.):
        super(IntentClassifier, self).__init__()
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, num_intent_labels)

    def forward(self, x):
        x = self.dropout(x)
        return self.linear(x)


class SlotClassifier(nn.Module):
    def __init__(
        self,
        input_dim,
        num_intent_labels,
        num_slot_labels,
        max_seq_len=50,
        attention_embedding_size=200,
        dropout_rate=0.0,
    ):
        super(SlotClassifier, self).__init__()
        self.max_seq_len = max_seq_len
        self.num_intent_labels = num_intent_labels
        self.num_slot_labels = num_slot_labels
        self.attention_embedding_size = attention_embedding_size

        output_dim = self.attention_embedding_size  # base model
        self.linear_slot = nn.Linear(input_dim, self.attention_embedding_size, bias=False)

        # output
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(output_dim, num_slot_labels)
        self.tanh = nn.Tanh()
        self.relu = nn.LeakyReLU(0.2)

    def forward(self, x):
        x = self.linear_slot(x)
        x = self.relu(x)
        x = self.dropout(x)
        return x, self.linear(x)
        # return x
class ScaledDotProductAttention(nn.Module):
    ''' Scaled Dot-Product Attention '''

    def __init__(self, temperature, attn_dropout=0.1):
        super().__init__()
        self.temperature = temperature
        self.dropout = nn.Dropout(attn_dropout)

    def forward(self, q, k, v, mask=None):
        attn = torch.matmul(q / self.temperature, k.transpose(2, 3))
        if mask is not None:
            attn = attn.masked_fill(mask, -1e9)
        attn = self.dropout(F.softmax(attn, dim=-1))
        output = torch.matmul(attn, v)
        return output, attn

class MultiHeadAttention(nn.Module):
    ''' Multi-Head Attention module '''

    def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1, residual=True):
        super().__init__()

        self.n_head = n_head
        self.d_model = d_model
        self.d_k = d_k
        self.d_v = d_v

        self.w_qs = nn.Linear(d_model, n_head * d_k, bias=False)
        self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False)
        self.w_vs = nn.Linear(d_model, n_head * d_v, bias=False)
        self.fc = nn.Linear(n_head * d_v, d_model, bias=False)

        self.attention = ScaledDotProductAttention(temperature=d_k ** 0.5)

        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)

        self.residual = residual
    def forward(self, q, k, v, mask=None):

        d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
        sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1)

        residual = q
        # Pass through the pre-attention projection: b x lq x (n*dv)
        # Separate different heads: b x lq x n x dv
        # print(d_k, d_v, n_head)
        # print(q.size())
        q = self.w_qs(q).view(sz_b, len_q, n_head, d_k)
        k = self.w_ks(k).view(sz_b, len_k, n_head, d_k)
        v = self.w_vs(v).view(sz_b, len_v, n_head, d_v)

        # Transpose for attention dot product: b x n x lq x dv
        q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)

        if mask is not None:
            mask = mask.unsqueeze(1)   # For head axis broadcasting.
        q, attn = self.attention(q, k, v, mask=mask)
        # Transpose to move the head dimension back: b x lq x n x dv
        # Combine the last two dimensions to concatenate all the heads together: b x lq x (n*dv)
        q = q.transpose(1, 2).contiguous().view(sz_b, len_q, -1)
        q = self.dropout(self.fc(q))
        if self.residual:
            q += residual

        q = self.layer_norm(q)

        return q


class LSTMEncoder(nn.Module):
    """
    Encoder structure based on bidirectional LSTM.
    """

    def __init__(self, embedding_dim, hidden_dim, dropout_rate):
        super(LSTMEncoder, self).__init__()

        # Parameter recording.
        self.__embedding_dim = embedding_dim
        self.__hidden_dim = hidden_dim // 2
        self.__dropout_rate = dropout_rate

        # Network attributes.
        self.__dropout_layer = nn.Dropout(self.__dropout_rate)
        self.__lstm_layer = nn.LSTM(
            input_size=self.__embedding_dim,
            hidden_size=self.__hidden_dim,
            batch_first=True,
            bidirectional=True,
            dropout=self.__dropout_rate,
            num_layers=1
        )

    def forward(self, embedded_text, seq_lens):
        """ Forward process for LSTM Encoder.

        (batch_size, max_sent_len)
        -> (batch_size, max_sent_len, word_dim)
        -> (batch_size, max_sent_len, hidden_dim)

        :param embedded_text: padded and embedded input text.
        :param seq_lens: is the length of original input text.
        :return: is encoded word hidden vectors.
        """

        # Padded_text should be instance of LongTensor.
        dropout_text = self.__dropout_layer(embedded_text)

        # Pack and Pad process for input of variable length.
        packed_text = pack_padded_sequence(dropout_text, seq_lens, batch_first=True, enforce_sorted=False)
        lstm_hiddens, (h_last, c_last) = self.__lstm_layer(packed_text)
        padded_hiddens, _ = pad_packed_sequence(lstm_hiddens, batch_first=True)

        return padded_hiddens

class Encoder(nn.Module):
    def __init__(self, args):
        super().__init__()

        self.__args = args

        # Initialize an LSTM Encoder object.
        self.__encoder = LSTMEncoder(
            self.__args.word_embedding_dim,
            self.__args.encoder_hidden_dim,
            self.__args.dropout_rate
        )

        if args.use_charlstm:
            self.charlstm = CharLSTM(
                self.__args.n_chars,
                self.__args.char_embed,
                self.__args.char_out
            )

        if args.use_charcnn:
            device = 'cpu' if args.no_cuda else 'cuda'
            self.charcnn = CharCNN(
                input_length=15,
                input_dim=args.n_chars,
                n_fc_neurons=args.char_out,
                device=device
            )

        # Initialize an self-attention layer.
        self.__attention = SelfAttention(
            self.__args.word_embedding_dim,
            self.__args.attention_hidden_dim,
            self.__args.attention_output_dim,
            self.__args.dropout_rate
        )

    def forward(self, word_tensor, char_tensor, seq_lens):
        lstm_hiddens = self.__encoder(word_tensor, seq_lens)
        attention_hiddens = self.__attention(word_tensor, seq_lens)
        hiddens = torch.cat([attention_hiddens, lstm_hiddens], dim=2)
        if self.__args.use_charlstm:
            char_lstm = self.charlstm(char_tensor)
            hiddens = torch.cat([hiddens, char_lstm], dim=2)
        if self.__args.use_charcnn:
            char_cnn = self.charcnn(char_tensor)
            hiddens = torch.cat([hiddens, char_cnn], dim=2)
        return hiddens

class QKVAttention(nn.Module):
    """
    Attention mechanism based on Query-Key-Value architecture. And
    especially, when query == key == value, it's self-attention.
    """

    def __init__(self, query_dim, key_dim, value_dim, hidden_dim, output_dim, dropout_rate):
        super(QKVAttention, self).__init__()

        # Record hyper-parameters.
        self.__query_dim = query_dim
        self.__key_dim = key_dim
        self.__value_dim = value_dim
        self.__hidden_dim = hidden_dim
        self.__output_dim = output_dim
        self.__dropout_rate = dropout_rate

        # Declare network structures.
        self.__query_layer = nn.Linear(self.__query_dim, self.__hidden_dim)
        self.__key_layer = nn.Linear(self.__key_dim, self.__hidden_dim)
        self.__value_layer = nn.Linear(self.__value_dim, self.__output_dim)
        self.__dropout_layer = nn.Dropout(p=self.__dropout_rate)

    def forward(self, input_query, input_key, input_value):
        """ The forward propagation of attention.

        Here we require the first dimension of input key
        and value are equal.

        :param input_query: is query tensor, (n, d_q)
        :param input_key:  is key tensor, (m, d_k)
        :param input_value:  is value tensor, (m, d_v)
        :return: attention based tensor, (n, d_h)
        """

        # Linear transform to fine-tune dimension.
        linear_query = self.__query_layer(input_query)
        linear_key = self.__key_layer(input_key)
        linear_value = self.__value_layer(input_value)

        score_tensor = F.softmax(torch.matmul(
            linear_query,
            linear_key.transpose(-2, -1)
        ) / math.sqrt(self.__hidden_dim) , dim=-1)
        forced_tensor = torch.matmul(score_tensor, linear_value)
        forced_tensor = self.__dropout_layer(forced_tensor)

        return forced_tensor


class SelfAttention(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate):
        super(SelfAttention, self).__init__()

        # Record parameters.
        self.__input_dim = input_dim
        self.__hidden_dim = hidden_dim
        self.__output_dim = output_dim
        self.__dropout_rate = dropout_rate

        # Record network parameters.
        self.__dropout_layer = nn.Dropout(self.__dropout_rate)
        self.__attention_layer = QKVAttention(
            self.__input_dim, self.__input_dim, self.__input_dim,
            self.__hidden_dim, self.__output_dim, self.__dropout_rate
        )

    def forward(self, input_x, seq_lens):
        dropout_x = self.__dropout_layer(input_x)
        attention_x = self.__attention_layer(
            dropout_x, dropout_x, dropout_x
        )

        return attention_x

class CharLSTM(nn.Module):
    r"""
    CharLSTM aims to generate character-level embeddings for tokens.
    It summerizes the information of characters in each token to an embedding using a LSTM layer.
    Args:
        n_char (int):
            The number of characters.
        n_embed (int):
            The size of each embedding vector as input to LSTM.
        n_out (int):
            The size of each output vector.
        pad_index (int):
            The index of the padding token in the vocabulary. Default: 0.
    """

    def __init__(self, n_chars, n_embed, n_out, pad_index=0):
        super().__init__()

        self.n_chars = n_chars
        self.n_embed = n_embed
        self.n_out = n_out
        self.pad_index = pad_index

        # the embedding layer
        self.embed = nn.Embedding(num_embeddings=n_chars,
                                  embedding_dim=n_embed)
        # the lstm layer
        self.lstm = nn.LSTM(input_size=n_embed,
                            hidden_size=n_out//2,
                            batch_first=True,
                            bidirectional=True)

    def __repr__(self):
        return f"{self.__class__.__name__}({self.n_chars}, {self.n_embed}, n_out={self.n_out}, pad_index={self.pad_index})"

    def forward(self, x):
        r"""
        Args:
            x (~torch.Tensor): ``[batch_size, seq_len, fix_len]``.
                Characters of all tokens.
                Each token holds no more than `fix_len` characters, and the excess is cut off directly.
        Returns:
            ~torch.Tensor:
                The embeddings of shape ``[batch_size, seq_len, n_out]`` derived from the characters.
        """
        # [batch_size, seq_len, fix_len]
        mask = x.ne(self.pad_index)
        # [batch_size, seq_len]
        lens = mask.sum(-1)
        char_mask = lens.gt(0)

        # [n, fix_len, n_embed]
        x = self.embed(x[char_mask])
        x = pack_padded_sequence(x, lens[char_mask].tolist(), True, False)
        x, (h, _) = self.lstm(x)
        # [n, fix_len, n_out]
        h = torch.cat(torch.unbind(h), -1)
        # [batch_size, seq_len, n_out]
        embed = h.new_zeros(*lens.shape, self.n_out)
        embed = embed.masked_scatter_(char_mask.unsqueeze(-1), h)

        return embed

class CharCNN(nn.Module):
    def __init__(self, input_length=15, input_dim=50,
                 n_conv_filters=32,
                 n_fc_neurons=32, pad_index=0, device='cpu'):
        super(CharCNN, self).__init__()
        self.layer = CharacterLevelCNN(input_length, input_dim, n_conv_filters, n_fc_neurons)
        self.pad_index = pad_index
        self.n_vocab = input_dim
        self.identity = torch.eye(input_dim).to(device)
        self.n_out = n_fc_neurons
        self.input_length = input_length

    def forward(self, x):
        mask = x.ne(self.pad_index)
        lens = mask.sum(-1)

        # [batch_size, seq_len]
        char_mask = lens.gt(0)
        # [n, fix_len, n_vocab]
        feat = x[char_mask]
        feat = torch.cat([torch.index_select(self.identity, 0, w).unsqueeze(0) for w in feat], dim=0)
        n_char = feat.size(1)
        if n_char < self.input_length:
            feat = torch.cat([feat, feat.new_zeros(feat.size(0), self.input_length - n_char, self.n_vocab)], dim=1)
        elif n_char > self.input_length:
            feat = feat[:, :self.input_length, :]
        # [n, n_fc]
        out = self.layer(feat)

        embed = out.new_zeros(*lens.shape, self.n_out)
        embed = embed.masked_scatter_(char_mask.unsqueeze(-1), out)

        return embed




class CharacterLevelCNN(nn.Module):
    def __init__(self, input_length=15, input_dim=30,
                 n_conv_filters=256,
                 n_fc_neurons=1024):
        super(CharacterLevelCNN, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv1d(input_dim, n_conv_filters, kernel_size=3, padding=0), nn.ReLU(),
                                   nn.MaxPool1d(2))
        self.conv2 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU())
        self.conv3 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU(),
                                   nn.MaxPool1d(2))

        dimension = int((input_length - 96) / 27 * n_conv_filters)
        self.fc1 = nn.Sequential(nn.Linear(32, n_fc_neurons), nn.Dropout(0.5))
        self.fc2 = nn.Sequential(nn.Linear(n_fc_neurons, n_fc_neurons), nn.Dropout(0.5))

        self._create_weights(mean=0.0, std=0.05)

    def _create_weights(self, mean=0.0, std=0.05):
        for module in self.modules():
            if isinstance(module, nn.Conv1d) or isinstance(module, nn.Linear):
                module.weight.data.normal_(mean, std)

    def forward(self, input):
        input = input.transpose(1, 2)
        output = self.conv1(input)
        output = self.conv2(output)
        output = self.conv3(output)

        output = output.view(output.size(0), -1)
        output = self.fc1(output)
        output = self.fc2(output)

        return output

attention layer

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class AttentionLayer(nn.Module):

    def __init__(self,
                 args,
                 size: int,
                 level_projection_size: int = 0,
                 n_labels=None,
                 n_level: int = 1
                 ):
        """
        The init function
        :param args: the input parameters from commandline
        :param size: the input size of the layer, it is normally the output size of other DNN models,
            such as CNN, RNN
        """
        super(AttentionLayer, self).__init__()
        self.attention_mode = args.attention_mode

        self.size = size
        # For self-attention: d_a and r are the dimension of the dense layer and the number of attention-hops
        # d_a is the output size of the first linear layer
        self.d_a = args.d_a if args.d_a > 0 else self.size

        # r is the number of attention heads

        self.n_labels = n_labels
        self.n_level = n_level

        self.level_projection_size = level_projection_size

        self.linear = nn.Linear(self.size, self.size, bias=False)

        self.first_linears = nn.ModuleList([nn.Linear(self.size, self.d_a, bias=False) for _ in range(self.n_level)])
        self.second_linears = nn.ModuleList([nn.Linear(self.d_a, self.n_labels[label_lvl], bias=False) for label_lvl in range(self.n_level)])
        self.third_linears = nn.ModuleList([nn.Linear(self.size +
                                            (self.level_projection_size if label_lvl > 0 else 0),
                                            self.n_labels[label_lvl], bias=True) for label_lvl in range(self.n_level)])

        self._init_weights(mean=0.0, std=0.03)

    def _init_weights(self, mean=0.0, std=0.03) -> None:
        """
        Initialise the weights
        :param mean:
        :param std:
        :return: None
        """
        for first_linear in self.first_linears:
            torch.nn.init.normal(first_linear.weight, mean, std)
            if first_linear.bias is not None:
                first_linear.bias.data.fill_(0)

        for linear in self.second_linears:
            torch.nn.init.normal(linear.weight, mean, std)
            if linear.bias is not None:
                linear.bias.data.fill_(0)
        for linear in self.third_linears:
            torch.nn.init.normal(linear.weight, mean, std)

    def forward(self, x, previous_level_projection=None, label_level=0, masks=None):
        """
        :param x: [batch_size x max_len x dim (i.e., self.size)]

        :param previous_level_projection: the embeddings for the previous level output
        :param label_level: the current label level
        :return:
            Weighted average output: [batch_size x dim (i.e., self.size)]
            Attention weights
        """
        weights = F.tanh(self.first_linears[label_level](x))

        att_weights = self.second_linears[label_level](weights)
        att_weights = F.softmax(att_weights, 1).transpose(1, 2)
        if len(att_weights.size()) != len(x.size()):
            att_weights = att_weights.squeeze()
        context_vector = att_weights @ x

        batch_size = context_vector.size(0)

        if previous_level_projection is not None:
            temp = [context_vector,
                    previous_level_projection.repeat(1, self.n_labels[label_level]).view(batch_size, self.n_labels[label_level], -1)]
            context_vector = torch.cat(temp, dim=2)

        weighted_output = self.third_linears[label_level].weight.mul(context_vector).sum(dim=2).add(
            self.third_linears[label_level].bias)

        return context_vector, weighted_output, att_weights

    # Using when use_regularisation = True
    @staticmethod
    def l2_matrix_norm(m):
        """
        Frobenius norm calculation
        :param m: {Variable} ||AAT - I||
        :return: regularized value
        """
        return torch.sum(torch.sum(torch.sum(m ** 2, 1), 1) ** 0.5)

def init_attention_layer(model, name, n_labels, n_levels, output_size):

    model.level_projection_size = model.args.level_projection_size
    if model.attention_mode is not None:
        model.add_module(f'attention_{name}', AttentionLayer(args=model.args, size=output_size,
                                                            level_projection_size=model.level_projection_size,
                                                            n_labels=n_labels, n_level=n_levels))
    linears = []
    projection_linears = []
    for level in range(n_levels):
        level_projection_size = 0 if level == 0 else model.level_projection_size
        linears.append(nn.Linear(output_size + level_projection_size,
                                    n_labels[level]))
        projection_linears.append(nn.Linear(n_labels[level], model.level_projection_size, bias=False))
    model.add_module(f'linears_{name}', nn.ModuleList(linears))
    model.add_module(f'projection_linears_{name}', nn.ModuleList(projection_linears))



def perform_attention(model, name, all_output, last_output, n_labels, n_levels):
    attention_weights = None
    previous_level_projection = None
    weighted_outputs = []
    attention_weights = []
    context_vectors = []
    for level in range(n_levels):
        context_vector, weighted_output, attention_weight = model.__getattr__(f'attention_{name}')(all_output,
                                                            previous_level_projection, label_level=level)

        previous_level_projection = model.__getattr__(f'projection_linears_{name}')[level](
            torch.sigmoid(weighted_output) if model.attention_mode in ["label", "caml"]
            else torch.softmax(weighted_output, 1))
        previous_level_projection = F.sigmoid(previous_level_projection)
        weighted_outputs.append(weighted_output)
        attention_weights.append(attention_weight)
        context_vectors.append(context_vector)

    return context_vectors, weighted_outputs, attention_weights

class AttentionFlow(nn.Module):
    def __init__(self, args, dim_x, dim_y, out_dim, dropout_rate=0.):
        super(AttentionFlow, self).__init__()

        self.linear_x = nn.Linear(dim_x, out_dim)
        self.linear_y = nn.Linear(dim_y, out_dim)
        self.scorer = Biaffine(dim_y, dim_x, dropout=args.dropout_rate)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x, y):
        # x : intent
        # y : seq_len
        # x = [bz, num_intent, out_dim]
        score = self.scorer(y, x)

        x = self.linear_x(x)
        x = self.dropout(x)
        # y = [bz, seq_len, out_dim]
        y = self.linear_y(y)
        y = self.dropout(y)

        # [bz, seq_len, num_intent]
        a = F.softmax(score, dim=-1)
        b = F.softmax(score.transpose(1, 2), dim=-1)

        out_slot = torch.tanh(torch.bmm(a, x))
        out_intent = torch.tanh(torch.bmm(b, y))

        return out_intent, out_slot

class HierCoAttention(nn.Module):
    def __init__(self, args, dims, out_dim, dropout_rate=0.):
        super(HierCoAttention, self).__init__()

        self.n_layers = len(dims)
        self.linears = nn.ModuleList([nn.Linear(inp_dim, out_dim, bias=True) for inp_dim in dims])
        self.reverse = nn.ModuleList([nn.Linear(inp_dim, out_dim, bias=True) for inp_dim in dims])

        self.scorers = nn.ModuleList([Biaffine(dims[i], dims[i + 1], dropout=dropout_rate) for i in range(self.n_layers - 1)])
        self.dropout = nn.Dropout(dropout_rate)
        self.relu = nn.LeakyReLU(0.2)

    def forward(self, inps):
        # inps should be list of [intent, ..., slots]
        assert len(inps) == self.n_layers
        Cs = []
        for i in range(self.n_layers - 1):
            Cs.append(self.scorers[i](inps[i], inps[i + 1]))

        projs = []
        revers = []
        for i in range(self.n_layers):
            projs.append(self.linears[i](inps[i]))
            revers.append(self.reverse[i](inps[i]))

        slots = None
        for i in range(self.n_layers - 1):
            if slots is None:
                slots = torch.tanh(torch.bmm(Cs[0].transpose(1, 2), projs[0]) + projs[1])
            else:
                slots = torch.bmm(Cs[i].transpose(1, 2), slots) + projs[i + 1]
                if i < self.n_layers - 2:
                    slots = torch.tanh(slots)
        # slots = self.dropout(slots)

        intents = None
        for i in range(self.n_layers - 1, 0, -1):
            if intents is None:
                intents = torch.tanh(torch.bmm(Cs[-1], revers[-1]) + revers[-2])
            else:
                intents = torch.bmm(Cs[i - 1], intents) + revers[i - 1]
                if i > 1:
                    intents = torch.tanh(intents)
        return intents, slots

joint bert

In [None]:
import torch
import torch.nn as nn
from torchcrf import CRF
import torch.nn.functional as F
from transformers.models.bert.modeling_bert import BertModel, BertPreTrainedModel



class JointBERT(BertPreTrainedModel):
    def __init__(self, config, args, intent_label_lst, slot_label_lst, slot_hier):
        super(JointBERT, self).__init__(config)
        self.args = args
        self.attn_type = args.intent_slot_attn_type
        self.n_levels = args.n_levels
        self.num_intent_labels = len(intent_label_lst)
        self.num_slot_labels = len(slot_label_lst)
        self.slot_hier = [len(x) for x in slot_hier]
        self.bert = BertModel(config)

        self.lstm_intent = LSTMEncoder(
            config.hidden_size,
            args.decoder_hidden_dim,
            args.dropout_rate
        )
        self.lstm_slot = LSTMEncoder(
            config.hidden_size,
            args.decoder_hidden_dim,
            args.dropout_rate
        )

        self.intent_detection = IntentClassifier(self.num_intent_labels, self.num_intent_labels, args.dropout_rate)
        self.slot_classifier = SlotClassifier(
            args.decoder_hidden_dim,
            self.num_intent_labels,
            self.num_slot_labels,
            self.args.max_seq_len,
            self.args.slot_decoder_size,
            args.dropout_rate,
        )
        self.output_size = args.decoder_hidden_dim
        self.attention_mode = args.attention_mode

        if args.intent_slot_attn_type == 'coattention':
            dims = [self.args.label_embedding_size] + [args.slot_decoder_size] + [args.slot_decoder_size + args.level_projection_size] * (len(self.slot_hier) - 1) + [self.args.label_embedding_size]
            self.attn = HierCoAttention(args, dims, args.intent_slot_attn_size, args.dropout_rate)
        if args.intent_slot_attn_type:
            self.intent_refine = nn.Linear(args.decoder_hidden_dim + args.intent_slot_attn_size, self.num_intent_labels, args.dropout_rate)
            self.slot_refine = IntentClassifier(args.slot_decoder_size + args.intent_slot_attn_size, self.num_slot_labels, args.dropout_rate)
            self.slot_proj = IntentClassifier(self.num_slot_labels, self.args.label_embedding_size, args.dropout_rate)
            self.intent_proj = IntentClassifier(1, self.args.label_embedding_size, args.dropout_rate)

        init_attention_layer(self, 'intent', [self.num_intent_labels], 1, args.decoder_hidden_dim)
        if args.intent_slot_attn_type == 'coattention':
            init_attention_layer(self, 'slot', self.slot_hier, len(self.slot_hier), self.args.slot_decoder_size)

        self.relu = nn.LeakyReLU(0.2)
        self.intent_classifier = nn.Linear(args.decoder_hidden_dim, 1, args.dropout_rate)
        if args.use_crf:
            self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True)

    def sequence_mask(self, length, max_length=None):
        if max_length is None:
            max_length = length.max()
        x = torch.arange(max_length, dtype=length.dtype, device=length.device)
        mask = x.unsqueeze(0) < length.unsqueeze(1)
        # mask[:, 0] = 0
        return mask

    def forward(self, input_ids, attention_mask, token_type_ids, heads, intent_label_ids, slot_labels_ids, seq_lens):
        lens = torch.sum(attention_mask, dim=-1).cpu()
        encoded = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)[0]
        intent_output = self.lstm_intent(encoded, lens)
        slot_output = self.lstm_slot(encoded, lens)

        intent_output = torch.cat(
            [torch.index_select(intent_output[i], 0, heads[i]).unsqueeze(0) for i in range(intent_output.size(0))],
            dim=0
        )
        slot_output = torch.cat(
            [torch.index_select(slot_output[i], 0, heads[i]).unsqueeze(0) for i in range(slot_output.size(0))],
            dim=0
        )

        i_context_vector, intent_logits, i_attn = perform_attention(self, 'intent', intent_output, None, [self.num_intent_labels], 1)
        intent_logits = intent_logits[-1]

        i_context_vector = i_context_vector[-1]
        intent_dec = self.intent_detection(intent_logits)
        x, slot_logits = self.slot_classifier(slot_output)

        if self.args.intent_slot_attn_type == 'coattention':
            s_context_vector, s_logits, s_attn = perform_attention(self, 'slot', x, None, self.slot_hier, len(self.slot_hier))

        if self.attn_type == 'coattention':
            if self.args.embedding_type == 'soft':
                slots = self.slot_proj(F.softmax(slot_logits, -1))
                intents = self.intent_proj(F.sigmoid(intent_logits.unsqueeze(2)))
            else:
                slot_label = torch.argmax(slot_logits, dim=-1)
                hard_label = F.one_hot(slot_label, num_classes=self.num_slot_labels)
                for i in range(len(seq_lens)):
                    hard_label[i, seq_lens[i]:, :] = 0
                slots = self.slot_proj(hard_label.float())

                int_labels = torch.zeros_like(intent_logits)
                num = torch.argmax(intent_dec, dim=-1)
                for i in range(len(intent_logits)):
                    num_i = num[i]
                    ids = torch.topk(intent_logits[i], num_i).indices
                    int_labels[i, ids] = 1.0

                intents = self.intent_proj(int_labels.unsqueeze(2))
            intent_vec, slot_vec = self.attn([intents] + s_context_vector + [slots])

        if self.attn_type:
            intent_logits = self.intent_refine.weight.mul(torch.tanh(torch.cat([i_context_vector, intent_vec], dim=-1))).sum(dim=2).add(self.intent_refine.bias)
            slot_logits = self.relu(self.slot_refine(torch.cat([x, self.relu(slot_vec)], dim=-1)))

        max_len = torch.max(seq_lens)
        attention_mask = self.sequence_mask(seq_lens, max_length=max_len)
        total_loss = 0
        aux_loss = 0
        intent_loss = 0
        slot_loss = 0
        count_loss = 0
        # 1. Intent Softmax
        if intent_label_ids is not None:
            if self.num_intent_labels == 1:
                intent_loss_fct = nn.MSELoss()
                intent_loss = intent_loss_fct(intent_logits.view(-1), intent_label_ids.view(-1))
            else:
                intent_loss_fct = nn.BCEWithLogitsLoss()
                intent_loss_cnt = nn.CrossEntropyLoss()
                intent_count = torch.sum(intent_label_ids, dim=-1).long()
                intent_loss = intent_loss_fct(intent_logits.view(-1, self.num_intent_labels), intent_label_ids.float())
                count_loss = intent_loss_cnt(intent_dec.view(-1, self.num_intent_labels), intent_count)
            total_loss += (intent_loss + count_loss) * self.args.intent_loss_coef

        # 2. Slot Softmax
        if slot_labels_ids is not None:
            if self.args.use_crf:
                slot_loss = self.crf(slot_logits, slot_labels_ids, mask=attention_mask.byte().to(slot_logits.device), reduction='mean')
                slot_loss = -1 * slot_loss  # negative log-likelihood
            else:
                slot_loss_fct = nn.CrossEntropyLoss(ignore_index=self.args.ignore_index)
                # Only keep active parts of the loss
                if attention_mask is not None:
                    active_loss = attention_mask.view(-1) == 1
                    # print("SHAPE", slot_labels_ids.shape, slot_logits.shape, active_loss.shape)
                    active_logits = slot_logits.view(-1, self.num_slot_labels)[active_loss]
                    active_labels = slot_labels_ids.reshape(-1)[active_loss]
                    slot_loss = slot_loss_fct(active_logits, active_labels)
                else:
                    slot_loss = slot_loss_fct(slot_logits.view(-1, self.num_slot_labels), slot_labels_ids.view(-1))
            total_loss += slot_loss * (1 - self.args.intent_loss_coef)

        outputs = ((intent_logits, slot_logits, intent_dec),)  # add hidden states and attention if they are here
        outputs = ((total_loss, intent_loss, slot_loss, count_loss),) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions) # Logits is a tuple of intent and slot logits


# Train model

In [None]:
import os

import numpy as np
import torch


class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""

    def __init__(self, patience=7, verbose=False):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model, args):
        if args.tuning_metric == "loss":
            score = -val_loss
        else:
            score = val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, args)
        elif score < self.best_score:
            if self.patience > 0:
                self.counter += 1
                print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
                if self.counter >= self.patience:
                    self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, args)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, args):
        """Saves model when validation loss decreases or accuracy/f1 increases."""
        if self.verbose:
            if args.tuning_metric == "loss":
                print(f"Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...")
            else:
                print(
                    f"{args.tuning_metric} increased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ..."
                )
        torch.save(model, os.path.join(args.model_dir, "model.bin"))
        torch.save(args, os.path.join(args.model_dir, "training_args.bin"))
        self.val_loss_min = val_loss

In [None]:
import logging
import os

import numpy as np
import torch
import torch.nn.functional as F

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.tensorboard import SummaryWriter
from tqdm.auto import tqdm, trange
from transformers import AdamW, get_linear_schedule_with_warmup, AutoTokenizer

logger = logging.getLogger(__name__)


class Trainer(object):
    def __init__(self, args, collate, train_dataset=None, dev_dataset=None, test_dataset=None):
        self.args = args
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset
        self.collate_fn = collate
        args.n_chars = len(self.train_dataset.chars)
        if 'bert' in self.args.model_type:
            self.tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
            train_dataset.load_bert(self.tokenizer)
            dev_dataset.load_bert(self.tokenizer)
            test_dataset.load_bert(self.tokenizer)

        self.intent_label_lst = get_intent_labels(args)
        self.slot_label_lst, self.hiers = get_slots_all(args)

        self.pad_token_label_id = args.ignore_index
        self.config_class, self.model_class, _ = MODEL_CLASSES[args.model_type]
        if 'bert' in self.args.model_type:
            self.config = self.config_class.from_pretrained(args.model_name_or_path, finetuning_task=args.task)
            self.model = self.model_class.from_pretrained(
                args.model_name_or_path,
                config=self.config,
                args=args,
                intent_label_lst=self.intent_label_lst,
                slot_label_lst=self.slot_label_lst,
                slot_hier=self.hiers
            )
        else:
            self.model = self.model_class(args, len(self.train_dataset.vocab), self.intent_label_lst, self.slot_label_lst, self.hiers)
        if args.base_model:
            model_state = self.model.state_dict()
            pretrained_state = torch.load(os.path.join(args.base_model, 'model.bin'))
            pretrained_state = { k:v for k,v in pretrained_state.items() if k in model_state and v.size() == model_state[k].size() }
            model_state.update(pretrained_state)
            self.model.load_state_dict(model_state)

        self.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
        self.model.to(self.device)

    def train(self):
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(self.train_dataset, sampler=train_sampler, batch_size=self.args.train_batch_size, collate_fn=self.collate_fn)

        writer = SummaryWriter(log_dir=self.args.model_dir)
        if self.args.max_steps > 0:
            t_total = self.args.max_steps
            self.args.num_train_epochs = (
                self.args.max_steps // (len(train_dataloader) // self.args.gradient_accumulation_steps) + 1
            )
        else:
            t_total = len(train_dataloader) // self.args.gradient_accumulation_steps * self.args.num_train_epochs
        print("check init")
        results = self.evaluate("dev", -1)
        print(results)
        logfile = open(self.args.model_dir + "/" + self.args.logging, 'w')
        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": self.args.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(optimizer_grouped_parameters, lr=self.args.learning_rate, eps=self.args.adam_epsilon)
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=self.args.warmup_steps, num_training_steps=t_total
        )

        if self.args.logging_steps < 0:
            self.args.logging_steps = len(train_dataloader)

        # Train!
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(self.train_dataset))
        logger.info("  Num Epochs = %d", self.args.num_train_epochs)
        logger.info("  Total train batch size = %d", self.args.train_batch_size)
        logger.info("  Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)
        logger.info("  Logging steps = %d", self.args.logging_steps)

        global_step = 0
        tr_loss = 0.0
        self.model.zero_grad()
        best_sent = 0
        best_slot = 0

        train_iterator = trange(int(self.args.num_train_epochs), desc="Epoch")
        early_stopping = EarlyStopping(patience=self.args.early_stopping, verbose=True)

        for _ in train_iterator:
            epoch_iterator = tqdm(train_dataloader, desc="Iteration", position=0, leave=True)
            print("\nEpoch", _)

            for step, batch in enumerate(epoch_iterator):
                self.model.train()
                batch = tuple(t.to(self.device) for t in batch[:-1]) + (batch[-1], ) # GPU or CPU
                if 'bert' in self.args.model_type:
                       inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[3],
                    "intent_label_ids": batch[5],
                    "slot_labels_ids": batch[6],
                    "token_type_ids": batch[4],
                    "heads": batch[2],
                    "seq_lens": batch[-1].cpu()
                    }
                else:
                    inputs = {
                        "input_ids": batch[0],
                        "char_ids": batch[1],
                        "intent_label_ids": batch[2],
                        "slot_labels_ids": batch[3],
                        "seq_lens": batch[4],
                    }
                outputs = self.model(**inputs)
                total_loss, intent_loss, slot_loss, count_loss = outputs[0]

                if self.args.gradient_accumulation_steps > 1:
                    total_loss = total_loss / self.args.gradient_accumulation_steps
                if _ < self.args.num_train_epochs * self.args.only_intent:
                    total_loss = intent_loss + count_loss
                    total_loss.backward()
                else:
                    total_loss.backward()

                tr_loss += total_loss.item()
                if (step + 1) % self.args.gradient_accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm)

                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    self.model.zero_grad()
                    global_step += 1

                    if self.args.logging_steps > 0 and global_step % (self.args.logging_steps) == 0:
                        print("\nTuning metrics:", self.args.tuning_metric)
                        results = self.evaluate("dev", _)
                        # self.evaluate("test")
                        writer.add_scalar("Loss/validation", results["loss"], _)
                        writer.add_scalar("Intent Accuracy/validation", results["intent_acc"], _)
                        writer.add_scalar("Intent F1", results["intent_f1"], _)
                        writer.add_scalar("Slot F1/validation", results["slot_f1"], _)
                        writer.add_scalar("Mean Intent Slot", results["mean_intent_slot"], _)
                        writer.add_scalar("Sentence Accuracy/validation", results["semantic_frame_acc"], _)

                        if results['semantic_frame_acc'] >= best_sent or results['slot_f1'] >= best_slot:
                            best_sent = results['semantic_frame_acc']
                            best_slot = results['slot_f1']
                            self.save_model()
                            results = self.evaluate('test', _)
                            logfile.write('\n\nEPOCH = ' + str(_) + '\n')
                            for key in sorted(results.keys()):
                                to_write = " {key} = {value}".format(key=key, value=str(results[key]))
                                logfile.write(to_write)
                                logfile.write("\n")

                if 0 < self.args.max_steps < global_step:
                    epoch_iterator.close()
                    break

            if 0 < self.args.max_steps < global_step or early_stopping.early_stop:
                train_iterator.close()
                break
            writer.add_scalar("Loss/train", tr_loss / global_step, _)
        logfile.close()
        return global_step, tr_loss / global_step

    def write_evaluation_result(self, out_file, results):
        out_file = self.args.model_dir + "/" + out_file
        w = open(out_file, "w", encoding="utf-8")
        w.write("***** Eval results *****\n")
        for key in sorted(results.keys()):
            to_write = " {key} = {value}".format(key=key, value=str(results[key]))
            w.write(to_write)
            w.write("\n")
        w.close()

    def evaluate(self, mode, epoch):
        if mode == "test":
            dataset = self.test_dataset
        elif mode == "dev":
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size, collate_fn=self.collate_fn)

        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        slot_label_map = {i: label for i, label in enumerate(self.slot_label_lst)}
        out_slot_label_list = []
        slot_preds_list = []
        predictions = []
        intent_labels = []
        int_len_gold = []
        int_len_pred = []

        results = {}
        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch[:-1]) + (batch[-1], )
            # print(batch)
            with torch.no_grad():
                if 'bert' in self.args.model_type:
                       inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[3],
                    "intent_label_ids": batch[5],
                    "slot_labels_ids": batch[6],
                    "token_type_ids": batch[4],
                    "heads": batch[2],
                    "seq_lens": batch[-1].cpu()
                    }
                else:
                    inputs = {
                        "input_ids": batch[0],
                        "char_ids": batch[1],
                        "intent_label_ids": batch[2],
                        "slot_labels_ids": batch[3],
                        "seq_lens": batch[4],
                    }
                outputs = self.model(**inputs)

                if self.args.num_intent_detection:
                    tmp_eval_loss, (intent_logits, slot_logits, intent_dec) = outputs[:2]
                else:
                    tmp_eval_loss, (intent_logits, slot_logits) = outputs[:2]

                eval_loss += tmp_eval_loss[0].mean().item()
            nb_eval_steps += 1

            # Intent prediction
            intent_logits = F.logsigmoid(intent_logits).detach().cpu()
            intent_preds = intent_logits.numpy()
            if self.args.num_intent_detection:
                intent_nums = intent_dec.detach().cpu().numpy()
            out_intent_label_ids = inputs["intent_label_ids"].detach().cpu().numpy()
            intent_labels.extend(out_intent_label_ids.tolist())

            # Slot prediction

            if self.args.use_crf:
                slot_preds = np.array(self.model.crf.decode(slot_logits))
            else:
                slot_preds = slot_logits.detach().cpu()
            out_slot_labels_ids = inputs["slot_labels_ids"].detach().cpu().numpy()

            cur = []
            if self.args.num_intent_detection:
                num_intents = intent_logits.size(1)
                intent_nums = np.argmax(intent_nums, axis=-1)
                gold_nums = np.sum(out_intent_label_ids, axis=-1)
                int_len_gold.extend(gold_nums.tolist())
                int_len_pred.extend(intent_nums.tolist())
                for num, preds in zip(intent_nums, intent_preds):
                    idx = preds.argsort()[-num:]
                    p = np.zeros(num_intents)
                    p[idx] = 1.
                    predictions.append(p)
                    cur.append(p)
            else:
                predictions.extend(np.rint(intent_preds).tolist())

            if not self.args.use_crf:
                slot_preds_arg = np.argmax(slot_preds.numpy(), axis=2)
            else:
                slot_preds_arg = slot_preds

            for i in range(out_slot_labels_ids.shape[0]):
                slt = None
                out_slot_label_list.append([])
                slot_preds_list.append([])
                for j in range(out_slot_labels_ids.shape[1]):
                    if out_slot_labels_ids[i, j] != self.pad_token_label_id:
                        out_slot_label_list[-1].append(slot_label_map[out_slot_labels_ids[i][j]])

                        predict_label = slot_label_map[slot_preds_arg[i][j]]
                        if predict_label[:2] == 'B-':
                            slt = predict_label[2:]
                        elif predict_label[:2] == 'I-':
                            if slt is None:
                                predict_label = 'O'
                            elif slt != predict_label[2:]:
                                predict_label = 'O'
                        else:
                            slt = None
                        slot_preds_list[-1].append(predict_label)
        eval_loss = eval_loss / nb_eval_steps
        results['loss'] = eval_loss
        predictions = np.array(predictions)
        intent_labels = np.array(intent_labels)
        total_result = compute_metrics(predictions, intent_labels, slot_preds_list, out_slot_label_list)
        results.update(total_result)
        int_len_gold = np.array(int_len_gold)
        int_len_pred = np.array(int_len_pred)
        results['num_acc'] = (int_len_gold == int_len_pred).mean()
        results['epoch'] = epoch
        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))
        if mode == "test":
            self.write_evaluation_result("eval_test_results.txt", results)
        elif mode == "dev":
            self.write_evaluation_result("eval_dev_results.txt", results)
        return results

    def save_model(self):
        # Save model checkpoint (Overwrite)
        if not os.path.exists(self.args.model_dir):
            os.makedirs(self.args.model_dir)
        model_to_save = self.model.module if hasattr(self.model, "module") else self.model
        torch.save(model_to_save.state_dict(), os.path.join(self.args.model_dir, 'model.bin'))

        # Save training arguments together with the trained model
        torch.save(self.args, os.path.join(self.args.model_dir, "training_args.bin"))
        logger.info("Saving model checkpoint to %s", self.args.model_dir)

    def load_model(self):
        # Check whether model exists
        if not os.path.exists(self.args.model_dir):
            raise Exception("Model doesn't exists! Train first!")

        try:
            self.model.load_state_dict(torch.load(os.path.join(self.args.model_dir, 'model.bin')), strict=False)
            self.model.to(self.device)
            logger.info("***** Model Loaded *****")
        except Exception:
            raise Exception("Some model files might be missing...")

# Run

In [None]:
import argparse


def main(args):
    init_logger()
    set_seed(args)
    slot_label_lst, hiers = get_slots_all(args)
    collate = TextCollate(0, len(get_intent_labels(args)), args.max_seq_len)

    train_dataset = TextLoader(args, 'train')
    dev_dataset = TextLoader(args, 'dev')
    test_dataset = TextLoader(args, 'test')



    trainer = Trainer(args, collate, train_dataset, dev_dataset, test_dataset)

    if args.do_train:
        trainer.train()

    if args.do_eval:
        trainer.load_model()
        trainer.evaluate('dev', 0)
        trainer.evaluate("test", -1)




In [None]:
import argparse



parser = argparse.ArgumentParser()

parser.add_argument("--task", default="mixatis", type=str, help="The name of the task to train")
parser.add_argument("--model_dir", default="dir_base", type=str, help="Path to save, load model")
parser.add_argument("--data_dir", default="./MISCA/data", type=str, help="The input data dir")
parser.add_argument("--intent_label_file", default="intent_label.txt", type=str, help="Intent Label file")
parser.add_argument("--slot_label_file", default="slot_label.txt", type=str, help="Slot Label file")
parser.add_argument("--slot_label_clean", default="slot_clean.txt", type=str, help="Slot Label file")
parser.add_argument("--logging", default="log.txt", type=str, help="Logging file")

# LAAT
parser.add_argument("--n_levels", default=1, type=int, help="Number of attention")
parser.add_argument("--attention_mode", default="label", type=str)
parser.add_argument("--level_projection_size", default=32, type=int)
parser.add_argument("--d_a", default=-1, type=int)

parser.add_argument("--char_embed", default=64, type=int)
parser.add_argument("--char_out", default=64, type=int)
parser.add_argument("--use_charcnn", action="store_false", help="Whether to use CharCNN")
parser.add_argument("--use_charlstm", action="store_false", help="Whether to use CharLSTM")
parser.add_argument("--word_embedding_dim", default=128, type=int)
parser.add_argument("--encoder_hidden_dim", default=128, type=int)
parser.add_argument("--decoder_hidden_dim", default=256, type=int)
parser.add_argument("--attention_hidden_dim", default=256, type=int)
parser.add_argument("--attention_output_dim", default=256, type=int)

# Config training
parser.add_argument("--model_type", default="bert", type=str, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
parser.add_argument('--seed', type=int, default=1234, help="random seed for initialization")
parser.add_argument("--train_batch_size", default=32, type=int, help="Batch size for training.")
parser.add_argument("--eval_batch_size", default=64, type=int, help="Batch size for evaluation.")
parser.add_argument("--max_seq_len", default=100, type=int, help="The maximum total input sequence length after tokenization.")
parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
parser.add_argument("--num_train_epochs", default=20, type=float, help="Total number of training epochs to perform.")
parser.add_argument("--weight_decay", default=0, type=float, help="Weight decay if we apply some.")
parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
                    help="Number of updates steps to accumulate before performing a backward/update pass.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
parser.add_argument("--max_steps", default=-1, type=int, help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
parser.add_argument("--dropout_rate", default=0.1, type=float, help="Dropout for fully-connected layers")

parser.add_argument('--logging_steps', type=int, default=-1, help="Log every X updates steps.")

parser.add_argument("--do_train",default=True, action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval",default=True, action="store_true", help="Whether to run eval on the test set.")
parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")
parser.add_argument("--tuning_metric", default="mean_intent_slot", type=str, help="Metric to save checkpoint")

parser.add_argument("--only_intent", default=0, type=float, help="The first epochs to optimize intent")

parser.add_argument("--ignore_index", default=0, type=int,
                    help='Specifies a target value that is ignored and does not contribute to the input gradient')

parser.add_argument(
    "--token_level",
    type=str,
    default="word-level",
    help="Tokens are at syllable level or word level (Vietnamese) [word-level, syllable-level]",
)

parser.add_argument('--intent_loss_coef', type=float, default=0.5, help='Coefficient for the intent loss.')
parser.add_argument('--aux_loss_coef', type=float, default=0.5, help='Coefficient for the aux task.')
parser.add_argument('--early_stopping', type=float, default=-1, help='Early stopping strategy')

parser.add_argument("--base_model", default=None, type=str, help="The pretrained model path")

parser.add_argument(
    "--num_intent_detection",
    action="store_true",
    default=True,
    help="Whether to use two-stage intent detection",
)

parser.add_argument(
    "--auxiliary_tasks",
    action="store_true",
    help="Whether to optimize with auxiliary tasks",
)

parser.add_argument(
    "--slot_decoder_size", type=int, default=512, help="hidden size of attention output vector"
)

parser.add_argument(
    "--intent_slot_attn_size", type=int, default=256, help="hidden size of attention output vector"
)

parser.add_argument(
    "--min_freq", type=int, default=1, help="Minimum number of frequency to be considered in the vocab"
)

parser.add_argument(
    '--intent_slot_attn_type', choices=['coattention', 'attention_flow'],
)

parser.add_argument(
    '--embedding_type', choices=['soft', 'hard'], default='soft',
)

parser.add_argument(
    "--label_embedding_size", type=int, default=256, help="hidden size of label embedding vector"
)

# CRF option
parser.add_argument("--use_crf",default=True, action="store_true", help="Whether to use CRF")
parser.add_argument("--slot_pad_label", default="PAD", type=str, help="Pad token for slot label pad (to be ignore when calculate loss)")

args = parser.parse_args([])

args.model_name_or_path = MODEL_PATH_MAP[args.model_type]
#main(args)


# mixAtis task

In [None]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

In [None]:
main(args)

INFO:__main__:LOOKING AT ./MISCA/data/mixatis/train.txt
INFO:__main__:LOOKING AT ./MISCA/data/mixatis/dev.txt
INFO:__main__:LOOKING AT ./MISCA/data/mixatis/test.txt
INFO:__main__:*** Example ***
INFO:__main__:guid: train-0
INFO:__main__:tokens: [CLS] define airline ua , names of airports and also show me city served both by nation ##air and canadian airlines international [SEP]
INFO:__main__:input_ids: 101 9375 8582 25423 1010 3415 1997 13586 1998 2036 2265 2033 2103 2366 2119 2011 3842 11215 1998 3010 7608 2248 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:__main__:attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:__main__:token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

check init


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = -1
INFO:__main__:  intent_acc = 0.0013175230566534915
INFO:__main__:  intent_f1 = 0.2060562623185809
INFO:__main__:  loss = 57.006303469340004
INFO:__main__:  mean_intent_slot = 0.004264136815117962
INFO:__main__:  num_acc = 0.03689064558629776
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.0
INFO:__main__:  slot_f1 = 0.007210750573582432
INFO:__main__:  slot_precision = 0.00610207100591716
INFO:__main__:  slot_recall = 0.008811748998664886
INFO:__main__:***** Running training *****
INFO:__main__:  Num examples = 13162
INFO:__main__:  Num Epochs = 20
INFO:__main__:  Total train batch size = 32
INFO:__main__:  Gradient Accumulation steps = 1
INFO:__main__:  Total optimization steps = 8240
INFO:__main__:  Logging steps = 412


{'loss': 57.006303469340004, 'intent_acc': 0.0013175230566534915, 'intent_f1': 0.2060562623185809, 'slot_precision': 0.00610207100591716, 'slot_recall': 0.008811748998664886, 'slot_f1': 0.007210750573582432, 'semantic_frame_acc': 0.0, 'slot_acc': 0.0, 'mean_intent_slot': 0.004264136815117962, 'num_acc': 0.03689064558629776, 'epoch': -1}


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 0


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.005270092226613966
INFO:__main__:  intent_f1 = 0.17449664429530204
INFO:__main__:  loss = 5.011628309885661
INFO:__main__:  mean_intent_slot = 0.42289162950290865
INFO:__main__:  num_acc = 0.6007905138339921
INFO:__main__:  semantic_frame_acc = 0.0013175230566534915
INFO:__main__:  slot_acc = 0.43478260869565216
INFO:__main__:  slot_f1 = 0.8405131667792033
INFO:__main__:  slot_precision = 0.8502732240437159
INFO:__main__:  slot_recall = 0.8309746328437917
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.0012077294685990338
INFO:__main__:  intent_f1 = 0.13178984861976847
INFO:__main__:  loss = 5.354785222273606
INFO:__main__:  mean_intent_slot = 0.3843110666233196
INFO:__main__:  num_acc = 0.5857487922705314
INFO:__main__:  semantic_frame_acc = 0.0012077294685990338
INFO:__main__:  slot_acc = 0.32971014492753625
INFO:__main__:  slot_f1 = 0.7674144037780402
INFO:__main__:  slot_precision = 0.7619171659286272
INFO:__main__:  slot_recall = 0.7729915433403806


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 1


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 1
INFO:__main__:  intent_acc = 0.007905138339920948
INFO:__main__:  intent_f1 = 0.1322512774271115
INFO:__main__:  loss = 2.071782350540161
INFO:__main__:  mean_intent_slot = 0.4747853437441861
INFO:__main__:  num_acc = 0.8616600790513834
INFO:__main__:  semantic_frame_acc = 0.0013175230566534915
INFO:__main__:  slot_acc = 0.7457180500658761
INFO:__main__:  slot_f1 = 0.9416655491484512
INFO:__main__:  slot_precision = 0.9458512931034483
INFO:__main__:  slot_recall = 0.9375166889185581
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 1
INFO:__main__:  intent_acc = 0.0012077294685990338
INFO:__main__:  intent_f1 = 0.10030820958251611
INFO:__main__:  loss = 4.141474100259634
INFO:__main__:  mean_intent_slot = 0.4212213560144959
INFO:__main__:  num_acc = 0.8272946859903382
INFO:__main__:  semantic_frame_acc = 0.0012077294685990338
INFO:__main__:  slot_acc = 0.4528985507246377
INFO:__main__:  slot_f1 = 0.8412349825603928
INFO:__main__:  slot_precision = 0.8228455900935052
INFO:__main__:  slot_recall = 0.8604651162790697


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 2


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 2
INFO:__main__:  intent_acc = 0.039525691699604744
INFO:__main__:  intent_f1 = 0.2732070785470351
INFO:__main__:  loss = 1.6926356504360835
INFO:__main__:  mean_intent_slot = 0.49840103009546327
INFO:__main__:  num_acc = 0.9986824769433466
INFO:__main__:  semantic_frame_acc = 0.034255599472990776
INFO:__main__:  slot_acc = 0.7971014492753623
INFO:__main__:  slot_f1 = 0.9572763684913218
INFO:__main__:  slot_precision = 0.9572763684913218
INFO:__main__:  slot_recall = 0.9572763684913218
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 2
INFO:__main__:  intent_acc = 0.016908212560386472
INFO:__main__:  intent_f1 = 0.22183304144775248
INFO:__main__:  loss = 3.990798968535203
INFO:__main__:  mean_intent_slot = 0.4382524146731665
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.010869565217391304
INFO:__main__:  slot_acc = 0.49033816425120774
INFO:__main__:  slot_f1 = 0.8595966167859466
INFO:__main__:  slot_precision = 0.8467059728274802
INFO:__main__:  slot_recall = 0.8728858350951374


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 3


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 3
INFO:__main__:  intent_acc = 0.17654808959156784
INFO:__main__:  intent_f1 = 0.4852438645542094
INFO:__main__:  loss = 1.5952865928411484
INFO:__main__:  mean_intent_slot = 0.5713961261833757
INFO:__main__:  num_acc = 0.9960474308300395
INFO:__main__:  semantic_frame_acc = 0.15151515151515152
INFO:__main__:  slot_acc = 0.8326745718050066
INFO:__main__:  slot_f1 = 0.9662441627751835
INFO:__main__:  slot_precision = 0.9656
INFO:__main__:  slot_recall = 0.9668891855807744
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 3
INFO:__main__:  intent_acc = 0.11594202898550725
INFO:__main__:  intent_f1 = 0.45281916447560616
INFO:__main__:  loss = 4.479249532406147
INFO:__main__:  mean_intent_slot = 0.48656097428001527
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.0640096618357488
INFO:__main__:  slot_acc = 0.4867149758454106
INFO:__main__:  slot_f1 = 0.8571799195745233
INFO:__main__:  slot_precision = 0.8417834394904459
INFO:__main__:  slot_recall = 0.8731501057082452


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 4


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 4
INFO:__main__:  intent_acc = 0.466403162055336
INFO:__main__:  intent_f1 = 0.6910897236882956
INFO:__main__:  loss = 1.4852108259995778
INFO:__main__:  mean_intent_slot = 0.7170294345427709
INFO:__main__:  num_acc = 0.9986824769433466
INFO:__main__:  semantic_frame_acc = 0.40843214756258234
INFO:__main__:  slot_acc = 0.8274044795783926
INFO:__main__:  slot_f1 = 0.9676557070302058
INFO:__main__:  slot_precision = 0.9686914637409687
INFO:__main__:  slot_recall = 0.9666221628838452
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 4
INFO:__main__:  intent_acc = 0.39492753623188404
INFO:__main__:  intent_f1 = 0.6682242990654206
INFO:__main__:  loss = 4.201558406536396
INFO:__main__:  mean_intent_slot = 0.6305724017690403
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.23792270531400966
INFO:__main__:  slot_acc = 0.5253623188405797
INFO:__main__:  slot_f1 = 0.8662172673061965
INFO:__main__:  slot_precision = 0.8501272264631043
INFO:__main__:  slot_recall = 0.8829281183932347


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 5


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 5
INFO:__main__:  intent_acc = 0.6719367588932806
INFO:__main__:  intent_f1 = 0.8072027320707854
INFO:__main__:  loss = 1.6568872183561325
INFO:__main__:  mean_intent_slot = 0.8183542881736219
INFO:__main__:  num_acc = 0.9986824769433466
INFO:__main__:  semantic_frame_acc = 0.5797101449275363
INFO:__main__:  slot_acc = 0.8089591567852438
INFO:__main__:  slot_f1 = 0.9647718174539632
INFO:__main__:  slot_precision = 0.9642571352360629
INFO:__main__:  slot_recall = 0.965287049399199
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 5
INFO:__main__:  intent_acc = 0.5422705314009661
INFO:__main__:  intent_f1 = 0.7497810218978103
INFO:__main__:  loss = 4.485178617330698
INFO:__main__:  mean_intent_slot = 0.7093448441945455
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.3321256038647343
INFO:__main__:  slot_acc = 0.5664251207729468
INFO:__main__:  slot_f1 = 0.8764191569881248
INFO:__main__:  slot_precision = 0.865687032740397
INFO:__main__:  slot_recall = 0.8874207188160677


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 6


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 6
INFO:__main__:  intent_acc = 0.7444005270092227
INFO:__main__:  intent_f1 = 0.8562558211735487
INFO:__main__:  loss = 1.6614648525913556
INFO:__main__:  mean_intent_slot = 0.8553912987233947
INFO:__main__:  num_acc = 0.9986824769433466
INFO:__main__:  semantic_frame_acc = 0.6600790513833992
INFO:__main__:  slot_acc = 0.8300395256916996
INFO:__main__:  slot_f1 = 0.9663820704375667
INFO:__main__:  slot_precision = 0.9656091708877632
INFO:__main__:  slot_recall = 0.9671562082777037
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 6
INFO:__main__:  intent_acc = 0.6086956521739131
INFO:__main__:  intent_f1 = 0.7896119054566676
INFO:__main__:  loss = 4.368861565223107
INFO:__main__:  mean_intent_slot = 0.7407020062975923
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.37077294685990336
INFO:__main__:  slot_acc = 0.5410628019323671
INFO:__main__:  slot_f1 = 0.8727083604212715
INFO:__main__:  slot_precision = 0.8589710775531098
INFO:__main__:  slot_recall = 0.8868921775898521


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 7


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 7
INFO:__main__:  intent_acc = 0.8432147562582345
INFO:__main__:  intent_f1 = 0.9140018627755355
INFO:__main__:  loss = 1.5306957339247067
INFO:__main__:  mean_intent_slot = 0.9072414820981136
INFO:__main__:  num_acc = 0.9986824769433466
INFO:__main__:  semantic_frame_acc = 0.758893280632411
INFO:__main__:  slot_acc = 0.8484848484848485
INFO:__main__:  slot_f1 = 0.9712682079379927
INFO:__main__:  slot_precision = 0.9721776350989834
INFO:__main__:  slot_recall = 0.9703604806408545
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 7
INFO:__main__:  intent_acc = 0.6968599033816425
INFO:__main__:  intent_f1 = 0.8412142440163457
INFO:__main__:  loss = 4.412041058907142
INFO:__main__:  mean_intent_slot = 0.7865823525258944
INFO:__main__:  num_acc = 0.9975845410628019
INFO:__main__:  semantic_frame_acc = 0.42995169082125606
INFO:__main__:  slot_acc = 0.5531400966183575
INFO:__main__:  slot_f1 = 0.8763048016701461
INFO:__main__:  slot_precision = 0.8654639175257732
INFO:__main__:  slot_recall = 0.8874207188160677


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 8


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 8
INFO:__main__:  intent_acc = 0.857707509881423
INFO:__main__:  intent_f1 = 0.9267080745341616
INFO:__main__:  loss = 1.5433942427237828
INFO:__main__:  mean_intent_slot = 0.9142893508787125
INFO:__main__:  num_acc = 0.997364953886693
INFO:__main__:  semantic_frame_acc = 0.7628458498023716
INFO:__main__:  slot_acc = 0.8432147562582345
INFO:__main__:  slot_f1 = 0.9708711918760021
INFO:__main__:  slot_precision = 0.9716501738432736
INFO:__main__:  slot_recall = 0.9700934579439252
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 8
INFO:__main__:  intent_acc = 0.7342995169082126
INFO:__main__:  intent_f1 = 0.8621495327102803
INFO:__main__:  loss = 5.2012924047616815
INFO:__main__:  mean_intent_slot = 0.800152718938081
INFO:__main__:  num_acc = 0.9975845410628019
INFO:__main__:  semantic_frame_acc = 0.4528985507246377
INFO:__main__:  slot_acc = 0.5253623188405797
INFO:__main__:  slot_f1 = 0.8660059209679496
INFO:__main__:  slot_precision = 0.8441656210790465
INFO:__main__:  slot_recall = 0.8890063424947146


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 9


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 9
INFO:__main__:  intent_acc = 0.8761528326745718
INFO:__main__:  intent_f1 = 0.9391304347826087
INFO:__main__:  loss = 1.4477488150199254
INFO:__main__:  mean_intent_slot = 0.9261843799930849
INFO:__main__:  num_acc = 0.997364953886693
INFO:__main__:  semantic_frame_acc = 0.7944664031620553
INFO:__main__:  slot_acc = 0.8603425559947299
INFO:__main__:  slot_f1 = 0.976215927311598
INFO:__main__:  slot_precision = 0.9769991976464295
INFO:__main__:  slot_recall = 0.97543391188251
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 9
INFO:__main__:  intent_acc = 0.7367149758454107
INFO:__main__:  intent_f1 = 0.8639018691588785
INFO:__main__:  loss = 4.536816872083223
INFO:__main__:  mean_intent_slot = 0.8090110826939472
INFO:__main__:  num_acc = 0.9975845410628019
INFO:__main__:  semantic_frame_acc = 0.463768115942029
INFO:__main__:  slot_acc = 0.5845410628019324
INFO:__main__:  slot_f1 = 0.8813071895424837
INFO:__main__:  slot_precision = 0.871960682876358
INFO:__main__:  slot_recall = 0.8908562367864693


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 10


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 10
INFO:__main__:  intent_acc = 0.8787878787878788
INFO:__main__:  intent_f1 = 0.9397515527950311
INFO:__main__:  loss = 1.3858883492648602
INFO:__main__:  mean_intent_slot = 0.9279027582074082
INFO:__main__:  num_acc = 0.997364953886693
INFO:__main__:  semantic_frame_acc = 0.7957839262187089
INFO:__main__:  slot_acc = 0.8603425559947299
INFO:__main__:  slot_f1 = 0.9770176376269376
INFO:__main__:  slot_precision = 0.9778015512169029
INFO:__main__:  slot_recall = 0.9762349799732978
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 10
INFO:__main__:  intent_acc = 0.7439613526570048
INFO:__main__:  intent_f1 = 0.8706569343065693
INFO:__main__:  loss = 5.1201348304748535
INFO:__main__:  mean_intent_slot = 0.8103779616326006
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.46980676328502413
INFO:__main__:  slot_acc = 0.5652173913043478
INFO:__main__:  slot_f1 = 0.8767945706081963
INFO:__main__:  slot_precision = 0.8661681279009799
INFO:__main__:  slot_recall = 0.8876849894291755


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 11


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 11
INFO:__main__:  intent_acc = 0.9064558629776021
INFO:__main__:  intent_f1 = 0.9556038497361069
INFO:__main__:  loss = 1.4102720096707344
INFO:__main__:  mean_intent_slot = 0.9413358951446
INFO:__main__:  num_acc = 0.9986824769433466
INFO:__main__:  semantic_frame_acc = 0.8142292490118577
INFO:__main__:  slot_acc = 0.8537549407114624
INFO:__main__:  slot_f1 = 0.976215927311598
INFO:__main__:  slot_precision = 0.9769991976464295
INFO:__main__:  slot_recall = 0.97543391188251
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 11
INFO:__main__:  intent_acc = 0.7391304347826086
INFO:__main__:  intent_f1 = 0.8663164039696438
INFO:__main__:  loss = 4.962585284159734
INFO:__main__:  mean_intent_slot = 0.8054426836426023
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.4577294685990338
INFO:__main__:  slot_acc = 0.535024154589372
INFO:__main__:  slot_f1 = 0.8717549325025961
INFO:__main__:  slot_precision = 0.8566326530612245
INFO:__main__:  slot_recall = 0.8874207188160677


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 12


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 12
INFO:__main__:  intent_acc = 0.8722002635046113
INFO:__main__:  intent_f1 = 0.9382179447376592
INFO:__main__:  loss = 1.515500370413065
INFO:__main__:  mean_intent_slot = 0.9203205115329958
INFO:__main__:  num_acc = 0.9986824769433466
INFO:__main__:  semantic_frame_acc = 0.766798418972332
INFO:__main__:  slot_acc = 0.83399209486166
INFO:__main__:  slot_f1 = 0.9684407595613801
INFO:__main__:  slot_precision = 0.9699973211893919
INFO:__main__:  slot_recall = 0.9668891855807744


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 13


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 13
INFO:__main__:  intent_acc = 0.9143610013175231
INFO:__main__:  intent_f1 = 0.9590062111801242
INFO:__main__:  loss = 1.4318149089813232
INFO:__main__:  mean_intent_slot = 0.9438836945649489
INFO:__main__:  num_acc = 0.997364953886693
INFO:__main__:  semantic_frame_acc = 0.8221343873517787
INFO:__main__:  slot_acc = 0.852437417654809
INFO:__main__:  slot_f1 = 0.9734063878123748
INFO:__main__:  slot_precision = 0.9743178170144462
INFO:__main__:  slot_recall = 0.9724966622162884
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 13
INFO:__main__:  intent_acc = 0.7596618357487923
INFO:__main__:  intent_f1 = 0.8732476635514018
INFO:__main__:  loss = 5.019077887901893
INFO:__main__:  mean_intent_slot = 0.8174366433135634
INFO:__main__:  num_acc = 0.9975845410628019
INFO:__main__:  semantic_frame_acc = 0.4758454106280193
INFO:__main__:  slot_acc = 0.5362318840579711
INFO:__main__:  slot_f1 = 0.8752114508783345
INFO:__main__:  slot_precision = 0.8620866444501409
INFO:__main__:  slot_recall = 0.8887420718816068


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 14


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 14
INFO:__main__:  intent_acc = 0.922266139657444
INFO:__main__:  intent_f1 = 0.9630549518782987
INFO:__main__:  loss = 1.4601129926741123
INFO:__main__:  mean_intent_slot = 0.9483708087035049
INFO:__main__:  num_acc = 0.9986824769433466
INFO:__main__:  semantic_frame_acc = 0.8274044795783926
INFO:__main__:  slot_acc = 0.852437417654809
INFO:__main__:  slot_f1 = 0.9744754777495658
INFO:__main__:  slot_precision = 0.9753879079721777
INFO:__main__:  slot_recall = 0.9735647530040054
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 14
INFO:__main__:  intent_acc = 0.7644927536231884
INFO:__main__:  intent_f1 = 0.8750729713952131
INFO:__main__:  loss = 5.231186023125281
INFO:__main__:  mean_intent_slot = 0.8203960649612824
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.4746376811594203
INFO:__main__:  slot_acc = 0.532608695652174
INFO:__main__:  slot_f1 = 0.8762993762993764
INFO:__main__:  slot_precision = 0.8619631901840491
INFO:__main__:  slot_recall = 0.8911205073995772


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 15


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 15
INFO:__main__:  intent_acc = 0.9117259552042161
INFO:__main__:  intent_f1 = 0.9590062111801242
INFO:__main__:  loss = 1.4163890903194745
INFO:__main__:  mean_intent_slot = 0.9438341083158209
INFO:__main__:  num_acc = 0.997364953886693
INFO:__main__:  semantic_frame_acc = 0.8234519104084321
INFO:__main__:  slot_acc = 0.857707509881423
INFO:__main__:  slot_f1 = 0.9759422614274258
INFO:__main__:  slot_precision = 0.9769868878779769
INFO:__main__:  slot_recall = 0.9748998664886516
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 15
INFO:__main__:  intent_acc = 0.7669082125603864
INFO:__main__:  intent_f1 = 0.8779205607476636
INFO:__main__:  loss = 5.298106670379639
INFO:__main__:  mean_intent_slot = 0.8193782029789195
INFO:__main__:  num_acc = 0.9975845410628019
INFO:__main__:  semantic_frame_acc = 0.46859903381642515
INFO:__main__:  slot_acc = 0.5253623188405797
INFO:__main__:  slot_f1 = 0.8718481933974526
INFO:__main__:  slot_precision = 0.8578005115089514
INFO:__main__:  slot_recall = 0.8863636363636364


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 16


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 16
INFO:__main__:  intent_acc = 0.9143610013175231
INFO:__main__:  intent_f1 = 0.9596273291925466
INFO:__main__:  loss = 1.4146946395436923
INFO:__main__:  mean_intent_slot = 0.9452232528297703
INFO:__main__:  num_acc = 0.997364953886693
INFO:__main__:  semantic_frame_acc = 0.8234519104084321
INFO:__main__:  slot_acc = 0.8537549407114624
INFO:__main__:  slot_f1 = 0.9760855043420175
INFO:__main__:  slot_precision = 0.9767379679144385
INFO:__main__:  slot_recall = 0.97543391188251
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 16
INFO:__main__:  intent_acc = 0.7644927536231884
INFO:__main__:  intent_f1 = 0.8776642335766424
INFO:__main__:  loss = 5.275090144230769
INFO:__main__:  mean_intent_slot = 0.8185934799552679
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.47101449275362317
INFO:__main__:  slot_acc = 0.5277777777777778
INFO:__main__:  slot_f1 = 0.8726942062873475
INFO:__main__:  slot_precision = 0.8582013285641288
INFO:__main__:  slot_recall = 0.8876849894291755


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 17


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 17
INFO:__main__:  intent_acc = 0.9169960474308301
INFO:__main__:  intent_f1 = 0.9614906832298136
INFO:__main__:  loss = 1.4533914898832638
INFO:__main__:  mean_intent_slot = 0.9456706586699848
INFO:__main__:  num_acc = 0.997364953886693
INFO:__main__:  semantic_frame_acc = 0.8194993412384717
INFO:__main__:  slot_acc = 0.8484848484848485
INFO:__main__:  slot_f1 = 0.9743452699091395
INFO:__main__:  slot_precision = 0.975127039315325
INFO:__main__:  slot_recall = 0.9735647530040054


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 18


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 18
INFO:__main__:  intent_acc = 0.919631093544137
INFO:__main__:  intent_f1 = 0.9627329192546583
INFO:__main__:  loss = 1.4405018823842208
INFO:__main__:  mean_intent_slot = 0.9481907471996474
INFO:__main__:  num_acc = 0.997364953886693
INFO:__main__:  semantic_frame_acc = 0.8274044795783926
INFO:__main__:  slot_acc = 0.8537549407114624
INFO:__main__:  slot_f1 = 0.9767504008551577
INFO:__main__:  slot_precision = 0.9775341000267451
INFO:__main__:  slot_recall = 0.9759679572763685
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 18
INFO:__main__:  intent_acc = 0.7669082125603864
INFO:__main__:  intent_f1 = 0.8761682242990654
INFO:__main__:  loss = 5.282381021059477
INFO:__main__:  mean_intent_slot = 0.820694231020318
INFO:__main__:  num_acc = 0.9975845410628019
INFO:__main__:  semantic_frame_acc = 0.4746376811594203
INFO:__main__:  slot_acc = 0.5289855072463768
INFO:__main__:  slot_f1 = 0.8744802494802495
INFO:__main__:  slot_precision = 0.8601738241308794
INFO:__main__:  slot_recall = 0.8892706131078224


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 19


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 19
INFO:__main__:  intent_acc = 0.9183135704874835
INFO:__main__:  intent_f1 = 0.962111801242236
INFO:__main__:  loss = 1.4442115873098373
INFO:__main__:  mean_intent_slot = 0.9475319856713207
INFO:__main__:  num_acc = 0.997364953886693
INFO:__main__:  semantic_frame_acc = 0.8274044795783926
INFO:__main__:  slot_acc = 0.8537549407114624
INFO:__main__:  slot_f1 = 0.9767504008551577
INFO:__main__:  slot_precision = 0.9775341000267451
INFO:__main__:  slot_recall = 0.9759679572763685
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 19
INFO:__main__:  intent_acc = 0.7681159420289855
INFO:__main__:  intent_f1 = 0.8785046728971962
INFO:__main__:  loss = 5.263315751002385
INFO:__main__:  mean_intent_slot = 0.8212249625674491
INFO:__main__:  num_acc = 0.9975845410628019
INFO:__main__:  semantic_frame_acc = 0.4746376811594203
INFO:__main__:  slot_acc = 0.5301932367149759
INFO:__main__:  slot_f1 = 0.8743339831059128
INFO:__main__:  slot_precision = 0.8601380721043211
INFO:__main__:  slot_recall = 0.8890063424947146
INFO:__main__:***** Model Loaded *****
INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.9183135704874835
INFO:__main__:  intent_f1 = 0.962111801242236
INFO:__main__:  loss = 1.4442115873098373
INFO:__main__:  mean_intent_slot = 0.9475319856713207
INFO:__main__:  num_acc = 0.997364953886693
INFO:__main__:  semantic_frame_acc = 0.8274044795783926
INFO:__main__:  slot_acc = 0.8537549407114624
INFO:__main__:  slot_f1 = 0.9767504008551577
INFO:__main__:  slot_precision = 0.9775341000267451
INFO:__main__:  slot_recall = 0.9759679572763685
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = -1
INFO:__main__:  intent_acc = 0.7681159420289855
INFO:__main__:  intent_f1 = 0.8785046728971962
INFO:__main__:  loss = 5.263315751002385
INFO:__main__:  mean_intent_slot = 0.8212249625674491
INFO:__main__:  num_acc = 0.9975845410628019
INFO:__main__:  semantic_frame_acc = 0.4746376811594203
INFO:__main__:  slot_acc = 0.5301932367149759
INFO:__main__:  slot_f1 = 0.8743339831059128
INFO:__main__:  slot_precision = 0.8601380721043211
INFO:__main__:  slot_recall = 0.8890063424947146


# mixSnips task

In [None]:
args = parser.parse_args(["--task", "mixsnips", "--num_train_epochs","5"])

args.model_name_or_path = MODEL_PATH_MAP[args.model_type]
main(args)

INFO:__main__:LOOKING AT ./MISCA/data/mixsnips/train.txt
INFO:__main__:LOOKING AT ./MISCA/data/mixsnips/dev.txt
INFO:__main__:LOOKING AT ./MISCA/data/mixsnips/test.txt
INFO:__main__:*** Example ***
INFO:__main__:guid: train-0
INFO:__main__:tokens: [CLS] play is ##ham jones and sw ##ine not deserves four points [SEP]
INFO:__main__:input_ids: 101 2377 2003 3511 3557 1998 25430 3170 2025 17210 2176 2685 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:__main__:attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:__main__:token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

check init


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = -1
INFO:__main__:  intent_acc = 0.017288444040036398
INFO:__main__:  intent_f1 = 0.3374582241893235
INFO:__main__:  loss = 49.09063535417829
INFO:__main__:  mean_intent_slot = 0.011920864241893685
INFO:__main__:  num_acc = 0.3380345768880801
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.0
INFO:__main__:  slot_f1 = 0.006553284443750975
INFO:__main__:  slot_precision = 0.004658729571840568
INFO:__main__:  slot_recall = 0.011044880785413744
INFO:__main__:***** Running training *****
INFO:__main__:  Num examples = 39776
INFO:__main__:  Num Epochs = 5
INFO:__main__:  Total train batch size = 32
INFO:__main__:  Gradient Accumulation steps = 1
INFO:__main__:  Total optimization steps = 6215
INFO:__main__:  Logging steps = 1243


{'loss': 49.09063535417829, 'intent_acc': 0.017288444040036398, 'intent_f1': 0.3374582241893235, 'slot_precision': 0.004658729571840568, 'slot_recall': 0.011044880785413744, 'slot_f1': 0.006553284443750975, 'semantic_frame_acc': 0.0, 'slot_acc': 0.0, 'mean_intent_slot': 0.011920864241893685, 'num_acc': 0.3380345768880801, 'epoch': -1}


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 0


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.7443130118289354
INFO:__main__:  intent_f1 = 0.9240059245755953
INFO:__main__:  loss = 2.100152264322553
INFO:__main__:  mean_intent_slot = 0.8429324621986207
INFO:__main__:  num_acc = 0.7565969062784349
INFO:__main__:  semantic_frame_acc = 0.5609645131938126
INFO:__main__:  slot_acc = 0.7415832575068244
INFO:__main__:  slot_f1 = 0.941551912568306
INFO:__main__:  slot_precision = 0.9391296764628935
INFO:__main__:  slot_recall = 0.9439866760168303
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.7157798999545247
INFO:__main__:  intent_f1 = 0.9127363864205968
INFO:__main__:  loss = 1.93958774294172
INFO:__main__:  mean_intent_slot = 0.8273007932459076
INFO:__main__:  num_acc = 0.7426102773988177
INFO:__main__:  semantic_frame_acc = 0.53706230104593
INFO:__main__:  slot_acc = 0.7280582082764893
INFO:__main__:  slot_f1 = 0.9388216865372903
INFO:__main__:  slot_precision = 0.9375977750738745
INFO:__main__:  slot_recall = 0.9400487974904148


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 1


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 1
INFO:__main__:  intent_acc = 0.9513193812556869
INFO:__main__:  intent_f1 = 0.9788526434195726
INFO:__main__:  loss = 1.6115931851523264
INFO:__main__:  mean_intent_slot = 0.9548518570905118
INFO:__main__:  num_acc = 0.9849863512283894
INFO:__main__:  semantic_frame_acc = 0.781164695177434
INFO:__main__:  slot_acc = 0.8061874431301183
INFO:__main__:  slot_f1 = 0.9583843329253368
INFO:__main__:  slot_precision = 0.9558772235786537
INFO:__main__:  slot_recall = 0.9609046283309958
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 1
INFO:__main__:  intent_acc = 0.9472487494315598
INFO:__main__:  intent_f1 = 0.9755987855616777
INFO:__main__:  loss = 1.3048747556550162
INFO:__main__:  mean_intent_slot = 0.9529099334175877
INFO:__main__:  num_acc = 0.9854479308776717
INFO:__main__:  semantic_frame_acc = 0.789449749886312
INFO:__main__:  slot_acc = 0.810368349249659
INFO:__main__:  slot_f1 = 0.9585711174036157
INFO:__main__:  slot_precision = 0.9584458576531056
INFO:__main__:  slot_recall = 0.9586964098989195


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 2


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 2
INFO:__main__:  intent_acc = 0.967697907188353
INFO:__main__:  intent_f1 = 0.9824521934758154
INFO:__main__:  loss = 1.9453666499682836
INFO:__main__:  mean_intent_slot = 0.9630912651563621
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.8066424021838035
INFO:__main__:  slot_acc = 0.8139217470427661
INFO:__main__:  slot_f1 = 0.9584846231243711
INFO:__main__:  slot_precision = 0.9566850056763602
INFO:__main__:  slot_recall = 0.9602910238429172
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 2
INFO:__main__:  intent_acc = 0.9686221009549796
INFO:__main__:  intent_f1 = 0.9820062977957713
INFO:__main__:  loss = 1.3324510182653155
INFO:__main__:  mean_intent_slot = 0.9677235793584349
INFO:__main__:  num_acc = 0.9986357435197817
INFO:__main__:  semantic_frame_acc = 0.8290131878126421
INFO:__main__:  slot_acc = 0.8403819918144612
INFO:__main__:  slot_f1 = 0.9668250577618903
INFO:__main__:  slot_precision = 0.9673732879699904
INFO:__main__:  slot_recall = 0.9662774485883583


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 3


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 3
INFO:__main__:  intent_acc = 0.9658780709736123
INFO:__main__:  intent_f1 = 0.9815564552406658
INFO:__main__:  loss = 2.325549827303205
INFO:__main__:  mean_intent_slot = 0.9611637840657306
INFO:__main__:  num_acc = 0.997270245677889
INFO:__main__:  semantic_frame_acc = 0.7952684258416742
INFO:__main__:  slot_acc = 0.8034576888080073
INFO:__main__:  slot_f1 = 0.9564494971578488
INFO:__main__:  slot_precision = 0.9541964753097191
INFO:__main__:  slot_recall = 0.9587131837307152


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 4


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 4
INFO:__main__:  intent_acc = 0.9699727024567789
INFO:__main__:  intent_f1 = 0.9825744800449691
INFO:__main__:  loss = 2.250480318069458
INFO:__main__:  mean_intent_slot = 0.964582204790914
INFO:__main__:  num_acc = 0.9995450409463148
INFO:__main__:  semantic_frame_acc = 0.8093721565059144
INFO:__main__:  slot_acc = 0.816196542311192
INFO:__main__:  slot_f1 = 0.9591917071250491
INFO:__main__:  slot_precision = 0.9572239196857267
INFO:__main__:  slot_recall = 0.9611676016830295
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 4
INFO:__main__:  intent_acc = 0.9749886311959982
INFO:__main__:  intent_f1 = 0.9842625899280576
INFO:__main__:  loss = 1.3895104442323958
INFO:__main__:  mean_intent_slot = 0.9730716576459294
INFO:__main__:  num_acc = 1.0
INFO:__main__:  semantic_frame_acc = 0.8549340609367895
INFO:__main__:  slot_acc = 0.8635743519781719
INFO:__main__:  slot_f1 = 0.9711546840958606
INFO:__main__:  slot_precision = 0.9712393236883389
INFO:__main__:  slot_recall = 0.9710700592540955
INFO:__main__:***** Model Loaded *****
INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.9699727024567789
INFO:__main__:  intent_f1 = 0.9825744800449691
INFO:__main__:  loss = 2.250480318069458
INFO:__main__:  mean_intent_slot = 0.964582204790914
INFO:__main__:  num_acc = 0.9995450409463148
INFO:__main__:  semantic_frame_acc = 0.8093721565059144
INFO:__main__:  slot_acc = 0.816196542311192
INFO:__main__:  slot_f1 = 0.9591917071250491
INFO:__main__:  slot_precision = 0.9572239196857267
INFO:__main__:  slot_recall = 0.9611676016830295
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = -1
INFO:__main__:  intent_acc = 0.9749886311959982
INFO:__main__:  intent_f1 = 0.9842625899280576
INFO:__main__:  loss = 1.3895104442323958
INFO:__main__:  mean_intent_slot = 0.9730716576459294
INFO:__main__:  num_acc = 1.0
INFO:__main__:  semantic_frame_acc = 0.8549340609367895
INFO:__main__:  slot_acc = 0.8635743519781719
INFO:__main__:  slot_f1 = 0.9711546840958606
INFO:__main__:  slot_precision = 0.9712393236883389
INFO:__main__:  slot_recall = 0.9710700592540955


# atis task

In [None]:
#unzip dataset
! mkdir data
! unzip atis.zip -d data

Archive:  atis.zip
   creating: data/atis/
   creating: data/atis/dev/
  inflating: data/atis/dev/label     
  inflating: data/atis/dev/seq.in    
  inflating: data/atis/dev/seq.out   
  inflating: data/atis/intent_label.txt  
  inflating: data/atis/slot_label.txt  
   creating: data/atis/test/
  inflating: data/atis/test/label    
  inflating: data/atis/test/seq.in   
  inflating: data/atis/test/seq.out  
   creating: data/atis/train/
  inflating: data/atis/train/label   
  inflating: data/atis/train/seq.in  
  inflating: data/atis/train/seq.out  


In [None]:
import argparse



parser = argparse.ArgumentParser()

parser.add_argument("--task", default="atis", type=str, help="The name of the task to train")
parser.add_argument("--model_dir", default="dir_base", type=str, help="Path to save, load model")
parser.add_argument("--data_dir", default="./data", type=str, help="The input data dir")
parser.add_argument("--intent_label_file", default="intent_label.txt", type=str, help="Intent Label file")
parser.add_argument("--slot_label_file", default="slot_label.txt", type=str, help="Slot Label file")
parser.add_argument("--slot_label_clean", default="slot_clean.txt", type=str, help="Slot Label file")
parser.add_argument("--logging", default="log.txt", type=str, help="Logging file")

# LAAT
parser.add_argument("--n_levels", default=1, type=int, help="Number of attention")
parser.add_argument("--attention_mode", default="label", type=str)
parser.add_argument("--level_projection_size", default=32, type=int)
parser.add_argument("--d_a", default=-1, type=int)

parser.add_argument("--char_embed", default=64, type=int)
parser.add_argument("--char_out", default=64, type=int)
parser.add_argument("--use_charcnn", action="store_false", help="Whether to use CharCNN")
parser.add_argument("--use_charlstm", action="store_false", help="Whether to use CharLSTM")
parser.add_argument("--word_embedding_dim", default=128, type=int)
parser.add_argument("--encoder_hidden_dim", default=128, type=int)
parser.add_argument("--decoder_hidden_dim", default=256, type=int)
parser.add_argument("--attention_hidden_dim", default=256, type=int)
parser.add_argument("--attention_output_dim", default=256, type=int)

# Config training
parser.add_argument("--model_type", default="bert", type=str, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
parser.add_argument('--seed', type=int, default=1234, help="random seed for initialization")
parser.add_argument("--train_batch_size", default=32, type=int, help="Batch size for training.")
parser.add_argument("--eval_batch_size", default=64, type=int, help="Batch size for evaluation.")
parser.add_argument("--max_seq_len", default=100, type=int, help="The maximum total input sequence length after tokenization.")
parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
parser.add_argument("--num_train_epochs", default=50, type=float, help="Total number of training epochs to perform.")
parser.add_argument("--weight_decay", default=0, type=float, help="Weight decay if we apply some.")
parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
                    help="Number of updates steps to accumulate before performing a backward/update pass.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
parser.add_argument("--max_steps", default=-1, type=int, help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
parser.add_argument("--dropout_rate", default=0.1, type=float, help="Dropout for fully-connected layers")

parser.add_argument('--logging_steps', type=int, default=-1, help="Log every X updates steps.")

parser.add_argument("--do_train",default=True, action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval",default=True, action="store_true", help="Whether to run eval on the test set.")
parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")
parser.add_argument("--tuning_metric", default="mean_intent_slot", type=str, help="Metric to save checkpoint")

parser.add_argument("--only_intent", default=0, type=float, help="The first epochs to optimize intent")

parser.add_argument("--ignore_index", default=0, type=int,
                    help='Specifies a target value that is ignored and does not contribute to the input gradient')

parser.add_argument(
    "--token_level",
    type=str,
    default="word-level",
    help="Tokens are at syllable level or word level (Vietnamese) [word-level, syllable-level]",
)

parser.add_argument('--intent_loss_coef', type=float, default=0.5, help='Coefficient for the intent loss.')
parser.add_argument('--aux_loss_coef', type=float, default=0.5, help='Coefficient for the aux task.')
parser.add_argument('--early_stopping', type=float, default=-1, help='Early stopping strategy')

parser.add_argument("--base_model", default=None, type=str, help="The pretrained model path")

parser.add_argument(
    "--num_intent_detection",
    action="store_true",
    default=True,
    help="Whether to use two-stage intent detection",
)

parser.add_argument(
    "--auxiliary_tasks",
    action="store_true",
    help="Whether to optimize with auxiliary tasks",
)

parser.add_argument(
    "--slot_decoder_size", type=int, default=512, help="hidden size of attention output vector"
)

parser.add_argument(
    "--intent_slot_attn_size", type=int, default=256, help="hidden size of attention output vector"
)

parser.add_argument(
    "--min_freq", type=int, default=1, help="Minimum number of frequency to be considered in the vocab"
)

parser.add_argument(
    '--intent_slot_attn_type', choices=['coattention', 'attention_flow'],
)

parser.add_argument(
    '--embedding_type', choices=['soft', 'hard'], default='soft',
)

parser.add_argument(
    "--label_embedding_size", type=int, default=256, help="hidden size of label embedding vector"
)

# CRF option
parser.add_argument("--use_crf",default=True, action="store_true", help="Whether to use CRF")
parser.add_argument("--slot_pad_label", default="PAD", type=str, help="Pad token for slot label pad (to be ignore when calculate loss)")

args = parser.parse_args([])

args.model_name_or_path = MODEL_PATH_MAP[args.model_type]
#main(args)


In [None]:
import os
import numpy as np
import torch
import logging
import copy
import json

from transformers import AutoTokenizer
from torch.utils.data import Dataset, TensorDataset
from torch.utils.data import DataLoader

#test
logger = logging.getLogger(__name__)

def convert_examples_to_features(examples, max_seq_len, tokenizer,
                                 pad_token_label_id=-100,
                                 cls_token_segment_id=0,
                                 pad_token_segment_id=0,
                                 sequence_a_segment_id=0,
                                 mask_padding_with_zero=True):
    # Setting based on the current model type
    cls_token = tokenizer.cls_token
    sep_token = tokenizer.sep_token
    unk_token = tokenizer.unk_token
    pad_token_id = tokenizer.pad_token_id

    features = []
    for (ex_index, example) in enumerate(examples):
        # Tokenize word by word (for NER)
        tokens = []
        heads = []
        # slot_labels_ids = []
        for word, slot_label in zip(example.text, example.slot_labels[1:-1]):
            word_tokens = tokenizer.tokenize(word)
            if not word_tokens:
                word_tokens = [unk_token]  # For handling the bad-encoded word
            heads.append(len(tokens) + 1) # +1 for the cls token
            tokens.extend(word_tokens)
        # Account for [CLS] and [SEP]
        special_tokens_count = 2
        if len(tokens) > max_seq_len - special_tokens_count:
            tokens = tokens[:(max_seq_len - special_tokens_count)]

        # Add [SEP] token
        heads += [len(tokens) + 1]
        tokens += [sep_token]
        token_type_ids = [sequence_a_segment_id] * len(tokens)

        # Add [CLS] token
        tokens = [cls_token] + tokens
        heads = [0] + heads
        token_type_ids = [cls_token_segment_id] + token_type_ids

        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        # The mask has 1 for real tokens and 0 for padding tokens. Only real
        # tokens are attended to.
        attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)

        # Zero-pad up to the sequence length.
        padding_length = max_seq_len - len(input_ids)
        input_ids = input_ids + ([pad_token_id] * padding_length)
        attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
        token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)

        assert len(input_ids) == max_seq_len, "Error with input length {} vs {}".format(len(input_ids), max_seq_len)
        assert len(attention_mask) == max_seq_len, "Error with attention mask length {} vs {}".format(len(attention_mask), max_seq_len)
        assert len(token_type_ids) == max_seq_len, "Error with token type length {} vs {}".format(len(token_type_ids), max_seq_len)
        assert len(heads) == len(example.slot_labels)

        if ex_index < 5:
            logger.info("*** Example ***")
            logger.info("guid: %s" % example.guid)
            logger.info("tokens: %s" % " ".join([str(x) for x in tokens]))
            logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
            logger.info("attention_mask: %s" % " ".join([str(x) for x in attention_mask]))
            logger.info("token_type_ids: %s" % " ".join([str(x) for x in token_type_ids]))
            logger.info("heads: %s" % " ".join([str(x) for x in heads]))

        features.append(
            InputExample(guid=example.guid,
                         words=input_ids,
                         chars=example.chars,
                         heads=heads,
                         attention_mask=attention_mask,
                         token_type_ids=token_type_ids,
                         intent_label=example.intent_label,
                         slot_labels=example.slot_labels,
                         text=example.text))

    return features


class Vocab(object):

    def __init__(self, min_freq=1):
        self.min_freq = min_freq
        self.word2index = {}
        self.index2word = []
        self.special_tokens = ['<PAD>', '<UNK>', '<s>', '</s>']

        self.count = {}

        self.pad_token = '<PAD>'
        self.pad_index = 0
        self.add(self.pad_token)

        self.unk_token = '<UNK>'
        self.unk_index = 1
        self.add(self.unk_token)

        self.start_token = '<s>'
        self.start_index = 2
        self.add(self.start_token)

        self.end_token = '</s>'
        self.end_index = 3
        self.add(self.end_token)

    def add(self, token):
        if isinstance(token, (list, tuple)):
            for element in token:
                self.add(element)
            return

        assert isinstance(token, str)

        if self.min_freq > 1 and token not in self.special_tokens:
            if len(token) > 1 and not token[0].isalnum():
                token = token[1:]

            if len(token) > 1 and not token[-1].isalnum():
                token = token[:-1]

        if token not in self.count:
            self.count[token] = 0
        self.count[token] += 1

        if token in self.special_tokens or (token not in self.word2index and self.count[token] >= self.min_freq):
            self.word2index[token] = len(self.index2word)
            self.index2word.append(token)

    def get_index(self, token):
        if isinstance(token, list):
            return [self.get_index(element) for element in token]

        assert isinstance(token, str)

        return self.word2index.get(token, self.unk_index)

    def get_token(self, index):
        if isinstance(index, list):
            return [self.get_token(element) for element in index]

        assert isinstance(index, int)
        return self.index2word[index]

    def save(self, path):
        torch.save(self.index2word, path)

    def load(self, path):
        self.index2word = torch.load(path)
        self.word2index = {word: i for i, word in enumerate(self.index2word)}

    def __len__(self):
        return len(self.index2word)

    def __str__(self):
        return f'Vocab object with {len(self.index2word)} instances'


class InputExample(object):
    """
    A single training/test example for simple sequence classification.

    Args:
        guid: Unique id for the example.
        words: list. The words of the sequence.
        intent_label: (Optional) string. The intent label of the example.
        slot_labels: (Optional) list. The slot labels of the example.
    """

    def __init__(self, guid, words, chars=None, heads=None, attention_mask=None, token_type_ids=None, intent_label=None, slot_labels=None, text=None):
        self.guid = guid
        self.words = words
        self.chars = chars
        self.heads = heads
        self.attention_mask = attention_mask
        self.token_type_ids = token_type_ids
        self.intent_label = intent_label
        self.slot_labels = slot_labels
        self.text = text

    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        """Serializes this instance to a Python dictionary."""
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        """Serializes this instance to a JSON string."""
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"



class TextLoader(Dataset):

    def __init__(self, args, mode):
        self.args = args
        self.intent_labels = get_intent_labels(args)
        self.slot_labels, self.hiers = get_slots_all(args)

        self.vocab = Vocab(min_freq=self.args.min_freq)
        self.chars = Vocab()
        self.examples = self.build(mode)
    def load_bert(self, tokenizer):
        pad_token_label_id = self.args.ignore_index
        self.examples = convert_examples_to_features(self.examples, self.args.max_seq_len, tokenizer,
                                                     pad_token_label_id=pad_token_label_id)

    def read(cls, input_file, quotechar=None):
        """Reads a tab separated value file."""
        with open(input_file, "r", encoding="utf-8") as f:
            lines = []
            for line in f:
                lines.append(line.strip())
            return lines


    def read_file(self, input_file, quotechar=None):
        """ Read data file of given path.
        :param file_path: path of data file.
        :return: list of sentence, list of slot and list of intent.
        """

        texts=self.read(os.path.join(input_file, 'seq.in'))
        intents=self.read(os.path.join(input_file,'label' ))
        slots=self.read(os.path.join(input_file, 'seq.out'))

        converted_texts = []
        converted_intents = []
        converted_slots = []

        for text, intent, slot in zip(texts, intents, slots):
            converted_text = []
            converted_intent = []
            converted_slot = []

            for word, s in zip(text.split(), slot.split()):

                converted_slot.append(s)
                converted_text.append(word)
            converted_intent = [intent]


            converted_texts.append(converted_text)
            converted_intents.append(converted_intent)
            converted_slots.append(converted_slot)

        return converted_texts, converted_slots, converted_intents

    def _create_examples(self, texts, chars, intents, slots, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for i, (text, char, intent, slot) in enumerate(zip(texts, chars, intents, slots)):
            guid = "%s-%s" % (set_type, i)
            # 1. input_text
            words = self.vocab.get_index(text)  # Some are spaced twice
            words = [self.vocab.start_index] + words + [self.vocab.end_index]
            # char
            char  = self.chars.get_index(char)
            max_char = max([len(x) for x in char])
            for j in range(len(char)):
                char[j] = char[j] + [0] * (max_char - len(char[j]))
            char = [[0] * max_char] + char + [[0] * max_char]
            # 2. intent
            _intent = intent[0].split('#')
            intent_label = [0 for _ in self.intent_labels]
            for _int in _intent:
                idx = self.intent_labels.index(_int) if _int in self.intent_labels else self.intent_labels.index("UNK")
                intent_label[idx] = 1
            # 3. slot
            slot_labels = []
            for s in slot:
                slot_labels.append(self.slot_labels.index(s) if s in self.slot_labels else self.slot_labels.index("UNK"))
            slot_labels = [self.slot_labels.index('PAD')] + slot_labels + [self.slot_labels.index('PAD')]
            assert len(words) == len(slot_labels)
            examples.append(InputExample(guid=guid, words=words, chars=char, intent_label=intent_label, slot_labels=slot_labels, text=text))
        return examples

    def build(self, mode):
        data_path = os.path.join(self.args.data_dir, self.args.task, mode)
        logger.info("LOOKING AT {}".format(data_path))
        texts, slots, intents = self.read_file(data_path)


        chars = []
        max_len = 0
        for text in texts:
            chars.append([])
            for word in text:
                chars[-1].append(list(word))

        cache = os.path.join(self.args.data_dir, f'vocab_{self.args.task}')
        if os.path.exists(cache):
            self.vocab.load(cache)
        elif mode == 'train':
            self.vocab.add(texts)
            self.vocab.save(cache)
        cache_chars = os.path.join(self.args.data_dir, f'chars_{self.args.task}')
        if os.path.exists(cache_chars):
            self.chars.load(cache_chars)
        elif mode == 'train':
            self.chars.add(chars)
            self.chars.save(cache_chars)

        return self._create_examples(texts=texts,
                                     chars=chars,
                                     intents=intents,
                                     slots=slots,
                                     set_type=mode)

    def __getitem__(self, index):
        example = self.examples[index]

        words = torch.tensor(example.words, dtype=torch.long)

        intent = torch.tensor(example.intent_label, dtype=torch.float)
        slot = torch.tensor(example.slot_labels, dtype=torch.long)
        chars = torch.tensor(example.chars, dtype=torch.long)

        if 'bert' in self.args.model_type:
            attention_mask = torch.tensor(example.attention_mask, dtype=torch.long)
            token_type_ids = torch.tensor(example.token_type_ids, dtype=torch.long)
            heads = torch.tensor(example.heads, dtype=torch.long)
            return (words, chars, heads, attention_mask, token_type_ids, intent, slot)
        else:
            return (words, chars, intent, slot)

    def __len__(self):
        return len(self.examples)

class TextCollate():
    def __init__(self, pad_index, num_intents, max_seq_len):
        self.pad_index = pad_index
        self.num_intents = num_intents
        self.max_seq_len = max_seq_len

    def __call__(self, batch):

        len_list = [len(x[-1]) for x in batch]
        len_char = [x[1].size(1) for x in batch]
        max_len = max(len_list)
        max_char = max(len_char)

        seq_lens = []

        bert = len(batch[0]) > 4

        char_padded = torch.LongTensor(len(batch), max_len, max_char)
        slot_padded = torch.LongTensor(len(batch), max_len)
        intent = torch.FloatTensor(len(batch), self.num_intents)
        char_padded.zero_()
        intent.zero_()
        slot_padded.zero_()

        if not bert:
            text_padded = torch.LongTensor(len(batch), max_len)
            text_padded.zero_()

        else:
            input_ids = torch.LongTensor(len(batch), self.max_seq_len)
            attention_mask = torch.LongTensor(len(batch), self.max_seq_len)
            token_type_ids = torch.LongTensor(len(batch), self.max_seq_len)
            heads = torch.LongTensor(len(batch), max_len)
            input_ids.zero_()
            attention_mask.zero_()
            token_type_ids.zero_()
            heads.zero_()
        # Get sorted index of len_list.
        sorted_index = np.argsort(len_list)[::-1]

        for i, index in enumerate(sorted_index):
            seq_lens.append(len_list[index])
            intent[i] = batch[index][-2]
            slot = batch[index][-1]
            slot_padded[i, :slot.size(0)] = slot
            char = batch[index][1]
            char_padded[i, :char.size(0), :char.size(1)] = char

            if not bert:
                text = batch[index][0]
                text_padded[i, :text.size(0)] = text
            else:
                input_ids[i] = batch[index][0]
                attention_mask[i] = batch[index][3]
                token_type_ids[i] = batch[index][4]
                head = batch[index][2]
                heads[i, :head.size(0)] = head
        if not bert:
            return text_padded, char_padded, intent, slot_padded, torch.tensor(seq_lens, dtype=torch.long)
        else:
            return input_ids, char_padded, heads, attention_mask, token_type_ids, intent, slot_padded, torch.tensor(seq_lens, dtype=torch.long)


train_dataset = TextLoader(args, 'train')



INFO:__main__:LOOKING AT ./data/atis/train


In [None]:
main(args)

INFO:__main__:LOOKING AT ./data/atis/train
INFO:__main__:LOOKING AT ./data/atis/dev
INFO:__main__:LOOKING AT ./data/atis/test
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

INFO:__main__:*** Example ***
INFO:__main__:guid: train-0
INFO:__main__:tokens: [CLS] i want to fly from baltimore to dallas round trip [SEP]
INFO:__main__:input_ids: 101 1045 2215 2000 4875 2013 6222 2000 5759 2461 4440 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:__main__:attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:__main__:token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
INFO:__main__:heads: 0 1 2 3 4 5 6 7 8 9 10 11
INFO:__main__:*** Example ***
INFO:__main__:guid: train-1
INFO:__main__:tokens: [CLS] round 

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

  torch.nn.init.normal(first_linear.weight, mean, std)
  torch.nn.init.normal(linear.weight, mean, std)
  torch.nn.init.normal(linear.weight, mean, std)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing JointBERT: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing JointBERT from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing JointBERT from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of JointBERT were not i

check init


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

  score = torch.where(mask[i].unsqueeze(1), next_score, score)
INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = -1
INFO:__main__:  intent_acc = 0.0
INFO:__main__:  intent_f1 = 0.10928787637406405
INFO:__main__:  loss = 33.922916412353516
INFO:__main__:  mean_intent_slot = 0.0005330490405117271
INFO:__main__:  num_acc = 0.0
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.0
INFO:__main__:  slot_f1 = 0.0010660980810234541
INFO:__main__:  slot_precision = 0.0007656967840735069
INFO:__main__:  slot_recall = 0.0017543859649122807


{'loss': 33.922916412353516, 'intent_acc': 0.0, 'intent_f1': 0.10928787637406405, 'slot_precision': 0.0007656967840735069, 'slot_recall': 0.0017543859649122807, 'slot_f1': 0.0010660980810234541, 'semantic_frame_acc': 0.0, 'slot_acc': 0.0, 'mean_intent_slot': 0.0005330490405117271, 'num_acc': 0.0, 'epoch': -1}


INFO:__main__:***** Running training *****
INFO:__main__:  Num examples = 4478
INFO:__main__:  Num Epochs = 50
INFO:__main__:  Total train batch size = 32
INFO:__main__:  Gradient Accumulation steps = 1
INFO:__main__:  Total optimization steps = 7000
INFO:__main__:  Logging steps = 140


Epoch:   0%|          | 0/50 [00:00<?, ?it/s]

Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 0


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.05
INFO:__main__:  intent_f1 = 0.04985044865403789
INFO:__main__:  loss = 9.33890688419342
INFO:__main__:  mean_intent_slot = 0.025
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.004
INFO:__main__:  slot_f1 = 0
INFO:__main__:  slot_precision = 0
INFO:__main__:  slot_recall = 0.0
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.040313549832026875
INFO:__main__:  intent_f1 = 0.039977790116601894
INFO:__main__:  loss = 9.112785509654454
INFO:__main__:  mean_intent_slot = 0.020156774916013438
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.0022396416573348264
INFO:__main__:  slot_f1 = 0
INFO:__main__:  slot_precision = 0
INFO:__main__:  slot_recall = 0.0


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 1


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 1
INFO:__main__:  intent_acc = 0.036
INFO:__main__:  intent_f1 = 0.03589232303090728
INFO:__main__:  loss = 5.115483105182648
INFO:__main__:  mean_intent_slot = 0.2903048327137547
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.01
INFO:__main__:  slot_acc = 0.2
INFO:__main__:  slot_f1 = 0.5446096654275093
INFO:__main__:  slot_precision = 0.5790513833992095
INFO:__main__:  slot_recall = 0.5140350877192983
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 1
INFO:__main__:  intent_acc = 0.0425531914893617
INFO:__main__:  intent_f1 = 0.04441976679622432
INFO:__main__:  loss = 4.966466614178249
INFO:__main__:  mean_intent_slot = 0.28065788807490233
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.008958566629339306
INFO:__main__:  slot_acc = 0.17357222844344905
INFO:__main__:  slot_f1 = 0.518762584660443
INFO:__main__:  slot_precision = 0.540015243902439
INFO:__main__:  slot_recall = 0.49911940824233886


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 2


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 2
INFO:__main__:  intent_acc = 0.714
INFO:__main__:  intent_f1 = 0.7158524426719841
INFO:__main__:  loss = 2.777223229408264
INFO:__main__:  mean_intent_slot = 0.7492734026745913
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.37
INFO:__main__:  slot_acc = 0.476
INFO:__main__:  slot_f1 = 0.7845468053491828
INFO:__main__:  slot_precision = 0.797583081570997
INFO:__main__:  slot_recall = 0.7719298245614035
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 2
INFO:__main__:  intent_acc = 0.7077267637178052
INFO:__main__:  intent_f1 = 0.7173792337590227
INFO:__main__:  loss = 2.7204665626798357
INFO:__main__:  mean_intent_slot = 0.7448208286674132
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.41881298992161253
INFO:__main__:  slot_acc = 0.5027995520716685
INFO:__main__:  slot_f1 = 0.7819148936170213
INFO:__main__:  slot_precision = 0.7872188504105676
INFO:__main__:  slot_recall = 0.7766819302571328


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 3


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 3
INFO:__main__:  intent_acc = 0.714
INFO:__main__:  intent_f1 = 0.7158524426719841
INFO:__main__:  loss = 1.7294695973396301
INFO:__main__:  mean_intent_slot = 0.7846024771453848
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.468
INFO:__main__:  slot_acc = 0.612
INFO:__main__:  slot_f1 = 0.8552049542907697
INFO:__main__:  slot_precision = 0.8625817965496728
INFO:__main__:  slot_recall = 0.847953216374269
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 3
INFO:__main__:  intent_acc = 0.7077267637178052
INFO:__main__:  intent_f1 = 0.7173792337590227
INFO:__main__:  loss = 1.8609136853899275
INFO:__main__:  mean_intent_slot = 0.7705300485255693
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.4748040313549832
INFO:__main__:  slot_acc = 0.5890257558790594
INFO:__main__:  slot_f1 = 0.8333333333333334
INFO:__main__:  slot_precision = 0.8389860764012853
INFO:__main__:  slot_recall = 0.8277562522014794


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 4


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 4
INFO:__main__:  intent_acc = 0.714
INFO:__main__:  intent_f1 = 0.7158524426719841
INFO:__main__:  loss = 1.2210425660014153
INFO:__main__:  mean_intent_slot = 0.8059736070381232
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.528
INFO:__main__:  slot_acc = 0.702
INFO:__main__:  slot_f1 = 0.8979472140762464
INFO:__main__:  slot_precision = 0.9005882352941177
INFO:__main__:  slot_recall = 0.8953216374269006
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 4
INFO:__main__:  intent_acc = 0.7077267637178052
INFO:__main__:  intent_f1 = 0.7173792337590227
INFO:__main__:  loss = 1.4567035053457533
INFO:__main__:  mean_intent_slot = 0.7924784675328118
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.5296752519596865
INFO:__main__:  slot_acc = 0.6741321388577828
INFO:__main__:  slot_f1 = 0.8772301713478184
INFO:__main__:  slot_precision = 0.8798724309000708
INFO:__main__:  slot_recall = 0.8746037337090525


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 5


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 5
INFO:__main__:  intent_acc = 0.714
INFO:__main__:  intent_f1 = 0.7158524426719841
INFO:__main__:  loss = 0.9727346003055573
INFO:__main__:  mean_intent_slot = 0.8249788484136311
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.582
INFO:__main__:  slot_acc = 0.8
INFO:__main__:  slot_f1 = 0.9359576968272622
INFO:__main__:  slot_precision = 0.9403778040141676
INFO:__main__:  slot_recall = 0.9315789473684211
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 5
INFO:__main__:  intent_acc = 0.7077267637178052
INFO:__main__:  intent_f1 = 0.7173792337590227
INFO:__main__:  loss = 1.240065974848611
INFO:__main__:  mean_intent_slot = 0.8060634172008357
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.574468085106383
INFO:__main__:  slot_acc = 0.7413213885778276
INFO:__main__:  slot_f1 = 0.9044000706838663
INFO:__main__:  slot_precision = 0.9074468085106383
INFO:__main__:  slot_recall = 0.9013737231419514


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 6


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 6
INFO:__main__:  intent_acc = 0.714
INFO:__main__:  intent_f1 = 0.7158524426719841
INFO:__main__:  loss = 0.7797044850885868
INFO:__main__:  mean_intent_slot = 0.8292222222222222
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.604
INFO:__main__:  slot_acc = 0.838
INFO:__main__:  slot_f1 = 0.9444444444444444
INFO:__main__:  slot_precision = 0.9444444444444444
INFO:__main__:  slot_recall = 0.9444444444444444
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 6
INFO:__main__:  intent_acc = 0.7077267637178052
INFO:__main__:  intent_f1 = 0.7173792337590227
INFO:__main__:  loss = 1.1362710275820322
INFO:__main__:  mean_intent_slot = 0.8152526625359124
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.6259798432250839
INFO:__main__:  slot_acc = 0.8029115341545353
INFO:__main__:  slot_f1 = 0.9227785613540198
INFO:__main__:  slot_precision = 0.9237557359689376
INFO:__main__:  slot_recall = 0.92180345191969


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 7


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 7
INFO:__main__:  intent_acc = 0.714
INFO:__main__:  intent_f1 = 0.7158524426719841
INFO:__main__:  loss = 0.6256816238164902
INFO:__main__:  mean_intent_slot = 0.8324672897196261
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.618
INFO:__main__:  slot_acc = 0.856
INFO:__main__:  slot_f1 = 0.9509345794392523
INFO:__main__:  slot_precision = 0.9498249708284714
INFO:__main__:  slot_recall = 0.952046783625731
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 7
INFO:__main__:  intent_acc = 0.7077267637178052
INFO:__main__:  intent_f1 = 0.7173792337590227
INFO:__main__:  loss = 1.039718012724604
INFO:__main__:  mean_intent_slot = 0.8167024096856021
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.6293393057110862
INFO:__main__:  slot_acc = 0.8118701007838746
INFO:__main__:  slot_f1 = 0.9256780556533991
INFO:__main__:  slot_precision = 0.9256780556533991
INFO:__main__:  slot_recall = 0.9256780556533991


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 8


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 8
INFO:__main__:  intent_acc = 0.782
INFO:__main__:  intent_f1 = 0.7856430707876371
INFO:__main__:  loss = 0.5438729599118233
INFO:__main__:  mean_intent_slot = 0.8688296382730455
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.694
INFO:__main__:  slot_acc = 0.878
INFO:__main__:  slot_f1 = 0.955659276546091
INFO:__main__:  slot_precision = 0.9534342258440046
INFO:__main__:  slot_recall = 0.9578947368421052
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 8
INFO:__main__:  intent_acc = 0.7368421052631579
INFO:__main__:  intent_f1 = 0.7462520821765685
INFO:__main__:  loss = 0.9437242448329926
INFO:__main__:  mean_intent_slot = 0.8345138144024854
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.6606942889137738
INFO:__main__:  slot_acc = 0.8331466965285554
INFO:__main__:  slot_f1 = 0.9321855235418129
INFO:__main__:  slot_precision = 0.9298983526112864
INFO:__main__:  slot_recall = 0.9344839732300105


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 9


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 9
INFO:__main__:  intent_acc = 0.818
INFO:__main__:  intent_f1 = 0.8215353938185442
INFO:__main__:  loss = 0.5045423582196236
INFO:__main__:  mean_intent_slot = 0.889549868382568
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.72
INFO:__main__:  slot_acc = 0.884
INFO:__main__:  slot_f1 = 0.961099736765136
INFO:__main__:  slot_precision = 0.9613809245172615
INFO:__main__:  slot_recall = 0.9608187134502923
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 9
INFO:__main__:  intent_acc = 0.77491601343785
INFO:__main__:  intent_f1 = 0.7828983897834536
INFO:__main__:  loss = 0.9343862597431455
INFO:__main__:  mean_intent_slot = 0.8576741374516609
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.6864501679731243
INFO:__main__:  slot_acc = 0.8544232922732363
INFO:__main__:  slot_f1 = 0.9404322614654718
INFO:__main__:  slot_precision = 0.938288920056101
INFO:__main__:  slot_recall = 0.9425854174004932


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 10


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 10
INFO:__main__:  intent_acc = 0.856
INFO:__main__:  intent_f1 = 0.859421734795613
INFO:__main__:  loss = 0.4604128338396549
INFO:__main__:  mean_intent_slot = 0.9105870646766169
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.768
INFO:__main__:  slot_acc = 0.9
INFO:__main__:  slot_f1 = 0.9651741293532338
INFO:__main__:  slot_precision = 0.9660222612770943
INFO:__main__:  slot_recall = 0.9643274853801169
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 10
INFO:__main__:  intent_acc = 0.812989921612542
INFO:__main__:  intent_f1 = 0.8206551915602442
INFO:__main__:  loss = 0.8849026284047535
INFO:__main__:  mean_intent_slot = 0.8796510952896366
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.7211646136618141
INFO:__main__:  slot_acc = 0.8667413213885778
INFO:__main__:  slot_f1 = 0.9463122689667313
INFO:__main__:  slot_precision = 0.9458128078817734
INFO:__main__:  slot_recall = 0.9468122578372666


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 11


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 11
INFO:__main__:  intent_acc = 0.888
INFO:__main__:  intent_f1 = 0.8913260219341974
INFO:__main__:  loss = 0.43237696774303913
INFO:__main__:  mean_intent_slot = 0.9305181711606095
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.81
INFO:__main__:  slot_acc = 0.916
INFO:__main__:  slot_f1 = 0.973036342321219
INFO:__main__:  slot_precision = 0.9753231492361927
INFO:__main__:  slot_recall = 0.9707602339181286
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 11
INFO:__main__:  intent_acc = 0.8566629339305711
INFO:__main__:  intent_f1 = 0.863964464186563
INFO:__main__:  loss = 0.9002090224197933
INFO:__main__:  mean_intent_slot = 0.9017656682319992
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.7637178051511758
INFO:__main__:  slot_acc = 0.8656215005599104
INFO:__main__:  slot_f1 = 0.9468684025334272
INFO:__main__:  slot_precision = 0.9458699472759227
INFO:__main__:  slot_recall = 0.94786896794646


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 12


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 12
INFO:__main__:  intent_acc = 0.894
INFO:__main__:  intent_f1 = 0.8973080757726819
INFO:__main__:  loss = 0.4232565928250551
INFO:__main__:  mean_intent_slot = 0.9345476120714913
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.826
INFO:__main__:  slot_acc = 0.926
INFO:__main__:  slot_f1 = 0.9750952241429827
INFO:__main__:  slot_precision = 0.9770992366412213
INFO:__main__:  slot_recall = 0.9730994152046784
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 12
INFO:__main__:  intent_acc = 0.8723404255319149
INFO:__main__:  intent_f1 = 0.880621876735147
INFO:__main__:  loss = 0.889623333300863
INFO:__main__:  mean_intent_slot = 0.912914398812469
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.7816349384098544
INFO:__main__:  slot_acc = 0.8790593505039194
INFO:__main__:  slot_f1 = 0.9534883720930233
INFO:__main__:  slot_precision = 0.9538244624603455
INFO:__main__:  slot_recall = 0.9531525184924269


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 13


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 13
INFO:__main__:  intent_acc = 0.894
INFO:__main__:  intent_f1 = 0.8973080757726819
INFO:__main__:  loss = 0.3781479364261031
INFO:__main__:  mean_intent_slot = 0.9357196015235864
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.834
INFO:__main__:  slot_acc = 0.932
INFO:__main__:  slot_f1 = 0.9774392030471727
INFO:__main__:  slot_precision = 0.9794480328831474
INFO:__main__:  slot_recall = 0.9754385964912281
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 13
INFO:__main__:  intent_acc = 0.8678611422172452
INFO:__main__:  intent_f1 = 0.8761799000555247
INFO:__main__:  loss = 0.8995907455682755
INFO:__main__:  mean_intent_slot = 0.9094009421302509
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.7849944008958567
INFO:__main__:  slot_acc = 0.8790593505039194
INFO:__main__:  slot_f1 = 0.9509407420432565
INFO:__main__:  slot_precision = 0.949438202247191
INFO:__main__:  slot_recall = 0.952448045086298


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 14


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 14
INFO:__main__:  intent_acc = 0.894
INFO:__main__:  intent_f1 = 0.8973080757726819
INFO:__main__:  loss = 0.4065073188394308
INFO:__main__:  mean_intent_slot = 0.9354401521802751
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.832
INFO:__main__:  slot_acc = 0.928
INFO:__main__:  slot_f1 = 0.9768803043605502
INFO:__main__:  slot_precision = 0.9777387229056825
INFO:__main__:  slot_recall = 0.9760233918128655


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 15


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 15
INFO:__main__:  intent_acc = 0.894
INFO:__main__:  intent_f1 = 0.8973080757726819
INFO:__main__:  loss = 0.38061691261827946
INFO:__main__:  mean_intent_slot = 0.9354333821376282
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.834
INFO:__main__:  slot_acc = 0.932
INFO:__main__:  slot_f1 = 0.9768667642752562
INFO:__main__:  slot_precision = 0.9782991202346041
INFO:__main__:  slot_recall = 0.9754385964912281
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 15
INFO:__main__:  intent_acc = 0.8745800671892497
INFO:__main__:  intent_f1 = 0.8828428650749583
INFO:__main__:  loss = 0.8816879093647003
INFO:__main__:  mean_intent_slot = 0.9159947150448184
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.7961926091825308
INFO:__main__:  slot_acc = 0.8947368421052632
INFO:__main__:  slot_f1 = 0.9574093629003873
INFO:__main__:  slot_precision = 0.9567358424199789
INFO:__main__:  slot_recall = 0.9580838323353293


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 16


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 16
INFO:__main__:  intent_acc = 0.896
INFO:__main__:  intent_f1 = 0.8993020937188434
INFO:__main__:  loss = 0.35282058641314507
INFO:__main__:  mean_intent_slot = 0.9365964912280702
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.836
INFO:__main__:  slot_acc = 0.932
INFO:__main__:  slot_f1 = 0.9771929824561404
INFO:__main__:  slot_precision = 0.9771929824561404
INFO:__main__:  slot_recall = 0.9771929824561404
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 16
INFO:__main__:  intent_acc = 0.8745800671892497
INFO:__main__:  intent_f1 = 0.8828428650749583
INFO:__main__:  loss = 0.9219836039202554
INFO:__main__:  mean_intent_slot = 0.9159872166932165
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.799552071668533
INFO:__main__:  slot_acc = 0.896976483762598
INFO:__main__:  slot_f1 = 0.9573943661971832
INFO:__main__:  slot_precision = 0.9570573741640267
INFO:__main__:  slot_recall = 0.9577315956322648


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 17


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 17
INFO:__main__:  intent_acc = 0.914
INFO:__main__:  intent_f1 = 0.9172482552342972
INFO:__main__:  loss = 0.3115303535014391
INFO:__main__:  mean_intent_slot = 0.9466289804265264
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.862
INFO:__main__:  slot_acc = 0.942
INFO:__main__:  slot_f1 = 0.9792579608530528
INFO:__main__:  slot_precision = 0.9784004670169294
INFO:__main__:  slot_recall = 0.9801169590643275
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 17
INFO:__main__:  intent_acc = 0.8756998880179171
INFO:__main__:  intent_f1 = 0.883953359244864
INFO:__main__:  loss = 0.9723366647958755
INFO:__main__:  mean_intent_slot = 0.9157665373575421
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.7973124300111982
INFO:__main__:  slot_acc = 0.8924972004479284
INFO:__main__:  slot_f1 = 0.955833186697167
INFO:__main__:  slot_precision = 0.9549929676511955
INFO:__main__:  slot_recall = 0.9566748855230714


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 18


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 18
INFO:__main__:  intent_acc = 0.936
INFO:__main__:  intent_f1 = 0.9391824526420738
INFO:__main__:  loss = 0.2971779922954738
INFO:__main__:  mean_intent_slot = 0.9584942965779468
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.882
INFO:__main__:  slot_acc = 0.942
INFO:__main__:  slot_f1 = 0.9809885931558935
INFO:__main__:  slot_precision = 0.981275599765945
INFO:__main__:  slot_recall = 0.980701754385965
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 18
INFO:__main__:  intent_acc = 0.8824188129899216
INFO:__main__:  intent_f1 = 0.8906163242642976
INFO:__main__:  loss = 0.9300091798816409
INFO:__main__:  mean_intent_slot = 0.9189500364439314
INFO:__main__:  num_acc = 0.9832026875699889
INFO:__main__:  semantic_frame_acc = 0.7984322508398656
INFO:__main__:  slot_acc = 0.8902575587905935
INFO:__main__:  slot_f1 = 0.9554812598979412
INFO:__main__:  slot_precision = 0.9546413502109705
INFO:__main__:  slot_recall = 0.9563226488200071


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 19


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 19
INFO:__main__:  intent_acc = 0.932
INFO:__main__:  intent_f1 = 0.9351944167497507
INFO:__main__:  loss = 0.37724296748638153
INFO:__main__:  mean_intent_slot = 0.9544333821376281
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.866
INFO:__main__:  slot_acc = 0.93
INFO:__main__:  slot_f1 = 0.9768667642752562
INFO:__main__:  slot_precision = 0.9782991202346041
INFO:__main__:  slot_recall = 0.9754385964912281


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 20


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 20
INFO:__main__:  intent_acc = 0.934
INFO:__main__:  intent_f1 = 0.9371884346959122
INFO:__main__:  loss = 0.3686746209859848
INFO:__main__:  mean_intent_slot = 0.9553040935672514
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.872
INFO:__main__:  slot_acc = 0.934
INFO:__main__:  slot_f1 = 0.9766081871345029
INFO:__main__:  slot_precision = 0.9766081871345029
INFO:__main__:  slot_recall = 0.9766081871345029


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 21


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 21
INFO:__main__:  intent_acc = 0.938
INFO:__main__:  intent_f1 = 0.9411764705882353
INFO:__main__:  loss = 0.348717350512743
INFO:__main__:  mean_intent_slot = 0.9580254609306409
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.872
INFO:__main__:  slot_acc = 0.932
INFO:__main__:  slot_f1 = 0.9780509218612818
INFO:__main__:  slot_precision = 0.9789103690685413
INFO:__main__:  slot_recall = 0.9771929824561404


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 22


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 22
INFO:__main__:  intent_acc = 0.938
INFO:__main__:  intent_f1 = 0.9411764705882353
INFO:__main__:  loss = 0.3491499889642
INFO:__main__:  mean_intent_slot = 0.9570187025131501
INFO:__main__:  num_acc = 0.994
INFO:__main__:  semantic_frame_acc = 0.872
INFO:__main__:  slot_acc = 0.928
INFO:__main__:  slot_f1 = 0.9760374050263003
INFO:__main__:  slot_precision = 0.9754672897196262
INFO:__main__:  slot_recall = 0.9766081871345029


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 23


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 23
INFO:__main__:  intent_acc = 0.958
INFO:__main__:  intent_f1 = 0.9602385685884692
INFO:__main__:  loss = 0.3536002244800329
INFO:__main__:  mean_intent_slot = 0.9680382928968138
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.9
INFO:__main__:  slot_acc = 0.936
INFO:__main__:  slot_f1 = 0.9780765857936276
INFO:__main__:  slot_precision = 0.9777907656341321
INFO:__main__:  slot_recall = 0.9783625730994152
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 23
INFO:__main__:  intent_acc = 0.8902575587905935
INFO:__main__:  intent_f1 = 0.8965135583840621
INFO:__main__:  loss = 1.0164017592157637
INFO:__main__:  mean_intent_slot = 0.9226252632208944
INFO:__main__:  num_acc = 0.9876819708846585
INFO:__main__:  semantic_frame_acc = 0.8073908174692049
INFO:__main__:  slot_acc = 0.8913773796192609
INFO:__main__:  slot_f1 = 0.9549929676511955
INFO:__main__:  slot_precision = 0.9533169533169533
INFO:__main__:  slot_recall = 0.9566748855230714


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 24


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 24
INFO:__main__:  intent_acc = 0.954
INFO:__main__:  intent_f1 = 0.9562624254473161
INFO:__main__:  loss = 0.3397528724744916
INFO:__main__:  mean_intent_slot = 0.9658888888888888
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.894
INFO:__main__:  slot_acc = 0.934
INFO:__main__:  slot_f1 = 0.9777777777777777
INFO:__main__:  slot_precision = 0.9777777777777777
INFO:__main__:  slot_recall = 0.9777777777777777


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 25


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 25
INFO:__main__:  intent_acc = 0.96
INFO:__main__:  intent_f1 = 0.9622266401590457
INFO:__main__:  loss = 0.327116085216403
INFO:__main__:  mean_intent_slot = 0.9693243638490787
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.9
INFO:__main__:  slot_acc = 0.934
INFO:__main__:  slot_f1 = 0.9786487276981574
INFO:__main__:  slot_precision = 0.9789350497366881
INFO:__main__:  slot_recall = 0.9783625730994152
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 25
INFO:__main__:  intent_acc = 0.9003359462486002
INFO:__main__:  intent_f1 = 0.9064748201438849
INFO:__main__:  loss = 1.0439630938427789
INFO:__main__:  mean_intent_slot = 0.927564630995892
INFO:__main__:  num_acc = 0.9876819708846585
INFO:__main__:  semantic_frame_acc = 0.8085106382978723
INFO:__main__:  slot_acc = 0.8880179171332587
INFO:__main__:  slot_f1 = 0.9547933157431837
INFO:__main__:  slot_precision = 0.9536191145467322
INFO:__main__:  slot_recall = 0.9559704121169426


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 26


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 26
INFO:__main__:  intent_acc = 0.96
INFO:__main__:  intent_f1 = 0.9622266401590457
INFO:__main__:  loss = 0.3228543857112527
INFO:__main__:  mean_intent_slot = 0.9697660818713449
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.9
INFO:__main__:  slot_acc = 0.934
INFO:__main__:  slot_f1 = 0.97953216374269
INFO:__main__:  slot_precision = 0.97953216374269
INFO:__main__:  slot_recall = 0.97953216374269
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 26
INFO:__main__:  intent_acc = 0.9137737961926092
INFO:__main__:  intent_f1 = 0.9208633093525179
INFO:__main__:  loss = 1.0390786794679505
INFO:__main__:  mean_intent_slot = 0.9347272076284889
INFO:__main__:  num_acc = 0.9876819708846585
INFO:__main__:  semantic_frame_acc = 0.8185890257558791
INFO:__main__:  slot_acc = 0.8902575587905935
INFO:__main__:  slot_f1 = 0.9556806190643686
INFO:__main__:  slot_precision = 0.954337899543379
INFO:__main__:  slot_recall = 0.957027122226136


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 27


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 27
INFO:__main__:  intent_acc = 0.964
INFO:__main__:  intent_f1 = 0.9662027833001988
INFO:__main__:  loss = 0.32186830695718527
INFO:__main__:  mean_intent_slot = 0.9719152294650687
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.908
INFO:__main__:  slot_acc = 0.938
INFO:__main__:  slot_f1 = 0.9798304589301374
INFO:__main__:  slot_precision = 0.9795441262419637
INFO:__main__:  slot_recall = 0.9801169590643275
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 27
INFO:__main__:  intent_acc = 0.9260918253079508
INFO:__main__:  intent_f1 = 0.9314159292035398
INFO:__main__:  loss = 1.0280310937336512
INFO:__main__:  mean_intent_slot = 0.9409935492731987
INFO:__main__:  num_acc = 0.9888017917133258
INFO:__main__:  semantic_frame_acc = 0.832026875699888
INFO:__main__:  slot_acc = 0.8936170212765957
INFO:__main__:  slot_f1 = 0.9558952732384467
INFO:__main__:  slot_precision = 0.9537166900420757
INFO:__main__:  slot_recall = 0.9580838323353293


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 28


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 28
INFO:__main__:  intent_acc = 0.962
INFO:__main__:  intent_f1 = 0.9642147117296223
INFO:__main__:  loss = 0.33899517077952623
INFO:__main__:  mean_intent_slot = 0.9704736842105264
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.904
INFO:__main__:  slot_acc = 0.936
INFO:__main__:  slot_f1 = 0.9789473684210527
INFO:__main__:  slot_precision = 0.9789473684210527
INFO:__main__:  slot_recall = 0.9789473684210527


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 29


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 29
INFO:__main__:  intent_acc = 0.968
INFO:__main__:  intent_f1 = 0.9701789264413518
INFO:__main__:  loss = 0.34030827693641186
INFO:__main__:  mean_intent_slot = 0.9740584795321637
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.912
INFO:__main__:  slot_acc = 0.938
INFO:__main__:  slot_f1 = 0.9801169590643275
INFO:__main__:  slot_precision = 0.9801169590643275
INFO:__main__:  slot_recall = 0.9801169590643275
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 29
INFO:__main__:  intent_acc = 0.9260918253079508
INFO:__main__:  intent_f1 = 0.9319313779745435
INFO:__main__:  loss = 1.0399497470685415
INFO:__main__:  mean_intent_slot = 0.9415821547370156
INFO:__main__:  num_acc = 0.9876819708846585
INFO:__main__:  semantic_frame_acc = 0.832026875699888
INFO:__main__:  slot_acc = 0.8947368421052632
INFO:__main__:  slot_f1 = 0.9570724841660803
INFO:__main__:  slot_precision = 0.9560632688927944
INFO:__main__:  slot_recall = 0.9580838323353293


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 30


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 30
INFO:__main__:  intent_acc = 0.968
INFO:__main__:  intent_f1 = 0.9701789264413518
INFO:__main__:  loss = 0.3364059906452894
INFO:__main__:  mean_intent_slot = 0.9744942965779467
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.914
INFO:__main__:  slot_acc = 0.94
INFO:__main__:  slot_f1 = 0.9809885931558935
INFO:__main__:  slot_precision = 0.981275599765945
INFO:__main__:  slot_recall = 0.980701754385965
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 30
INFO:__main__:  intent_acc = 0.93505039193729
INFO:__main__:  intent_f1 = 0.9407858328721638
INFO:__main__:  loss = 1.026029063122613
INFO:__main__:  mean_intent_slot = 0.9460689877379028
INFO:__main__:  num_acc = 0.9876819708846585
INFO:__main__:  semantic_frame_acc = 0.8387458006718925
INFO:__main__:  slot_acc = 0.8936170212765957
INFO:__main__:  slot_f1 = 0.9570875835385156
INFO:__main__:  slot_precision = 0.9557428872497366
INFO:__main__:  slot_recall = 0.9584360690383938


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 31


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 31
INFO:__main__:  intent_acc = 0.97
INFO:__main__:  intent_f1 = 0.9721669980119284
INFO:__main__:  loss = 0.34865615144371986
INFO:__main__:  mean_intent_slot = 0.9749093302135128
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.914
INFO:__main__:  slot_acc = 0.938
INFO:__main__:  slot_f1 = 0.9798186604270255
INFO:__main__:  slot_precision = 0.9801053247513165
INFO:__main__:  slot_recall = 0.97953216374269
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 31
INFO:__main__:  intent_acc = 0.9417693169092946
INFO:__main__:  intent_f1 = 0.9469026548672567
INFO:__main__:  loss = 1.0260709247418813
INFO:__main__:  mean_intent_slot = 0.949443533674331
INFO:__main__:  num_acc = 0.9888017917133258
INFO:__main__:  semantic_frame_acc = 0.8499440089585666
INFO:__main__:  slot_acc = 0.896976483762598
INFO:__main__:  slot_f1 = 0.9571177504393674
INFO:__main__:  slot_precision = 0.9551034724658015
INFO:__main__:  slot_recall = 0.9591405424445227


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 32


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 32
INFO:__main__:  intent_acc = 0.972
INFO:__main__:  intent_f1 = 0.974155069582505
INFO:__main__:  loss = 0.3597451187670231
INFO:__main__:  mean_intent_slot = 0.9759152294650687
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.916
INFO:__main__:  slot_acc = 0.938
INFO:__main__:  slot_f1 = 0.9798304589301374
INFO:__main__:  slot_precision = 0.9795441262419637
INFO:__main__:  slot_recall = 0.9801169590643275
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 32
INFO:__main__:  intent_acc = 0.93505039193729
INFO:__main__:  intent_f1 = 0.9407858328721638
INFO:__main__:  loss = 1.0378866397908755
INFO:__main__:  mean_intent_slot = 0.9462448582971712
INFO:__main__:  num_acc = 0.9876819708846585
INFO:__main__:  semantic_frame_acc = 0.8432250839865622
INFO:__main__:  slot_acc = 0.8958566629339306
INFO:__main__:  slot_f1 = 0.9574393246570524
INFO:__main__:  slot_precision = 0.9560941341763259
INFO:__main__:  slot_recall = 0.9587883057414582


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 33


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 33
INFO:__main__:  intent_acc = 0.966
INFO:__main__:  intent_f1 = 0.9681908548707754
INFO:__main__:  loss = 0.3429557653144002
INFO:__main__:  mean_intent_slot = 0.9730642898889539
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.91
INFO:__main__:  slot_acc = 0.938
INFO:__main__:  slot_f1 = 0.9801285797779077
INFO:__main__:  slot_precision = 0.9795560747663551
INFO:__main__:  slot_recall = 0.980701754385965
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 33
INFO:__main__:  intent_acc = 0.9384098544232923
INFO:__main__:  intent_f1 = 0.9419568822553898
INFO:__main__:  loss = 1.0560031980276108
INFO:__main__:  mean_intent_slot = 0.9482689666205131
INFO:__main__:  num_acc = 0.9899216125419933
INFO:__main__:  semantic_frame_acc = 0.8465845464725644
INFO:__main__:  slot_acc = 0.8958566629339306
INFO:__main__:  slot_f1 = 0.958128078817734
INFO:__main__:  slot_precision = 0.9571177504393673
INFO:__main__:  slot_recall = 0.9591405424445227


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 34


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 34
INFO:__main__:  intent_acc = 0.972
INFO:__main__:  intent_f1 = 0.974155069582505
INFO:__main__:  loss = 0.36017549596726894
INFO:__main__:  mean_intent_slot = 0.9759152294650687
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.914
INFO:__main__:  slot_acc = 0.936
INFO:__main__:  slot_f1 = 0.9798304589301374
INFO:__main__:  slot_precision = 0.9795441262419637
INFO:__main__:  slot_recall = 0.9801169590643275
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 34
INFO:__main__:  intent_acc = 0.9440089585666294
INFO:__main__:  intent_f1 = 0.9507470946319867
INFO:__main__:  loss = 1.0765749428953444
INFO:__main__:  mean_intent_slot = 0.9501047431355575
INFO:__main__:  num_acc = 0.9876819708846585
INFO:__main__:  semantic_frame_acc = 0.8477043673012318
INFO:__main__:  slot_acc = 0.8924972004479284
INFO:__main__:  slot_f1 = 0.9562005277044856
INFO:__main__:  slot_precision = 0.955024595924104
INFO:__main__:  slot_recall = 0.9573793589292005


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 35


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 35
INFO:__main__:  intent_acc = 0.97
INFO:__main__:  intent_f1 = 0.9721669980119284
INFO:__main__:  loss = 0.34620468877255917
INFO:__main__:  mean_intent_slot = 0.9754998538439053
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.916
INFO:__main__:  slot_acc = 0.94
INFO:__main__:  slot_f1 = 0.9809997076878105
INFO:__main__:  slot_precision = 0.9807130333138515
INFO:__main__:  slot_recall = 0.9812865497076023
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 35
INFO:__main__:  intent_acc = 0.9451287793952967
INFO:__main__:  intent_f1 = 0.9491150442477877
INFO:__main__:  loss = 1.0605685359665327
INFO:__main__:  mean_intent_slot = 0.9511848366446833
INFO:__main__:  num_acc = 0.9888017917133258
INFO:__main__:  semantic_frame_acc = 0.851063829787234
INFO:__main__:  slot_acc = 0.8947368421052632
INFO:__main__:  slot_f1 = 0.9572408938940701
INFO:__main__:  slot_precision = 0.9563994374120957
INFO:__main__:  slot_recall = 0.9580838323353293


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 36


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 36
INFO:__main__:  intent_acc = 0.972
INFO:__main__:  intent_f1 = 0.974155069582505
INFO:__main__:  loss = 0.3438690397888422
INFO:__main__:  mean_intent_slot = 0.9760642898889538
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.916
INFO:__main__:  slot_acc = 0.938
INFO:__main__:  slot_f1 = 0.9801285797779077
INFO:__main__:  slot_precision = 0.9795560747663551
INFO:__main__:  slot_recall = 0.980701754385965
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 36
INFO:__main__:  intent_acc = 0.948488241881299
INFO:__main__:  intent_f1 = 0.953012714206744
INFO:__main__:  loss = 1.0601657490645136
INFO:__main__:  mean_intent_slot = 0.9529637832691757
INFO:__main__:  num_acc = 0.9899216125419933
INFO:__main__:  semantic_frame_acc = 0.8544232922732363
INFO:__main__:  slot_acc = 0.8958566629339306
INFO:__main__:  slot_f1 = 0.9574393246570524
INFO:__main__:  slot_precision = 0.9560941341763259
INFO:__main__:  slot_recall = 0.9587883057414582


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 37


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 37
INFO:__main__:  intent_acc = 0.97
INFO:__main__:  intent_f1 = 0.9721669980119284
INFO:__main__:  loss = 0.36981993727386
INFO:__main__:  mean_intent_slot = 0.9757921660333235
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.92
INFO:__main__:  slot_acc = 0.944
INFO:__main__:  slot_f1 = 0.9815843320666472
INFO:__main__:  slot_precision = 0.9812974868497955
INFO:__main__:  slot_recall = 0.9818713450292398
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 37
INFO:__main__:  intent_acc = 0.9451287793952967
INFO:__main__:  intent_f1 = 0.9496959646213378
INFO:__main__:  loss = 1.0697524505002158
INFO:__main__:  mean_intent_slot = 0.951269071147842
INFO:__main__:  num_acc = 0.9899216125419933
INFO:__main__:  semantic_frame_acc = 0.851063829787234
INFO:__main__:  slot_acc = 0.8958566629339306
INFO:__main__:  slot_f1 = 0.9574093629003873
INFO:__main__:  slot_precision = 0.9567358424199789
INFO:__main__:  slot_recall = 0.9580838323353293


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 38


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 38
INFO:__main__:  intent_acc = 0.972
INFO:__main__:  intent_f1 = 0.974155069582505
INFO:__main__:  loss = 0.342086891643703
INFO:__main__:  mean_intent_slot = 0.9773717461245979
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.924
INFO:__main__:  slot_acc = 0.946
INFO:__main__:  slot_f1 = 0.9827434922491958
INFO:__main__:  slot_precision = 0.9830310122878877
INFO:__main__:  slot_recall = 0.9824561403508771
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 38
INFO:__main__:  intent_acc = 0.9496080627099664
INFO:__main__:  intent_f1 = 0.955174322080797
INFO:__main__:  loss = 1.0678008965083532
INFO:__main__:  mean_intent_slot = 0.9536153554274291
INFO:__main__:  num_acc = 0.9876819708846585
INFO:__main__:  semantic_frame_acc = 0.858902575587906
INFO:__main__:  slot_acc = 0.8980963045912654
INFO:__main__:  slot_f1 = 0.9576226481448918
INFO:__main__:  slot_precision = 0.9561095505617978
INFO:__main__:  slot_recall = 0.9591405424445227


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 39


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 39
INFO:__main__:  intent_acc = 0.97
INFO:__main__:  intent_f1 = 0.9721669980119284
INFO:__main__:  loss = 0.33740038238465786
INFO:__main__:  mean_intent_slot = 0.9760844782227418
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.92
INFO:__main__:  slot_acc = 0.944
INFO:__main__:  slot_f1 = 0.9821689564454837
INFO:__main__:  slot_precision = 0.9818819403857393
INFO:__main__:  slot_recall = 0.9824561403508771


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 40


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 40
INFO:__main__:  intent_acc = 0.972
INFO:__main__:  intent_f1 = 0.974155069582505
INFO:__main__:  loss = 0.33676853869110346
INFO:__main__:  mean_intent_slot = 0.9767921660333236
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.92
INFO:__main__:  slot_acc = 0.942
INFO:__main__:  slot_f1 = 0.9815843320666472
INFO:__main__:  slot_precision = 0.9812974868497955
INFO:__main__:  slot_recall = 0.9818713450292398


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 41


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 41
INFO:__main__:  intent_acc = 0.97
INFO:__main__:  intent_f1 = 0.9721669980119284
INFO:__main__:  loss = 0.33041797718033195
INFO:__main__:  mean_intent_slot = 0.9763767904121602
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.924
INFO:__main__:  slot_acc = 0.948
INFO:__main__:  slot_f1 = 0.9827535808243204
INFO:__main__:  slot_precision = 0.9824663939216832
INFO:__main__:  slot_recall = 0.9830409356725146
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 41
INFO:__main__:  intent_acc = 0.9563269876819709
INFO:__main__:  intent_f1 = 0.9618151632540122
INFO:__main__:  loss = 1.0918238461017609
INFO:__main__:  mean_intent_slot = 0.9569748179134314
INFO:__main__:  num_acc = 0.9876819708846585
INFO:__main__:  semantic_frame_acc = 0.8633818589025756
INFO:__main__:  slot_acc = 0.896976483762598
INFO:__main__:  slot_f1 = 0.9576226481448918
INFO:__main__:  slot_precision = 0.9561095505617978
INFO:__main__:  slot_recall = 0.9591405424445227


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 42


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 42
INFO:__main__:  intent_acc = 0.97
INFO:__main__:  intent_f1 = 0.9721669980119284
INFO:__main__:  loss = 0.3328621438704431
INFO:__main__:  mean_intent_slot = 0.9768176504967855
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.924
INFO:__main__:  slot_acc = 0.948
INFO:__main__:  slot_f1 = 0.983635300993571
INFO:__main__:  slot_precision = 0.9830607476635514
INFO:__main__:  slot_recall = 0.9842105263157894
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 42
INFO:__main__:  intent_acc = 0.9552071668533034
INFO:__main__:  intent_f1 = 0.9607083563918096
INFO:__main__:  loss = 1.0792430330600058
INFO:__main__:  mean_intent_slot = 0.9560556502692199
INFO:__main__:  num_acc = 0.9876819708846585
INFO:__main__:  semantic_frame_acc = 0.8600223964165733
INFO:__main__:  slot_acc = 0.8947368421052632
INFO:__main__:  slot_f1 = 0.9569041336851363
INFO:__main__:  slot_precision = 0.9557273366127899
INFO:__main__:  slot_recall = 0.9580838323353293


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 43


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 43
INFO:__main__:  intent_acc = 0.97
INFO:__main__:  intent_f1 = 0.9721669980119284
INFO:__main__:  loss = 0.32930304389446974
INFO:__main__:  mean_intent_slot = 0.9771098772647575
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.926
INFO:__main__:  slot_acc = 0.95
INFO:__main__:  slot_f1 = 0.9842197545295149
INFO:__main__:  slot_precision = 0.9836448598130841
INFO:__main__:  slot_recall = 0.9847953216374269
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 43
INFO:__main__:  intent_acc = 0.9574468085106383
INFO:__main__:  intent_f1 = 0.9623893805309734
INFO:__main__:  loss = 1.0843920026506697
INFO:__main__:  mean_intent_slot = 0.9570913254653086
INFO:__main__:  num_acc = 0.9888017917133258
INFO:__main__:  semantic_frame_acc = 0.8622620380739082
INFO:__main__:  slot_acc = 0.8947368421052632
INFO:__main__:  slot_f1 = 0.9567358424199789
INFO:__main__:  slot_precision = 0.9553916403231472
INFO:__main__:  slot_recall = 0.9580838323353293


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 44


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 44
INFO:__main__:  intent_acc = 0.97
INFO:__main__:  intent_f1 = 0.9721669980119284
INFO:__main__:  loss = 0.32656296994537115
INFO:__main__:  mean_intent_slot = 0.9771098772647575
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.926
INFO:__main__:  slot_acc = 0.95
INFO:__main__:  slot_f1 = 0.9842197545295149
INFO:__main__:  slot_precision = 0.9836448598130841
INFO:__main__:  slot_recall = 0.9847953216374269
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 44
INFO:__main__:  intent_acc = 0.9574468085106383
INFO:__main__:  intent_f1 = 0.9623893805309734
INFO:__main__:  loss = 1.0884051620960236
INFO:__main__:  mean_intent_slot = 0.957183049059258
INFO:__main__:  num_acc = 0.9888017917133258
INFO:__main__:  semantic_frame_acc = 0.8622620380739082
INFO:__main__:  slot_acc = 0.8947368421052632
INFO:__main__:  slot_f1 = 0.9569192896078776
INFO:__main__:  slot_precision = 0.9554073033707865
INFO:__main__:  slot_recall = 0.9584360690383938


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 45


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 45
INFO:__main__:  intent_acc = 0.972
INFO:__main__:  intent_f1 = 0.974155069582505
INFO:__main__:  loss = 0.32654297444969416
INFO:__main__:  mean_intent_slot = 0.9781098772647574
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.928
INFO:__main__:  slot_acc = 0.95
INFO:__main__:  slot_f1 = 0.9842197545295149
INFO:__main__:  slot_precision = 0.9836448598130841
INFO:__main__:  slot_recall = 0.9847953216374269
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 45
INFO:__main__:  intent_acc = 0.9552071668533034
INFO:__main__:  intent_f1 = 0.9601769911504425
INFO:__main__:  loss = 1.089863893176828
INFO:__main__:  mean_intent_slot = 0.955887388596337
INFO:__main__:  num_acc = 0.9888017917133258
INFO:__main__:  semantic_frame_acc = 0.8600223964165733
INFO:__main__:  slot_acc = 0.8947368421052632
INFO:__main__:  slot_f1 = 0.9565676103393705
INFO:__main__:  slot_precision = 0.9550561797752809
INFO:__main__:  slot_recall = 0.9580838323353293


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 46


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 46
INFO:__main__:  intent_acc = 0.97
INFO:__main__:  intent_f1 = 0.9721669980119284
INFO:__main__:  loss = 0.328166457824409
INFO:__main__:  mean_intent_slot = 0.9771098772647575
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.926
INFO:__main__:  slot_acc = 0.95
INFO:__main__:  slot_f1 = 0.9842197545295149
INFO:__main__:  slot_precision = 0.9836448598130841
INFO:__main__:  slot_recall = 0.9847953216374269
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 46
INFO:__main__:  intent_acc = 0.9563269876819709
INFO:__main__:  intent_f1 = 0.961283185840708
INFO:__main__:  loss = 1.0863786467484065
INFO:__main__:  mean_intent_slot = 0.9566231386449242
INFO:__main__:  num_acc = 0.9888017917133258
INFO:__main__:  semantic_frame_acc = 0.8611422172452408
INFO:__main__:  slot_acc = 0.8947368421052632
INFO:__main__:  slot_f1 = 0.9569192896078776
INFO:__main__:  slot_precision = 0.9554073033707865
INFO:__main__:  slot_recall = 0.9584360690383938


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 47


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 47
INFO:__main__:  intent_acc = 0.972
INFO:__main__:  intent_f1 = 0.974155069582505
INFO:__main__:  loss = 0.33335520792752504
INFO:__main__:  mean_intent_slot = 0.9776691026015785
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.926
INFO:__main__:  slot_acc = 0.948
INFO:__main__:  slot_f1 = 0.9833382052031571
INFO:__main__:  slot_precision = 0.9830508474576272
INFO:__main__:  slot_recall = 0.9836257309941521
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 47
INFO:__main__:  intent_acc = 0.9563269876819709
INFO:__main__:  intent_f1 = 0.961283185840708
INFO:__main__:  loss = 1.0880745298096113
INFO:__main__:  mean_intent_slot = 0.9567989782791777
INFO:__main__:  num_acc = 0.9888017917133258
INFO:__main__:  semantic_frame_acc = 0.8622620380739082
INFO:__main__:  slot_acc = 0.8958566629339306
INFO:__main__:  slot_f1 = 0.9572709688763846
INFO:__main__:  slot_precision = 0.9557584269662921
INFO:__main__:  slot_recall = 0.9587883057414582


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 48


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 48
INFO:__main__:  intent_acc = 0.972
INFO:__main__:  intent_f1 = 0.974155069582505
INFO:__main__:  loss = 0.33225065749138594
INFO:__main__:  mean_intent_slot = 0.9775204678362572
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.928
INFO:__main__:  slot_acc = 0.95
INFO:__main__:  slot_f1 = 0.9830409356725146
INFO:__main__:  slot_precision = 0.9830409356725146
INFO:__main__:  slot_recall = 0.9830409356725146
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 48
INFO:__main__:  intent_acc = 0.9552071668533034
INFO:__main__:  intent_f1 = 0.9601769911504425
INFO:__main__:  loss = 1.0894466830151421
INFO:__main__:  mean_intent_slot = 0.9564149074990976
INFO:__main__:  num_acc = 0.9888017917133258
INFO:__main__:  semantic_frame_acc = 0.8622620380739082
INFO:__main__:  slot_acc = 0.896976483762598
INFO:__main__:  slot_f1 = 0.9576226481448918
INFO:__main__:  slot_precision = 0.9561095505617978
INFO:__main__:  slot_recall = 0.9591405424445227


Iteration:   0%|          | 0/140 [00:00<?, ?it/s]


Epoch 49


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 49
INFO:__main__:  intent_acc = 0.972
INFO:__main__:  intent_f1 = 0.974155069582505
INFO:__main__:  loss = 0.33272994472645223
INFO:__main__:  mean_intent_slot = 0.9775204678362572
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.928
INFO:__main__:  slot_acc = 0.95
INFO:__main__:  slot_f1 = 0.9830409356725146
INFO:__main__:  slot_precision = 0.9830409356725146
INFO:__main__:  slot_recall = 0.9830409356725146
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 49
INFO:__main__:  intent_acc = 0.9552071668533034
INFO:__main__:  intent_f1 = 0.9601769911504425
INFO:__main__:  loss = 1.089235782623291
INFO:__main__:  mean_intent_slot = 0.9564149074990976
INFO:__main__:  num_acc = 0.9888017917133258
INFO:__main__:  semantic_frame_acc = 0.8622620380739082
INFO:__main__:  slot_acc = 0.896976483762598
INFO:__main__:  slot_f1 = 0.9576226481448918
INFO:__main__:  slot_precision = 0.9561095505617978
INFO:__main__:  slot_recall = 0.9591405424445227
INFO:__main__:***** Model Loaded *****
INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 500
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/8 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.972
INFO:__main__:  intent_f1 = 0.974155069582505
INFO:__main__:  loss = 0.33272994472645223
INFO:__main__:  mean_intent_slot = 0.9775204678362572
INFO:__main__:  num_acc = 0.996
INFO:__main__:  semantic_frame_acc = 0.928
INFO:__main__:  slot_acc = 0.95
INFO:__main__:  slot_f1 = 0.9830409356725146
INFO:__main__:  slot_precision = 0.9830409356725146
INFO:__main__:  slot_recall = 0.9830409356725146
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 893
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/14 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = -1
INFO:__main__:  intent_acc = 0.9552071668533034
INFO:__main__:  intent_f1 = 0.9601769911504425
INFO:__main__:  loss = 1.089235782623291
INFO:__main__:  mean_intent_slot = 0.9564149074990976
INFO:__main__:  num_acc = 0.9888017917133258
INFO:__main__:  semantic_frame_acc = 0.8622620380739082
INFO:__main__:  slot_acc = 0.896976483762598
INFO:__main__:  slot_f1 = 0.9576226481448918
INFO:__main__:  slot_precision = 0.9561095505617978
INFO:__main__:  slot_recall = 0.9591405424445227
