In [1]:
!git clone https://github.com/VinAIResearch/MISCA.git

Cloning into 'MISCA'...
remote: Enumerating objects: 119, done.[K
remote: Counting objects: 100% (119/119), done.[K
remote: Compressing objects: 100% (90/90), done.[K
remote: Total 119 (delta 59), reused 69 (delta 24), pack-reused 0[K
Receiving objects: 100% (119/119), 2.64 MiB | 3.25 MiB/s, done.
Resolving deltas: 100% (59/59), done.


In [2]:
!pip install pytorch-crf==0.7.2
!pip install scikit-learn==1.2.2
!pip install scipy==1.10.0
!pip install sentencepiece==0.1.97
!pip install seqeval==0.0.12
!pip install tensorboard==2.15

!pip install tokenizers==0.13.2
!pip install transformers==4.26.1
!pip install tqdm==4.64.1

!pip install six

Collecting pytorch-crf==0.7.2
  Downloading pytorch_crf-0.7.2-py3-none-any.whl (9.5 kB)
Installing collected packages: pytorch-crf
Successfully installed pytorch-crf-0.7.2
Collecting scipy==1.10.0
  Downloading scipy-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.4/34.4 MB[0m [31m49.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scipy
  Attempting uninstall: scipy
    Found existing installation: scipy 1.11.4
    Uninstalling scipy-1.11.4:
      Successfully uninstalled scipy-1.11.4
Successfully installed scipy-1.10.0
Collecting sentencepiece==0.1.97
  Downloading sentencepiece-0.1.97-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
  Attempting uninstall: sentencepiece
    Found existing

# processdata

In [3]:
import os
import numpy as np
import torch
import logging
import copy
import json

from transformers import AutoTokenizer
from torch.utils.data import Dataset, TensorDataset
from torch.utils.data import DataLoader

#test
logger = logging.getLogger(__name__)

def convert_examples_to_features(examples, max_seq_len, tokenizer,
                                 pad_token_label_id=-100,
                                 cls_token_segment_id=0,
                                 pad_token_segment_id=0,
                                 sequence_a_segment_id=0,
                                 mask_padding_with_zero=True):
    # Setting based on the current model type
    cls_token = tokenizer.cls_token
    sep_token = tokenizer.sep_token
    unk_token = tokenizer.unk_token
    pad_token_id = tokenizer.pad_token_id

    features = []
    for (ex_index, example) in enumerate(examples):
        # Tokenize word by word (for NER)
        tokens = []
        heads = []
        # slot_labels_ids = []
        for word, slot_label in zip(example.text, example.slot_labels[1:-1]):
            word_tokens = tokenizer.tokenize(word)
            if not word_tokens:
                word_tokens = [unk_token]  # For handling the bad-encoded word
            heads.append(len(tokens) + 1) # +1 for the cls token
            tokens.extend(word_tokens)
        # Account for [CLS] and [SEP]
        special_tokens_count = 2
        if len(tokens) > max_seq_len - special_tokens_count:
            tokens = tokens[:(max_seq_len - special_tokens_count)]

        # Add [SEP] token
        heads += [len(tokens) + 1]
        tokens += [sep_token]
        token_type_ids = [sequence_a_segment_id] * len(tokens)

        # Add [CLS] token
        tokens = [cls_token] + tokens
        heads = [0] + heads
        token_type_ids = [cls_token_segment_id] + token_type_ids

        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        # The mask has 1 for real tokens and 0 for padding tokens. Only real
        # tokens are attended to.
        attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)

        # Zero-pad up to the sequence length.
        padding_length = max_seq_len - len(input_ids)
        input_ids = input_ids + ([pad_token_id] * padding_length)
        attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
        token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)

        assert len(input_ids) == max_seq_len, "Error with input length {} vs {}".format(len(input_ids), max_seq_len)
        assert len(attention_mask) == max_seq_len, "Error with attention mask length {} vs {}".format(len(attention_mask), max_seq_len)
        assert len(token_type_ids) == max_seq_len, "Error with token type length {} vs {}".format(len(token_type_ids), max_seq_len)
        assert len(heads) == len(example.slot_labels)

        if ex_index < 5:
            logger.info("*** Example ***")
            logger.info("guid: %s" % example.guid)
            logger.info("tokens: %s" % " ".join([str(x) for x in tokens]))
            logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
            logger.info("attention_mask: %s" % " ".join([str(x) for x in attention_mask]))
            logger.info("token_type_ids: %s" % " ".join([str(x) for x in token_type_ids]))
            logger.info("heads: %s" % " ".join([str(x) for x in heads]))

        features.append(
            InputExample(guid=example.guid,
                         words=input_ids,
                         chars=example.chars,
                         heads=heads,
                         attention_mask=attention_mask,
                         token_type_ids=token_type_ids,
                         intent_label=example.intent_label,
                         slot_labels=example.slot_labels,
                         text=example.text))

    return features


class Vocab(object):

    def __init__(self, min_freq=1):
        self.min_freq = min_freq
        self.word2index = {}
        self.index2word = []
        self.special_tokens = ['<PAD>', '<UNK>', '<s>', '</s>']

        self.count = {}

        self.pad_token = '<PAD>'
        self.pad_index = 0
        self.add(self.pad_token)

        self.unk_token = '<UNK>'
        self.unk_index = 1
        self.add(self.unk_token)

        self.start_token = '<s>'
        self.start_index = 2
        self.add(self.start_token)

        self.end_token = '</s>'
        self.end_index = 3
        self.add(self.end_token)

    def add(self, token):
        if isinstance(token, (list, tuple)):
            for element in token:
                self.add(element)
            return

        assert isinstance(token, str)

        if self.min_freq > 1 and token not in self.special_tokens:
            if len(token) > 1 and not token[0].isalnum():
                token = token[1:]

            if len(token) > 1 and not token[-1].isalnum():
                token = token[:-1]

        if token not in self.count:
            self.count[token] = 0
        self.count[token] += 1

        if token in self.special_tokens or (token not in self.word2index and self.count[token] >= self.min_freq):
            self.word2index[token] = len(self.index2word)
            self.index2word.append(token)

    def get_index(self, token):
        if isinstance(token, list):
            return [self.get_index(element) for element in token]

        assert isinstance(token, str)

        return self.word2index.get(token, self.unk_index)

    def get_token(self, index):
        if isinstance(index, list):
            return [self.get_token(element) for element in index]

        assert isinstance(index, int)
        return self.index2word[index]

    def save(self, path):
        torch.save(self.index2word, path)

    def load(self, path):
        self.index2word = torch.load(path)
        self.word2index = {word: i for i, word in enumerate(self.index2word)}

    def __len__(self):
        return len(self.index2word)

    def __str__(self):
        return f'Vocab object with {len(self.index2word)} instances'


class InputExample(object):
    """
    A single training/test example for simple sequence classification.

    Args:
        guid: Unique id for the example.
        words: list. The words of the sequence.
        intent_label: (Optional) string. The intent label of the example.
        slot_labels: (Optional) list. The slot labels of the example.
    """

    def __init__(self, guid, words, chars=None, heads=None, attention_mask=None, token_type_ids=None, intent_label=None, slot_labels=None, text=None):
        self.guid = guid
        self.words = words
        self.chars = chars
        self.heads = heads
        self.attention_mask = attention_mask
        self.token_type_ids = token_type_ids
        self.intent_label = intent_label
        self.slot_labels = slot_labels
        self.text = text

    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        """Serializes this instance to a Python dictionary."""
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        """Serializes this instance to a JSON string."""
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"



class TextLoader(Dataset):

    def __init__(self, args, mode):
        self.args = args
        self.intent_labels = get_intent_labels(args)
        self.slot_labels, self.hiers = get_slots_all(args)

        self.vocab = Vocab(min_freq=self.args.min_freq)
        self.chars = Vocab()
        self.examples = self.build(mode)
    def load_bert(self, tokenizer):
        pad_token_label_id = self.args.ignore_index
        self.examples = convert_examples_to_features(self.examples, self.args.max_seq_len, tokenizer,
                                                     pad_token_label_id=pad_token_label_id)
    @classmethod
    def read_file(cls, input_file, quotechar=None):
        """ Read data file of given path.
        :param file_path: path of data file.
        :return: list of sentence, list of slot and list of intent.
        """

        texts, slots, intents = [], [], []
        text, slot = [], []

        with open(input_file, 'r', encoding="utf8") as fr:
            for line in fr.readlines():
                items = line.strip().split()

                if len(items) == 1:
                    texts.append(text)
                    slots.append(slot)
                    if "/" not in items[0]:
                        intents.append(items)
                    else:
                        new = items[0].split("/")
                        intents.append([new[1]])

                    # clear buffer lists.
                    text, slot = [], []

                elif len(items) == 2:
                    text.append(items[0].strip())
                    slot.append(items[1].strip())

        return texts, slots, intents

    def _create_examples(self, texts, chars, intents, slots, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for i, (text, char, intent, slot) in enumerate(zip(texts, chars, intents, slots)):
            guid = "%s-%s" % (set_type, i)
            # 1. input_text
            words = self.vocab.get_index(text)  # Some are spaced twice
            words = [self.vocab.start_index] + words + [self.vocab.end_index]
            # char
            char  = self.chars.get_index(char)
            max_char = max([len(x) for x in char])
            for j in range(len(char)):
                char[j] = char[j] + [0] * (max_char - len(char[j]))
            char = [[0] * max_char] + char + [[0] * max_char]
            # 2. intent
            _intent = intent[0].split('#')
            intent_label = [0 for _ in self.intent_labels]
            for _int in _intent:
                idx = self.intent_labels.index(_int) if _int in self.intent_labels else self.intent_labels.index("UNK")
                intent_label[idx] = 1
            # 3. slot
            slot_labels = []
            for s in slot:
                slot_labels.append(self.slot_labels.index(s) if s in self.slot_labels else self.slot_labels.index("UNK"))
            slot_labels = [self.slot_labels.index('PAD')] + slot_labels + [self.slot_labels.index('PAD')]
            assert len(words) == len(slot_labels)
            examples.append(InputExample(guid=guid, words=words, chars=char, intent_label=intent_label, slot_labels=slot_labels, text=text))
        return examples

    def build(self, mode):
        data_path = os.path.join(self.args.data_dir, self.args.task, mode + '.txt')
        logger.info("LOOKING AT {}".format(data_path))
        texts, slots, intents = self.read_file(data_path)


        chars = []
        max_len = 0
        for text in texts:
            chars.append([])
            for word in text:
                chars[-1].append(list(word))

        cache = os.path.join(self.args.data_dir, f'vocab_{self.args.task}')
        if os.path.exists(cache):
            self.vocab.load(cache)
        elif mode == 'train':
            self.vocab.add(texts)
            self.vocab.save(cache)
        cache_chars = os.path.join(self.args.data_dir, f'chars_{self.args.task}')
        if os.path.exists(cache_chars):
            self.chars.load(cache_chars)
        elif mode == 'train':
            self.chars.add(chars)
            self.chars.save(cache_chars)

        return self._create_examples(texts=texts,
                                     chars=chars,
                                     intents=intents,
                                     slots=slots,
                                     set_type=mode)

    def __getitem__(self, index):
        example = self.examples[index]

        words = torch.tensor(example.words, dtype=torch.long)

        intent = torch.tensor(example.intent_label, dtype=torch.float)
        slot = torch.tensor(example.slot_labels, dtype=torch.long)
        chars = torch.tensor(example.chars, dtype=torch.long)

        if 'bert' in self.args.model_type:
            attention_mask = torch.tensor(example.attention_mask, dtype=torch.long)
            token_type_ids = torch.tensor(example.token_type_ids, dtype=torch.long)
            heads = torch.tensor(example.heads, dtype=torch.long)
            return (words, chars, heads, attention_mask, token_type_ids, intent, slot)
        else:
            return (words, chars, intent, slot)

    def __len__(self):
        return len(self.examples)

class TextCollate():
    def __init__(self, pad_index, num_intents, max_seq_len):
        self.pad_index = pad_index
        self.num_intents = num_intents
        self.max_seq_len = max_seq_len

    def __call__(self, batch):

        len_list = [len(x[-1]) for x in batch]
        len_char = [x[1].size(1) for x in batch]
        max_len = max(len_list)
        max_char = max(len_char)

        seq_lens = []

        bert = len(batch[0]) > 4

        char_padded = torch.LongTensor(len(batch), max_len, max_char)
        slot_padded = torch.LongTensor(len(batch), max_len)
        intent = torch.FloatTensor(len(batch), self.num_intents)
        char_padded.zero_()
        intent.zero_()
        slot_padded.zero_()

        if not bert:
            text_padded = torch.LongTensor(len(batch), max_len)
            text_padded.zero_()

        else:
            input_ids = torch.LongTensor(len(batch), self.max_seq_len)
            attention_mask = torch.LongTensor(len(batch), self.max_seq_len)
            token_type_ids = torch.LongTensor(len(batch), self.max_seq_len)
            heads = torch.LongTensor(len(batch), max_len)
            input_ids.zero_()
            attention_mask.zero_()
            token_type_ids.zero_()
            heads.zero_()
        # Get sorted index of len_list.
        sorted_index = np.argsort(len_list)[::-1]

        for i, index in enumerate(sorted_index):
            seq_lens.append(len_list[index])
            intent[i] = batch[index][-2]
            slot = batch[index][-1]
            slot_padded[i, :slot.size(0)] = slot
            char = batch[index][1]
            char_padded[i, :char.size(0), :char.size(1)] = char

            if not bert:
                text = batch[index][0]
                text_padded[i, :text.size(0)] = text
            else:
                input_ids[i] = batch[index][0]
                attention_mask[i] = batch[index][3]
                token_type_ids[i] = batch[index][4]
                head = batch[index][2]
                heads[i, :head.size(0)] = head
        if not bert:
            return text_padded, char_padded, intent, slot_padded, torch.tensor(seq_lens, dtype=torch.long)
        else:
            return input_ids, char_padded, heads, attention_mask, token_type_ids, intent, slot_padded, torch.tensor(seq_lens, dtype=torch.long)


# train_dataset = TextLoader(args, 'train')
# #print(train_dataset[0])
# for x in train_dataset[0]:
#   print(x)
#   print(x.shape)
#   print("--")

# #print([x for x in train_dataset[0]])


# utils

In [7]:
import os
import random
import logging

import torch
import numpy as np
from seqeval.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import average_precision_score, precision_recall_curve

from transformers import RobertaConfig, RobertaTokenizer

MODEL_CLASSES = {
    "lstm": (None, JointLSTM, None),
    #"roberta": (RobertaConfig, JointRoberta, RobertaTokenizer)
}

MODEL_PATH_MAP = {
    "lstm": "",
    #"roberta": "roberta-base"
}


def get_intent_labels(args):
    return [label.strip() for label in open(os.path.join(args.data_dir, args.task, args.intent_label_file), 'r', encoding='utf-8')]


def get_slot_labels(args):
    return [label.strip() for label in open(os.path.join(args.data_dir, args.task, args.slot_label_file), 'r', encoding='utf-8')]

def get_clean_labels(args):
    return [label.strip() for label in open(os.path.join(args.data_dir, args.task, args.slot_label_clean), 'r', encoding='utf-8')]

def get_slots_all(args):
    slot_labels = get_slot_labels(args)
    hier = ()
    if args.task == 'mixatis':
        slot_parents = get_clean_labels(args)
        hier = (slot_parents, )
    slot_type = sorted(set([name[2:] for name in slot_labels if name[:2] == 'B-' or name[:2] == 'I-']))
    hier += (slot_type, )
    return slot_labels, hier



def load_tokenizer(args):
    return MODEL_CLASSES[args.model_type][2].from_pretrained(args.model_name_or_path)


def init_logger():
    logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO)


def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if not args.no_cuda and torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)


def compute_metrics(intent_preds, intent_labels, slot_preds, slot_labels):
    # print(len(intent_preds), len(intent_labels), len(slot_preds), len(slot_labels))
    assert len(intent_preds) == len(intent_labels) == len(slot_preds) == len(slot_labels)
    results = {}
    intent_result = get_intent_acc(intent_preds, intent_labels)
    slot_result = get_slot_metrics(slot_preds, slot_labels)
    sementic_result = get_sentence_frame_acc(intent_preds, intent_labels, slot_preds, slot_labels)

    mean_intent_slot = (intent_result["intent_acc"] + slot_result["slot_f1"]) / 2

    results.update(intent_result)
    results.update(slot_result)
    results.update(sementic_result)
    results["mean_intent_slot"] = mean_intent_slot

    return results


def get_slot_metrics(preds, labels):
    assert len(preds) == len(labels)
    return {
        "slot_precision": precision_score(labels, preds),
        "slot_recall": recall_score(labels, preds),
        "slot_f1": f1_score(labels, preds)
    }


def get_intent_acc(preds, labels):
    # average_precision = average_precision_score(labels.reshape(-1), preds.reshape(-1))
    acc = ((preds == labels).all(1)).mean()

    tp = preds == 1.
    tl = labels == 1.
    correct = np.multiply(tp, tl).sum()

    tp = np.sum(tp)
    tl = np.sum(tl)

    p = correct / tp if tp > 0 else 0.0
    r = correct / tl if tl > 0 else 0.0
    f1 = 0.0 if p + r == 0.0 else 2 * p * r / (p + r)

    return {
        "intent_acc": acc,
        "intent_f1": f1,
    }


def read_prediction_text(args):
    return [text.strip() for text in open(os.path.join(args.pred_dir, args.pred_input_file), 'r', encoding='utf-8')]


def get_sentence_frame_acc(intent_preds, intent_labels, slot_preds, slot_labels):
    """For the cases that intent and all the slots are correct (in one sentence)"""
    # Get the intent comparison result
    intent_result = (intent_preds == intent_labels).all(1)

    # Get the slot comparision result
    slot_result = []
    for preds, labels in zip(slot_preds, slot_labels):
        assert len(preds) == len(labels)
        one_sent_result = True
        for p, l in zip(preds, labels):
            if p != l:
                one_sent_result = False
                break
        slot_result.append(one_sent_result)
    slot_result = np.array(slot_result)
    slot_acc = slot_result.mean()

    sementic_acc = np.multiply(intent_result, slot_result).mean()
    return {
        "semantic_frame_acc": sementic_acc,
        "slot_acc": slot_acc
    }


# Model

In [4]:
from __future__ import annotations

from typing import Callable, Optional

import torch
import torch.nn as nn
import numpy as np
import math
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence
from torch.nn.utils.rnn import pad_packed_sequence

class Biaffine(nn.Module):
    r"""
    Biaffine layer for first-order scoring :cite:`dozat-etal-2017-biaffine`.
    This function has a tensor of weights :math:`W` and bias terms if needed.
    The score :math:`s(x, y)` of the vector pair :math:`(x, y)` is computed as :math:`x^T W y / d^s`,
    where `d` and `s` are vector dimension and scaling factor respectively.
    :math:`x` and :math:`y` can be concatenated with bias terms.
    Args:
        n_in (int):
            The size of the input feature.
        n_out (int):
            The number of output channels.
        n_proj (Optional[int]):
            If specified, applies MLP layers to reduce vector dimensions. Default: ``None``.
        dropout (Optional[float]):
            If specified, applies a :class:`SharedDropout` layer with the ratio on MLP outputs. Default: 0.
        scale (float):
            Factor to scale the scores. Default: 0.
        bias_x (bool):
            If ``True``, adds a bias term for tensor :math:`x`. Default: ``True``.
        bias_y (bool):
            If ``True``, adds a bias term for tensor :math:`y`. Default: ``True``.
        decompose (bool):
            If ``True``, represents the weight as the product of 2 independent matrices. Default: ``False``.
        init (Callable):
            Callable initialization method. Default: `nn.init.zeros_`.
    """

    def __init__(
        self,
        n_x: int,
        n_y: int,
        n_out: int = 1,
        dropout: Optional[float] = 0,
        scale: int = 0,
        bias_x: bool = False,
        bias_y: bool = False,
        init: Callable = nn.init.zeros_
    ) -> Biaffine:
        super().__init__()

        self.n_x = n_x
        self.n_y = n_y
        self.n_out = n_out
        self.dropout = dropout
        self.scale = scale
        self.bias_x = bias_x
        self.bias_y = bias_y
        self.init = init

        # self.n_model = n_in
        self.weight = nn.Parameter(torch.Tensor(n_out, self.n_x + bias_x, self.n_y + bias_y))

        self.reset_parameters()

    def reset_parameters(self):
        self.init(self.weight)

    def forward(
        self,
        x: torch.Tensor,
        y: torch.Tensor
    ) -> torch.Tensor:
        r"""
        Args:
            x (torch.Tensor): ``[batch_size, seq_len, n_in]``.
            y (torch.Tensor): ``[batch_size, seq_len, n_in]``.
        Returns:
            ~torch.Tensor:
                A scoring tensor of shape ``[batch_size, n_out, seq_len, seq_len]``.
                If ``n_out=1``, the dimension for ``n_out`` will be squeezed automatically.
        """
        if self.bias_x:
            x = torch.cat((x, torch.ones_like(x[..., :1])), -1)
        if self.bias_y:
            y = torch.cat((y, torch.ones_like(y[..., :1])), -1)
        # [batch_size, n_out, seq_len, seq_len]
        s = torch.einsum('bxi,oij,byj->boxy', x, self.weight, y)
        return s.squeeze(1) / self.n_x ** self.scale


class IntentClassifier(nn.Module):
    def __init__(self, input_dim, num_intent_labels, dropout_rate=0.):
        super(IntentClassifier, self).__init__()
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, num_intent_labels)

    def forward(self, x):
        x = self.dropout(x)
        return self.linear(x)


class SlotClassifier(nn.Module):
    def __init__(
        self,
        input_dim,
        num_intent_labels,
        num_slot_labels,
        max_seq_len=50,
        attention_embedding_size=200,
        dropout_rate=0.0,
    ):
        super(SlotClassifier, self).__init__()
        self.max_seq_len = max_seq_len
        self.num_intent_labels = num_intent_labels
        self.num_slot_labels = num_slot_labels
        self.attention_embedding_size = attention_embedding_size

        output_dim = self.attention_embedding_size  # base model
        self.linear_slot = nn.Linear(input_dim, self.attention_embedding_size, bias=False)

        # output
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(output_dim, num_slot_labels)
        self.tanh = nn.Tanh()
        self.relu = nn.LeakyReLU(0.2)

    def forward(self, x):
        x = self.linear_slot(x)
        x = self.relu(x)
        x = self.dropout(x)
        return x, self.linear(x)
        # return x
class ScaledDotProductAttention(nn.Module):
    ''' Scaled Dot-Product Attention '''

    def __init__(self, temperature, attn_dropout=0.1):
        super().__init__()
        self.temperature = temperature
        self.dropout = nn.Dropout(attn_dropout)

    def forward(self, q, k, v, mask=None):
        attn = torch.matmul(q / self.temperature, k.transpose(2, 3))
        if mask is not None:
            attn = attn.masked_fill(mask, -1e9)
        attn = self.dropout(F.softmax(attn, dim=-1))
        output = torch.matmul(attn, v)
        return output, attn

class MultiHeadAttention(nn.Module):
    ''' Multi-Head Attention module '''

    def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1, residual=True):
        super().__init__()

        self.n_head = n_head
        self.d_model = d_model
        self.d_k = d_k
        self.d_v = d_v

        self.w_qs = nn.Linear(d_model, n_head * d_k, bias=False)
        self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False)
        self.w_vs = nn.Linear(d_model, n_head * d_v, bias=False)
        self.fc = nn.Linear(n_head * d_v, d_model, bias=False)

        self.attention = ScaledDotProductAttention(temperature=d_k ** 0.5)

        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)

        self.residual = residual
    def forward(self, q, k, v, mask=None):

        d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
        sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1)

        residual = q
        # Pass through the pre-attention projection: b x lq x (n*dv)
        # Separate different heads: b x lq x n x dv
        # print(d_k, d_v, n_head)
        # print(q.size())
        q = self.w_qs(q).view(sz_b, len_q, n_head, d_k)
        k = self.w_ks(k).view(sz_b, len_k, n_head, d_k)
        v = self.w_vs(v).view(sz_b, len_v, n_head, d_v)

        # Transpose for attention dot product: b x n x lq x dv
        q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)

        if mask is not None:
            mask = mask.unsqueeze(1)   # For head axis broadcasting.
        q, attn = self.attention(q, k, v, mask=mask)
        # Transpose to move the head dimension back: b x lq x n x dv
        # Combine the last two dimensions to concatenate all the heads together: b x lq x (n*dv)
        q = q.transpose(1, 2).contiguous().view(sz_b, len_q, -1)
        q = self.dropout(self.fc(q))
        if self.residual:
            q += residual

        q = self.layer_norm(q)

        return q


class LSTMEncoder(nn.Module):
    """
    Encoder structure based on bidirectional LSTM.
    """

    def __init__(self, embedding_dim, hidden_dim, dropout_rate):
        super(LSTMEncoder, self).__init__()

        # Parameter recording.
        self.__embedding_dim = embedding_dim
        self.__hidden_dim = hidden_dim // 2
        self.__dropout_rate = dropout_rate

        # Network attributes.
        self.__dropout_layer = nn.Dropout(self.__dropout_rate)
        self.__lstm_layer = nn.LSTM(
            input_size=self.__embedding_dim,
            hidden_size=self.__hidden_dim,
            batch_first=True,
            bidirectional=True,
            dropout=self.__dropout_rate,
            num_layers=1
        )

    def forward(self, embedded_text, seq_lens):
        """ Forward process for LSTM Encoder.

        (batch_size, max_sent_len)
        -> (batch_size, max_sent_len, word_dim)
        -> (batch_size, max_sent_len, hidden_dim)

        :param embedded_text: padded and embedded input text.
        :param seq_lens: is the length of original input text.
        :return: is encoded word hidden vectors.
        """

        # Padded_text should be instance of LongTensor.
        dropout_text = self.__dropout_layer(embedded_text)

        # Pack and Pad process for input of variable length.
        packed_text = pack_padded_sequence(dropout_text, seq_lens, batch_first=True, enforce_sorted=False)
        lstm_hiddens, (h_last, c_last) = self.__lstm_layer(packed_text)
        padded_hiddens, _ = pad_packed_sequence(lstm_hiddens, batch_first=True)

        return padded_hiddens

class Encoder(nn.Module):
    def __init__(self, args):
        super().__init__()

        self.__args = args

        # Initialize an LSTM Encoder object.
        self.__encoder = LSTMEncoder(
            self.__args.word_embedding_dim,
            self.__args.encoder_hidden_dim,
            self.__args.dropout_rate
        )

        if args.use_charlstm:
            self.charlstm = CharLSTM(
                self.__args.n_chars,
                self.__args.char_embed,
                self.__args.char_out
            )

        if args.use_charcnn:
            device = 'cpu' if args.no_cuda else 'cuda'
            self.charcnn = CharCNN(
                input_length=15,
                input_dim=args.n_chars,
                n_fc_neurons=args.char_out,
                device=device
            )

        # Initialize an self-attention layer.
        self.__attention = SelfAttention(
            self.__args.word_embedding_dim,
            self.__args.attention_hidden_dim,
            self.__args.attention_output_dim,
            self.__args.dropout_rate
        )

    def forward(self, word_tensor, char_tensor, seq_lens):
        lstm_hiddens = self.__encoder(word_tensor, seq_lens)
        attention_hiddens = self.__attention(word_tensor, seq_lens)
        hiddens = torch.cat([attention_hiddens, lstm_hiddens], dim=2)
        if self.__args.use_charlstm:
            char_lstm = self.charlstm(char_tensor)
            hiddens = torch.cat([hiddens, char_lstm], dim=2)
        if self.__args.use_charcnn:
            char_cnn = self.charcnn(char_tensor)
            hiddens = torch.cat([hiddens, char_cnn], dim=2)
        return hiddens

class QKVAttention(nn.Module):
    """
    Attention mechanism based on Query-Key-Value architecture. And
    especially, when query == key == value, it's self-attention.
    """

    def __init__(self, query_dim, key_dim, value_dim, hidden_dim, output_dim, dropout_rate):
        super(QKVAttention, self).__init__()

        # Record hyper-parameters.
        self.__query_dim = query_dim
        self.__key_dim = key_dim
        self.__value_dim = value_dim
        self.__hidden_dim = hidden_dim
        self.__output_dim = output_dim
        self.__dropout_rate = dropout_rate

        # Declare network structures.
        self.__query_layer = nn.Linear(self.__query_dim, self.__hidden_dim)
        self.__key_layer = nn.Linear(self.__key_dim, self.__hidden_dim)
        self.__value_layer = nn.Linear(self.__value_dim, self.__output_dim)
        self.__dropout_layer = nn.Dropout(p=self.__dropout_rate)

    def forward(self, input_query, input_key, input_value):
        """ The forward propagation of attention.

        Here we require the first dimension of input key
        and value are equal.

        :param input_query: is query tensor, (n, d_q)
        :param input_key:  is key tensor, (m, d_k)
        :param input_value:  is value tensor, (m, d_v)
        :return: attention based tensor, (n, d_h)
        """

        # Linear transform to fine-tune dimension.
        linear_query = self.__query_layer(input_query)
        linear_key = self.__key_layer(input_key)
        linear_value = self.__value_layer(input_value)

        score_tensor = F.softmax(torch.matmul(
            linear_query,
            linear_key.transpose(-2, -1)
        ) / math.sqrt(self.__hidden_dim) , dim=-1)
        forced_tensor = torch.matmul(score_tensor, linear_value)
        forced_tensor = self.__dropout_layer(forced_tensor)

        return forced_tensor


class SelfAttention(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate):
        super(SelfAttention, self).__init__()

        # Record parameters.
        self.__input_dim = input_dim
        self.__hidden_dim = hidden_dim
        self.__output_dim = output_dim
        self.__dropout_rate = dropout_rate

        # Record network parameters.
        self.__dropout_layer = nn.Dropout(self.__dropout_rate)
        self.__attention_layer = QKVAttention(
            self.__input_dim, self.__input_dim, self.__input_dim,
            self.__hidden_dim, self.__output_dim, self.__dropout_rate
        )

    def forward(self, input_x, seq_lens):
        dropout_x = self.__dropout_layer(input_x)
        attention_x = self.__attention_layer(
            dropout_x, dropout_x, dropout_x
        )

        return attention_x

class CharLSTM(nn.Module):
    r"""
    CharLSTM aims to generate character-level embeddings for tokens.
    It summerizes the information of characters in each token to an embedding using a LSTM layer.
    Args:
        n_char (int):
            The number of characters.
        n_embed (int):
            The size of each embedding vector as input to LSTM.
        n_out (int):
            The size of each output vector.
        pad_index (int):
            The index of the padding token in the vocabulary. Default: 0.
    """

    def __init__(self, n_chars, n_embed, n_out, pad_index=0):
        super().__init__()

        self.n_chars = n_chars
        self.n_embed = n_embed
        self.n_out = n_out
        self.pad_index = pad_index

        # the embedding layer
        self.embed = nn.Embedding(num_embeddings=n_chars,
                                  embedding_dim=n_embed)
        # the lstm layer
        self.lstm = nn.LSTM(input_size=n_embed,
                            hidden_size=n_out//2,
                            batch_first=True,
                            bidirectional=True)

    def __repr__(self):
        return f"{self.__class__.__name__}({self.n_chars}, {self.n_embed}, n_out={self.n_out}, pad_index={self.pad_index})"

    def forward(self, x):
        r"""
        Args:
            x (~torch.Tensor): ``[batch_size, seq_len, fix_len]``.
                Characters of all tokens.
                Each token holds no more than `fix_len` characters, and the excess is cut off directly.
        Returns:
            ~torch.Tensor:
                The embeddings of shape ``[batch_size, seq_len, n_out]`` derived from the characters.
        """
        # [batch_size, seq_len, fix_len]
        mask = x.ne(self.pad_index)
        # [batch_size, seq_len]
        lens = mask.sum(-1)
        char_mask = lens.gt(0)

        # [n, fix_len, n_embed]
        x = self.embed(x[char_mask])
        x = pack_padded_sequence(x, lens[char_mask].tolist(), True, False)
        x, (h, _) = self.lstm(x)
        # [n, fix_len, n_out]
        h = torch.cat(torch.unbind(h), -1)
        # [batch_size, seq_len, n_out]
        embed = h.new_zeros(*lens.shape, self.n_out)
        embed = embed.masked_scatter_(char_mask.unsqueeze(-1), h)

        return embed

class CharCNN(nn.Module):
    def __init__(self, input_length=15, input_dim=50,
                 n_conv_filters=32,
                 n_fc_neurons=32, pad_index=0, device='cpu'):
        super(CharCNN, self).__init__()
        self.layer = CharacterLevelCNN(input_length, input_dim, n_conv_filters, n_fc_neurons)
        self.pad_index = pad_index
        self.n_vocab = input_dim
        self.identity = torch.eye(input_dim).to(device)
        self.n_out = n_fc_neurons
        self.input_length = input_length

    def forward(self, x):
        mask = x.ne(self.pad_index)
        lens = mask.sum(-1)

        # [batch_size, seq_len]
        char_mask = lens.gt(0)
        # [n, fix_len, n_vocab]
        feat = x[char_mask]
        feat = torch.cat([torch.index_select(self.identity, 0, w).unsqueeze(0) for w in feat], dim=0)
        n_char = feat.size(1)
        if n_char < self.input_length:
            feat = torch.cat([feat, feat.new_zeros(feat.size(0), self.input_length - n_char, self.n_vocab)], dim=1)
        elif n_char > self.input_length:
            feat = feat[:, :self.input_length, :]
        # [n, n_fc]
        out = self.layer(feat)

        embed = out.new_zeros(*lens.shape, self.n_out)
        embed = embed.masked_scatter_(char_mask.unsqueeze(-1), out)

        return embed




class CharacterLevelCNN(nn.Module):
    def __init__(self, input_length=15, input_dim=30,
                 n_conv_filters=256,
                 n_fc_neurons=1024):
        super(CharacterLevelCNN, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv1d(input_dim, n_conv_filters, kernel_size=3, padding=0), nn.ReLU(),
                                   nn.MaxPool1d(2))
        self.conv2 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU())
        self.conv3 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU(),
                                   nn.MaxPool1d(2))

        dimension = int((input_length - 96) / 27 * n_conv_filters)
        self.fc1 = nn.Sequential(nn.Linear(32, n_fc_neurons), nn.Dropout(0.5))
        self.fc2 = nn.Sequential(nn.Linear(n_fc_neurons, n_fc_neurons), nn.Dropout(0.5))

        self._create_weights(mean=0.0, std=0.05)

    def _create_weights(self, mean=0.0, std=0.05):
        for module in self.modules():
            if isinstance(module, nn.Conv1d) or isinstance(module, nn.Linear):
                module.weight.data.normal_(mean, std)

    def forward(self, input):
        input = input.transpose(1, 2)
        output = self.conv1(input)
        output = self.conv2(output)
        output = self.conv3(output)

        output = output.view(output.size(0), -1)
        output = self.fc1(output)
        output = self.fc2(output)

        return output

attention layer

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class AttentionLayer(nn.Module):

    def __init__(self,
                 args,
                 size: int,
                 level_projection_size: int = 0,
                 n_labels=None,
                 n_level: int = 1
                 ):
        """
        The init function
        :param args: the input parameters from commandline
        :param size: the input size of the layer, it is normally the output size of other DNN models,
            such as CNN, RNN
        """
        super(AttentionLayer, self).__init__()
        self.attention_mode = args.attention_mode

        self.size = size
        # For self-attention: d_a and r are the dimension of the dense layer and the number of attention-hops
        # d_a is the output size of the first linear layer
        self.d_a = args.d_a if args.d_a > 0 else self.size

        # r is the number of attention heads

        self.n_labels = n_labels
        self.n_level = n_level

        self.level_projection_size = level_projection_size

        self.linear = nn.Linear(self.size, self.size, bias=False)

        self.first_linears = nn.ModuleList([nn.Linear(self.size, self.d_a, bias=False) for _ in range(self.n_level)])
        self.second_linears = nn.ModuleList([nn.Linear(self.d_a, self.n_labels[label_lvl], bias=False) for label_lvl in range(self.n_level)])
        self.third_linears = nn.ModuleList([nn.Linear(self.size +
                                            (self.level_projection_size if label_lvl > 0 else 0),
                                            self.n_labels[label_lvl], bias=True) for label_lvl in range(self.n_level)])

        self._init_weights(mean=0.0, std=0.03)

    def _init_weights(self, mean=0.0, std=0.03) -> None:
        """
        Initialise the weights
        :param mean:
        :param std:
        :return: None
        """
        for first_linear in self.first_linears:
            torch.nn.init.normal(first_linear.weight, mean, std)
            if first_linear.bias is not None:
                first_linear.bias.data.fill_(0)

        for linear in self.second_linears:
            torch.nn.init.normal(linear.weight, mean, std)
            if linear.bias is not None:
                linear.bias.data.fill_(0)
        for linear in self.third_linears:
            torch.nn.init.normal(linear.weight, mean, std)

    def forward(self, x, previous_level_projection=None, label_level=0, masks=None):
        """
        :param x: [batch_size x max_len x dim (i.e., self.size)]

        :param previous_level_projection: the embeddings for the previous level output
        :param label_level: the current label level
        :return:
            Weighted average output: [batch_size x dim (i.e., self.size)]
            Attention weights
        """
        weights = F.tanh(self.first_linears[label_level](x))

        att_weights = self.second_linears[label_level](weights)
        att_weights = F.softmax(att_weights, 1).transpose(1, 2)
        if len(att_weights.size()) != len(x.size()):
            att_weights = att_weights.squeeze()
        context_vector = att_weights @ x

        batch_size = context_vector.size(0)

        if previous_level_projection is not None:
            temp = [context_vector,
                    previous_level_projection.repeat(1, self.n_labels[label_level]).view(batch_size, self.n_labels[label_level], -1)]
            context_vector = torch.cat(temp, dim=2)

        weighted_output = self.third_linears[label_level].weight.mul(context_vector).sum(dim=2).add(
            self.third_linears[label_level].bias)

        return context_vector, weighted_output, att_weights

    # Using when use_regularisation = True
    @staticmethod
    def l2_matrix_norm(m):
        """
        Frobenius norm calculation
        :param m: {Variable} ||AAT - I||
        :return: regularized value
        """
        return torch.sum(torch.sum(torch.sum(m ** 2, 1), 1) ** 0.5)

def init_attention_layer(model, name, n_labels, n_levels, output_size):

    model.level_projection_size = model.args.level_projection_size
    if model.attention_mode is not None:
        model.add_module(f'attention_{name}', AttentionLayer(args=model.args, size=output_size,
                                                            level_projection_size=model.level_projection_size,
                                                            n_labels=n_labels, n_level=n_levels))
    linears = []
    projection_linears = []
    for level in range(n_levels):
        level_projection_size = 0 if level == 0 else model.level_projection_size
        linears.append(nn.Linear(output_size + level_projection_size,
                                    n_labels[level]))
        projection_linears.append(nn.Linear(n_labels[level], model.level_projection_size, bias=False))
    model.add_module(f'linears_{name}', nn.ModuleList(linears))
    model.add_module(f'projection_linears_{name}', nn.ModuleList(projection_linears))



def perform_attention(model, name, all_output, last_output, n_labels, n_levels):
    attention_weights = None
    previous_level_projection = None
    weighted_outputs = []
    attention_weights = []
    context_vectors = []
    for level in range(n_levels):
        context_vector, weighted_output, attention_weight = model.__getattr__(f'attention_{name}')(all_output,
                                                            previous_level_projection, label_level=level)

        previous_level_projection = model.__getattr__(f'projection_linears_{name}')[level](
            torch.sigmoid(weighted_output) if model.attention_mode in ["label", "caml"]
            else torch.softmax(weighted_output, 1))
        previous_level_projection = F.sigmoid(previous_level_projection)
        weighted_outputs.append(weighted_output)
        attention_weights.append(attention_weight)
        context_vectors.append(context_vector)

    return context_vectors, weighted_outputs, attention_weights

class AttentionFlow(nn.Module):
    def __init__(self, args, dim_x, dim_y, out_dim, dropout_rate=0.):
        super(AttentionFlow, self).__init__()

        self.linear_x = nn.Linear(dim_x, out_dim)
        self.linear_y = nn.Linear(dim_y, out_dim)
        self.scorer = Biaffine(dim_y, dim_x, dropout=args.dropout_rate)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x, y):
        # x : intent
        # y : seq_len
        # x = [bz, num_intent, out_dim]
        score = self.scorer(y, x)

        x = self.linear_x(x)
        x = self.dropout(x)
        # y = [bz, seq_len, out_dim]
        y = self.linear_y(y)
        y = self.dropout(y)

        # [bz, seq_len, num_intent]
        a = F.softmax(score, dim=-1)
        b = F.softmax(score.transpose(1, 2), dim=-1)

        out_slot = torch.tanh(torch.bmm(a, x))
        out_intent = torch.tanh(torch.bmm(b, y))

        return out_intent, out_slot

class HierCoAttention(nn.Module):
    def __init__(self, args, dims, out_dim, dropout_rate=0.):
        super(HierCoAttention, self).__init__()

        self.n_layers = len(dims)
        self.linears = nn.ModuleList([nn.Linear(inp_dim, out_dim, bias=True) for inp_dim in dims])
        self.reverse = nn.ModuleList([nn.Linear(inp_dim, out_dim, bias=True) for inp_dim in dims])

        self.scorers = nn.ModuleList([Biaffine(dims[i], dims[i + 1], dropout=dropout_rate) for i in range(self.n_layers - 1)])
        self.dropout = nn.Dropout(dropout_rate)
        self.relu = nn.LeakyReLU(0.2)

    def forward(self, inps):
        # inps should be list of [intent, ..., slots]
        assert len(inps) == self.n_layers
        Cs = []
        for i in range(self.n_layers - 1):
            Cs.append(self.scorers[i](inps[i], inps[i + 1]))

        projs = []
        revers = []
        for i in range(self.n_layers):
            projs.append(self.linears[i](inps[i]))
            revers.append(self.reverse[i](inps[i]))

        slots = None
        for i in range(self.n_layers - 1):
            if slots is None:
                slots = torch.tanh(torch.bmm(Cs[0].transpose(1, 2), projs[0]) + projs[1])
            else:
                slots = torch.bmm(Cs[i].transpose(1, 2), slots) + projs[i + 1]
                if i < self.n_layers - 2:
                    slots = torch.tanh(slots)
        # slots = self.dropout(slots)

        intents = None
        for i in range(self.n_layers - 1, 0, -1):
            if intents is None:
                intents = torch.tanh(torch.bmm(Cs[-1], revers[-1]) + revers[-2])
            else:
                intents = torch.bmm(Cs[i - 1], intents) + revers[i - 1]
                if i > 1:
                    intents = torch.tanh(intents)
        return intents, slots

joint lstm

In [6]:
import torch
import torch.nn as nn
from torchcrf import CRF
import torch.nn.functional as F


class JointLSTM(nn.Module):
    def __init__(self, args, num_vocab, intent_label_lst, slot_label_lst, slot_hier):
        super().__init__()
        self.args = args
        self.attn_type = args.intent_slot_attn_type
        self.n_levels = args.n_levels
        self.num_intent_labels = len(intent_label_lst)
        self.num_slot_labels = len(slot_label_lst)
        self.slot_hier = [len(x) for x in slot_hier]
        self.embedding = nn.Embedding(num_vocab, args.word_embedding_dim)
        self.encoder = Encoder(args)
        nn.init.normal_(self.embedding.weight)
        out_dim = args.encoder_hidden_dim + args.attention_output_dim
        if args.use_charcnn:
            out_dim += args.char_out
        if args.use_charlstm:
            out_dim += args.char_out
        self.lstm_intent = LSTMEncoder(
            out_dim,
            args.decoder_hidden_dim,
            args.dropout_rate
        )
        self.lstm_slot = LSTMEncoder(
            out_dim,
            args.decoder_hidden_dim,
            args.dropout_rate
        )

        self.intent_detection = IntentClassifier(self.num_intent_labels, self.num_intent_labels, args.dropout_rate)
        self.slot_classifier = SlotClassifier(
            args.decoder_hidden_dim,
            self.num_intent_labels,
            self.num_slot_labels,
            self.args.max_seq_len,
            self.args.slot_decoder_size,
            args.dropout_rate,
        )
        self.output_size = args.decoder_hidden_dim
        self.attention_mode = args.attention_mode

        if args.intent_slot_attn_type == 'coattention':
            dims = [self.args.label_embedding_size] + [args.slot_decoder_size] + [args.slot_decoder_size + args.level_projection_size] * (len(self.slot_hier) - 1) + [self.args.label_embedding_size]
            self.attn = HierCoAttention(args, dims, args.intent_slot_attn_size, args.dropout_rate)
        if args.intent_slot_attn_type:
            self.intent_refine = nn.Linear(args.decoder_hidden_dim + args.intent_slot_attn_size, self.num_intent_labels, args.dropout_rate)
            self.slot_refine = IntentClassifier(args.slot_decoder_size + args.intent_slot_attn_size, self.num_slot_labels, args.dropout_rate)
            self.slot_proj = IntentClassifier(self.num_slot_labels, self.args.label_embedding_size, args.dropout_rate)
            self.intent_proj = IntentClassifier(1, self.args.label_embedding_size, args.dropout_rate)

        init_attention_layer(self, 'intent', [self.num_intent_labels], 1, args.decoder_hidden_dim)
        if args.auxiliary_tasks or args.intent_slot_attn_type == 'coattention':
            init_attention_layer(self, 'slot', self.slot_hier, len(self.slot_hier), self.args.slot_decoder_size)

        self.relu = nn.LeakyReLU(0.2)
        self.intent_classifier = nn.Linear(args.decoder_hidden_dim, self.num_intent_labels, args.dropout_rate)

        if args.use_crf:
            self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True)

    def sequence_mask(self, length, max_length=None):
        if max_length is None:
            max_length = length.max()
        x = torch.arange(max_length, dtype=length.dtype, device=length.device)
        mask = x.unsqueeze(0) < length.unsqueeze(1)
        # mask[:, 0] = 0
        return mask

    def forward(self, input_ids, char_ids, seq_lens, intent_label_ids, slot_labels_ids, slot_hier=None):

        embedded_text = self.embedding(input_ids)
        encoded = self.encoder(embedded_text, char_ids, seq_lens)
        intent_output = self.lstm_intent(encoded, seq_lens)
        slot_output = self.lstm_slot(encoded, seq_lens)

        i_context_vector, intent_logits, i_attn = perform_attention(self, 'intent', intent_output, None, [self.num_intent_labels], 1)
        intent_logits = intent_logits[-1]
        i_context_vector = i_context_vector[-1]
        intent_logits = self.intent_classifier.weight.mul(i_context_vector).sum(dim=2).add(self.intent_classifier.bias)
        intent_dec = self.intent_detection(intent_logits)

        x, slot_logits = self.slot_classifier(slot_output)

        if self.args.intent_slot_attn_type == 'coattention':
            s_context_vector, s_logits, s_attn = perform_attention(self, 'slot', x, None, self.slot_hier, len(self.slot_hier))

        if self.attn_type == 'coattention':
            if self.args.embedding_type == 'soft':
                slots = self.slot_proj(F.softmax(slot_logits, -1))
                intents = self.intent_proj(F.sigmoid(intent_logits.unsqueeze(2)))
            else:
                slot_label = torch.argmax(slot_logits, dim=-1)
                hard_label = F.one_hot(slot_label, num_classes=self.num_slot_labels)
                for i in range(len(seq_lens)):
                    hard_label[i, seq_lens[i]:, :] = 0
                slots = self.slot_proj(hard_label.float())

                int_labels = torch.zeros_like(intent_logits)
                num = torch.argmax(intent_dec, dim=-1)
                for i in range(len(intent_logits)):
                    num_i = num[i]
                    ids = torch.topk(intent_logits[i], num_i).indices
                    int_labels[i, ids] = 1.0

                intents = self.intent_proj(int_labels.unsqueeze(2))
            intent_vec, slot_vec = self.attn([intents] + s_context_vector + [slots])

        if self.attn_type:
            intent_logits = self.intent_refine.weight.mul(torch.tanh(torch.cat([i_context_vector, intent_vec], dim=-1))).sum(dim=2).add(self.intent_refine.bias)
            slot_logits = self.relu(self.slot_refine(torch.cat([x, self.relu(slot_vec)], dim=-1)))

        attention_mask = self.sequence_mask(seq_lens)
        total_loss = 0
        aux_loss = 0
        intent_loss = 0
        slot_loss = 0
        count_loss = 0
        # 1. Intent Softmax
        if intent_label_ids is not None:
            if self.num_intent_labels == 1:
                intent_loss_fct = nn.MSELoss()
                intent_loss = intent_loss_fct(intent_logits.view(-1), intent_label_ids.view(-1))
            else:
                intent_loss_fct = nn.BCEWithLogitsLoss()
                intent_loss_cnt = nn.CrossEntropyLoss()
                intent_count = torch.sum(intent_label_ids, dim=-1).long()
                intent_loss = intent_loss_fct(intent_logits.view(-1, self.num_intent_labels), intent_label_ids)
                count_loss = intent_loss_cnt(intent_dec.view(-1, self.num_intent_labels), intent_count)
            total_loss += (intent_loss + count_loss) * self.args.intent_loss_coef

        # 2. Slot Softmax
        if slot_labels_ids is not None:
            if self.args.use_crf:
                slot_loss = self.crf(slot_logits, slot_labels_ids, mask=attention_mask.to(slot_logits.device), reduction='mean')
                slot_loss = -1 * slot_loss  # negative log-likelihood
            else:
                slot_loss_fct = nn.CrossEntropyLoss(ignore_index=self.args.ignore_index)
                # Only keep active parts of the loss
                if attention_mask is not None:
                    active_loss = attention_mask.view(-1) == 1
                    active_logits = slot_logits.view(-1, self.num_slot_labels)[active_loss]
                    active_labels = slot_labels_ids.view(-1)[active_loss]
                    slot_loss = slot_loss_fct(active_logits, active_labels)
                else:
                    slot_loss = slot_loss_fct(slot_logits.view(-1, self.num_slot_labels), slot_labels_ids.view(-1))
            total_loss += slot_loss * (1 - self.args.intent_loss_coef)

        outputs = ((intent_logits, slot_logits, intent_dec),)  # add hidden states and attention if they are here

        outputs = ((total_loss, intent_loss, slot_loss, count_loss),) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions) # Logits is a tuple of intent and slot logits

# Train model

In [8]:
import os

import numpy as np
import torch


class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""

    def __init__(self, patience=7, verbose=False):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model, args):
        if args.tuning_metric == "loss":
            score = -val_loss
        else:
            score = val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, args)
        elif score < self.best_score:
            if self.patience > 0:
                self.counter += 1
                print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
                if self.counter >= self.patience:
                    self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, args)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, args):
        """Saves model when validation loss decreases or accuracy/f1 increases."""
        if self.verbose:
            if args.tuning_metric == "loss":
                print(f"Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...")
            else:
                print(
                    f"{args.tuning_metric} increased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ..."
                )
        torch.save(model, os.path.join(args.model_dir, "model.bin"))
        torch.save(args, os.path.join(args.model_dir, "training_args.bin"))
        self.val_loss_min = val_loss

In [9]:
import logging
import os

import numpy as np
import torch
import torch.nn.functional as F

from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.tensorboard import SummaryWriter
from tqdm.auto import tqdm, trange
from transformers import AdamW, get_linear_schedule_with_warmup, AutoTokenizer

logger = logging.getLogger(__name__)


class Trainer(object):
    def __init__(self, args, collate, train_dataset=None, dev_dataset=None, test_dataset=None):
        self.args = args
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset
        self.collate_fn = collate
        args.n_chars = len(self.train_dataset.chars)
        if 'bert' in self.args.model_type:
            self.tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
            train_dataset.load_bert(self.tokenizer)
            dev_dataset.load_bert(self.tokenizer)
            test_dataset.load_bert(self.tokenizer)

        self.intent_label_lst = get_intent_labels(args)
        self.slot_label_lst, self.hiers = get_slots_all(args)

        self.pad_token_label_id = args.ignore_index
        self.config_class, self.model_class, _ = MODEL_CLASSES[args.model_type]
        if 'bert' in self.args.model_type:
            self.config = self.config_class.from_pretrained(args.model_name_or_path, finetuning_task=args.task)
            self.model = self.model_class.from_pretrained(
                args.model_name_or_path,
                config=self.config,
                args=args,
                intent_label_lst=self.intent_label_lst,
                slot_label_lst=self.slot_label_lst,
                slot_hier=self.hiers
            )
        else:
            self.model = self.model_class(args, len(self.train_dataset.vocab), self.intent_label_lst, self.slot_label_lst, self.hiers)
        if args.base_model:
            model_state = self.model.state_dict()
            pretrained_state = torch.load(os.path.join(args.base_model, 'model.bin'))
            pretrained_state = { k:v for k,v in pretrained_state.items() if k in model_state and v.size() == model_state[k].size() }
            model_state.update(pretrained_state)
            self.model.load_state_dict(model_state)

        self.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
        self.model.to(self.device)

    def train(self):
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(self.train_dataset, sampler=train_sampler, batch_size=self.args.train_batch_size, collate_fn=self.collate_fn)

        writer = SummaryWriter(log_dir=self.args.model_dir)
        if self.args.max_steps > 0:
            t_total = self.args.max_steps
            self.args.num_train_epochs = (
                self.args.max_steps // (len(train_dataloader) // self.args.gradient_accumulation_steps) + 1
            )
        else:
            t_total = len(train_dataloader) // self.args.gradient_accumulation_steps * self.args.num_train_epochs
        print("check init")
        results = self.evaluate("dev", -1)
        print(results)
        logfile = open(self.args.model_dir + "/" + self.args.logging, 'w')
        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": self.args.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(optimizer_grouped_parameters, lr=self.args.learning_rate, eps=self.args.adam_epsilon)
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=self.args.warmup_steps, num_training_steps=t_total
        )

        if self.args.logging_steps < 0:
            self.args.logging_steps = len(train_dataloader)

        # Train!
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(self.train_dataset))
        logger.info("  Num Epochs = %d", self.args.num_train_epochs)
        logger.info("  Total train batch size = %d", self.args.train_batch_size)
        logger.info("  Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)
        logger.info("  Logging steps = %d", self.args.logging_steps)

        global_step = 0
        tr_loss = 0.0
        self.model.zero_grad()
        best_sent = 0
        best_slot = 0

        train_iterator = trange(int(self.args.num_train_epochs), desc="Epoch")
        early_stopping = EarlyStopping(patience=self.args.early_stopping, verbose=True)

        for _ in train_iterator:
            epoch_iterator = tqdm(train_dataloader, desc="Iteration", position=0, leave=True)
            print("\nEpoch", _)

            for step, batch in enumerate(epoch_iterator):
                self.model.train()
                batch = tuple(t.to(self.device) for t in batch[:-1]) + (batch[-1], ) # GPU or CPU
                if 'bert' in self.args.model_type:
                       inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[3],
                    "intent_label_ids": batch[5],
                    "slot_labels_ids": batch[6],
                    "token_type_ids": batch[4],
                    "heads": batch[2],
                    "seq_lens": batch[-1].cpu()
                    }
                else:
                    inputs = {
                        "input_ids": batch[0],
                        "char_ids": batch[1],
                        "intent_label_ids": batch[2],
                        "slot_labels_ids": batch[3],
                        "seq_lens": batch[4],
                    }
                outputs = self.model(**inputs)
                total_loss, intent_loss, slot_loss, count_loss = outputs[0]

                if self.args.gradient_accumulation_steps > 1:
                    total_loss = total_loss / self.args.gradient_accumulation_steps
                if _ < self.args.num_train_epochs * self.args.only_intent:
                    total_loss = intent_loss + count_loss
                    total_loss.backward()
                else:
                    total_loss.backward()

                tr_loss += total_loss.item()
                if (step + 1) % self.args.gradient_accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm)

                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    self.model.zero_grad()
                    global_step += 1

                    if self.args.logging_steps > 0 and global_step % (self.args.logging_steps) == 0:
                        print("\nTuning metrics:", self.args.tuning_metric)
                        results = self.evaluate("dev", _)
                        # self.evaluate("test")
                        writer.add_scalar("Loss/validation", results["loss"], _)
                        writer.add_scalar("Intent Accuracy/validation", results["intent_acc"], _)
                        writer.add_scalar("Intent F1", results["intent_f1"], _)
                        writer.add_scalar("Slot F1/validation", results["slot_f1"], _)
                        writer.add_scalar("Mean Intent Slot", results["mean_intent_slot"], _)
                        writer.add_scalar("Sentence Accuracy/validation", results["semantic_frame_acc"], _)

                        if results['semantic_frame_acc'] >= best_sent or results['slot_f1'] >= best_slot:
                            best_sent = results['semantic_frame_acc']
                            best_slot = results['slot_f1']
                            self.save_model()
                            results = self.evaluate('test', _)
                            logfile.write('\n\nEPOCH = ' + str(_) + '\n')
                            for key in sorted(results.keys()):
                                to_write = " {key} = {value}".format(key=key, value=str(results[key]))
                                logfile.write(to_write)
                                logfile.write("\n")

                if 0 < self.args.max_steps < global_step:
                    epoch_iterator.close()
                    break

            if 0 < self.args.max_steps < global_step or early_stopping.early_stop:
                train_iterator.close()
                break
            writer.add_scalar("Loss/train", tr_loss / global_step, _)
        logfile.close()
        return global_step, tr_loss / global_step

    def write_evaluation_result(self, out_file, results):
        out_file = self.args.model_dir + "/" + out_file
        w = open(out_file, "w", encoding="utf-8")
        w.write("***** Eval results *****\n")
        for key in sorted(results.keys()):
            to_write = " {key} = {value}".format(key=key, value=str(results[key]))
            w.write(to_write)
            w.write("\n")
        w.close()

    def evaluate(self, mode, epoch):
        if mode == "test":
            dataset = self.test_dataset
        elif mode == "dev":
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size, collate_fn=self.collate_fn)

        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        slot_label_map = {i: label for i, label in enumerate(self.slot_label_lst)}
        out_slot_label_list = []
        slot_preds_list = []
        predictions = []
        intent_labels = []
        int_len_gold = []
        int_len_pred = []

        results = {}
        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch[:-1]) + (batch[-1], )
            # print(batch)
            with torch.no_grad():
                if 'bert' in self.args.model_type:
                       inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[3],
                    "intent_label_ids": batch[5],
                    "slot_labels_ids": batch[6],
                    "token_type_ids": batch[4],
                    "heads": batch[2],
                    "seq_lens": batch[-1].cpu()
                    }
                else:
                    inputs = {
                        "input_ids": batch[0],
                        "char_ids": batch[1],
                        "intent_label_ids": batch[2],
                        "slot_labels_ids": batch[3],
                        "seq_lens": batch[4],
                    }
                outputs = self.model(**inputs)

                if self.args.num_intent_detection:
                    tmp_eval_loss, (intent_logits, slot_logits, intent_dec) = outputs[:2]
                else:
                    tmp_eval_loss, (intent_logits, slot_logits) = outputs[:2]

                eval_loss += tmp_eval_loss[0].mean().item()
            nb_eval_steps += 1

            # Intent prediction
            intent_logits = F.logsigmoid(intent_logits).detach().cpu()
            intent_preds = intent_logits.numpy()
            if self.args.num_intent_detection:
                intent_nums = intent_dec.detach().cpu().numpy()
            out_intent_label_ids = inputs["intent_label_ids"].detach().cpu().numpy()
            intent_labels.extend(out_intent_label_ids.tolist())

            # Slot prediction

            if self.args.use_crf:
                slot_preds = np.array(self.model.crf.decode(slot_logits))
            else:
                slot_preds = slot_logits.detach().cpu()
            out_slot_labels_ids = inputs["slot_labels_ids"].detach().cpu().numpy()

            cur = []
            if self.args.num_intent_detection:
                num_intents = intent_logits.size(1)
                intent_nums = np.argmax(intent_nums, axis=-1)
                gold_nums = np.sum(out_intent_label_ids, axis=-1)
                int_len_gold.extend(gold_nums.tolist())
                int_len_pred.extend(intent_nums.tolist())
                for num, preds in zip(intent_nums, intent_preds):
                    idx = preds.argsort()[-num:]
                    p = np.zeros(num_intents)
                    p[idx] = 1.
                    predictions.append(p)
                    cur.append(p)
            else:
                predictions.extend(np.rint(intent_preds).tolist())

            if not self.args.use_crf:
                slot_preds_arg = np.argmax(slot_preds.numpy(), axis=2)
            else:
                slot_preds_arg = slot_preds

            for i in range(out_slot_labels_ids.shape[0]):
                slt = None
                out_slot_label_list.append([])
                slot_preds_list.append([])
                for j in range(out_slot_labels_ids.shape[1]):
                    if out_slot_labels_ids[i, j] != self.pad_token_label_id:
                        out_slot_label_list[-1].append(slot_label_map[out_slot_labels_ids[i][j]])

                        predict_label = slot_label_map[slot_preds_arg[i][j]]
                        if predict_label[:2] == 'B-':
                            slt = predict_label[2:]
                        elif predict_label[:2] == 'I-':
                            if slt is None:
                                predict_label = 'O'
                            elif slt != predict_label[2:]:
                                predict_label = 'O'
                        else:
                            slt = None
                        slot_preds_list[-1].append(predict_label)
        eval_loss = eval_loss / nb_eval_steps
        results['loss'] = eval_loss
        predictions = np.array(predictions)
        intent_labels = np.array(intent_labels)
        total_result = compute_metrics(predictions, intent_labels, slot_preds_list, out_slot_label_list)
        results.update(total_result)
        int_len_gold = np.array(int_len_gold)
        int_len_pred = np.array(int_len_pred)
        results['num_acc'] = (int_len_gold == int_len_pred).mean()
        results['epoch'] = epoch
        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))
        if mode == "test":
            self.write_evaluation_result("eval_test_results.txt", results)
        elif mode == "dev":
            self.write_evaluation_result("eval_dev_results.txt", results)
        return results

    def save_model(self):
        # Save model checkpoint (Overwrite)
        if not os.path.exists(self.args.model_dir):
            os.makedirs(self.args.model_dir)
        model_to_save = self.model.module if hasattr(self.model, "module") else self.model
        torch.save(model_to_save.state_dict(), os.path.join(self.args.model_dir, 'model.bin'))

        # Save training arguments together with the trained model
        torch.save(self.args, os.path.join(self.args.model_dir, "training_args.bin"))
        logger.info("Saving model checkpoint to %s", self.args.model_dir)

    def load_model(self):
        # Check whether model exists
        if not os.path.exists(self.args.model_dir):
            raise Exception("Model doesn't exists! Train first!")

        try:
            self.model.load_state_dict(torch.load(os.path.join(self.args.model_dir, 'model.bin')), strict=False)
            self.model.to(self.device)
            logger.info("***** Model Loaded *****")
        except Exception:
            raise Exception("Some model files might be missing...")

# Run

In [10]:
import argparse


def main(args):
    init_logger()
    set_seed(args)
    slot_label_lst, hiers = get_slots_all(args)
    collate = TextCollate(0, len(get_intent_labels(args)), args.max_seq_len)

    train_dataset = TextLoader(args, 'train')
    dev_dataset = TextLoader(args, 'dev')
    test_dataset = TextLoader(args, 'test')



    trainer = Trainer(args, collate, train_dataset, dev_dataset, test_dataset)

    if args.do_train:
        trainer.train()

    if args.do_eval:
        trainer.load_model()
        trainer.evaluate('dev', 0)
        trainer.evaluate("test", -1)




In [14]:
import argparse



parser = argparse.ArgumentParser()

parser.add_argument("--task", default="mixsnips", type=str, help="The name of the task to train")
parser.add_argument("--model_dir", default="dir_base", type=str, help="Path to save, load model")
parser.add_argument("--data_dir", default="./MISCA/data", type=str, help="The input data dir")
parser.add_argument("--intent_label_file", default="intent_label.txt", type=str, help="Intent Label file")
parser.add_argument("--slot_label_file", default="slot_label.txt", type=str, help="Slot Label file")
parser.add_argument("--slot_label_clean", default="slot_clean.txt", type=str, help="Slot Label file")
parser.add_argument("--logging", default="log.txt", type=str, help="Logging file")

# LAAT
parser.add_argument("--n_levels", default=1, type=int, help="Number of attention")
parser.add_argument("--attention_mode", default="label", type=str)
parser.add_argument("--level_projection_size", default=32, type=int)
parser.add_argument("--d_a", default=-1, type=int)

parser.add_argument("--char_embed", default=64, type=int)
parser.add_argument("--char_out", default=64, type=int)
parser.add_argument("--use_charcnn", action="store_false", help="Whether to use CharCNN")
parser.add_argument("--use_charlstm", action="store_false", help="Whether to use CharLSTM")
parser.add_argument("--word_embedding_dim", default=128, type=int)
parser.add_argument("--encoder_hidden_dim", default=128, type=int)
parser.add_argument("--decoder_hidden_dim", default=256, type=int)
parser.add_argument("--attention_hidden_dim", default=256, type=int)
parser.add_argument("--attention_output_dim", default=256, type=int)

# Config training
parser.add_argument("--model_type", default="lstm", type=str, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
parser.add_argument('--seed', type=int, default=1234, help="random seed for initialization")
parser.add_argument("--train_batch_size", default=32, type=int, help="Batch size for training.")
parser.add_argument("--eval_batch_size", default=64, type=int, help="Batch size for evaluation.")
parser.add_argument("--max_seq_len", default=100, type=int, help="The maximum total input sequence length after tokenization.")
parser.add_argument("--learning_rate", default=4e-5, type=float, help="The initial learning rate for Adam.")
parser.add_argument("--num_train_epochs", default=20, type=float, help="Total number of training epochs to perform.")
parser.add_argument("--weight_decay", default=0, type=float, help="Weight decay if we apply some.")
parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
                    help="Number of updates steps to accumulate before performing a backward/update pass.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
parser.add_argument("--max_steps", default=-1, type=int, help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
parser.add_argument("--dropout_rate", default=0.1, type=float, help="Dropout for fully-connected layers")

parser.add_argument('--logging_steps', type=int, default=-1, help="Log every X updates steps.")

parser.add_argument("--do_train",default=True, action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval",default=True, action="store_true", help="Whether to run eval on the test set.")
parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")
parser.add_argument("--tuning_metric", default="mean_intent_slot", type=str, help="Metric to save checkpoint")

parser.add_argument("--only_intent", default=0, type=float, help="The first epochs to optimize intent")

parser.add_argument("--ignore_index", default=0, type=int,
                    help='Specifies a target value that is ignored and does not contribute to the input gradient')

parser.add_argument(
    "--token_level",
    type=str,
    default="word-level",
    help="Tokens are at syllable level or word level (Vietnamese) [word-level, syllable-level]",
)

parser.add_argument('--intent_loss_coef', type=float, default=0.5, help='Coefficient for the intent loss.')
parser.add_argument('--aux_loss_coef', type=float, default=0.5, help='Coefficient for the aux task.')
parser.add_argument('--early_stopping', type=float, default=-1, help='Early stopping strategy')

parser.add_argument("--base_model", default=None, type=str, help="The pretrained model path")

parser.add_argument(
    "--num_intent_detection",
    action="store_true",
    default=True,
    help="Whether to use two-stage intent detection",
)

parser.add_argument(
    "--auxiliary_tasks",
    action="store_true",
    help="Whether to optimize with auxiliary tasks",
)

parser.add_argument(
    "--slot_decoder_size", type=int, default=512, help="hidden size of attention output vector"
)

parser.add_argument(
    "--intent_slot_attn_size", type=int, default=256, help="hidden size of attention output vector"
)

parser.add_argument(
    "--min_freq", type=int, default=1, help="Minimum number of frequency to be considered in the vocab"
)

parser.add_argument(
    '--intent_slot_attn_type', choices=['coattention', 'attention_flow'],
)

parser.add_argument(
    '--embedding_type', choices=['soft', 'hard'], default='soft',
)

parser.add_argument(
    "--label_embedding_size", type=int, default=256, help="hidden size of label embedding vector"
)

# CRF option
parser.add_argument("--use_crf",default=True, action="store_true", help="Whether to use CRF")
parser.add_argument("--slot_pad_label", default="PAD", type=str, help="Pad token for slot label pad (to be ignore when calculate loss)")

args = parser.parse_args([])

args.model_name_or_path = MODEL_PATH_MAP[args.model_type]
#main(args)


In [12]:
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

In [None]:
main(args) #mixAtis

INFO:__main__:LOOKING AT ./MISCA/data/mixatis/train.txt
INFO:__main__:LOOKING AT ./MISCA/data/mixatis/dev.txt
INFO:__main__:LOOKING AT ./MISCA/data/mixatis/test.txt
  torch.nn.init.normal(first_linear.weight, mean, std)
  torch.nn.init.normal(linear.weight, mean, std)
  torch.nn.init.normal(linear.weight, mean, std)
INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64


check init


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

  score = torch.where(mask[i].unsqueeze(1), next_score, score)
INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = -1
INFO:__main__:  intent_acc = 0.0013175230566534915
INFO:__main__:  intent_f1 = 0.1911438459490638
INFO:__main__:  loss = 57.24488131205241
INFO:__main__:  mean_intent_slot = 0.0025954056008125666
INFO:__main__:  num_acc = 0.057971014492753624
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.0
INFO:__main__:  slot_f1 = 0.0038732881449716414
INFO:__main__:  slot_precision = 0.0026136469709698495
INFO:__main__:  slot_recall = 0.007476635514018692


{'loss': 57.24488131205241, 'intent_acc': 0.0013175230566534915, 'intent_f1': 0.1911438459490638, 'slot_precision': 0.0026136469709698495, 'slot_recall': 0.007476635514018692, 'slot_f1': 0.0038732881449716414, 'semantic_frame_acc': 0.0, 'slot_acc': 0.0, 'mean_intent_slot': 0.0025954056008125666, 'num_acc': 0.057971014492753624, 'epoch': -1}


INFO:__main__:***** Running training *****
INFO:__main__:  Num examples = 13162
INFO:__main__:  Num Epochs = 50
INFO:__main__:  Total train batch size = 32
INFO:__main__:  Gradient Accumulation steps = 1
INFO:__main__:  Total optimization steps = 20600
INFO:__main__:  Logging steps = 412


Epoch:   0%|          | 0/50 [00:00<?, ?it/s]

Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 0


  return F.conv1d(input, weight, bias, self.stride,
INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.00922266139657444
INFO:__main__:  intent_f1 = 0.1654228855721393
INFO:__main__:  loss = 13.172293504079184
INFO:__main__:  mean_intent_slot = 0.15525952023784914
INFO:__main__:  num_acc = 0.6600790513833992
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.007905138339920948
INFO:__main__:  slot_f1 = 0.3012963790791238
INFO:__main__:  slot_precision = 0.3408631153068105
INFO:__main__:  slot_recall = 0.26995994659546063
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.0012077294685990338
INFO:__main__:  intent_f1 = 0.1366136034732272
INFO:__main__:  loss = 12.974015676058256
INFO:__main__:  mean_intent_slot = 0.09584976637364377
INFO:__main__:  num_acc = 0.6654589371980676
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.007246376811594203
INFO:__main__:  slot_f1 = 0.1904918032786885
INFO:__main__:  slot_precision = 0.2508635578583765
INFO:__main__:  slot_recall = 0.1535412262156448


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 1


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 1
INFO:__main__:  intent_acc = 0.02766798418972332
INFO:__main__:  intent_f1 = 0.18835720816824142
INFO:__main__:  loss = 7.885634422302246
INFO:__main__:  mean_intent_slot = 0.2517336694195903
INFO:__main__:  num_acc = 0.8669301712779973
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.03557312252964427
INFO:__main__:  slot_f1 = 0.47579935464945733
INFO:__main__:  slot_precision = 0.5278229742922226
INFO:__main__:  slot_recall = 0.4331108144192256
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 1
INFO:__main__:  intent_acc = 0.018115942028985508
INFO:__main__:  intent_f1 = 0.15678449258836943
INFO:__main__:  loss = 8.177071791428785
INFO:__main__:  mean_intent_slot = 0.21184894488622671
INFO:__main__:  num_acc = 0.8719806763285024
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.04468599033816425
INFO:__main__:  slot_f1 = 0.4055819477434679
INFO:__main__:  slot_precision = 0.4627371273712737
INFO:__main__:  slot_recall = 0.3609936575052854


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 2


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 2
INFO:__main__:  intent_acc = 0.025032938076416336
INFO:__main__:  intent_f1 = 0.1821561338289963
INFO:__main__:  loss = 5.0703043937683105
INFO:__main__:  mean_intent_slot = 0.33842024723880615
INFO:__main__:  num_acc = 0.9499341238471674
INFO:__main__:  semantic_frame_acc = 0.011857707509881422
INFO:__main__:  slot_acc = 0.1541501976284585
INFO:__main__:  slot_f1 = 0.6518075564011959
INFO:__main__:  slot_precision = 0.6637143647938002
INFO:__main__:  slot_recall = 0.6403204272363151
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 2
INFO:__main__:  intent_acc = 0.018115942028985508
INFO:__main__:  intent_f1 = 0.16055846422338568
INFO:__main__:  loss = 5.918036717634934
INFO:__main__:  mean_intent_slot = 0.3099453063196823
INFO:__main__:  num_acc = 0.9541062801932367
INFO:__main__:  semantic_frame_acc = 0.006038647342995169
INFO:__main__:  slot_acc = 0.15096618357487923
INFO:__main__:  slot_f1 = 0.6017746706103791
INFO:__main__:  slot_precision = 0.6124794745484401
INFO:__main__:  slot_recall = 0.5914376321353065


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 3


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 3
INFO:__main__:  intent_acc = 0.011857707509881422
INFO:__main__:  intent_f1 = 0.16224552745218998
INFO:__main__:  loss = 3.4048877557118735
INFO:__main__:  mean_intent_slot = 0.3945417179955746
INFO:__main__:  num_acc = 0.9578392621870883
INFO:__main__:  semantic_frame_acc = 0.005270092226613966
INFO:__main__:  slot_acc = 0.3201581027667984
INFO:__main__:  slot_f1 = 0.7772257284812677
INFO:__main__:  slot_precision = 0.7817396002160995
INFO:__main__:  slot_recall = 0.7727636849132177
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 3
INFO:__main__:  intent_acc = 0.006038647342995169
INFO:__main__:  intent_f1 = 0.13797101449275362
INFO:__main__:  loss = 4.9850087349231424
INFO:__main__:  mean_intent_slot = 0.3538988724830996
INFO:__main__:  num_acc = 0.9685990338164251
INFO:__main__:  semantic_frame_acc = 0.0012077294685990338
INFO:__main__:  slot_acc = 0.24396135265700483
INFO:__main__:  slot_f1 = 0.701759097623204
INFO:__main__:  slot_precision = 0.7133497133497133
INFO:__main__:  slot_recall = 0.6905391120507399


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 4


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 4
INFO:__main__:  intent_acc = 0.01844532279314888
INFO:__main__:  intent_f1 = 0.1655813953488372
INFO:__main__:  loss = 2.6098660031954446
INFO:__main__:  mean_intent_slot = 0.44236842774169777
INFO:__main__:  num_acc = 0.9802371541501976
INFO:__main__:  semantic_frame_acc = 0.010540184453227932
INFO:__main__:  slot_acc = 0.49538866930171277
INFO:__main__:  slot_f1 = 0.8662915326902466
INFO:__main__:  slot_precision = 0.8693197095993547
INFO:__main__:  slot_recall = 0.8632843791722297
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 4
INFO:__main__:  intent_acc = 0.01570048309178744
INFO:__main__:  intent_f1 = 0.14352392065344222
INFO:__main__:  loss = 4.583262571921716
INFO:__main__:  mean_intent_slot = 0.38254935895696623
INFO:__main__:  num_acc = 0.9855072463768116
INFO:__main__:  semantic_frame_acc = 0.004830917874396135
INFO:__main__:  slot_acc = 0.32004830917874394
INFO:__main__:  slot_f1 = 0.7493982348221451
INFO:__main__:  slot_precision = 0.758527341635084
INFO:__main__:  slot_recall = 0.7404862579281184


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 5


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 5
INFO:__main__:  intent_acc = 0.026350461133069828
INFO:__main__:  intent_f1 = 0.200185931205454
INFO:__main__:  loss = 2.1132309635480246
INFO:__main__:  mean_intent_slot = 0.4568767550383188
INFO:__main__:  num_acc = 0.9828722002635046
INFO:__main__:  semantic_frame_acc = 0.019762845849802372
INFO:__main__:  slot_acc = 0.5718050065876152
INFO:__main__:  slot_f1 = 0.8874030489435678
INFO:__main__:  slot_precision = 0.8888293597642647
INFO:__main__:  slot_recall = 0.8859813084112149
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 5
INFO:__main__:  intent_acc = 0.020531400966183576
INFO:__main__:  intent_f1 = 0.16676384839650146
INFO:__main__:  loss = 4.433552008408767
INFO:__main__:  mean_intent_slot = 0.39900564686378887
INFO:__main__:  num_acc = 0.9879227053140096
INFO:__main__:  semantic_frame_acc = 0.0036231884057971015
INFO:__main__:  slot_acc = 0.3611111111111111
INFO:__main__:  slot_f1 = 0.7774798927613942
INFO:__main__:  slot_precision = 0.7889009793253536
INFO:__main__:  slot_recall = 0.766384778012685


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 6


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 6
INFO:__main__:  intent_acc = 0.03689064558629776
INFO:__main__:  intent_f1 = 0.22566646001239926
INFO:__main__:  loss = 1.9220518271128337
INFO:__main__:  mean_intent_slot = 0.4652453227931489
INFO:__main__:  num_acc = 0.9868247694334651
INFO:__main__:  semantic_frame_acc = 0.025032938076416336
INFO:__main__:  slot_acc = 0.6060606060606061
INFO:__main__:  slot_f1 = 0.8936000000000001
INFO:__main__:  slot_precision = 0.892410119840213
INFO:__main__:  slot_recall = 0.8947930574098798
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 6
INFO:__main__:  intent_acc = 0.025362318840579712
INFO:__main__:  intent_f1 = 0.17535683076026798
INFO:__main__:  loss = 4.345833246524517
INFO:__main__:  mean_intent_slot = 0.406531426799969
INFO:__main__:  num_acc = 0.9891304347826086
INFO:__main__:  semantic_frame_acc = 0.006038647342995169
INFO:__main__:  slot_acc = 0.3756038647342995
INFO:__main__:  slot_f1 = 0.7877005347593583
INFO:__main__:  slot_precision = 0.797077922077922
INFO:__main__:  slot_recall = 0.7785412262156448


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 7


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 7
INFO:__main__:  intent_acc = 0.04216073781291173
INFO:__main__:  intent_f1 = 0.24852438645542094
INFO:__main__:  loss = 1.7017182906468709
INFO:__main__:  mean_intent_slot = 0.47320802848092397
INFO:__main__:  num_acc = 0.9907773386034255
INFO:__main__:  semantic_frame_acc = 0.03293807641633729
INFO:__main__:  slot_acc = 0.6732542819499341
INFO:__main__:  slot_f1 = 0.9042553191489362
INFO:__main__:  slot_precision = 0.9006622516556292
INFO:__main__:  slot_recall = 0.9078771695594126
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 7
INFO:__main__:  intent_acc = 0.026570048309178744
INFO:__main__:  intent_f1 = 0.19036496350364965
INFO:__main__:  loss = 4.263667601805467
INFO:__main__:  mean_intent_slot = 0.4118144359192953
INFO:__main__:  num_acc = 0.9939613526570048
INFO:__main__:  semantic_frame_acc = 0.007246376811594203
INFO:__main__:  slot_acc = 0.39009661835748793
INFO:__main__:  slot_f1 = 0.7970588235294118
INFO:__main__:  slot_precision = 0.8065476190476191
INFO:__main__:  slot_recall = 0.7877906976744186


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 8


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 8
INFO:__main__:  intent_acc = 0.05006587615283267
INFO:__main__:  intent_f1 = 0.26521739130434785
INFO:__main__:  loss = 1.5948344667752583
INFO:__main__:  mean_intent_slot = 0.4788923316948461
INFO:__main__:  num_acc = 0.9894598155467721
INFO:__main__:  semantic_frame_acc = 0.039525691699604744
INFO:__main__:  slot_acc = 0.6508563899868248
INFO:__main__:  slot_f1 = 0.9077187872368595
INFO:__main__:  slot_precision = 0.9002100840336135
INFO:__main__:  slot_recall = 0.9153538050734312
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 8
INFO:__main__:  intent_acc = 0.030193236714975844
INFO:__main__:  intent_f1 = 0.21476510067114096
INFO:__main__:  loss = 4.287693977355957
INFO:__main__:  mean_intent_slot = 0.42014132675198945
INFO:__main__:  num_acc = 0.9915458937198067
INFO:__main__:  semantic_frame_acc = 0.010869565217391304
INFO:__main__:  slot_acc = 0.4178743961352657
INFO:__main__:  slot_f1 = 0.810089416789003
INFO:__main__:  slot_precision = 0.8182798598004853
INFO:__main__:  slot_recall = 0.802061310782241


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 9


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 9
INFO:__main__:  intent_acc = 0.07905138339920949
INFO:__main__:  intent_f1 = 0.3147459727385377
INFO:__main__:  loss = 1.4922236204147339
INFO:__main__:  mean_intent_slot = 0.495369160763592
INFO:__main__:  num_acc = 0.9894598155467721
INFO:__main__:  semantic_frame_acc = 0.06455862977602109
INFO:__main__:  slot_acc = 0.6482213438735178
INFO:__main__:  slot_f1 = 0.9116869381279745
INFO:__main__:  slot_precision = 0.9028541503011259
INFO:__main__:  slot_recall = 0.9206942590120161
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 9
INFO:__main__:  intent_acc = 0.03985507246376811
INFO:__main__:  intent_f1 = 0.24489795918367346
INFO:__main__:  loss = 4.309674959916335
INFO:__main__:  mean_intent_slot = 0.4270379345211115
INFO:__main__:  num_acc = 0.9903381642512077
INFO:__main__:  semantic_frame_acc = 0.021739130434782608
INFO:__main__:  slot_acc = 0.4214975845410628
INFO:__main__:  slot_f1 = 0.8142207965784549
INFO:__main__:  slot_precision = 0.8236884802595997
INFO:__main__:  slot_recall = 0.804968287526427


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 10


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 10
INFO:__main__:  intent_acc = 0.11462450592885376
INFO:__main__:  intent_f1 = 0.3606455617628802
INFO:__main__:  loss = 1.3528520266215007
INFO:__main__:  mean_intent_slot = 0.5133569397578834
INFO:__main__:  num_acc = 0.9920948616600791
INFO:__main__:  semantic_frame_acc = 0.09354413702239789
INFO:__main__:  slot_acc = 0.6429512516469038
INFO:__main__:  slot_f1 = 0.9120893735869131
INFO:__main__:  slot_precision = 0.9085850556438791
INFO:__main__:  slot_recall = 0.9156208277703605
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 10
INFO:__main__:  intent_acc = 0.06642512077294686
INFO:__main__:  intent_f1 = 0.3017216224102714
INFO:__main__:  loss = 4.309487617932833
INFO:__main__:  mean_intent_slot = 0.44554370924296666
INFO:__main__:  num_acc = 0.9915458937198067
INFO:__main__:  semantic_frame_acc = 0.030193236714975844
INFO:__main__:  slot_acc = 0.4335748792270531
INFO:__main__:  slot_f1 = 0.8246622977129865
INFO:__main__:  slot_precision = 0.83482263742215
INFO:__main__:  slot_recall = 0.8147463002114165


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 11


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 11
INFO:__main__:  intent_acc = 0.11857707509881422
INFO:__main__:  intent_f1 = 0.38287309959664906
INFO:__main__:  loss = 1.2977645695209503
INFO:__main__:  mean_intent_slot = 0.5175389026098081
INFO:__main__:  num_acc = 0.9881422924901185
INFO:__main__:  semantic_frame_acc = 0.09354413702239789
INFO:__main__:  slot_acc = 0.6495388669301713
INFO:__main__:  slot_f1 = 0.9165007301208018
INFO:__main__:  slot_precision = 0.91129883843717
INFO:__main__:  slot_recall = 0.9217623497997329
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 11
INFO:__main__:  intent_acc = 0.0821256038647343
INFO:__main__:  intent_f1 = 0.3341107871720117
INFO:__main__:  loss = 4.400384481136616
INFO:__main__:  mean_intent_slot = 0.451236831383505
INFO:__main__:  num_acc = 0.9927536231884058
INFO:__main__:  semantic_frame_acc = 0.036231884057971016
INFO:__main__:  slot_acc = 0.4166666666666667
INFO:__main__:  slot_f1 = 0.8203480589022757
INFO:__main__:  slot_precision = 0.8312533912099838
INFO:__main__:  slot_recall = 0.8097251585623678


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 12


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 12
INFO:__main__:  intent_acc = 0.13438735177865613
INFO:__main__:  intent_f1 = 0.4017385904998448
INFO:__main__:  loss = 1.27365347246329
INFO:__main__:  mean_intent_slot = 0.5248095699290631
INFO:__main__:  num_acc = 0.9907773386034255
INFO:__main__:  semantic_frame_acc = 0.10935441370223979
INFO:__main__:  slot_acc = 0.6627140974967062
INFO:__main__:  slot_f1 = 0.9152317880794701
INFO:__main__:  slot_precision = 0.9080157687253614
INFO:__main__:  slot_recall = 0.9225634178905207
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 12
INFO:__main__:  intent_acc = 0.0893719806763285
INFO:__main__:  intent_f1 = 0.3615160349854228
INFO:__main__:  loss = 4.434512743583093
INFO:__main__:  mean_intent_slot = 0.45642709758609146
INFO:__main__:  num_acc = 0.9927536231884058
INFO:__main__:  semantic_frame_acc = 0.03985507246376811
INFO:__main__:  slot_acc = 0.4359903381642512
INFO:__main__:  slot_f1 = 0.8234822144958545
INFO:__main__:  slot_precision = 0.8335138061721711
INFO:__main__:  slot_recall = 0.8136892177589852


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 13


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 13
INFO:__main__:  intent_acc = 0.14492753623188406
INFO:__main__:  intent_f1 = 0.42670807453416154
INFO:__main__:  loss = 1.2245561083157857
INFO:__main__:  mean_intent_slot = 0.5333183328717808
INFO:__main__:  num_acc = 0.9920948616600791
INFO:__main__:  semantic_frame_acc = 0.1251646903820817
INFO:__main__:  slot_acc = 0.6745718050065876
INFO:__main__:  slot_f1 = 0.9217091295116774
INFO:__main__:  slot_precision = 0.9161171194935374
INFO:__main__:  slot_recall = 0.927369826435247
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 13
INFO:__main__:  intent_acc = 0.09782608695652174
INFO:__main__:  intent_f1 = 0.3797025371828521
INFO:__main__:  loss = 4.371569633483887
INFO:__main__:  mean_intent_slot = 0.4638635650080817
INFO:__main__:  num_acc = 0.9915458937198067
INFO:__main__:  semantic_frame_acc = 0.043478260869565216
INFO:__main__:  slot_acc = 0.44806763285024154
INFO:__main__:  slot_f1 = 0.8299010430596416
INFO:__main__:  slot_precision = 0.8400108283703303
INFO:__main__:  slot_recall = 0.820031712473573


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 14


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 14
INFO:__main__:  intent_acc = 0.1857707509881423
INFO:__main__:  intent_f1 = 0.4611559975139839
INFO:__main__:  loss = 1.1983147313197453
INFO:__main__:  mean_intent_slot = 0.5545973861111912
INFO:__main__:  num_acc = 0.9920948616600791
INFO:__main__:  semantic_frame_acc = 0.14756258234519104
INFO:__main__:  slot_acc = 0.6745718050065876
INFO:__main__:  slot_f1 = 0.9234240212342403
INFO:__main__:  slot_precision = 0.9179419525065963
INFO:__main__:  slot_recall = 0.9289719626168225
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 14
INFO:__main__:  intent_acc = 0.12681159420289856
INFO:__main__:  intent_f1 = 0.4197314652656159
INFO:__main__:  loss = 4.4417362579932576
INFO:__main__:  mean_intent_slot = 0.4832561844912436
INFO:__main__:  num_acc = 0.9927536231884058
INFO:__main__:  semantic_frame_acc = 0.06521739130434782
INFO:__main__:  slot_acc = 0.4758454106280193
INFO:__main__:  slot_f1 = 0.8397007747795886
INFO:__main__:  slot_precision = 0.8490005402485143
INFO:__main__:  slot_recall = 0.8306025369978859


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 15


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 15
INFO:__main__:  intent_acc = 0.20421607378129117
INFO:__main__:  intent_f1 = 0.4903786468032278
INFO:__main__:  loss = 1.2489878336588542
INFO:__main__:  mean_intent_slot = 0.5622697762516425
INFO:__main__:  num_acc = 0.9894598155467721
INFO:__main__:  semantic_frame_acc = 0.16337285902503293
INFO:__main__:  slot_acc = 0.6653491436100132
INFO:__main__:  slot_f1 = 0.9203234787219938
INFO:__main__:  slot_precision = 0.9139020537124802
INFO:__main__:  slot_recall = 0.9268357810413885
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 15
INFO:__main__:  intent_acc = 0.13647342995169082
INFO:__main__:  intent_f1 = 0.43861184018664334
INFO:__main__:  loss = 4.515544928037203
INFO:__main__:  mean_intent_slot = 0.487328837806286
INFO:__main__:  num_acc = 0.9915458937198067
INFO:__main__:  semantic_frame_acc = 0.07004830917874397
INFO:__main__:  slot_acc = 0.46497584541062803
INFO:__main__:  slot_f1 = 0.8381842456608811
INFO:__main__:  slot_precision = 0.847004856988667
INFO:__main__:  slot_recall = 0.8295454545454546


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 16


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 16
INFO:__main__:  intent_acc = 0.20948616600790515
INFO:__main__:  intent_f1 = 0.4942564420987271
INFO:__main__:  loss = 1.1642397940158844
INFO:__main__:  mean_intent_slot = 0.5659648625524254
INFO:__main__:  num_acc = 0.9907773386034255
INFO:__main__:  semantic_frame_acc = 0.16337285902503293
INFO:__main__:  slot_acc = 0.6627140974967062
INFO:__main__:  slot_f1 = 0.9224435590969455
INFO:__main__:  slot_precision = 0.9175693527080582
INFO:__main__:  slot_recall = 0.927369826435247
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 16
INFO:__main__:  intent_acc = 0.15579710144927536
INFO:__main__:  intent_f1 = 0.4557922381091334
INFO:__main__:  loss = 4.5768652512477
INFO:__main__:  mean_intent_slot = 0.49582295302195306
INFO:__main__:  num_acc = 0.9939613526570048
INFO:__main__:  semantic_frame_acc = 0.07367149758454106
INFO:__main__:  slot_acc = 0.46256038647342995
INFO:__main__:  slot_f1 = 0.8358488045946307
INFO:__main__:  slot_precision = 0.8449905482041588
INFO:__main__:  slot_recall = 0.8269027484143763


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 17


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 17
INFO:__main__:  intent_acc = 0.23451910408432147
INFO:__main__:  intent_f1 = 0.521901211556384
INFO:__main__:  loss = 1.1742047369480133
INFO:__main__:  mean_intent_slot = 0.5796665658572511
INFO:__main__:  num_acc = 0.9907773386034255
INFO:__main__:  semantic_frame_acc = 0.1857707509881423
INFO:__main__:  slot_acc = 0.6772068511198946
INFO:__main__:  slot_f1 = 0.9248140276301807
INFO:__main__:  slot_precision = 0.9201691779011366
INFO:__main__:  slot_recall = 0.929506008010681
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 17
INFO:__main__:  intent_acc = 0.16545893719806765
INFO:__main__:  intent_f1 = 0.4733002626203676
INFO:__main__:  loss = 4.56117740044227
INFO:__main__:  mean_intent_slot = 0.5001741205062091
INFO:__main__:  num_acc = 0.9939613526570048
INFO:__main__:  semantic_frame_acc = 0.07971014492753623
INFO:__main__:  slot_acc = 0.4553140096618358
INFO:__main__:  slot_f1 = 0.8348893038143506
INFO:__main__:  slot_precision = 0.8427571351642434
INFO:__main__:  slot_recall = 0.8271670190274841


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 18


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 18
INFO:__main__:  intent_acc = 0.2608695652173913
INFO:__main__:  intent_f1 = 0.5320074580484774
INFO:__main__:  loss = 1.1755470434824626
INFO:__main__:  mean_intent_slot = 0.5949964665948401
INFO:__main__:  num_acc = 0.9920948616600791
INFO:__main__:  semantic_frame_acc = 0.2134387351778656
INFO:__main__:  slot_acc = 0.696969696969697
INFO:__main__:  slot_f1 = 0.9291233679722888
INFO:__main__:  slot_precision = 0.9271470353629354
INFO:__main__:  slot_recall = 0.9311081441922563
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 18
INFO:__main__:  intent_acc = 0.17270531400966183
INFO:__main__:  intent_f1 = 0.472992700729927
INFO:__main__:  loss = 4.526841787191538
INFO:__main__:  mean_intent_slot = 0.5059013283895285
INFO:__main__:  num_acc = 0.9915458937198067
INFO:__main__:  semantic_frame_acc = 0.08816425120772947
INFO:__main__:  slot_acc = 0.46497584541062803
INFO:__main__:  slot_f1 = 0.8390973427693952
INFO:__main__:  slot_precision = 0.8480431848852902
INFO:__main__:  slot_recall = 0.830338266384778


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 19


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 19
INFO:__main__:  intent_acc = 0.2727272727272727
INFO:__main__:  intent_f1 = 0.5527950310559006
INFO:__main__:  loss = 1.1474109143018723
INFO:__main__:  mean_intent_slot = 0.6005923597678917
INFO:__main__:  num_acc = 0.9894598155467721
INFO:__main__:  semantic_frame_acc = 0.2147562582345191
INFO:__main__:  slot_acc = 0.689064558629776
INFO:__main__:  slot_f1 = 0.9284574468085106
INFO:__main__:  slot_precision = 0.9247682119205298
INFO:__main__:  slot_recall = 0.9321762349799733
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 19
INFO:__main__:  intent_acc = 0.19202898550724637
INFO:__main__:  intent_f1 = 0.4986876640419947
INFO:__main__:  loss = 4.569444986490103
INFO:__main__:  mean_intent_slot = 0.516884522110442
INFO:__main__:  num_acc = 0.9915458937198067
INFO:__main__:  semantic_frame_acc = 0.10024154589371981
INFO:__main__:  slot_acc = 0.4722222222222222
INFO:__main__:  slot_f1 = 0.8417400587136376
INFO:__main__:  slot_precision = 0.8501347708894879
INFO:__main__:  slot_recall = 0.8335095137420718


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 20


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 20
INFO:__main__:  intent_acc = 0.2951251646903821
INFO:__main__:  intent_f1 = 0.5693883886991616
INFO:__main__:  loss = 1.0748186508814495
INFO:__main__:  mean_intent_slot = 0.6124561993664677
INFO:__main__:  num_acc = 0.9907773386034255
INFO:__main__:  semantic_frame_acc = 0.23978919631093545
INFO:__main__:  slot_acc = 0.691699604743083
INFO:__main__:  slot_f1 = 0.9297872340425531
INFO:__main__:  slot_precision = 0.926092715231788
INFO:__main__:  slot_recall = 0.9335113484646195
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 20
INFO:__main__:  intent_acc = 0.21497584541062803
INFO:__main__:  intent_f1 = 0.5144356955380577
INFO:__main__:  loss = 4.667256043507503
INFO:__main__:  mean_intent_slot = 0.5251891641951018
INFO:__main__:  num_acc = 0.9939613526570048
INFO:__main__:  semantic_frame_acc = 0.1111111111111111
INFO:__main__:  slot_acc = 0.4577294685990338
INFO:__main__:  slot_f1 = 0.8354024829795754
INFO:__main__:  slot_precision = 0.8440787698947936
INFO:__main__:  slot_recall = 0.8269027484143763


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 21


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 21
INFO:__main__:  intent_acc = 0.29907773386034253
INFO:__main__:  intent_f1 = 0.5762185656628376
INFO:__main__:  loss = 1.0633211235205333
INFO:__main__:  mean_intent_slot = 0.6171925950881011
INFO:__main__:  num_acc = 0.9907773386034255
INFO:__main__:  semantic_frame_acc = 0.2437417654808959
INFO:__main__:  slot_acc = 0.7114624505928854
INFO:__main__:  slot_f1 = 0.9353074563158597
INFO:__main__:  slot_precision = 0.9344349680170576
INFO:__main__:  slot_recall = 0.9361815754339119
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 21
INFO:__main__:  intent_acc = 0.2222222222222222
INFO:__main__:  intent_f1 = 0.5338389731621936
INFO:__main__:  loss = 4.652418026557336
INFO:__main__:  mean_intent_slot = 0.5297592543117523
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.11231884057971014
INFO:__main__:  slot_acc = 0.4577294685990338
INFO:__main__:  slot_f1 = 0.8372962864012824
INFO:__main__:  slot_precision = 0.8465694219340897
INFO:__main__:  slot_recall = 0.8282241014799154


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 22


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 22
INFO:__main__:  intent_acc = 0.33992094861660077
INFO:__main__:  intent_f1 = 0.603656647040595
INFO:__main__:  loss = 1.1068836996952693
INFO:__main__:  mean_intent_slot = 0.6369912455884389
INFO:__main__:  num_acc = 0.9934123847167325
INFO:__main__:  semantic_frame_acc = 0.28194993412384717
INFO:__main__:  slot_acc = 0.7154150197628458
INFO:__main__:  slot_f1 = 0.9340615425602771
INFO:__main__:  slot_precision = 0.9319510898458266
INFO:__main__:  slot_recall = 0.9361815754339119
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 22
INFO:__main__:  intent_acc = 0.2427536231884058
INFO:__main__:  intent_f1 = 0.5521865889212829
INFO:__main__:  loss = 4.688493728637695
INFO:__main__:  mean_intent_slot = 0.5407359571215727
INFO:__main__:  num_acc = 0.9927536231884058
INFO:__main__:  semantic_frame_acc = 0.12318840579710146
INFO:__main__:  slot_acc = 0.46497584541062803
INFO:__main__:  slot_f1 = 0.8387182910547397
INFO:__main__:  slot_precision = 0.8475445223961144
INFO:__main__:  slot_recall = 0.8300739957716702


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 23


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 23
INFO:__main__:  intent_acc = 0.33069828722002637
INFO:__main__:  intent_f1 = 0.6008690254500311
INFO:__main__:  loss = 1.0987732758124669
INFO:__main__:  mean_intent_slot = 0.6322286328358712
INFO:__main__:  num_acc = 0.9920948616600791
INFO:__main__:  semantic_frame_acc = 0.2608695652173913
INFO:__main__:  slot_acc = 0.7114624505928854
INFO:__main__:  slot_f1 = 0.933758978451716
INFO:__main__:  slot_precision = 0.930294195600318
INFO:__main__:  slot_recall = 0.9372496662216289


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 24


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 24
INFO:__main__:  intent_acc = 0.34914361001317523
INFO:__main__:  intent_f1 = 0.6204538389804166
INFO:__main__:  loss = 1.1201986173788707
INFO:__main__:  mean_intent_slot = 0.6411321028393226
INFO:__main__:  num_acc = 0.9907773386034255
INFO:__main__:  semantic_frame_acc = 0.28063241106719367
INFO:__main__:  slot_acc = 0.7075098814229249
INFO:__main__:  slot_f1 = 0.9331205956654701
INFO:__main__:  slot_precision = 0.9292902542372882
INFO:__main__:  slot_recall = 0.9369826435246996


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 25


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 25
INFO:__main__:  intent_acc = 0.36100131752305664
INFO:__main__:  intent_f1 = 0.6310226919490208
INFO:__main__:  loss = 1.0843999336163204
INFO:__main__:  mean_intent_slot = 0.645598385127703
INFO:__main__:  num_acc = 0.9907773386034255
INFO:__main__:  semantic_frame_acc = 0.27931488801054016
INFO:__main__:  slot_acc = 0.6956521739130435
INFO:__main__:  slot_f1 = 0.9301954527323494
INFO:__main__:  slot_precision = 0.9263771186440678
INFO:__main__:  slot_recall = 0.934045393858478


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 26


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 26
INFO:__main__:  intent_acc = 0.37417654808959155
INFO:__main__:  intent_f1 = 0.6351477449455677
INFO:__main__:  loss = 1.1083785444498062
INFO:__main__:  mean_intent_slot = 0.6543057580533246
INFO:__main__:  num_acc = 0.9881422924901185
INFO:__main__:  semantic_frame_acc = 0.2951251646903821
INFO:__main__:  slot_acc = 0.7180500658761528
INFO:__main__:  slot_f1 = 0.9344349680170576
INFO:__main__:  slot_precision = 0.9326948656557595
INFO:__main__:  slot_recall = 0.9361815754339119
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 26
INFO:__main__:  intent_acc = 0.2777777777777778
INFO:__main__:  intent_f1 = 0.5884413309982487
INFO:__main__:  loss = 4.861622865383442
INFO:__main__:  mean_intent_slot = 0.5552551951230364
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.13526570048309178
INFO:__main__:  slot_acc = 0.45169082125603865
INFO:__main__:  slot_f1 = 0.8327326124682952
INFO:__main__:  slot_precision = 0.8413811707580253
INFO:__main__:  slot_recall = 0.8242600422832981


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 27


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 27
INFO:__main__:  intent_acc = 0.38603425559947296
INFO:__main__:  intent_f1 = 0.6557071960297768
INFO:__main__:  loss = 1.0903796801964443
INFO:__main__:  mean_intent_slot = 0.6575263833597311
INFO:__main__:  num_acc = 0.994729907773386
INFO:__main__:  semantic_frame_acc = 0.2938076416337286
INFO:__main__:  slot_acc = 0.691699604743083
INFO:__main__:  slot_f1 = 0.9290185111199893
INFO:__main__:  slot_precision = 0.926673751328374
INFO:__main__:  slot_recall = 0.9313751668891855


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 28


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 28
INFO:__main__:  intent_acc = 0.3965744400527009
INFO:__main__:  intent_f1 = 0.6538342129773363
INFO:__main__:  loss = 1.0625357031822205
INFO:__main__:  mean_intent_slot = 0.66599471935915
INFO:__main__:  num_acc = 0.9934123847167325
INFO:__main__:  semantic_frame_acc = 0.3188405797101449
INFO:__main__:  slot_acc = 0.7285902503293807
INFO:__main__:  slot_f1 = 0.9354149986655992
INFO:__main__:  slot_precision = 0.9349159775940251
INFO:__main__:  slot_recall = 0.9359145527369827
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 28
INFO:__main__:  intent_acc = 0.30917874396135264
INFO:__main__:  intent_f1 = 0.6088733216579101
INFO:__main__:  loss = 4.890314523990337
INFO:__main__:  mean_intent_slot = 0.5694673882451743
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.13768115942028986
INFO:__main__:  slot_acc = 0.4396135265700483
INFO:__main__:  slot_f1 = 0.829756032528996
INFO:__main__:  slot_precision = 0.8372343287597525
INFO:__main__:  slot_recall = 0.8224101479915433


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 29


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 29
INFO:__main__:  intent_acc = 0.4005270092226614
INFO:__main__:  intent_f1 = 0.6623215394165115
INFO:__main__:  loss = 1.09877743323644
INFO:__main__:  mean_intent_slot = 0.6681472999204991
INFO:__main__:  num_acc = 0.9920948616600791
INFO:__main__:  semantic_frame_acc = 0.308300395256917
INFO:__main__:  slot_acc = 0.7272727272727273
INFO:__main__:  slot_f1 = 0.9357675906183368
INFO:__main__:  slot_precision = 0.934025006650705
INFO:__main__:  slot_recall = 0.9375166889185581
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 29
INFO:__main__:  intent_acc = 0.3309178743961353
INFO:__main__:  intent_f1 = 0.620951269331777
INFO:__main__:  loss = 4.829480042824378
INFO:__main__:  mean_intent_slot = 0.5806565867707172
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.15096618357487923
INFO:__main__:  slot_acc = 0.44565217391304346
INFO:__main__:  slot_f1 = 0.8303952991452991
INFO:__main__:  slot_precision = 0.8393628509719222
INFO:__main__:  slot_recall = 0.8216173361522199


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 30


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 30
INFO:__main__:  intent_acc = 0.40711462450592883
INFO:__main__:  intent_f1 = 0.6656298600311042
INFO:__main__:  loss = 1.103081648548444
INFO:__main__:  mean_intent_slot = 0.6685466626257014
INFO:__main__:  num_acc = 0.9907773386034255
INFO:__main__:  semantic_frame_acc = 0.3069828722002635
INFO:__main__:  slot_acc = 0.6956521739130435
INFO:__main__:  slot_f1 = 0.929978700745474
INFO:__main__:  slot_precision = 0.9272630740642421
INFO:__main__:  slot_recall = 0.9327102803738317


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 31


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 31
INFO:__main__:  intent_acc = 0.4163372859025033
INFO:__main__:  intent_f1 = 0.6689483509645302
INFO:__main__:  loss = 1.0999794056018193
INFO:__main__:  mean_intent_slot = 0.6745776005625552
INFO:__main__:  num_acc = 0.9894598155467721
INFO:__main__:  semantic_frame_acc = 0.3201581027667984
INFO:__main__:  slot_acc = 0.7140974967061924
INFO:__main__:  slot_f1 = 0.9328179152226073
INFO:__main__:  slot_precision = 0.9313281873835507
INFO:__main__:  slot_recall = 0.9343124165554072
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 31
INFO:__main__:  intent_acc = 0.3321256038647343
INFO:__main__:  intent_f1 = 0.6234676007005254
INFO:__main__:  loss = 4.913754463195801
INFO:__main__:  mean_intent_slot = 0.5812854092370858
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.15338164251207728
INFO:__main__:  slot_acc = 0.4420289855072464
INFO:__main__:  slot_f1 = 0.8304452146094373
INFO:__main__:  slot_precision = 0.8378160301237224
INFO:__main__:  slot_recall = 0.8232029598308668


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 32


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 32
INFO:__main__:  intent_acc = 0.42424242424242425
INFO:__main__:  intent_f1 = 0.6735138499844382
INFO:__main__:  loss = 1.1167345096667607
INFO:__main__:  mean_intent_slot = 0.6796225440338386
INFO:__main__:  num_acc = 0.9881422924901185
INFO:__main__:  semantic_frame_acc = 0.32938076416337286
INFO:__main__:  slot_acc = 0.7206851119894598
INFO:__main__:  slot_f1 = 0.935002663825253
INFO:__main__:  slot_precision = 0.9327664097794313
INFO:__main__:  slot_recall = 0.9372496662216289
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 32
INFO:__main__:  intent_acc = 0.3357487922705314
INFO:__main__:  intent_f1 = 0.6258026853473438
INFO:__main__:  loss = 4.9131156114431525
INFO:__main__:  mean_intent_slot = 0.5844299813125517
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.1642512077294686
INFO:__main__:  slot_acc = 0.45652173913043476
INFO:__main__:  slot_f1 = 0.8331111703545722
INFO:__main__:  slot_precision = 0.8405056481979559
INFO:__main__:  slot_recall = 0.8258456659619451


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 33


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 33
INFO:__main__:  intent_acc = 0.4321475625823452
INFO:__main__:  intent_f1 = 0.6791044776119403
INFO:__main__:  loss = 1.103054717183113
INFO:__main__:  mean_intent_slot = 0.6820660613390895
INFO:__main__:  num_acc = 0.9920948616600791
INFO:__main__:  semantic_frame_acc = 0.32806324110671936
INFO:__main__:  slot_acc = 0.69433465085639
INFO:__main__:  slot_f1 = 0.9319845600958339
INFO:__main__:  slot_precision = 0.929140127388535
INFO:__main__:  slot_recall = 0.9348464619492657


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 34


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 34
INFO:__main__:  intent_acc = 0.4295125164690382
INFO:__main__:  intent_f1 = 0.6782608695652174
INFO:__main__:  loss = 1.098385140299797
INFO:__main__:  mean_intent_slot = 0.679385195767888
INFO:__main__:  num_acc = 0.9920948616600791
INFO:__main__:  semantic_frame_acc = 0.32674571805006586
INFO:__main__:  slot_acc = 0.6996047430830039
INFO:__main__:  slot_f1 = 0.9292578750667378
INFO:__main__:  slot_precision = 0.9290098745663197
INFO:__main__:  slot_recall = 0.929506008010681


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 35


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 35
INFO:__main__:  intent_acc = 0.4440052700922266
INFO:__main__:  intent_f1 = 0.6894409937888198
INFO:__main__:  loss = 1.070526550213496
INFO:__main__:  mean_intent_slot = 0.6878585751770768
INFO:__main__:  num_acc = 0.994729907773386
INFO:__main__:  semantic_frame_acc = 0.35046113306982873
INFO:__main__:  slot_acc = 0.7154150197628458
INFO:__main__:  slot_f1 = 0.931711880261927
INFO:__main__:  slot_precision = 0.9325842696629213
INFO:__main__:  slot_recall = 0.930841121495327
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 35
INFO:__main__:  intent_acc = 0.35990338164251207
INFO:__main__:  intent_f1 = 0.6456509048453006
INFO:__main__:  loss = 5.057736029991736
INFO:__main__:  mean_intent_slot = 0.5936159555663855
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.1606280193236715
INFO:__main__:  slot_acc = 0.4323671497584541
INFO:__main__:  slot_f1 = 0.8273285294902589
INFO:__main__:  slot_precision = 0.8355795148247979
INFO:__main__:  slot_recall = 0.8192389006342494


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 36


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 36
INFO:__main__:  intent_acc = 0.4440052700922266
INFO:__main__:  intent_f1 = 0.6884531590413945
INFO:__main__:  loss = 1.0521077265342076
INFO:__main__:  mean_intent_slot = 0.6895680381427497
INFO:__main__:  num_acc = 0.9881422924901185
INFO:__main__:  semantic_frame_acc = 0.35046113306982873
INFO:__main__:  slot_acc = 0.7220026350461133
INFO:__main__:  slot_f1 = 0.9351308061932728
INFO:__main__:  slot_precision = 0.9348812383239925
INFO:__main__:  slot_recall = 0.9353805073431242
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 36
INFO:__main__:  intent_acc = 0.358695652173913
INFO:__main__:  intent_f1 = 0.6433158201984822
INFO:__main__:  loss = 5.003805105502789
INFO:__main__:  mean_intent_slot = 0.5926260556563473
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.15942028985507245
INFO:__main__:  slot_acc = 0.4396135265700483
INFO:__main__:  slot_f1 = 0.8265564591387815
INFO:__main__:  slot_precision = 0.8340059187516815
INFO:__main__:  slot_recall = 0.8192389006342494


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 37


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 37
INFO:__main__:  intent_acc = 0.45454545454545453
INFO:__main__:  intent_f1 = 0.6948152747593914
INFO:__main__:  loss = 1.065601497888565
INFO:__main__:  mean_intent_slot = 0.6938681308002526
INFO:__main__:  num_acc = 0.9960474308300395
INFO:__main__:  semantic_frame_acc = 0.35046113306982873
INFO:__main__:  slot_acc = 0.7140974967061924
INFO:__main__:  slot_f1 = 0.9331908070550506
INFO:__main__:  slot_precision = 0.9339395560310243
INFO:__main__:  slot_recall = 0.9324432576769025
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 37
INFO:__main__:  intent_acc = 0.38405797101449274
INFO:__main__:  intent_f1 = 0.6600525240735338
INFO:__main__:  loss = 5.088986140031081
INFO:__main__:  mean_intent_slot = 0.6057623188405797
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.16908212560386474
INFO:__main__:  slot_acc = 0.44082125603864736
INFO:__main__:  slot_f1 = 0.8274666666666667
INFO:__main__:  slot_precision = 0.8350376749192681
INFO:__main__:  slot_recall = 0.820031712473573


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 38


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 38
INFO:__main__:  intent_acc = 0.45849802371541504
INFO:__main__:  intent_f1 = 0.69959614787201
INFO:__main__:  loss = 1.0752196212609608
INFO:__main__:  mean_intent_slot = 0.6956664240622743
INFO:__main__:  num_acc = 0.9934123847167325
INFO:__main__:  semantic_frame_acc = 0.3570487483530962
INFO:__main__:  slot_acc = 0.7127799736495388
INFO:__main__:  slot_f1 = 0.9328348244091335
INFO:__main__:  slot_precision = 0.9329594017094017
INFO:__main__:  slot_recall = 0.9327102803738317
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 38
INFO:__main__:  intent_acc = 0.3828502415458937
INFO:__main__:  intent_f1 = 0.6618033265246571
INFO:__main__:  loss = 5.068995787547185
INFO:__main__:  mean_intent_slot = 0.6059031903636681
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.16908212560386474
INFO:__main__:  slot_acc = 0.4396135265700483
INFO:__main__:  slot_f1 = 0.8289561391814425
INFO:__main__:  slot_precision = 0.8364272262577347
INFO:__main__:  slot_recall = 0.8216173361522199


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 39


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 39
INFO:__main__:  intent_acc = 0.4598155467720685
INFO:__main__:  intent_f1 = 0.7029210689869484
INFO:__main__:  loss = 1.045305664340655
INFO:__main__:  mean_intent_slot = 0.6985951815847257
INFO:__main__:  num_acc = 0.9920948616600791
INFO:__main__:  semantic_frame_acc = 0.36495388669301715
INFO:__main__:  slot_acc = 0.7259552042160737
INFO:__main__:  slot_f1 = 0.9373748163973828
INFO:__main__:  slot_precision = 0.9375
INFO:__main__:  slot_recall = 0.9372496662216289
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 39
INFO:__main__:  intent_acc = 0.38164251207729466
INFO:__main__:  intent_f1 = 0.6600525240735338
INFO:__main__:  loss = 5.02239883863009
INFO:__main__:  mean_intent_slot = 0.6065441074710257
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.16908212560386474
INFO:__main__:  slot_acc = 0.44806763285024154
INFO:__main__:  slot_f1 = 0.8314457028647568
INFO:__main__:  slot_precision = 0.8384842784197797
INFO:__main__:  slot_recall = 0.8245243128964059


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 40


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 40
INFO:__main__:  intent_acc = 0.4598155467720685
INFO:__main__:  intent_f1 = 0.7006527820951197
INFO:__main__:  loss = 1.0517555524905522
INFO:__main__:  mean_intent_slot = 0.6970016193345057
INFO:__main__:  num_acc = 0.9934123847167325
INFO:__main__:  semantic_frame_acc = 0.3570487483530962
INFO:__main__:  slot_acc = 0.7075098814229249
INFO:__main__:  slot_f1 = 0.9341876918969428
INFO:__main__:  slot_precision = 0.9340630005339028
INFO:__main__:  slot_recall = 0.9343124165554072


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 41


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 41
INFO:__main__:  intent_acc = 0.466403162055336
INFO:__main__:  intent_f1 = 0.7073094867807153
INFO:__main__:  loss = 1.059027522802353
INFO:__main__:  mean_intent_slot = 0.7008918346992301
INFO:__main__:  num_acc = 0.9907773386034255
INFO:__main__:  semantic_frame_acc = 0.3689064558629776
INFO:__main__:  slot_acc = 0.7154150197628458
INFO:__main__:  slot_f1 = 0.9353805073431242
INFO:__main__:  slot_precision = 0.9353805073431242
INFO:__main__:  slot_recall = 0.9353805073431242
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 41
INFO:__main__:  intent_acc = 0.38768115942028986
INFO:__main__:  intent_f1 = 0.6619964973730298
INFO:__main__:  loss = 5.11103263268104
INFO:__main__:  mean_intent_slot = 0.6087866300631488
INFO:__main__:  num_acc = 0.9951690821256038
INFO:__main__:  semantic_frame_acc = 0.16666666666666666
INFO:__main__:  slot_acc = 0.43719806763285024
INFO:__main__:  slot_f1 = 0.8298921007060077
INFO:__main__:  slot_precision = 0.8366908407198496
INFO:__main__:  slot_recall = 0.8232029598308668


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 42


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 42
INFO:__main__:  intent_acc = 0.47167325428194995
INFO:__main__:  intent_f1 = 0.7078931013051585
INFO:__main__:  loss = 1.0670917928218842
INFO:__main__:  mean_intent_slot = 0.7031176100469578
INFO:__main__:  num_acc = 0.9920948616600791
INFO:__main__:  semantic_frame_acc = 0.37417654808959155
INFO:__main__:  slot_acc = 0.7154150197628458
INFO:__main__:  slot_f1 = 0.9345619658119658
INFO:__main__:  slot_precision = 0.934811648410366
INFO:__main__:  slot_recall = 0.9343124165554072
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 42
INFO:__main__:  intent_acc = 0.40096618357487923
INFO:__main__:  intent_f1 = 0.6682229355121098
INFO:__main__:  loss = 5.091012679613554
INFO:__main__:  mean_intent_slot = 0.6157508775588225
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.1751207729468599
INFO:__main__:  slot_acc = 0.44323671497584544
INFO:__main__:  slot_f1 = 0.8305355715427658
INFO:__main__:  slot_precision = 0.837452982267598
INFO:__main__:  slot_recall = 0.8237315010570825


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 43


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 43
INFO:__main__:  intent_acc = 0.4756258234519104
INFO:__main__:  intent_f1 = 0.712022367194781
INFO:__main__:  loss = 1.0766249895095825
INFO:__main__:  mean_intent_slot = 0.7049691200036587
INFO:__main__:  num_acc = 0.9934123847167325
INFO:__main__:  semantic_frame_acc = 0.37549407114624506
INFO:__main__:  slot_acc = 0.7193675889328063
INFO:__main__:  slot_f1 = 0.9343124165554072
INFO:__main__:  slot_precision = 0.9343124165554072
INFO:__main__:  slot_recall = 0.9343124165554072
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 43
INFO:__main__:  intent_acc = 0.40096618357487923
INFO:__main__:  intent_f1 = 0.6688065363291509
INFO:__main__:  loss = 5.065166528408344
INFO:__main__:  mean_intent_slot = 0.6172274016503438
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.18478260869565216
INFO:__main__:  slot_acc = 0.45410628019323673
INFO:__main__:  slot_f1 = 0.8334886197258086
INFO:__main__:  slot_precision = 0.8396352909627246
INFO:__main__:  slot_recall = 0.827431289640592


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 44


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 44
INFO:__main__:  intent_acc = 0.4650856389986825
INFO:__main__:  intent_f1 = 0.7056263599626982
INFO:__main__:  loss = 1.0527716875076294
INFO:__main__:  mean_intent_slot = 0.7002244434309652
INFO:__main__:  num_acc = 0.9934123847167325
INFO:__main__:  semantic_frame_acc = 0.3675889328063241
INFO:__main__:  slot_acc = 0.7206851119894598
INFO:__main__:  slot_f1 = 0.9353632478632479
INFO:__main__:  slot_precision = 0.935613144536468
INFO:__main__:  slot_recall = 0.9351134846461949
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 44
INFO:__main__:  intent_acc = 0.40700483091787437
INFO:__main__:  intent_f1 = 0.6717245404143566
INFO:__main__:  loss = 5.07080864906311
INFO:__main__:  mean_intent_slot = 0.6190366547341837
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.17753623188405798
INFO:__main__:  slot_acc = 0.4444444444444444
INFO:__main__:  slot_f1 = 0.8310684785504929
INFO:__main__:  slot_precision = 0.837990327780763
INFO:__main__:  slot_recall = 0.8242600422832981


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 45


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 45
INFO:__main__:  intent_acc = 0.47299077733860345
INFO:__main__:  intent_f1 = 0.7132649891270582
INFO:__main__:  loss = 1.071090151866277
INFO:__main__:  mean_intent_slot = 0.7030463300499934
INFO:__main__:  num_acc = 0.9934123847167325
INFO:__main__:  semantic_frame_acc = 0.3702239789196311
INFO:__main__:  slot_acc = 0.7114624505928854
INFO:__main__:  slot_f1 = 0.9331018827613833
INFO:__main__:  slot_precision = 0.9332264957264957
INFO:__main__:  slot_recall = 0.932977303070761
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 45
INFO:__main__:  intent_acc = 0.40700483091787437
INFO:__main__:  intent_f1 = 0.67347534286548
INFO:__main__:  loss = 5.141415284230159
INFO:__main__:  mean_intent_slot = 0.6183053973545708
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.1751207729468599
INFO:__main__:  slot_acc = 0.44082125603864736
INFO:__main__:  slot_f1 = 0.8296059637912673
INFO:__main__:  slot_precision = 0.8358369098712446
INFO:__main__:  slot_recall = 0.8234672304439746


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 46


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 46
INFO:__main__:  intent_acc = 0.47167325428194995
INFO:__main__:  intent_f1 = 0.7087348461299346
INFO:__main__:  loss = 1.0730994790792465
INFO:__main__:  mean_intent_slot = 0.7026546268739167
INFO:__main__:  num_acc = 0.9934123847167325
INFO:__main__:  semantic_frame_acc = 0.3715415019762846
INFO:__main__:  slot_acc = 0.7167325428194994
INFO:__main__:  slot_f1 = 0.9336359994658834
INFO:__main__:  slot_precision = 0.9337606837606838
INFO:__main__:  slot_recall = 0.9335113484646195
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 46
INFO:__main__:  intent_acc = 0.40700483091787437
INFO:__main__:  intent_f1 = 0.6723081412313977
INFO:__main__:  loss = 5.105471280904917
INFO:__main__:  mean_intent_slot = 0.6186494938563817
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.178743961352657
INFO:__main__:  slot_acc = 0.4444444444444444
INFO:__main__:  slot_f1 = 0.8302941567948889
INFO:__main__:  slot_precision = 0.8364172700455886
INFO:__main__:  slot_recall = 0.8242600422832981


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 47


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 47
INFO:__main__:  intent_acc = 0.4782608695652174
INFO:__main__:  intent_f1 = 0.7128651336233685
INFO:__main__:  loss = 1.0738897671302159
INFO:__main__:  mean_intent_slot = 0.7060819636916753
INFO:__main__:  num_acc = 0.994729907773386
INFO:__main__:  semantic_frame_acc = 0.37812911725955206
INFO:__main__:  slot_acc = 0.7167325428194994
INFO:__main__:  slot_f1 = 0.9339030578181332
INFO:__main__:  slot_precision = 0.9340277777777778
INFO:__main__:  slot_recall = 0.9337783711615487
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 47
INFO:__main__:  intent_acc = 0.41545893719806765
INFO:__main__:  intent_f1 = 0.6775605485847681
INFO:__main__:  loss = 5.119814304205088
INFO:__main__:  mean_intent_slot = 0.6222889575529864
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.1823671497584541
INFO:__main__:  slot_acc = 0.4420289855072464
INFO:__main__:  slot_f1 = 0.8291189779079052
INFO:__main__:  slot_precision = 0.8351206434316354
INFO:__main__:  slot_recall = 0.8232029598308668


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 48


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 48
INFO:__main__:  intent_acc = 0.4756258234519104
INFO:__main__:  intent_f1 = 0.7124650295306186
INFO:__main__:  loss = 1.061816801627477
INFO:__main__:  mean_intent_slot = 0.7049603476233912
INFO:__main__:  num_acc = 0.9934123847167325
INFO:__main__:  semantic_frame_acc = 0.37812911725955206
INFO:__main__:  slot_acc = 0.7193675889328063
INFO:__main__:  slot_f1 = 0.9342948717948719
INFO:__main__:  slot_precision = 0.9345444830349987
INFO:__main__:  slot_recall = 0.934045393858478
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 48
INFO:__main__:  intent_acc = 0.4178743961352657
INFO:__main__:  intent_f1 = 0.6793113510358916
INFO:__main__:  loss = 5.088848187373235
INFO:__main__:  mean_intent_slot = 0.6241395409856307
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.1823671497584541
INFO:__main__:  slot_acc = 0.44565217391304346
INFO:__main__:  slot_f1 = 0.8304046858359957
INFO:__main__:  slot_precision = 0.8366416309012875
INFO:__main__:  slot_recall = 0.8242600422832981


Iteration:   0%|          | 0/412 [00:00<?, ?it/s]


Epoch 49


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 49
INFO:__main__:  intent_acc = 0.48221343873517786
INFO:__main__:  intent_f1 = 0.7169928549238893
INFO:__main__:  loss = 1.0634373873472214
INFO:__main__:  mean_intent_slot = 0.7082541552650249
INFO:__main__:  num_acc = 0.9934123847167325
INFO:__main__:  semantic_frame_acc = 0.380764163372859
INFO:__main__:  slot_acc = 0.7180500658761528
INFO:__main__:  slot_f1 = 0.9342948717948719
INFO:__main__:  slot_precision = 0.9345444830349987
INFO:__main__:  slot_recall = 0.934045393858478
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 49
INFO:__main__:  intent_acc = 0.41183574879227053
INFO:__main__:  intent_f1 = 0.6758097461336446
INFO:__main__:  loss = 5.109904564343966
INFO:__main__:  mean_intent_slot = 0.6206656449272148
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.178743961352657
INFO:__main__:  slot_acc = 0.4420289855072464
INFO:__main__:  slot_f1 = 0.829495541062159
INFO:__main__:  slot_precision = 0.8356127648163046
INFO:__main__:  slot_recall = 0.8234672304439746
INFO:__main__:***** Model Loaded *****
INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 759
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.48221343873517786
INFO:__main__:  intent_f1 = 0.7169928549238893
INFO:__main__:  loss = 1.0634373873472214
INFO:__main__:  mean_intent_slot = 0.7082541552650249
INFO:__main__:  num_acc = 0.9934123847167325
INFO:__main__:  semantic_frame_acc = 0.380764163372859
INFO:__main__:  slot_acc = 0.7180500658761528
INFO:__main__:  slot_f1 = 0.9342948717948719
INFO:__main__:  slot_precision = 0.9345444830349987
INFO:__main__:  slot_recall = 0.934045393858478
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 828
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/13 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = -1
INFO:__main__:  intent_acc = 0.41183574879227053
INFO:__main__:  intent_f1 = 0.6758097461336446
INFO:__main__:  loss = 5.109904564343966
INFO:__main__:  mean_intent_slot = 0.6206656449272148
INFO:__main__:  num_acc = 0.9963768115942029
INFO:__main__:  semantic_frame_acc = 0.178743961352657
INFO:__main__:  slot_acc = 0.4420289855072464
INFO:__main__:  slot_f1 = 0.829495541062159
INFO:__main__:  slot_precision = 0.8356127648163046
INFO:__main__:  slot_recall = 0.8234672304439746


In [15]:
main(args) #mixSnips

INFO:__main__:LOOKING AT ./MISCA/data/mixsnips/train.txt
INFO:__main__:LOOKING AT ./MISCA/data/mixsnips/dev.txt
INFO:__main__:LOOKING AT ./MISCA/data/mixsnips/test.txt
  torch.nn.init.normal(first_linear.weight, mean, std)
  torch.nn.init.normal(linear.weight, mean, std)
  torch.nn.init.normal(linear.weight, mean, std)
INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64


check init


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = -1
INFO:__main__:  intent_acc = 0.0
INFO:__main__:  intent_f1 = 0.43847999553596345
INFO:__main__:  loss = 49.52223358154297
INFO:__main__:  mean_intent_slot = 0.003643935450286309
INFO:__main__:  num_acc = 0.0
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.0
INFO:__main__:  slot_f1 = 0.007287870900572618
INFO:__main__:  slot_precision = 0.006032056069397369
INFO:__main__:  slot_recall = 0.00920406732117812
INFO:__main__:***** Running training *****
INFO:__main__:  Num examples = 39776
INFO:__main__:  Num Epochs = 20
INFO:__main__:  Total train batch size = 32
INFO:__main__:  Gradient Accumulation steps = 1
INFO:__main__:  Total optimization steps = 24860
INFO:__main__:  Logging steps = 1243


{'loss': 49.52223358154297, 'intent_acc': 0.0, 'intent_f1': 0.43847999553596345, 'slot_precision': 0.006032056069397369, 'slot_recall': 0.00920406732117812, 'slot_f1': 0.007287870900572618, 'semantic_frame_acc': 0.0, 'slot_acc': 0.0, 'mean_intent_slot': 0.003643935450286309, 'num_acc': 0.0, 'epoch': -1}


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 0


  return F.conv1d(input, weight, bias, self.stride,
INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.07006369426751592
INFO:__main__:  intent_f1 = 0.35625983367048775
INFO:__main__:  loss = 14.221346391950334
INFO:__main__:  mean_intent_slot = 0.16223793678698603
INFO:__main__:  num_acc = 0.9813466787989081
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.007279344858962694
INFO:__main__:  slot_f1 = 0.25441217930645615
INFO:__main__:  slot_precision = 0.3565670934091987
INFO:__main__:  slot_recall = 0.19775596072931276
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.07548885857207822
INFO:__main__:  intent_f1 = 0.3516310461192351
INFO:__main__:  loss = 14.173505892072406
INFO:__main__:  mean_intent_slot = 0.15952519089730918
INFO:__main__:  num_acc = 0.9690768531150523
INFO:__main__:  semantic_frame_acc = 0.0
INFO:__main__:  slot_acc = 0.0050022737608003635
INFO:__main__:  slot_f1 = 0.24356152322254015
INFO:__main__:  slot_precision = 0.3304958183990442
INFO:__main__:  slot_recall = 0.19283722551411642


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 1


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 1
INFO:__main__:  intent_acc = 0.09463148316651501
INFO:__main__:  intent_f1 = 0.3999102837277111
INFO:__main__:  loss = 8.304442187717983
INFO:__main__:  mean_intent_slot = 0.3268911170699996
INFO:__main__:  num_acc = 0.9904458598726115
INFO:__main__:  semantic_frame_acc = 0.004549590536851683
INFO:__main__:  slot_acc = 0.07961783439490445
INFO:__main__:  slot_f1 = 0.5591507509734842
INFO:__main__:  slot_precision = 0.5933687524596616
INFO:__main__:  slot_recall = 0.528664095371669
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 1
INFO:__main__:  intent_acc = 0.10641200545702592
INFO:__main__:  intent_f1 = 0.3955156950672646
INFO:__main__:  loss = 8.267773274012974
INFO:__main__:  mean_intent_slot = 0.31968573059465255
INFO:__main__:  num_acc = 0.9881764438381082
INFO:__main__:  semantic_frame_acc = 0.013642564802182811
INFO:__main__:  slot_acc = 0.0814006366530241
INFO:__main__:  slot_f1 = 0.5329594557322792
INFO:__main__:  slot_precision = 0.5640202374002724
INFO:__main__:  slot_recall = 0.5051411641686999


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 2


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 2
INFO:__main__:  intent_acc = 0.24385805277525022
INFO:__main__:  intent_f1 = 0.5293787214919673
INFO:__main__:  loss = 5.85592896597726
INFO:__main__:  mean_intent_slot = 0.4521130144532641
INFO:__main__:  num_acc = 0.9968152866242038
INFO:__main__:  semantic_frame_acc = 0.042311191992720654
INFO:__main__:  slot_acc = 0.14786169244767972
INFO:__main__:  slot_f1 = 0.660367976131278
INFO:__main__:  slot_precision = 0.6817884812844208
INFO:__main__:  slot_recall = 0.6402524544179523
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 2
INFO:__main__:  intent_acc = 0.2305593451568895
INFO:__main__:  intent_f1 = 0.5174699471969442
INFO:__main__:  loss = 5.779001821790422
INFO:__main__:  mean_intent_slot = 0.4411410444844687
INFO:__main__:  num_acc = 0.992269213278763
INFO:__main__:  semantic_frame_acc = 0.049567985447930875
INFO:__main__:  slot_acc = 0.16189176898590268
INFO:__main__:  slot_f1 = 0.651722743812048
INFO:__main__:  slot_precision = 0.6726008344923505
INFO:__main__:  slot_recall = 0.6321017776228651


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 3


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 3
INFO:__main__:  intent_acc = 0.40900818926296634
INFO:__main__:  intent_f1 = 0.6400719181930554
INFO:__main__:  loss = 4.552212333679199
INFO:__main__:  mean_intent_slot = 0.56403758585093
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.08826205641492266
INFO:__main__:  slot_acc = 0.21747042766151045
INFO:__main__:  slot_f1 = 0.7190669824388937
INFO:__main__:  slot_precision = 0.7333880229696472
INFO:__main__:  slot_recall = 0.7052945301542777
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 3
INFO:__main__:  intent_acc = 0.373806275579809
INFO:__main__:  intent_f1 = 0.6184269662921348
INFO:__main__:  loss = 4.505863114765712
INFO:__main__:  mean_intent_slot = 0.5442452069386052
INFO:__main__:  num_acc = 0.9972714870395635
INFO:__main__:  semantic_frame_acc = 0.10413824465666212
INFO:__main__:  slot_acc = 0.23010459299681674
INFO:__main__:  slot_f1 = 0.7146841382974014
INFO:__main__:  slot_precision = 0.7263565193917034
INFO:__main__:  slot_recall = 0.7033809689787383


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 4


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 4
INFO:__main__:  intent_acc = 0.5181983621474068
INFO:__main__:  intent_f1 = 0.7132584269662922
INFO:__main__:  loss = 3.811664765221732
INFO:__main__:  mean_intent_slot = 0.6444574365254168
INFO:__main__:  num_acc = 0.9981801637852593
INFO:__main__:  semantic_frame_acc = 0.16333030027297543
INFO:__main__:  slot_acc = 0.29981801637852595
INFO:__main__:  slot_f1 = 0.7707165109034267
INFO:__main__:  slot_precision = 0.782769842704755
INFO:__main__:  slot_recall = 0.7590287517531557
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 4
INFO:__main__:  intent_acc = 0.4806730331969077
INFO:__main__:  intent_f1 = 0.690414653331835
INFO:__main__:  loss = 3.7664776461465017
INFO:__main__:  mean_intent_slot = 0.6220995147510007
INFO:__main__:  num_acc = 0.9977262391996362
INFO:__main__:  semantic_frame_acc = 0.14870395634379263
INFO:__main__:  slot_acc = 0.2760345611641655
INFO:__main__:  slot_f1 = 0.7635259963050938
INFO:__main__:  slot_precision = 0.7709184579854326
INFO:__main__:  slot_recall = 0.7562739630533287


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 5


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 5
INFO:__main__:  intent_acc = 0.5800727934485896
INFO:__main__:  intent_f1 = 0.7522193504888189
INFO:__main__:  loss = 3.3756115300314766
INFO:__main__:  mean_intent_slot = 0.6875450411076622
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.1979071883530482
INFO:__main__:  slot_acc = 0.3248407643312102
INFO:__main__:  slot_f1 = 0.7950172887667347
INFO:__main__:  slot_precision = 0.8042152466367714
INFO:__main__:  slot_recall = 0.7860273492286115
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 5
INFO:__main__:  intent_acc = 0.5566166439290586
INFO:__main__:  intent_f1 = 0.737835711877739
INFO:__main__:  loss = 3.3019623620169503
INFO:__main__:  mean_intent_slot = 0.6715908583106495
INFO:__main__:  num_acc = 0.9977262391996362
INFO:__main__:  semantic_frame_acc = 0.19781718963165076
INFO:__main__:  slot_acc = 0.3128694861300591
INFO:__main__:  slot_f1 = 0.7865650726922403
INFO:__main__:  slot_precision = 0.790580985915493
INFO:__main__:  slot_recall = 0.7825897525270129


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 6


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 6
INFO:__main__:  intent_acc = 0.6373976342129208
INFO:__main__:  intent_f1 = 0.7871431782423016
INFO:__main__:  loss = 3.032515743800572
INFO:__main__:  mean_intent_slot = 0.7259474358909908
INFO:__main__:  num_acc = 0.997270245677889
INFO:__main__:  semantic_frame_acc = 0.23794358507734303
INFO:__main__:  slot_acc = 0.362147406733394
INFO:__main__:  slot_f1 = 0.8144972375690608
INFO:__main__:  slot_precision = 0.8214317553713114
INFO:__main__:  slot_recall = 0.8076788218793829
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 6
INFO:__main__:  intent_acc = 0.6175534333788085
INFO:__main__:  intent_f1 = 0.7749185301719294
INFO:__main__:  loss = 2.9894722734178814
INFO:__main__:  mean_intent_slot = 0.7089860677012675
INFO:__main__:  num_acc = 0.9986357435197817
INFO:__main__:  semantic_frame_acc = 0.23146884947703503
INFO:__main__:  slot_acc = 0.33788085493406095
INFO:__main__:  slot_f1 = 0.8004187020237264
INFO:__main__:  slot_precision = 0.8012574222843172
INFO:__main__:  slot_recall = 0.7995817357964448


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 7


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 7
INFO:__main__:  intent_acc = 0.6533212010919017
INFO:__main__:  intent_f1 = 0.7962692437352513
INFO:__main__:  loss = 2.7768274443490166
INFO:__main__:  mean_intent_slot = 0.7406651893211009
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.27115559599636035
INFO:__main__:  slot_acc = 0.40172884440400364
INFO:__main__:  slot_f1 = 0.8280091775503
INFO:__main__:  slot_precision = 0.8335998578535891
INFO:__main__:  slot_recall = 0.822492987377279
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 7
INFO:__main__:  intent_acc = 0.6257389722601182
INFO:__main__:  intent_f1 = 0.7821103494774695
INFO:__main__:  loss = 2.7336004734039308
INFO:__main__:  mean_intent_slot = 0.721607604208022
INFO:__main__:  num_acc = 0.9986357435197817
INFO:__main__:  semantic_frame_acc = 0.26512050932241926
INFO:__main__:  slot_acc = 0.38108231014097316
INFO:__main__:  slot_f1 = 0.8174762361559257
INFO:__main__:  slot_precision = 0.8181183452609531
INFO:__main__:  slot_recall = 0.8168351341930986


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 8


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 8
INFO:__main__:  intent_acc = 0.6787989080982711
INFO:__main__:  intent_f1 = 0.8099359334607171
INFO:__main__:  loss = 2.6261191197804044
INFO:__main__:  mean_intent_slot = 0.7574835291350613
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.29981801637852595
INFO:__main__:  slot_acc = 0.4212920837124659
INFO:__main__:  slot_f1 = 0.8361681501718515
INFO:__main__:  slot_precision = 0.8406875775296828
INFO:__main__:  slot_recall = 0.8316970546984572
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 8
INFO:__main__:  intent_acc = 0.6480218281036835
INFO:__main__:  intent_f1 = 0.7919991010225869
INFO:__main__:  loss = 2.5642849343163627
INFO:__main__:  mean_intent_slot = 0.7358115066960809
INFO:__main__:  num_acc = 0.9986357435197817
INFO:__main__:  semantic_frame_acc = 0.2869486130059118
INFO:__main__:  slot_acc = 0.396543883583447
INFO:__main__:  slot_f1 = 0.8236011852884784
INFO:__main__:  slot_precision = 0.823744769874477
INFO:__main__:  slot_recall = 0.82345765074939


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 9


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 9
INFO:__main__:  intent_acc = 0.7033666969972703
INFO:__main__:  intent_f1 = 0.824098010565359
INFO:__main__:  loss = 2.50464734349932
INFO:__main__:  mean_intent_slot = 0.7723304979593439
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.316196542311192
INFO:__main__:  slot_acc = 0.43448589626933576
INFO:__main__:  slot_f1 = 0.8412942989214175
INFO:__main__:  slot_precision = 0.8450517378615017
INFO:__main__:  slot_recall = 0.8375701262272089
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 9
INFO:__main__:  intent_acc = 0.6825829922692133
INFO:__main__:  intent_f1 = 0.8117766041128217
INFO:__main__:  loss = 2.420403402192252
INFO:__main__:  mean_intent_slot = 0.7585429453783846
INFO:__main__:  num_acc = 0.9986357435197817
INFO:__main__:  semantic_frame_acc = 0.3069577080491132
INFO:__main__:  slot_acc = 0.41564347430650295
INFO:__main__:  slot_f1 = 0.834502898487556
INFO:__main__:  slot_precision = 0.8348303828377082
INFO:__main__:  slot_recall = 0.8341756709654932


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 10


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 10
INFO:__main__:  intent_acc = 0.7434030937215651
INFO:__main__:  intent_f1 = 0.8433003597122302
INFO:__main__:  loss = 2.4190634284700665
INFO:__main__:  mean_intent_slot = 0.7962331679356353
INFO:__main__:  num_acc = 0.9981801637852593
INFO:__main__:  semantic_frame_acc = 0.3525932666060055
INFO:__main__:  slot_acc = 0.45450409463148317
INFO:__main__:  slot_f1 = 0.8490632421497054
INFO:__main__:  slot_precision = 0.8519858781994705
INFO:__main__:  slot_recall = 0.8461605890603086
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 10
INFO:__main__:  intent_acc = 0.7157798999545247
INFO:__main__:  intent_f1 = 0.8288187029335731
INFO:__main__:  loss = 2.318870449066162
INFO:__main__:  mean_intent_slot = 0.7786906462348777
INFO:__main__:  num_acc = 0.9986357435197817
INFO:__main__:  semantic_frame_acc = 0.34561164165529784
INFO:__main__:  slot_acc = 0.4392905866302865
INFO:__main__:  slot_f1 = 0.8416013925152306
INFO:__main__:  slot_precision = 0.8405771905424201
INFO:__main__:  slot_recall = 0.8426280934123388


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 11


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 11
INFO:__main__:  intent_acc = 0.732484076433121
INFO:__main__:  intent_f1 = 0.8378104979206474
INFO:__main__:  loss = 2.2832314763750348
INFO:__main__:  mean_intent_slot = 0.7945498927236769
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.3607825295723385
INFO:__main__:  slot_acc = 0.4727024567788899
INFO:__main__:  slot_f1 = 0.856615709014233
INFO:__main__:  slot_precision = 0.8585769637196196
INFO:__main__:  slot_recall = 0.854663394109397
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 11
INFO:__main__:  intent_acc = 0.7085038653933606
INFO:__main__:  intent_f1 = 0.8240053944706676
INFO:__main__:  loss = 2.2049680641719274
INFO:__main__:  mean_intent_slot = 0.7797088517567325
INFO:__main__:  num_acc = 0.9990904956798545
INFO:__main__:  semantic_frame_acc = 0.3537971805366075
INFO:__main__:  slot_acc = 0.461118690313779
INFO:__main__:  slot_f1 = 0.8509138381201046
INFO:__main__:  slot_precision = 0.8498783031988874
INFO:__main__:  slot_recall = 0.8519518996165911


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 12


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 12
INFO:__main__:  intent_acc = 0.7488626023657871
INFO:__main__:  intent_f1 = 0.8449690837549185
INFO:__main__:  loss = 2.2364263670785087
INFO:__main__:  mean_intent_slot = 0.8031787163793387
INFO:__main__:  num_acc = 0.9986351228389445
INFO:__main__:  semantic_frame_acc = 0.3689717925386715
INFO:__main__:  slot_acc = 0.4740673339399454
INFO:__main__:  slot_f1 = 0.8574948303928902
INFO:__main__:  slot_precision = 0.8607896828902041
INFO:__main__:  slot_recall = 0.8542251051893408
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 12
INFO:__main__:  intent_acc = 0.7262391996361983
INFO:__main__:  intent_f1 = 0.8335393953017871
INFO:__main__:  loss = 2.1527567556926184
INFO:__main__:  mean_intent_slot = 0.7899531220994281
INFO:__main__:  num_acc = 0.9986357435197817
INFO:__main__:  semantic_frame_acc = 0.3683492496589359
INFO:__main__:  slot_acc = 0.46839472487494316
INFO:__main__:  slot_f1 = 0.853667044562658
INFO:__main__:  slot_precision = 0.8543375807296212
INFO:__main__:  slot_recall = 0.8529975601254792


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 13


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 13
INFO:__main__:  intent_acc = 0.7634212920837125
INFO:__main__:  intent_f1 = 0.8534172661870504
INFO:__main__:  loss = 2.1710244825908114
INFO:__main__:  mean_intent_slot = 0.8128452106856557
INFO:__main__:  num_acc = 0.9981801637852593
INFO:__main__:  semantic_frame_acc = 0.3780709736123749
INFO:__main__:  slot_acc = 0.47952684258416745
INFO:__main__:  slot_f1 = 0.862269129287599
INFO:__main__:  slot_precision = 0.8651606071302507
INFO:__main__:  slot_recall = 0.8593969144460029
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 13
INFO:__main__:  intent_acc = 0.7394270122783083
INFO:__main__:  intent_f1 = 0.8401888064733649
INFO:__main__:  loss = 2.0985665321350098
INFO:__main__:  mean_intent_slot = 0.7987148144688532
INFO:__main__:  num_acc = 0.9990904956798545
INFO:__main__:  semantic_frame_acc = 0.3842655752614825
INFO:__main__:  slot_acc = 0.4815825375170532
INFO:__main__:  slot_f1 = 0.8580026166593981
INFO:__main__:  slot_precision = 0.8588266107909901
INFO:__main__:  slot_recall = 0.8571802021610317


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 14


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 14
INFO:__main__:  intent_acc = 0.762056414922657
INFO:__main__:  intent_f1 = 0.8537709340227043
INFO:__main__:  loss = 2.1464212928499493
INFO:__main__:  mean_intent_slot = 0.8131161195492405
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.3848953594176524
INFO:__main__:  slot_acc = 0.489080982711556
INFO:__main__:  slot_f1 = 0.8641758241758241
INFO:__main__:  slot_precision = 0.8666901780991006
INFO:__main__:  slot_recall = 0.8616760168302945
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 14
INFO:__main__:  intent_acc = 0.7353342428376535
INFO:__main__:  intent_f1 = 0.8392897280287706
INFO:__main__:  loss = 2.0600924355643135
INFO:__main__:  mean_intent_slot = 0.7978520158894356
INFO:__main__:  num_acc = 0.9990904956798545
INFO:__main__:  semantic_frame_acc = 0.3842655752614825
INFO:__main__:  slot_acc = 0.48522055479763526
INFO:__main__:  slot_f1 = 0.8603697889412175
INFO:__main__:  slot_precision = 0.8611208100558659
INFO:__main__:  slot_recall = 0.8596200766817707


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 15


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 15
INFO:__main__:  intent_acc = 0.7679708826205641
INFO:__main__:  intent_f1 = 0.8554406474820144
INFO:__main__:  loss = 2.10433281149183
INFO:__main__:  mean_intent_slot = 0.8168337289965704
INFO:__main__:  num_acc = 0.9981801637852593
INFO:__main__:  semantic_frame_acc = 0.38717015468607824
INFO:__main__:  slot_acc = 0.4899909008189263
INFO:__main__:  slot_f1 = 0.8656965753725766
INFO:__main__:  slot_precision = 0.868330540612047
INFO:__main__:  slot_recall = 0.863078541374474
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 15
INFO:__main__:  intent_acc = 0.7376080036380173
INFO:__main__:  intent_f1 = 0.8397392672510676
INFO:__main__:  loss = 2.018148422241211
INFO:__main__:  mean_intent_slot = 0.7996657252659025
INFO:__main__:  num_acc = 0.9990904956798545
INFO:__main__:  semantic_frame_acc = 0.3910868576625739
INFO:__main__:  slot_acc = 0.4943155979990905
INFO:__main__:  slot_f1 = 0.8617234468937877
INFO:__main__:  slot_precision = 0.861648370796306
INFO:__main__:  slot_recall = 0.8617985360752876


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 16


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 16
INFO:__main__:  intent_acc = 0.778434940855323
INFO:__main__:  intent_f1 = 0.8618635495110712
INFO:__main__:  loss = 2.0879891668047224
INFO:__main__:  mean_intent_slot = 0.823457484489269
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.39717925386715197
INFO:__main__:  slot_acc = 0.4968152866242038
INFO:__main__:  slot_f1 = 0.8684800281232149
INFO:__main__:  slot_precision = 0.8707375099127677
INFO:__main__:  slot_recall = 0.866234221598878
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 16
INFO:__main__:  intent_acc = 0.7517053206002728
INFO:__main__:  intent_f1 = 0.848505282085862
INFO:__main__:  loss = 2.0042971202305386
INFO:__main__:  mean_intent_slot = 0.8078551012381541
INFO:__main__:  num_acc = 0.9990904956798545
INFO:__main__:  semantic_frame_acc = 0.40427467030468395
INFO:__main__:  slot_acc = 0.5002273760800364
INFO:__main__:  slot_f1 = 0.8640048818760353
INFO:__main__:  slot_precision = 0.8643816500959358
INFO:__main__:  slot_recall = 0.8636284419658418


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 17


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 17
INFO:__main__:  intent_acc = 0.7802547770700637
INFO:__main__:  intent_f1 = 0.8626657675882221
INFO:__main__:  loss = 2.0601722274507797
INFO:__main__:  mean_intent_slot = 0.8239337947918973
INFO:__main__:  num_acc = 0.9981801637852593
INFO:__main__:  semantic_frame_acc = 0.3989990900818926
INFO:__main__:  slot_acc = 0.494540491355778
INFO:__main__:  slot_f1 = 0.8676128125137309
INFO:__main__:  slot_precision = 0.8697912078230993
INFO:__main__:  slot_recall = 0.865445301542777
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 17
INFO:__main__:  intent_acc = 0.7544338335607094
INFO:__main__:  intent_f1 = 0.8489548213081591
INFO:__main__:  loss = 1.9808796882629394
INFO:__main__:  mean_intent_slot = 0.8092887688905744
INFO:__main__:  num_acc = 0.9990904956798545
INFO:__main__:  semantic_frame_acc = 0.4033651659845384
INFO:__main__:  slot_acc = 0.5015916325602546
INFO:__main__:  slot_f1 = 0.8641437042204395
INFO:__main__:  slot_precision = 0.8647469458987783
INFO:__main__:  slot_recall = 0.8635413035901011


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 18


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 18
INFO:__main__:  intent_acc = 0.7788898999090081
INFO:__main__:  intent_f1 = 0.8625379341351017
INFO:__main__:  loss = 2.060855943816049
INFO:__main__:  mean_intent_slot = 0.8238016943652741
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.3980891719745223
INFO:__main__:  slot_acc = 0.4963603275705187
INFO:__main__:  slot_f1 = 0.86871348882154
INFO:__main__:  slot_precision = 0.8705871995774276
INFO:__main__:  slot_recall = 0.8668478260869565
INFO:__main__:Saving model checkpoint to dir_base
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 18
INFO:__main__:  intent_acc = 0.7521600727603456
INFO:__main__:  intent_f1 = 0.848505282085862
INFO:__main__:  loss = 1.9725887877600534
INFO:__main__:  mean_intent_slot = 0.8089047966242879
INFO:__main__:  num_acc = 0.9990904956798545
INFO:__main__:  semantic_frame_acc = 0.4051841746248295
INFO:__main__:  slot_acc = 0.5038653933606184
INFO:__main__:  slot_f1 = 0.8656495204882302
INFO:__main__:  slot_precision = 0.866102581995813
INFO:__main__:  slot_recall = 0.865196932729174


Iteration:   0%|          | 0/1243 [00:00<?, ?it/s]


Epoch 19


INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64



Tuning metrics: mean_intent_slot


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 19
INFO:__main__:  intent_acc = 0.778434940855323
INFO:__main__:  intent_f1 = 0.8623131392604249
INFO:__main__:  loss = 2.0537904228482926
INFO:__main__:  mean_intent_slot = 0.8234979148525879
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.3976342129208371
INFO:__main__:  slot_acc = 0.4968152866242038
INFO:__main__:  slot_f1 = 0.8685608888498528
INFO:__main__:  slot_precision = 0.8702807357212003
INFO:__main__:  slot_recall = 0.8668478260869565
INFO:__main__:***** Model Loaded *****
INFO:__main__:***** Running evaluation on dev dataset *****
INFO:__main__:  Num examples = 2198
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = 0
INFO:__main__:  intent_acc = 0.7788898999090081
INFO:__main__:  intent_f1 = 0.8625379341351017
INFO:__main__:  loss = 2.060855943816049
INFO:__main__:  mean_intent_slot = 0.8238016943652741
INFO:__main__:  num_acc = 0.9977252047315741
INFO:__main__:  semantic_frame_acc = 0.3980891719745223
INFO:__main__:  slot_acc = 0.4963603275705187
INFO:__main__:  slot_f1 = 0.86871348882154
INFO:__main__:  slot_precision = 0.8705871995774276
INFO:__main__:  slot_recall = 0.8668478260869565
INFO:__main__:***** Running evaluation on test dataset *****
INFO:__main__:  Num examples = 2199
INFO:__main__:  Batch size = 64


Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

INFO:__main__:***** Eval results *****
INFO:__main__:  epoch = -1
INFO:__main__:  intent_acc = 0.7521600727603456
INFO:__main__:  intent_f1 = 0.848505282085862
INFO:__main__:  loss = 1.9725887877600534
INFO:__main__:  mean_intent_slot = 0.8089047966242879
INFO:__main__:  num_acc = 0.9990904956798545
INFO:__main__:  semantic_frame_acc = 0.4051841746248295
INFO:__main__:  slot_acc = 0.5038653933606184
INFO:__main__:  slot_f1 = 0.8656495204882302
INFO:__main__:  slot_precision = 0.866102581995813
INFO:__main__:  slot_recall = 0.865196932729174
