In [1]:
# First step is to import the needed libraries
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import random
import numpy as np
import pickle
from tqdm import tqdm
%matplotlib inline
from sklearn.metrics import f1_score,accuracy_score
import math
import re
from torch.utils.data import Dataset, DataLoader

In [2]:
# in this section we define static values and variables for ease of access and testing
_fn="final" # file unique id for saving and loading models
bert_base='./bert-base-uncased/'
bert_large='./bert-large-uncased/'

snips_train="./dataset/snips_train.iob"
snips_test="./dataset/snips_test.iob"
atis_train="./dataset/atis.train.w-intent.iob"
atis_test="./dataset/atis.test.w-intent.iob"
#ENV variables directly affect the model's behaviour
ENV_DATASET_TRAIN=atis_train
ENV_DATASET_TEST=atis_test

ENV_BERT_ID_CLS=False # use cls token for id classification
ENV_EMBEDDING_SIZE=768# dimention of embbeding, bertbase=768,bertlarge&elmo=1024
ENV_BERT_ADDR=bert_base
ENV_SEED=1331
ENV_CNN_FILTERS=128
ENV_CNN_KERNELS=4
ENV_HIDDEN_SIZE=ENV_CNN_FILTERS*ENV_CNN_KERNELS

#these are related to training
BATCH_SIZE=16
LENGTH=60
STEP_SIZE=50

# you must use cuda to run this code. if this returns false, you can not proceed.
USE_CUDA = torch.cuda.is_available()
if USE_CUDA:
    print("You are using cuda. Good!")
else:
    print('You are NOT using cuda! Some problems may occur.')

torch.manual_seed(ENV_SEED)
random.seed(ENV_SEED)

You are using cuda. Good!


implement dataloader

In [3]:

#this function converts tokens to ids and then to a tensor
def prepare_sequence(seq, to_ix):
    idxs = list(map(lambda w: to_ix[w] if w in to_ix.keys() else to_ix["<UNK>"], seq))
    tensor = Variable(torch.LongTensor(idxs)).cuda() if USE_CUDA else Variable(torch.LongTensor(idxs))
    return tensor
# this function turns class text to id
def prepare_intent(intent, to_ix):
    idxs = to_ix[intent] if intent in to_ix.keys() else to_ix["UNKNOWN"]
    return idxs
# converts numbers to <NUM> TAG
def number_to_tag(txt):
    return "<NUM>" if txt.isdecimal() else txt

# Here we remove multiple spaces and punctuation which cause errors in tokenization for bert & elmo.
def remove_punc(mlist):
    mlist = [re.sub(" +"," ",t.split("\t")[0][4:-4]) for t in mlist] # remove spaces down to 1
    temp_train_tokens = []
    # punct remove example:  play samuel-el jackson from 2009 - 2010 > play samuelel jackson from 2009 - 2010
    for row in mlist:
        tokens = row.split(" ")
        newtokens = []
        for token in tokens:
            newtoken = re.sub(r"[.,'\"\\/\-:&’—=–官方杂志¡…“”~%]",r"",token) # remove punc
            newtoken = re.sub(r"[楽園追放�]",r"A",newtoken)
            newtokens.append(newtoken if len(token)>1 else token)
        if newtokens[-1]=="":
            newtokens.pop(-1)
        if newtokens[0]=="":
            newtokens.pop(0)
        temp_train_tokens.append(" ".join(newtokens))
    return temp_train_tokens
# this function returns the main tokens so that we can apply tagging on them. see original paper.
def get_subtoken_mask(current_tokens,bert_tokenizer):
    temp_mask = []
    for i in current_tokens:
        temp_row_mask = []
        temp_row_mask.append(False) # for cls token
        temp = bert_tokenizer.tokenize(i)
        for j in temp:
            temp_row_mask.append(j[:2]!="##")
        while len(temp_row_mask)<LENGTH:
            temp_row_mask.append(False)
        temp_mask.append(temp_row_mask)
        if sum(temp_row_mask)!=len(i.split(" ")):
            print(f"inconsistent:{temp}")
            print(i)
            print(sum(temp_row_mask))
            print(len(i.split(" ")))
    return torch.tensor(temp_mask).cuda()

flatten = lambda l: [number_to_tag(item) for sublist in l for item in sublist]

# Data load and Preprocessing

In [4]:
def tokenize_dataset(dataset_address):
    # added tokenizer and tokens for
    bert_tokenizer = torch.hub.load(ENV_BERT_ADDR, 'tokenizer', ENV_BERT_ADDR,verbose=False,source="local")#38toks snips,52Atis
    ##open database and read line by line
    dataset = open(dataset_address,"r").readlines()
    print("example input:"+dataset[0])
    ##remove last character of lines -\n- in train file
    dataset = [t[:-1] for t in dataset]
    #converts string to array of tokens + array of tags + target intent [array with x=3 and y dynamic]
    dataset_tokens = remove_punc(dataset)
    dataset_subtoken_mask = get_subtoken_mask(dataset_tokens,bert_tokenizer)
    dataset_toks = bert_tokenizer.batch_encode_plus(dataset_tokens,max_length=LENGTH,add_special_tokens=True,return_tensors='pt'
                                                  ,return_attention_mask=True , padding='max_length',truncation=True)
    dataset = [[re.sub(" +"," ",t.split("\t")[0]).split(" "),t.split("\t")[1].split(" ")[:-1],t.split("\t")[1].split(" ")[-1]] for t in dataset]
    #removes BOS, EOS from array of tokens and tags
    dataset = [[t[0][1:-1],t[1][1:],t[2]] for t in dataset]
    return dataset, dataset_subtoken_mask,dataset_toks
train,train_subtoken_mask,train_toks = tokenize_dataset(ENV_DATASET_TRAIN)
test, test_subtoken_mask, test_toks = tokenize_dataset(ENV_DATASET_TEST)

example input:BOS i want to fly from boston at 838 am and arrive in denver at 1110 in the morning EOS	 O O O O O O B-fromloc.city_name O B-depart_time.time I-depart_time.time O O O B-toloc.city_name O B-arrive_time.time O O B-arrive_time.period_of_day atis_flight

example input:BOS i would like to find a flight from charlotte to las vegas that makes a stop in st. louis EOS	O O O O O O O O O B-fromloc.city_name O B-toloc.city_name I-toloc.city_name O O O O O B-stoploc.city_name I-stoploc.city_name atis_flight



In [5]:
#convert above array to separate lists
seq_in,seq_out, intent = list(zip(*train))
seq_in_test,seq_out_test, intent_test = list(zip(*test.copy()))

In [6]:
# Create Sets of unique tokens
vocab = set(flatten(seq_in))
slot_tag = set(flatten(seq_out))
intent_tag = set(intent)

In [7]:
# adds paddings
sin=[] #padded input tokens
sout=[] # padded output translated tags
sin_test=[] #padded input tokens
sout_test=[] # padded output translated tags
## adds padding inside input tokens
def add_paddings(seq_in,seq_out):
    sin=[]
    sout=[]
    for i in range(len(seq_in)):
        temp = seq_in[i]
        if len(temp)<LENGTH:
            while len(temp)<LENGTH:
                temp.append('<PAD>')
        else:
            temp = temp[:LENGTH]
        sin.append(temp)
        # add padding inside output tokens
        temp = seq_out[i]
        if len(temp)<LENGTH:
            while len(temp)<LENGTH:
                temp.append('<PAD>')
        else:
            temp = temp[:LENGTH]
        sout.append(temp)
    return sin,sout
sin,sout=add_paddings(seq_in,seq_out)
sin_test,sout_test=add_paddings(seq_in_test,seq_out_test)

In [8]:
# making dictionary (token:id), initial value
word2index = {'<PAD>': 0, '<UNK>':1,'<BOS>':2,'<EOS>':3,'<NUM>':4}
# add rest of token list to dictionary
for token in vocab:
    if token not in word2index.keys():
        word2index[token]=len(word2index)
#make id to token list ( reverse )
index2word = {v:k for k,v in word2index.items()}

# initial tag2index dictionary
tag2index = {'<PAD>' : 0,'<BOS>':2,'<UNK>':1,'<EOS>':3}
# add rest of tag tokens to list
for tag in slot_tag:
    if tag not in tag2index.keys():
        tag2index[tag] = len(tag2index)
# making index to tag
index2tag = {v:k for k,v in tag2index.items()}

#initialize intent to index
intent2index={'UNKNOWN':0}
for ii in intent_tag:
    if ii not in intent2index.keys():
        intent2index[ii] = len(intent2index)
index2intent = {v:k for k,v in intent2index.items()}

# Loading PreTrained Embeddings

In [9]:
#defining datasets.
def remove_values_from_list(the_list, val):
   return [value for value in the_list if value != val]

class NLUDataset(Dataset):
    def __init__(self, sin,sout,intent,input_ids,attention_mask,token_type_ids,subtoken_mask):
        self.sin = [prepare_sequence(temp,word2index) for temp in sin]
        self.sout = [prepare_sequence(temp,tag2index) for temp in sout]
        self.intent = Variable(torch.LongTensor([prepare_intent(temp,intent2index) for temp in intent])).cuda()
        self.input_ids=input_ids.cuda()
        self.attention_mask=attention_mask.cuda()
        self.token_type_ids=token_type_ids.cuda()
        self.subtoken_mask=subtoken_mask.cuda()
        self.x_mask = [Variable(torch.BoolTensor(tuple(map(lambda s: s ==0, t )))).cuda() for t in self.sin]
    def __len__(self):
        return len(self.intent)
    def __getitem__(self, idx):
        sample = self.sin[idx],self.sout[idx],self.intent[idx],self.input_ids[idx],self.attention_mask[idx],self.token_type_ids[idx],self.subtoken_mask[idx],self.x_mask[idx]
        return sample
#making single list
train_data=NLUDataset(sin,sout,intent,train_toks['input_ids'],train_toks['attention_mask'],train_toks['token_type_ids'],train_subtoken_mask)
test_data=NLUDataset(sin_test,sout_test,intent_test,test_toks['input_ids'],test_toks['attention_mask'],test_toks['token_type_ids'],test_subtoken_mask)
train_data = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_data = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [10]:
# we put all tags inside of the batch in a flat array for F1 measure.
# we use masking so that we only non PAD tokens are counted in f1 measurement
def mask_important_tags(predictions,tags,masks):
    result_tags=[]
    result_preds=[]
    for pred,tag,mask in zip(predictions.tolist(),tags.tolist(),masks.tolist()):
        #index [0] is to get the data
        for p,t,m in zip(pred,tag,mask):
            if not m:
                result_tags.append(p)
                result_preds.append(t)
        #result_tags.pop()
        #result_preds.pop()
    return result_preds,result_tags


# Modeling

In [11]:
# generates transformer mask
def generate_square_subsequent_mask(sz: int) :
    """Generates an upper-triangular matrix of -inf, with zeros on diag."""
    return torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1)
def generate_square_diagonal_mask(sz: int) :
    """Generates a matrix which there are zeros on diag and other indexes are -inf."""
    return torch.triu(torch.ones(sz,sz)-float('inf'), diagonal=1)+torch.tril(torch.ones(sz,sz)-float('inf'), diagonal=-1)
# positional embedding used in transformers
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Args:
            x: Tensor, shape [seq_len, batch_size, embedding_dim]
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)


#start of the shared encoder
class BertLayer(nn.Module):
    def __init__(self):
        super(BertLayer, self).__init__()
        self.bert_model = torch.hub.load(ENV_BERT_ADDR, 'model', ENV_BERT_ADDR,source="local")

    def forward(self, bert_info=None):
        (bert_tokens, bert_mask, bert_tok_typeid) = bert_info
        bert_encodings = self.bert_model(bert_tokens, bert_mask, bert_tok_typeid)
        bert_last_hidden = bert_encodings['last_hidden_state']
        bert_pooler_output = bert_encodings['pooler_output']
        return bert_last_hidden, bert_pooler_output


class Encoder(nn.Module):
    def __init__(self, p_dropout=0.5):
        super(Encoder, self).__init__()
        self.filter_number = ENV_CNN_FILTERS
        self.kernel_number = ENV_CNN_KERNELS  # tedad size haye filter : 2,3,5 = 3
        self.embedding_size = ENV_EMBEDDING_SIZE
        self.activation = nn.ReLU()
        self.p_dropout = p_dropout
        self.softmax = nn.Softmax(dim=1)
        self.conv1 = nn.Conv1d(in_channels=self.embedding_size, out_channels=self.filter_number, kernel_size=(2,),
                               padding="same", padding_mode="zeros")
        self.conv2 = nn.Conv1d(in_channels=self.embedding_size, out_channels=self.filter_number, kernel_size=(3,),
                               padding="same", padding_mode="zeros")
        self.conv3 = nn.Conv1d(in_channels=self.embedding_size, out_channels=self.filter_number, kernel_size=(5,),
                               padding="same", padding_mode="zeros")
        self.conv4 = nn.Conv1d(in_channels=self.embedding_size, out_channels=self.filter_number, kernel_size=(1,),
                               padding="same", padding_mode="zeros")

    def forward(self, bert_last_hidden):
        trans_embedded = torch.transpose(bert_last_hidden, dim0=1, dim1=2)
        convolve1 = self.activation(self.conv1(trans_embedded))
        convolve2 = self.activation(self.conv2(trans_embedded))
        convolve3 = self.activation(self.conv3(trans_embedded))
        convolve4 = self.activation(self.conv4(trans_embedded))
        convolve1 = torch.transpose(convolve1, dim0=1, dim1=2)
        convolve2 = torch.transpose(convolve2, dim0=1, dim1=2)
        convolve3 = torch.transpose(convolve3, dim0=1, dim1=2)
        convolve4 = torch.transpose(convolve4, dim0=1, dim1=2)
        output = torch.cat((convolve4, convolve1, convolve2, convolve3), dim=2)
        return output


In [12]:
#Middle
class Middle(nn.Module):
    def __init__(self ,p_dropout=0.5):
        super(Middle, self).__init__()
        self.activation = nn.ReLU()
        self.p_dropout = p_dropout
        self.softmax = nn.Softmax(dim=1)
        #Transformer
        nlayers = 2  # number of nn.TransformerEncoderLayer in nn.TransformerEncoder
        self.pos_encoder = PositionalEncoding(ENV_HIDDEN_SIZE, dropout=0.1)
        encoder_layers = nn.TransformerEncoderLayer(ENV_HIDDEN_SIZE, nhead=2,batch_first=True, dim_feedforward=2048 ,activation="relu", dropout=0.1)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, nlayers,enable_nested_tensor=False)
        self.transformer_mask = generate_square_subsequent_mask(LENGTH).cuda()

    def forward(self, fromencoder,input_masking,training=True):
        src = fromencoder * math.sqrt(ENV_HIDDEN_SIZE)
        src = self.pos_encoder(src)
        output = (self.transformer_encoder(src,src_key_padding_mask=input_masking)) # outputs probably
        return output

In [13]:
#start of the decoder
class Decoder(nn.Module):

    def __init__(self,slot_size,intent_size,dropout_p=0.5):
        super(Decoder, self).__init__()
        self.slot_size = slot_size
        self.intent_size = intent_size
        self.dropout_p = dropout_p
        self.softmax= nn.Softmax(dim=1)
        # Define the layers
        self.embedding = nn.Embedding(self.slot_size, ENV_HIDDEN_SIZE)
        self.activation = nn.ReLU()
        self.dropout1 = nn.Dropout(self.dropout_p)
        self.dropout2 = nn.Dropout(self.dropout_p)
        self.dropout3 = nn.Dropout(self.dropout_p)
        self.slot_trans = nn.Linear(ENV_HIDDEN_SIZE, self.slot_size)
        self.intent_out = nn.Linear(ENV_HIDDEN_SIZE,self.intent_size)
        self.intent_out_cls = nn.Linear(ENV_EMBEDDING_SIZE,self.intent_size) # dim of bert
        self.decoder_layer = nn.TransformerDecoderLayer(d_model=ENV_HIDDEN_SIZE, nhead=2,batch_first=True,dim_feedforward=300 ,activation="relu")
        self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=2)
        self.transformer_mask = generate_square_subsequent_mask(LENGTH).cuda()
        self.transformer_diagonal_mask = generate_square_diagonal_mask(LENGTH).cuda()
        self.pos_encoder = PositionalEncoding(ENV_HIDDEN_SIZE, dropout=0.1)
        self.self_attention = nn.MultiheadAttention(embed_dim=ENV_HIDDEN_SIZE
                                                    ,num_heads=8,dropout=0.1
                                                    ,batch_first=True)
        self.layer_norm = nn.LayerNorm(ENV_HIDDEN_SIZE)


    def forward(self, input,encoder_outputs,encoder_maskings,bert_subtoken_maskings=None,infer=False):
        # encoder outputs: BATCH,LENGTH,Dims (16,60,1024)
        batch_size = encoder_outputs.shape[0]
        length = encoder_outputs.size(1) #for every token in batches
        embedded = self.embedding(input)

        #print("NOT CLS")
        encoder_outputs2=encoder_outputs
        context,attn_weight = self.self_attention(encoder_outputs2,encoder_outputs2,encoder_outputs2
                                                  ,key_padding_mask=encoder_maskings)
        encoder_outputs2 = self.layer_norm(self.dropout2(context))+encoder_outputs2
        sum_mask = (~encoder_maskings).sum(1).unsqueeze(1)
        sum_encoder = ((((encoder_outputs2)))*((~encoder_maskings).unsqueeze(2))).sum(1)
        intent_score = self.intent_out(self.dropout1(sum_encoder/sum_mask)) # B,D


        newtensor = torch.cuda.FloatTensor(batch_size, length,ENV_HIDDEN_SIZE).fill_(0.) # size of newtensor same as original
        for i in range(batch_size): # per batch
            newtensor_index=0
            for j in range(length): # for each token
                if bert_subtoken_maskings[i][j].item()==1:
                    newtensor[i][newtensor_index] = encoder_outputs[i][j]
                    newtensor_index+=1

        if infer==False:
            embedded=embedded*math.sqrt(ENV_HIDDEN_SIZE)
            embedded = self.pos_encoder(embedded)
            zol = self.transformer_decoder(tgt=embedded,memory=newtensor
                                           ,memory_mask=self.transformer_diagonal_mask
                                           ,tgt_mask=self.transformer_mask)

            scores = self.slot_trans(self.dropout3(zol))
            slot_scores = F.log_softmax(scores,dim=2)
        else:
            bos = Variable(torch.LongTensor([[tag2index['<BOS>']]*batch_size])).cuda().transpose(1,0)
            bos = self.embedding(bos)
            tokens=bos
            for i in range(length):
                temp_embedded=tokens*math.sqrt(ENV_HIDDEN_SIZE)
                temp_embedded = self.pos_encoder(temp_embedded)
                zol = self.transformer_decoder(tgt=temp_embedded,
                                               memory=newtensor,
                                               tgt_mask=self.transformer_mask[:i+1,:i+1],
                                               memory_mask=self.transformer_diagonal_mask[:i+1,:]
                                               )
                scores = self.slot_trans(self.dropout3(zol))
                softmaxed = F.log_softmax(scores,dim=2)
                #the last token is apended to vectors
                _,input = torch.max(softmaxed,2)
                newtok = self.embedding(input)
                tokens=torch.cat((bos,newtok),dim=1)
            slot_scores = softmaxed

        return slot_scores.view(input.size(0)*length,-1), intent_score

# Training



In [14]:
bert_layer = BertLayer()
encoder = Encoder(len(word2index))
middle = Middle()
decoder = Decoder(len(tag2index),len(intent2index))
if USE_CUDA:
    encoder = encoder.cuda()
    decoder = decoder.cuda()
    middle = middle.cuda()
    bert_layer.cuda()

loss_function_1 = nn.CrossEntropyLoss(ignore_index=0)
loss_function_2 = nn.CrossEntropyLoss()
dec_optim = optim.AdamW(decoder.parameters(),lr=0.0001)
enc_optim = optim.AdamW(encoder.parameters(),lr=0.001)
ber_optim = optim.AdamW(bert_layer.parameters(),lr=0.0001)
mid_optim = optim.AdamW(middle.parameters(), lr=0.0001)
enc_scheduler = torch.optim.lr_scheduler.StepLR(enc_optim, 1, gamma=0.96)
dec_scheduler = torch.optim.lr_scheduler.StepLR(dec_optim, 1, gamma=0.96)
mid_scheduler = torch.optim.lr_scheduler.StepLR(mid_optim, 1, gamma=0.96)
ber_scheduler = torch.optim.lr_scheduler.StepLR(ber_optim, 1, gamma=0.96)

Some weights of the model checkpoint at ./bert-base-uncased/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [15]:
max_id_prec=0.
max_sf_f1=0.
max_id_prec_both=0.
max_sf_f1_both=0.

for step in tqdm(range(STEP_SIZE)):
    losses=[]
    id_precision=[]
    sf_f1=[]

    ### TRAIN
    encoder.train() # set to train mode
    middle.train()
    decoder.train()
    bert_layer.train()
    for i,(x,tag_target,intent_target,bert_tokens,bert_mask,bert_toktype,subtoken_mask,x_mask) in enumerate(train_data):
        batch_size=tag_target.size(0)
        bert_layer.zero_grad()
        encoder.zero_grad()
        middle.zero_grad()
        decoder.zero_grad()
        bert_hidden,bert_pooler = bert_layer(bert_info=(bert_tokens,bert_mask,bert_toktype))
        encoder_output = encoder(bert_last_hidden=bert_hidden)
        output = middle(encoder_output,bert_mask==0,training=True)
        start_decode = Variable(torch.LongTensor([[tag2index['<BOS>']]*batch_size])).cuda().transpose(1,0)
        start_decode = torch.cat((start_decode,tag_target[:,:-1]),dim=1)
        tag_score, intent_score = decoder(start_decode,output,bert_mask==0,bert_subtoken_maskings=subtoken_mask)
        loss_1 = loss_function_1(tag_score,tag_target.view(-1))
        loss_2 = loss_function_2(intent_score,intent_target)
        loss = loss_1+loss_2
        losses.append(loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy()[0])
        loss.backward()
        torch.nn.utils.clip_grad_norm_(encoder.parameters(), 0.5)
        torch.nn.utils.clip_grad_norm_(middle.parameters(), 0.5)
        torch.nn.utils.clip_grad_norm_(decoder.parameters(), 0.5)
        torch.nn.utils.clip_grad_norm_(bert_layer.parameters(), 0.5)
        enc_optim.step()
        mid_optim.step()
        dec_optim.step()
        ber_optim.step()
        #print(bert_tokens[0])
        #print(tag_target[0])
        id_precision.append(accuracy_score(intent_target.detach().cpu(),torch.argmax(intent_score,dim=1).detach().cpu()))
        pred_list,target_list=mask_important_tags(torch.argmax(tag_score,dim=1).view(batch_size,LENGTH),tag_target,x_mask)
        sf_f1.append(f1_score(pred_list,target_list,average="micro",zero_division=0))
    #print report
    print("Step",step," batches",i," :")
    print("Train-")
    print(f"loss:{round(float(np.mean(losses)),4)}")
    print(f"SlotFilling F1:{round(float(np.mean(sf_f1)),3)}")
    print(f"IntentDet Prec:{round(float(np.mean(id_precision)),3)}")
    losses=[]
    sf_f1=[]
    id_precision=[]
    #scheduler.step()

    #### TEST
    encoder.eval() # set to test mode
    middle.eval()
    decoder.eval()
    bert_layer.eval()
    with torch.no_grad(): # to turn off gradients computation
        for i,(x,tag_target,intent_target,bert_tokens,bert_mask,bert_toktype,subtoken_mask,x_mask) in enumerate(test_data):
            batch_size=tag_target.size(0)
            encoder.zero_grad()
            middle.zero_grad()
            decoder.zero_grad()
            bert_layer.zero_grad()
            bert_hidden,bert_pooler = bert_layer(bert_info=(bert_tokens,bert_mask,bert_toktype))
            encoder_output = encoder(bert_last_hidden=bert_hidden)
            output = middle(encoder_output,bert_mask==0,training=True)
            start_decode = Variable(torch.LongTensor([[tag2index['<BOS>']]*batch_size])).cuda().transpose(1,0)
            tag_score, intent_score = decoder(start_decode,output,bert_mask==0,bert_subtoken_maskings=subtoken_mask,infer=True)
            loss_1 = loss_function_1(tag_score,tag_target.view(-1))
            loss_2 = loss_function_2(intent_score,intent_target)
            loss = loss_1+loss_2
            losses.append(loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy()[0])
            id_precision.append(accuracy_score(intent_target.detach().cpu(),torch.argmax(intent_score,dim=1).detach().cpu()))
            pred_list,target_list=mask_important_tags(torch.argmax(tag_score,dim=1).view(batch_size,LENGTH),tag_target,x_mask)
            sf_f1.append(f1_score(pred_list,target_list,average="micro",zero_division=0))
    print("Test-")
    print(f"loss:{round(float(np.mean(losses)),4)}")
    print(f"SlotFilling F1:{round(float(np.mean(sf_f1)),4)}")
    print(f"IntentDet Prec:{round(float(np.mean(id_precision)),4)}")
    print("--------------")
    max_sf_f1 = max_sf_f1 if round(float(np.mean(sf_f1)),4)<=max_sf_f1 else round(float(np.mean(sf_f1)),4)
    max_id_prec = max_id_prec if round(float(np.mean(id_precision)),4)<=max_id_prec else round(float(np.mean(id_precision)),4)
    if max_sf_f1_both<=round(float(np.mean(sf_f1)),4) and max_id_prec_both<=round(float(np.mean(id_precision)),4):
        max_sf_f1_both=round(float(np.mean(sf_f1)),4)
        max_id_prec_both=round(float(np.mean(id_precision)),4)
        torch.save(bert_layer,f"models/ctran{_fn}-bertlayer.pkl")
        torch.save(encoder,f"models/ctran{_fn}-encoder.pkl")
        torch.save(middle,f"models/ctran{_fn}-middle.pkl")
        torch.save(decoder,f"models/ctran{_fn}-decoder.pkl")
    enc_scheduler.step()
    dec_scheduler.step()
    mid_scheduler.step()
    ber_scheduler.step()
print(f"max single SF F1: {max_sf_f1}")
print(f"max single ID PR: {max_id_prec}")
print(f"max mutual SF:{max_sf_f1_both}  PR: {max_id_prec_both}")


  return F.conv1d(input, weight, bias, self.stride,


Step 0  batches 311  :
Train-
loss:0.8159
SlotFilling F1:0.911
IntentDet Prec:0.921
Test-
loss:0.5912
SlotFilling F1:0.9515
IntentDet Prec:0.9325
--------------


  2%|▏         | 1/50 [01:04<52:47, 64.63s/it]

Step 1  batches 311  :
Train-
loss:0.2482
SlotFilling F1:0.976
IntentDet Prec:0.971
Test-
loss:0.5703
SlotFilling F1:0.9697
IntentDet Prec:0.9498
--------------


  4%|▍         | 2/50 [02:08<51:14, 64.05s/it]

Step 2  batches 311  :
Train-
loss:0.1514
SlotFilling F1:0.988
IntentDet Prec:0.983
Test-
loss:0.393
SlotFilling F1:0.9783
IntentDet Prec:0.9654
--------------


  6%|▌         | 3/50 [03:10<49:35, 63.30s/it]

Step 3  batches 311  :
Train-
loss:0.1175
SlotFilling F1:0.992
IntentDet Prec:0.986


  8%|▊         | 4/50 [04:12<48:02, 62.65s/it]

Test-
loss:0.4426
SlotFilling F1:0.9768
IntentDet Prec:0.9743
--------------
Step 4  batches 311  :
Train-
loss:0.0831
SlotFilling F1:0.993
IntentDet Prec:0.991


 10%|█         | 5/50 [05:16<47:18, 63.09s/it]

Test-
loss:0.5154
SlotFilling F1:0.9763
IntentDet Prec:0.9688
--------------
Step 5  batches 311  :
Train-
loss:0.0637
SlotFilling F1:0.995
IntentDet Prec:0.991
Test-
loss:0.4438
SlotFilling F1:0.979
IntentDet Prec:0.9766
--------------


 12%|█▏        | 6/50 [06:18<46:08, 62.92s/it]

Step 6  batches 311  :
Train-
loss:0.0503
SlotFilling F1:0.996
IntentDet Prec:0.994


 14%|█▍        | 7/50 [07:20<44:51, 62.59s/it]

Test-
loss:0.4724
SlotFilling F1:0.9795
IntentDet Prec:0.9665
--------------
Step 7  batches 311  :
Train-
loss:0.0414
SlotFilling F1:0.997
IntentDet Prec:0.994


 16%|█▌        | 8/50 [08:22<43:39, 62.38s/it]

Test-
loss:0.494
SlotFilling F1:0.9816
IntentDet Prec:0.9721
--------------
Step 8  batches 311  :
Train-
loss:0.03
SlotFilling F1:0.998
IntentDet Prec:0.997


 18%|█▊        | 9/50 [09:24<42:36, 62.35s/it]

Test-
loss:0.4629
SlotFilling F1:0.9815
IntentDet Prec:0.9754
--------------
Step 9  batches 311  :
Train-
loss:0.0328
SlotFilling F1:0.998
IntentDet Prec:0.994


 20%|██        | 10/50 [10:27<41:36, 62.41s/it]

Test-
loss:0.493
SlotFilling F1:0.9812
IntentDet Prec:0.9743
--------------
Step 10  batches 311  :
Train-
loss:0.0224
SlotFilling F1:0.998
IntentDet Prec:0.997


 22%|██▏       | 11/50 [11:29<40:29, 62.30s/it]

Test-
loss:0.5784
SlotFilling F1:0.9799
IntentDet Prec:0.9743
--------------
Step 11  batches 311  :
Train-
loss:0.0147
SlotFilling F1:0.999
IntentDet Prec:0.998


 24%|██▍       | 12/50 [12:32<39:31, 62.40s/it]

Test-
loss:0.5194
SlotFilling F1:0.982
IntentDet Prec:0.9743
--------------
Step 12  batches 311  :
Train-
loss:0.0155
SlotFilling F1:0.999
IntentDet Prec:0.998


 26%|██▌       | 13/50 [13:34<38:33, 62.52s/it]

Test-
loss:0.5209
SlotFilling F1:0.9818
IntentDet Prec:0.9732
--------------
Step 13  batches 311  :
Train-
loss:0.0146
SlotFilling F1:0.999
IntentDet Prec:0.998


 28%|██▊       | 14/50 [14:37<37:30, 62.51s/it]

Test-
loss:0.5664
SlotFilling F1:0.9826
IntentDet Prec:0.9732
--------------
Step 14  batches 311  :
Train-
loss:0.0108
SlotFilling F1:0.999
IntentDet Prec:0.999


 30%|███       | 15/50 [15:39<36:24, 62.42s/it]

Test-
loss:0.5538
SlotFilling F1:0.9822
IntentDet Prec:0.9721
--------------
Step 15  batches 311  :
Train-
loss:0.0046
SlotFilling F1:0.999
IntentDet Prec:1.0


 32%|███▏      | 16/50 [16:41<35:13, 62.18s/it]

Test-
loss:0.6082
SlotFilling F1:0.9818
IntentDet Prec:0.9721
--------------
Step 16  batches 311  :
Train-
loss:0.0052
SlotFilling F1:0.999
IntentDet Prec:1.0


 34%|███▍      | 17/50 [17:42<34:07, 62.05s/it]

Test-
loss:0.5966
SlotFilling F1:0.9825
IntentDet Prec:0.971
--------------
Step 17  batches 311  :
Train-
loss:0.0051
SlotFilling F1:0.999
IntentDet Prec:1.0


 36%|███▌      | 18/50 [18:44<32:55, 61.75s/it]

Test-
loss:0.548
SlotFilling F1:0.9826
IntentDet Prec:0.9721
--------------
Step 18  batches 311  :
Train-
loss:0.0075
SlotFilling F1:0.999
IntentDet Prec:0.999


 38%|███▊      | 19/50 [19:45<31:53, 61.72s/it]

Test-
loss:0.549
SlotFilling F1:0.983
IntentDet Prec:0.9743
--------------
Step 19  batches 311  :
Train-
loss:0.0029
SlotFilling F1:1.0
IntentDet Prec:1.0
Test-
loss:0.5384
SlotFilling F1:0.9821
IntentDet Prec:0.9766
--------------


 40%|████      | 20/50 [20:47<30:51, 61.70s/it]

Step 20  batches 311  :
Train-
loss:0.0021
SlotFilling F1:1.0
IntentDet Prec:1.0


 42%|████▏     | 21/50 [21:49<29:52, 61.80s/it]

Test-
loss:0.6017
SlotFilling F1:0.9817
IntentDet Prec:0.9754
--------------
Step 21  batches 311  :
Train-
loss:0.0018
SlotFilling F1:1.0
IntentDet Prec:1.0


 44%|████▍     | 22/50 [22:51<28:54, 61.94s/it]

Test-
loss:0.57
SlotFilling F1:0.9823
IntentDet Prec:0.9754
--------------
Step 22  batches 311  :
Train-
loss:0.002
SlotFilling F1:1.0
IntentDet Prec:1.0


 46%|████▌     | 23/50 [23:53<27:52, 61.94s/it]

Test-
loss:0.6024
SlotFilling F1:0.9828
IntentDet Prec:0.9732
--------------
Step 23  batches 311  :
Train-
loss:0.0029
SlotFilling F1:1.0
IntentDet Prec:1.0


 48%|████▊     | 24/50 [24:55<26:51, 61.96s/it]

Test-
loss:0.5951
SlotFilling F1:0.9831
IntentDet Prec:0.9732
--------------
Step 24  batches 311  :
Train-
loss:0.0079
SlotFilling F1:1.0
IntentDet Prec:0.999


 50%|█████     | 25/50 [25:57<25:48, 61.95s/it]

Test-
loss:0.5759
SlotFilling F1:0.982
IntentDet Prec:0.9754
--------------
Step 25  batches 311  :
Train-
loss:0.007
SlotFilling F1:1.0
IntentDet Prec:0.999


 52%|█████▏    | 26/50 [26:59<24:50, 62.10s/it]

Test-
loss:0.6418
SlotFilling F1:0.9822
IntentDet Prec:0.9732
--------------
Step 26  batches 311  :
Train-
loss:0.0065
SlotFilling F1:1.0
IntentDet Prec:0.999


 54%|█████▍    | 27/50 [28:01<23:42, 61.87s/it]

Test-
loss:0.5885
SlotFilling F1:0.983
IntentDet Prec:0.9732
--------------
Step 27  batches 311  :
Train-
loss:0.0019
SlotFilling F1:1.0
IntentDet Prec:1.0


 56%|█████▌    | 28/50 [29:03<22:40, 61.86s/it]

Test-
loss:0.6066
SlotFilling F1:0.9818
IntentDet Prec:0.9699
--------------
Step 28  batches 311  :
Train-
loss:0.0053
SlotFilling F1:1.0
IntentDet Prec:0.999


 58%|█████▊    | 29/50 [30:04<21:37, 61.80s/it]

Test-
loss:0.6886
SlotFilling F1:0.9807
IntentDet Prec:0.9665
--------------
Step 29  batches 311  :
Train-
loss:0.0064
SlotFilling F1:1.0
IntentDet Prec:1.0


 60%|██████    | 30/50 [31:06<20:36, 61.83s/it]

Test-
loss:0.5956
SlotFilling F1:0.9825
IntentDet Prec:0.9721
--------------
Step 30  batches 311  :
Train-
loss:0.0042
SlotFilling F1:1.0
IntentDet Prec:1.0


 62%|██████▏   | 31/50 [32:08<19:33, 61.75s/it]

Test-
loss:0.6529
SlotFilling F1:0.9827
IntentDet Prec:0.9754
--------------
Step 31  batches 311  :
Train-
loss:0.0014
SlotFilling F1:1.0
IntentDet Prec:1.0


 64%|██████▍   | 32/50 [33:10<18:34, 61.89s/it]

Test-
loss:0.6132
SlotFilling F1:0.9831
IntentDet Prec:0.9743
--------------
Step 32  batches 311  :
Train-
loss:0.001
SlotFilling F1:1.0
IntentDet Prec:1.0


 66%|██████▌   | 33/50 [34:12<17:33, 61.95s/it]

Test-
loss:0.6386
SlotFilling F1:0.9824
IntentDet Prec:0.9754
--------------
Step 33  batches 311  :
Train-
loss:0.0015
SlotFilling F1:1.0
IntentDet Prec:1.0


 68%|██████▊   | 34/50 [35:14<16:29, 61.82s/it]

Test-
loss:0.6415
SlotFilling F1:0.9827
IntentDet Prec:0.9743
--------------
Step 34  batches 311  :
Train-
loss:0.0017
SlotFilling F1:1.0
IntentDet Prec:1.0


 70%|███████   | 35/50 [36:17<15:33, 62.21s/it]

Test-
loss:0.6578
SlotFilling F1:0.9829
IntentDet Prec:0.9743
--------------
Step 35  batches 311  :
Train-
loss:0.0036
SlotFilling F1:1.0
IntentDet Prec:1.0


 72%|███████▏  | 36/50 [37:21<14:38, 62.75s/it]

Test-
loss:0.661
SlotFilling F1:0.9835
IntentDet Prec:0.9743
--------------
Step 36  batches 311  :
Train-
loss:0.0004
SlotFilling F1:1.0
IntentDet Prec:1.0


 74%|███████▍  | 37/50 [38:23<13:33, 62.55s/it]

Test-
loss:0.6542
SlotFilling F1:0.9835
IntentDet Prec:0.9754
--------------
Step 37  batches 311  :
Train-
loss:0.0006
SlotFilling F1:1.0
IntentDet Prec:1.0


 76%|███████▌  | 38/50 [39:25<12:29, 62.45s/it]

Test-
loss:0.665
SlotFilling F1:0.9836
IntentDet Prec:0.9743
--------------
Step 38  batches 311  :
Train-
loss:0.0008
SlotFilling F1:1.0
IntentDet Prec:1.0


 78%|███████▊  | 39/50 [40:27<11:25, 62.30s/it]

Test-
loss:0.6587
SlotFilling F1:0.9835
IntentDet Prec:0.9754
--------------
Step 39  batches 311  :
Train-
loss:0.0004
SlotFilling F1:1.0
IntentDet Prec:1.0


 80%|████████  | 40/50 [41:30<10:26, 62.62s/it]

Test-
loss:0.664
SlotFilling F1:0.9835
IntentDet Prec:0.9754
--------------
Step 40  batches 311  :
Train-
loss:0.0004
SlotFilling F1:1.0
IntentDet Prec:1.0


 82%|████████▏ | 41/50 [42:32<09:21, 62.37s/it]

Test-
loss:0.6645
SlotFilling F1:0.9833
IntentDet Prec:0.9754
--------------
Step 41  batches 311  :
Train-
loss:0.0006
SlotFilling F1:1.0
IntentDet Prec:1.0


 84%|████████▍ | 42/50 [43:36<08:23, 62.91s/it]

Test-
loss:0.6378
SlotFilling F1:0.9833
IntentDet Prec:0.9754
--------------
Step 42  batches 311  :
Train-
loss:0.001
SlotFilling F1:1.0
IntentDet Prec:1.0


 86%|████████▌ | 43/50 [44:38<07:18, 62.69s/it]

Test-
loss:0.6583
SlotFilling F1:0.9826
IntentDet Prec:0.9743
--------------
Step 43  batches 311  :
Train-
loss:0.0026
SlotFilling F1:1.0
IntentDet Prec:1.0


 88%|████████▊ | 44/50 [45:41<06:15, 62.58s/it]

Test-
loss:0.6482
SlotFilling F1:0.9829
IntentDet Prec:0.9754
--------------
Step 44  batches 311  :
Train-
loss:0.0007
SlotFilling F1:1.0
IntentDet Prec:1.0


 90%|█████████ | 45/50 [46:43<05:12, 62.48s/it]

Test-
loss:0.6524
SlotFilling F1:0.9832
IntentDet Prec:0.9754
--------------
Step 45  batches 311  :
Train-
loss:0.0015
SlotFilling F1:1.0
IntentDet Prec:1.0
Test-
loss:0.6497
SlotFilling F1:0.983
IntentDet Prec:0.9766
--------------


 92%|█████████▏| 46/50 [47:46<04:10, 62.62s/it]

Step 46  batches 311  :
Train-
loss:0.0004
SlotFilling F1:1.0
IntentDet Prec:1.0


 94%|█████████▍| 47/50 [48:48<03:07, 62.41s/it]

Test-
loss:0.6535
SlotFilling F1:0.9829
IntentDet Prec:0.9766
--------------
Step 47  batches 311  :
Train-
loss:0.0005
SlotFilling F1:1.0
IntentDet Prec:1.0


 96%|█████████▌| 48/50 [49:52<02:05, 62.96s/it]

Test-
loss:0.639
SlotFilling F1:0.9824
IntentDet Prec:0.9766
--------------
Step 48  batches 311  :
Train-
loss:0.0005
SlotFilling F1:1.0
IntentDet Prec:1.0


 98%|█████████▊| 49/50 [50:55<01:02, 62.93s/it]

Test-
loss:0.6508
SlotFilling F1:0.9825
IntentDet Prec:0.9766
--------------
Step 49  batches 311  :
Train-
loss:0.0003
SlotFilling F1:1.0
IntentDet Prec:1.0


100%|██████████| 50/50 [51:58<00:00, 62.37s/it]

Test-
loss:0.6557
SlotFilling F1:0.9826
IntentDet Prec:0.9766
--------------
max single SF F1: 0.9836
max single ID PR: 0.9766
max mutual SF:0.983  PR: 0.9766





# Test

The following cells is for reviewing the performance of CTran.

In [16]:
# This cell reloads the best model during training from hard-drive.
bert_layer.load_state_dict(torch.load(f'models/ctran{_fn}-bertlayer.pkl').state_dict())
encoder.load_state_dict(torch.load(f'models/ctran{_fn}-encoder.pkl').state_dict())
middle.load_state_dict(torch.load(f'models/ctran{_fn}-middle.pkl').state_dict())
decoder.load_state_dict(torch.load(f'models/ctran{_fn}-decoder.pkl').state_dict())
if USE_CUDA:
    bert_layer = bert_layer.cuda()
    encoder = encoder.cuda()
    middle = middle.cuda()
    decoder = decoder.cuda()


In [17]:
global clipindex
clipindex=0
def removepads(toks,clip=False):
    global clipindex
    result = toks.copy()
    for i,t in enumerate(toks):
        if t=="<PAD>":
            result.remove(t)
        elif t=="<EOS>":
            result.remove(t)
            if not clip:
                clipindex=i
    if clip:
        result=result[:clipindex]
    return result

In [18]:
print("Example of model prediction on test dataset")
encoder.eval()
middle.eval()
decoder.eval()
bert_layer.eval()
with torch.no_grad():
    index = random.choice(range(len(test)))
    test_raw = test[index][0]
    bert_tokens = test_toks['input_ids'][index].unsqueeze(0).cuda()
    bert_mask = test_toks['attention_mask'][index].unsqueeze(0).cuda()
    bert_toktype = test_toks['token_type_ids'][index].unsqueeze(0).cuda()
    subtoken_mask = test_subtoken_mask[index].unsqueeze(0).cuda()
    test_in = prepare_sequence(test_raw,word2index)
    test_mask = Variable(torch.BoolTensor(tuple(map(lambda s: s ==0, test_in.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, test_in.data)))).view(1,-1)
    start_decode = Variable(torch.LongTensor([[word2index['<BOS>']]*1])).cuda().transpose(1,0) if USE_CUDA else Variable(torch.LongTensor([[word2index['<BOS>']]*1])).transpose(1,0)
    test_raw = [removepads(test_raw)]
    bert_hidden,bert_pooler = bert_layer(bert_info=(bert_tokens,bert_mask,bert_toktype))
    encoder_output = encoder(bert_last_hidden=bert_hidden)
    output = middle(encoder_output,bert_mask==0)
    tag_score, intent_score = decoder(start_decode,output,bert_mask==0,bert_subtoken_maskings=subtoken_mask,infer=True)

    v,i = torch.max(tag_score,1)
    print("Sentence           : ",*test_raw[0])
    print("Tag Truth          : ", *test[index][1][:len(test_raw[0])])
    print("Tag Prediction     : ",*(list(map(lambda ii:index2tag[ii],i.data.tolist()))[:len(test_raw[0])]))
    v,i = torch.max(intent_score,1)
    print("Intent Truth       : ", test[index][2])
    print("Intent Prediction  : ",index2intent[i.data.tolist()[0]])

Example of model prediction on test dataset
Sentence           :  show me first class flights from new york to miami round trip
Tag Truth          :  O O B-class_type I-class_type O O B-fromloc.city_name I-fromloc.city_name O B-toloc.city_name B-round_trip I-round_trip
Tag Prediction     :  O O B-class_type I-class_type O O B-fromloc.city_name I-fromloc.city_name O B-toloc.city_name B-round_trip I-round_trip
Intent Truth       :  atis_flight
Intent Prediction  :  atis_flight


In [19]:
print("Instances where model predicted intent wrong")
encoder.eval()
middle.eval()
decoder.eval()
bert_layer.eval()
total_wrong_predicted_intents = 0
with torch.no_grad():
    for i in range(len(test)):
        index = i
        test_raw = test[index][0]
        bert_tokens = test_toks['input_ids'][index].unsqueeze(0).cuda()
        bert_mask = test_toks['attention_mask'][index].unsqueeze(0).cuda()
        bert_toktype = test_toks['token_type_ids'][index].unsqueeze(0).cuda()
        subtoken_mask = test_subtoken_mask[index].unsqueeze(0).cuda()
        test_in = prepare_sequence(test_raw,word2index)
        test_mask = Variable(torch.BoolTensor(tuple(map(lambda s: s ==0, test_in.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, test_in.data)))).view(1,-1)
        # print(removepads(test_raw))
        start_decode = Variable(torch.LongTensor([[word2index['<BOS>']]*1])).cuda().transpose(1,0) if USE_CUDA else Variable(torch.LongTensor([[word2index['<BOS>']]*1])).transpose(1,0)
        test_raw = [removepads(test_raw)]
        bert_hidden,bert_pooler = bert_layer(bert_info=(bert_tokens,bert_mask,bert_toktype))
        encoder_output = encoder(bert_last_hidden=bert_hidden)
        output = middle(encoder_output,bert_mask==0)
        tag_score, intent_score = decoder(start_decode,output,bert_mask==0,bert_subtoken_maskings=subtoken_mask,infer=True)

        v,i = torch.max(intent_score,1)
        if test[index][2]!=index2intent[i.data.tolist()[0]]:
            v,i = torch.max(tag_score,1)
            print("Sentence           : ",*test_raw[0])
            print("Tag Truth          : ", *test[index][1][:len(test_raw[0])])
            print("Tag Prediction     : ",*list(map(lambda ii:index2tag[ii],i.data.tolist()))[:len(test_raw[0])])
            v,i = torch.max(intent_score,1)
            print("Intent Truth       : ", test[index][2])
            print("Intent Prediction  : ",index2intent[i.data.tolist()[0]])
            print("--------------------------------------")
            total_wrong_predicted_intents+=1

print("Total instances of wrong intent prediction is ",total_wrong_predicted_intents)

Instances where model predicted intent wrong
Sentence           :  show flight and prices kansas city to chicago on next wednesday arriving in chicago by 7 pm
Tag Truth          :  O O O O B-fromloc.city_name I-fromloc.city_name O B-toloc.city_name O B-depart_date.date_relative B-depart_date.day_name O O B-toloc.city_name B-arrive_time.time_relative B-arrive_time.time I-arrive_time.time
Tag Prediction     :  O O O O B-fromloc.city_name I-fromloc.city_name O B-toloc.city_name O B-depart_date.date_relative B-depart_date.day_name O O B-toloc.city_name B-arrive_time.time_relative B-arrive_time.time I-arrive_time.time
Intent Truth       :  atis_flight#atis_airfare
Intent Prediction  :  atis_flight
--------------------------------------
Sentence           :  what day of the week do flights from nashville to tacoma fly on
Tag Truth          :  O O O O O O O O B-fromloc.city_name O B-toloc.city_name O O
Tag Prediction     :  O O O O O O O O B-fromloc.city_name O B-toloc.city_name O O
Intent Tr