In [1]:
import subprocess
import argparse
import sys
import gzip
import cPickle

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import numpy as np

class Classifier(object):
    def __init__(self):
        pass

    def train(self):
        """
        Override this method in your class to implement train
        """
        raise NotImplementedError("Train method not implemented")

    def inference(self):
        """
        Override this method in your class to implement inference
        """
        raise NotImplementedError("Inference method not implemented")



def conlleval(p, g, w, filename='tempfile.txt'):
    '''
    INPUT:
    p :: predictions
    g :: groundtruth
    w :: corresponding words

    OUTPUT:
    filename :: name of the file where the predictions
    are written. it will be the input of conlleval.pl script
    for computing the performance in terms of precision
    recall and f1 score
    '''
    out = ''
    for sl, sp, sw in zip(g, p, w):
        out += 'BOS O O\n'
        for wl, wp, ww in zip(sl, sp, sw):
            out += ww + ' ' + wl + ' ' + wp + '\n'
        out += 'EOS O O\n\n'

    f = open(filename, 'w')
    f.writelines(out)
    f.close()

    return get_perf(filename)

def get_perf(filename):
    ''' run conlleval.pl perl script to obtain precision/recall and F1 score '''
    _conlleval = 'conlleval.pl'

    proc = subprocess.Popen(["perl", _conlleval], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    stdout, _ = proc.communicate(open(filename).read())
    for line in stdout.split('\n'):
        if 'accuracy' in line:
            out = line.split()
            break

    precision = float(out[6][:-2])
    recall    = float(out[8][:-2])
    f1score   = float(out[10])

    return (precision, recall, f1score)

In [2]:
def make_target(label, size):
    # print "label is ", label
    tensor = torch.zeros(size)
    tensor = tensor.long()
    tensor[label] = 1
    # print "tensor is ", tensor
    return tensor.view(1,-1)
    # return torch.LongTensor([label.tolist()])


class MyNNClassifier(Classifier):
    def __init__(self):
        pass

    def train(self):
        pass

    def inference(self):
        pass

In [3]:
class NeuralNet(nn.Module):  # inheriting from nn.Module!

    def __init__(self, num_input_nodes, num_hidden_nodes, output_dimension):
        super(NeuralNet, self).__init__()
        self.input_linear = nn.Linear(num_input_nodes, num_hidden_nodes)
        self.output_linear = nn.Linear(num_hidden_nodes, output_dimension)

    def forward(self, input_vector):
        out = self.input_linear(input_vector)
        out = F.sigmoid(out)
        out = self.output_linear(out)
        out = F.softmax(out)
        return out

In [137]:
def get_input_vector(word_embeddings, prev_label, word):
    word_embedding = word_embeddings[word]
    word_feature = autograd.Variable(word_embeddings[word])
    prev_label = autograd.Variable(prev_label)
#     input_vector = torch.cat((word_embedding.view(1,-1), prev_label.view(1,-1)), 1)
    input_vector = torch.cat((word_feature.view(1,-1), prev_label.view(1,-1)), 1)
    return input_vector

In [5]:
# argparser = argparse.ArgumentParser()
# argparser.add_argument("--data", type=str, default="atis.small.pkl.gz", help="The zipped dataset")

# parsed_args = argparser.parse_args(sys.argv[1:])

filename = "atis.small.pkl.gz"
f = gzip.open(filename,'rb')
train_set, valid_set, test_set, dicts = cPickle.load(f)

# print "train_set ", train_set

train_lex, _, train_y = train_set
valid_lex, _, valid_y = valid_set
test_lex,  _,  test_y  = test_set

# print "train_lex ", train_lex
# print "train_y ", train_y

idx2label = dict((k,v) for v,k in dicts['labels2idx'].iteritems())
idx2word  = dict((k,v) for v,k in dicts['words2idx'].iteritems())

'''
To have a look what the original data look like, commnet them before your submission
'''
print "length train data ", len(train_lex), " ", len(train_y)
# print "test lex ", test_lex[0]
# print "word dictionary is ", idx2word
# print "label dictionary is ", idx2label
# print train_lex[0], map(lambda t: idx2word[t], train_lex[0])
# print train_y[0], map(lambda t: idx2label[t], train_y[0])
# print test_lex[0], map(lambda t: idx2word[t], test_lex[0])
# print test_y[0], map(lambda t: idx2label[t], test_y[0])

length train data  3983   3983


In [117]:
def create_embedding(train_x, train_y):
    NUM_LABELS = len(idx2label)
    VOCAB_SIZE = len(idx2word)
    word_embedding_list = []
    label_list = []
    
#     word_embeddings = torch.rand(VOCAB_SIZE, 300)
    word_embeddings = torch.eye(VOCAB_SIZE, VOCAB_SIZE)
    print "VOCAB SIZE", VOCAB_SIZE
    print "NUM LABELS ", NUM_LABELS
    # tag_embeddings = torch.rand(NUM_LABELS+1, 100)
    tag_embeddings = torch.eye(NUM_LABELS+1, NUM_LABELS+1)
    for sentence, labels in zip(train_lex, train_y):
        prev_label = tag_embeddings[NUM_LABELS]
        for word, label in zip(sentence, labels):
            word_embedding = word_embeddings[word]
#             word_feature = autograd.Variable(word_embedding)
#             prev_label = autograd.Variable(prev_label)
            input_vector = torch.cat((word_embedding.view(1,-1), prev_label.view(1,-1)), 1)
#             print "input vector ", input_vector
#             input_vector = autograd.Variable(input_vector)
#             input_vector = torch.cat((word_feature, prev_label), 1)
            word_embedding_list.append(input_vector)
#             print "word embedding list ", word_embedding_list
            prev_label = tag_embeddings[label]
            # input_vector = autograd.Variable(concat_vec)
            # print "input vector ", input_vector
            label_tensor = torch.LongTensor(NUM_LABELS).zero_().view(1,-1)
            label_tensor[0,label] = 1
#             label_tensor = torch.LongTensor([label.item()])
#             target = autograd.Variable(label_tensor)
            label_list.append(label_tensor)
#             print "label list ", label_list
    print "word embedding list ", len(word_embedding_list)
    print "label list ", len(label_list)
#     print "label list 0 ", label_list[0]
    return word_embedding_list, label_list
            
    

In [129]:
word_embedding_list, label_list = create_embedding(train_lex, train_y)

'''
implement you training loop here
'''
# NUM_LABELS = len(idx2label)
VOCAB_SIZE = len(idx2word)
HIDDEN_NODES = 1000
NUM_LABELS = len(idx2label)
# word_embeddings = torch.rand(VOCAB_SIZE, 100)
# word_embeddings = torch.eye(VOCAB_SIZE, VOCAB_SIZE)
# tag_embeddings = torch.rand(NUM_LABELS+1, 100)
# tag_embeddings = torch.eye(NUM_LABELS+1, NUM_LABELS+1)
word_embedding_list = torch.stack(word_embedding_list)
word_embedding_list = torch.squeeze(word_embedding_list)
print "word_embeddings ", word_embedding_list.size()
label_list = torch.stack(label_list)
label_list = torch.squeeze(label_list)
label_list = label_list.float()
NUM_INPUT_NODES = word_embedding_list[0].size()[0]
print "number of input nodes ", NUM_INPUT_NODES
print "label list ", label_list.size()

# print "tag_embeddings ", tag_embeddings
# input dimension for neural network is concatenation of word and tag tensors
model = NeuralNet(NUM_INPUT_NODES, HIDDEN_NODES, NUM_LABELS)


loss_function = nn.MSELoss()
# loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
# optimizer = optim.Adam(model.parameters(), lr=0.0005)

VOCAB SIZE 572
NUM LABELS  127
word embedding list  45388
label list  45388
word_embeddings  torch.Size([45388, 700])
number of input nodes  700
label list  torch.Size([45388, 127])


In [130]:
for epoch in range(1000):
#         model.zero_grad()
#         if epoch % 100 == 0:
#             print "word embedding list ", word_embedding_list
#         words = autograd.Variable(word_embedding_list.view(-1, NUM_INPUT_NODES))
        words = autograd.Variable(word_embedding_list)
#         print "words ", words
        label = autograd.Variable(label_list)
        optimizer.zero_grad()
        probs = model(words)
#         if epoch % 100 == 0:
#             print "probs ", probs
# # #             print "target ", target
#             print "label list ", label_list
        loss = loss_function(probs, label)
#         if epoch % 100 == 0:
#             print loss
        print "loss ", loss 
        loss.backward()
        optimizer.step()
    
#     print "epoch number ", epoch, " epoch_loss ", epoch_loss

loss  Variable containing:
1.00000e-03 *
  7.6954
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6951
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6948
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6946
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6943
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6941
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6939
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6931
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6921
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6913
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6903
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6900
[torch.FloatTensor of size 1]

loss  Variable containing:
1

loss  Variable containing:
1.00000e-03 *
  7.6355
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6351
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6341
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6326
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6313
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6304
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6299
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6294
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6292
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6290
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6288
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.6282
[torch.FloatTensor of size 1]

loss  Variable containing:
1

loss  Variable containing:
1.00000e-03 *
  7.4887
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.4854
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.4820
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.4789
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.4762
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.4752
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.4734
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.4702
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.4665
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.4625
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.4592
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  7.4565
[torch.FloatTensor of size 1]

loss  Variable containing:
1

loss  Variable containing:
1.00000e-03 *
  6.7439
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  6.7256
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  6.7062
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  6.6855
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  6.6660
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  6.6459
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  6.6249
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  6.6045
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  6.5808
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  6.5578
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  6.5357
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  6.5114
[torch.FloatTensor of size 1]

loss  Variable containing:
1

loss  Variable containing:
1.00000e-03 *
  4.7184
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.7162
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.7140
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.7115
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.7094
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.7073
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.7055
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.7035
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.7015
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.7001
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6987
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6970
[torch.FloatTensor of size 1]

loss  Variable containing:
1

loss  Variable containing:
1.00000e-03 *
  4.6686
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6681
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6684
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6685
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6687
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6686
[torch.FloatTensor of size 1]

loss  Variable containing:
1

loss  Variable containing:
1.00000e-03 *
  4.6684
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6681
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1

loss  Variable containing:
1.00000e-03 *
  4.6686
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6686
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6683
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6682
[torch.FloatTensor of size 1]

loss  Variable containing:
1

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6678
[torch.FloatTensor of size 1]

loss  Variable containing:
1

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1.00000e-03 *
  4.6677
[torch.FloatTensor of size 1]

loss  Variable containing:
1

In [149]:
model.input_linear.parameters

<bound method Linear.parameters of Linear (700 -> 1000)>

In [138]:
def greedy_inference(model, sentence, word_embeddings, tag_embeddings, NUM_LABELS):
    output_labels = np.zeros(len(sentence))
    prev_label = tag_embeddings[NUM_LABELS]
    for i, word in enumerate(sentence):
        input_vector =  get_input_vector(word_embeddings, prev_label, word)
        probs = model(input_vector)
#         print "probs greedy", probs
        max_val, predicted_label = torch.max(probs, 1)
        predicted_label = predicted_label.data[0]
        prev_label = tag_embeddings[predicted_label]
        output_labels[i] = predicted_label
    return output_labels

In [139]:
def viterbi_inference(model, sentence, word_embeddings, tag_embeddings, NUM_LABELS):
        dp = np.zeros((NUM_LABELS, len(sentence)+1))
        back_pointers = np.zeros((NUM_LABELS, len(sentence)))
        dp[0][0] = 1
        for i in range(len(sentence)):
            word_table = np.zeros((NUM_LABELS, NUM_LABELS))
            if i == 0:
                input_vector = get_input_vector(word_embeddings, tag_embeddings[NUM_LABELS], sentence[i])
#                 print "input_vector ", input_vector
                probs = model(input_vector)
                probs = probs.data.numpy()
#                 print "probs is ", probs
#                 word_table[:,0] = np.log(dp[0, 0]) + np.log(probs)
#                 word_table[:,0] = dp[0,0] + probs
                word_table[:,0] = np.multiply(dp[0, 0], probs)
#                 print "word table 0", word_table[:, 0]
                dp[:,i+1] = word_table[:,0]
                back_pointers[:,i] = 128
                continue
#             print i
            for j in range(NUM_LABELS):
                input_vector = get_input_vector(word_embeddings, tag_embeddings[j], sentence[i])
                probs = model(input_vector)
                probs = probs.data.numpy()
#                 print " probs ", probs
#                 print "dp array ", dp[:,i]
#                 print np.multiply(dp[:, i], probs)
#                 word_table[:,j] = dp[:, i] + probs
                word_table[:,j] = np.multiply(dp[:, i], probs)
#             print "word table is ", word_table
            dp[:,i+1] = word_table.max(1)
            for k in range(NUM_LABELS):
                for index, element in enumerate(word_table[k]):
                    if element == dp[k, i+1]:
                        back_pointers[k, i] = index
#                 back_pointers[k, i] = word_table[k].index(dp[k,i+1])

#         print "back_pointers ", back_pointers
#         print "dp matrix ", dp[:,19]
        output_labels = np.zeros(len(sentence), dtype = np.int)
        label_index = len(sentence) - 1
        max_val = dp[:, len(sentence)].max()
        for index, element in enumerate(dp[:, len(sentence)]):
            if element == max_val:
                output_labels[label_index] = index
#                 print "debug2 ", index
                break
#         print "output labels ", output_labels
        for i in range(len(sentence)-1, 1, -1):
#             print "debug ", output_labels[label_index]
            row = back_pointers[output_labels[label_index], i]
            label_index -= 1
            output_labels[label_index] = row
        print "output labels ", output_labels
        return output_labels
            

In [142]:
predictions_test = [ map(lambda t: idx2label[t], 
                             viterbi_inference(model, x, 
                                              torch.eye(len(idx2word), len(idx2word))
#                                               torch.rand(VOCAB_SIZE, 300)
                                              ,
                                        torch.eye(NUM_LABELS+1, NUM_LABELS+1), NUM_LABELS)) 
                        for x in test_lex
                   ]


output labels  [  0  75 115  75 115  75 115  75 115  75 115  75 115  75 115  75  41  90
 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75 115  75 115  75  41
  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  7

output labels  [  0 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75 115  75 115  75 115  75
 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels 

output labels  [  0 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75  41  90 126]
output labels  [  0 115  75 115  75  41  90 126]
output labels  [  0  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0 115  75 115  75  41  90 126]
output labels  [  0  75 115  75  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0  75 115  75 115  75  41  90 126]
output labels  [  0  75  41  90 126]
output l

output labels  [  0 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 11

output labels  [  0 115  75  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0  41  90 126]
output labels  [  0  75  41  90 126]
output labels  [  0  75  41  90 126]
output labels  [  0  75  41  90 126]
output labels  [  0  75  41  90 126]
output labels  [  0  75  41  90 126]
output labels  

output labels  [  0 115  75 115  75  41  90 126]
output labels  [  0  75  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0  75 115  75  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0  75  41  90 126]
output labels  [  0  75  41  90 126]
output labels  [  0  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75  41  90 126]
output labels  [  0  75 115  75  41  90 126]
output labels  [  0 115  75 115  75  41  90 126]
output labels  [  0  75 115  75  41  90 126]
output labels  [  0  75 115  75  41  90 126]
output labels  [  0 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  41  90 126]

output labels  [  0  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75 115  75 115  75 115  75
  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75 115  75 115  75 115  75
  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75 115  75 115  75 115  75 115  75 115  75 115  75
  41  90 126]
output labels  [  0 115  75 115  75 115  75 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75 115  75  41  90 126]
output labels  [  0 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 115  75  41  90 126]
output labels  [  0  75 115  75 

In [143]:
print "predictions ", predictions_test[100]
groundtruth_test = [ map(lambda t: idx2label[t], y) for y in test_y ]

predictions  ['B-aircraft_code', 'B-today_relative', 'I-return_date.day_number', 'B-today_relative', 'I-return_date.day_number', 'B-today_relative', 'I-return_date.day_number', 'B-today_relative', 'I-return_date.day_number', 'B-today_relative', 'I-return_date.day_number', 'B-today_relative', 'B-flight_days', 'I-arrive_time.time_relative', 'O']


In [144]:
print "groundtruth ", groundtruth_test[100]
words_test = [ map(lambda t: idx2word[t], w) for w in test_lex ]
test_precision, test_recall, test_f1score = conlleval(predictions_test, groundtruth_test, words_test)

groundtruth  ['O', 'O', 'O', 'O', 'O', 'B-depart_date.month_name', 'B-depart_date.day_number', 'I-depart_date.day_number', 'O', 'O', 'B-depart_time.period_of_day', 'O', 'B-fromloc.city_name', 'O', 'B-toloc.city_name']


In [145]:
print test_precision, test_recall, test_f1score

0.0 0.0 0.0
