# Data Transformation

** Idea: We have 127 labels in the original data. To improve prediction accuracy, these are grouped into 10 "meta-labels", and each would have a separate RNN model at its service. To train a children-labelset (i.e. the set of labels under a meta-label) separately, we need to modify the labels in the original data: All labels in the children-labelset are mapped to new index-encodings, whereas the labels not in the children-labelset are treated as 'O' label. **

In [110]:
from collections import defaultdict
from copy import deepcopy

In [None]:
# IMPORT LABEL SCHEME (I.E. META-CHILDREN HIERARCHY)
import json, gzip, pickle, os
os.chdir('/Users/jacobsw/Desktop')
with open('new_ojoatis.json') as json_file:
    ojoatis = json.load(json_file)
entities = ojoatis['entities']
luis_utterances = ojoatis['utterances']
# entities = 
# [{u'children': [u'arrive_time.end_time',
#    u'arrive_time.period_mod',
#    u'arrive_time.period_of_day',
#    u'arrive_time.start_time',
#    u'arrive_time.time_relative',
#    u'arrive_time.time'],
#   u'name': u'arrive_time'},
#  { ... },
#  ... ]

In [None]:
def load_atis(path, folder=0):
    
    folders = {0: 'atis.fold0.pkl.gz',
               1: 'atis.fold1.pkl.gz',
               2: 'atis.fold2.pkl.gz',
               3: 'atis.fold3.pkl.gz',
               4: 'atis.fold4.pkl.gz'}
    
    f = gzip.open(path+folders[folder], 'rb')
    train, valid, test, dicts = pickle.load(f)
    f.close()
    
    train = (train[0],train[2])
    valid = (valid[0],valid[2])
    test = (test[0],test[2])

    return {'train':train, 'valid':valid, 'test':test, 'dicts':dicts}

# ATIS PATH: /Users/jacobsw/Desktop/IMPLEMENTATION_CAMP/CODE/OJO/LUIS/DATA

In [None]:
def entities_normalize(entities):
    """
    # entities:
        a list of dictionaries for each
        keys = {'name', 'children'}.    
      TRANSFORMED EXAMPLE:
        {u'aircraft_code': u'codes_types',
         u'airline_code': u'codes_types',
         u'airline_name': u'from_loc',
         u'airport_code': u'codes_types',
         u'airport_name': u'from_loc',
         u'arrive_date.date_relative': u'arrive_date',
         u'arrive_date.day_name': u'arrive_date',
         u'arrive_date.day_number': u'arrive_date',
         ...
    """
    new_dict = {}
    for entry in entities:
        for child in entry['children']:
            new_dict[child] = entry['name']
    return new_dict

In [111]:
class DataTransform:
    
    def __init__(self, data, entities):
        '''
        # data: original data, with the structure
          {'train':(X_train,Y_train),'valid':(X_valid,Y_valid),'test':(X_test,Y_test),
            'dicts':dicts}
        # entities:
            a list of dictionaries for each
            keys = {'name', 'children'}.
        '''
        self.data = data
        self.entities = entities
        self.entities_normalized = self.__entities_normalize(entities)
        self.meta_labels = [entity['name'] for entity in self.entities]
        self.meta_dict = defaultdict(dict)
        self.new_datasets = defaultdict(dict)
        for meta_label in self.meta_labels:
            self.meta_dict[meta_label] = self.__create_new_labels2idx(meta_label)
            self.new_datasets[meta_label] = self.__label_transform(meta_label, self.data)
    
    def __entities_normalize(self, entities):
        '''
        Turns a list of {'name':parent,'children':[child,...]}, i.e. entities
        into a mapping from children to parent.
        
        # entities:
            a list of dictionaries for each
            keys = {'name', 'children'}.    
          TRANSFORMED EXAMPLE:
            {u'aircraft_code': u'codes_types',
             u'airline_code': u'codes_types',
             u'airline_name': u'from_loc',
             u'airport_code': u'codes_types',
             u'airport_name': u'from_loc',
             u'arrive_date.date_relative': u'arrive_date',
             u'arrive_date.day_name': u'arrive_date',
             u'arrive_date.day_number': u'arrive_date',
             ...
        @ return: transformed children-to-parent dictionary.
        '''
        new_dict = {}
        for entry in entities:
            for child in entry['children']:
                new_dict[child] = entry['name']
        return new_dict
    
    def __create_new_labels2idx(self, meta_label):
        '''
        Create a dictionary with keys being the labels in the original dataset,
        and values being {'new_idx':.., 'old_idx':..} sub-dictionaries.
        
        # meta_label: e.g. 'arrive_time'.
        @ return: new dictionary.
        '''
        idx = 0
        new_dict = defaultdict(dict)
        new_dict['O']['new_idx'] = idx # 'O' will always have index 0
        new_dict['O']['old_idx'] = self.data['dicts']['labels2idx']['O']
        idx += 1
        for key in data['dicts']['labels2idx'].iterkeys():
            if key!= 'O' and self.entities_normalized[key[2:]] == meta_label:
                new_dict[key]['new_idx'] = idx
                idx += 1
            else: 
                new_dict[key]['new_idx'] = 0 
            new_dict[key]['old_idx'] = data['dicts']['labels2idx'][key]
        return new_dict
    
    def __label_transform(self, meta_label, data):
        '''
        Transform the original 127-label dataset into a k-label new dataset
        k depends on the meta_label.
        
        # data: original data, with the structure
          {'train':(X_train,Y_train),'valid':(X_valid,Y_valid),'test':(X_test,Y_test),
            'dicts':dicts}
        @ return: label-transformed data.
        '''
        X_train, X_valid, X_test = data['train'][0], data['valid'][0], data['test'][0]
        Y_train, Y_valid, Y_test = deepcopy(data['train'][1]), deepcopy(data['valid'][1]), deepcopy(data['test'][1])
        old_i2l = {i:l for l,i in data['dicts']['labels2idx'].iteritems()}
        l2_o_n_idx = self.meta_dict[meta_label]
        new_l2i = defaultdict(int)
        new_l2i['O'] = 0
        for label,o_n_pair in l2_o_n_idx.iteritems():
            if o_n_pair['new_idx'] != 0:
                new_l2i[label] = o_n_pair['new_idx']
        for labelidx_lists in [Y_train,Y_valid,Y_test]:
            for i,labelidx_list in enumerate(labelidx_lists):
                for j,labelidx in enumerate(labelidx_list):
                    label = old_i2l[labelidx]
                    new_label = l2_o_n_idx[label]['new_idx']
                    labelidx_lists[i][j] = new_label
        new_dicts = {'words2idx':data['dicts']['words2idx'], 'labels2idx':new_l2i}
        return {'train':(X_train,Y_train),'valid':(X_valid,Y_valid),'test':(X_test,Y_test),
                'dicts':new_dicts}
    


In [129]:
meta_labels = [entity['name'] for entity in entities]
# [arrive_time, depart_time_meal, return_cost, flight, from_loc, 
#  codes_types, misc_date_time, depart_date_mod_or, stop_to_loc,
#  arrive_date ]

In [130]:
data = load_atis(path='/Users/jacobsw/Desktop/IMPLEMENTATION_CAMP/CODE/OJO/LUIS/DATA/')

In [131]:
dtf = DataTransform(data, entities)

In [132]:
datasets = dtf.new_datasets

# Model: RNN-Keras-Embedding

In [74]:
import numpy as np
import gzip, pickle, random

In [75]:
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, LSTM, GRU, Dense, Activation, TimeDistributed
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical

In [154]:
class NER:
    
    def __init__(self, data, model_dir, mode='SimpleRNN'):
        '''
        # data: a dictionary which contains ...
            {'train':train, 'valid':valid, 'test':test, 'dicts':dicts}
            each value in the dictionary is a 2-tuple ...
            (encoded_sentences, encoded_labels)
            dicts includes ...
            'words2idx', 'labels2idx'
        # mode: 'SimpleRNN', 'LSTM', 'GRU'.
        '''
        if mode not in ['SimpleRNN','LSTM','GRU']:
            print "MODE ERROR: only 'SimpleRNN', 'LSTM', 'GRU'. \n"
            return
        self.mode = mode
        try:
            self.model_dir = model_dir
        except IOError:
            print "IOError: Check if directory is correct."
        try:
            data_transformed = [ (map(self.__transform(len(data['dicts']['labels2idx']),'x'),x),
                                      map(self.__transform(len(data['dicts']['labels2idx']),'y'),y)) 
                                      for x,y in [data['train'], 
                                                  data['valid'], 
                                                  data['test']] ]
            self.X_train, self.Y_train = data_transformed[0]
            self.X_valid, self.Y_valid = data_transformed[1]
            self.X_test, self.Y_test = data_transformed[2]
            self.dicts = data['dicts']
        except:
            print "DATA FORMAT ERROR: \n", \
                  "data = {'train':train, 'valid':valid, 'test':test, 'dicts':dicts} \n", \
                  "value = (encoded_sentences, encoded_labels) \n"
    
    def __transform(self, nlabels, mode='x'):
        '''
        # x: (sent_len, ) => (1, sent_len)
        # y: (sent_len, ) => (1, sent_len, nlabels)
        '''
        if mode not in ['x','y']: 
            print "MODE ERROR: only 'x' and 'y'. \n"
            return
        if mode=='x': return lambda x: np.asarray([x])  
        if mode=='y': return lambda y: to_categorical(np.asarray(y)[:,np.newaxis],
                                                  nlabels)[np.newaxis,:,:] 
    
    def __shuffle(self, X, Y, seed):
        '''
        # X, Y: data and corresponding labels.
        # seed: ensure the same after-shuffle order for X and Y.
        '''
        random.seed(seed)
        random.shuffle(X)
        random.seed(seed)
        random.shuffle(Y)
    
    def __get_mean_evaluation(self, X, Y):
        '''
        # X, Y: data and corresponding labels.
        # return: average loss and accuracy on X and Y
        '''
        losses, accuracies = [], []
        for i in xrange(100):
            loss,accuracy = ner.model.evaluate(X[i],Y[i],verbose=0)
            losses.append(loss)
            accuracies.append(accuracy)
        return (np.mean(losses),np.mean(accuracies))

    def __save_best_model(self,mode):
        try:
            os.remove(self.model_dir+mode+'.json')
            os.remove(self.model_dir+mode+'.h5')
        except OSError:
            pass
        model_json = self.model.to_json()
        open(self.model_dir+mode+'.json','w').write(model_json)
        self.model.save_weights(self.model_dir+mode+'.h5')
        print "New %s Saved!" % mode    
    
    def train(self, validation=False, validation_freq=1, verbose=0,
                    lr=.1, nhidden=100, emb_dim=100, nepochs=1):
        '''
        # validation, validation_freq: 
            if true, run validation at validation_freq epoch (1 by default).
        # verbose, verbose_freq: 
            0: only print out simple messages (e.g. '... building models').
            1: print out validation too.
            print training progress after training every verbose_freq sentences.
        # lr: learning rate.
        # nhidden: number of hidden neurons.
        # emb_dim: dimension of word embeddings.
        # nepochs: number of epochs.
        # return: end training when max patience is reached.
        '''
        print "... configuring model"
        vocsize = len(self.dicts['words2idx'])
        nlabels = len(self.dicts['labels2idx'])
        nsents = len(self.X_train)
        self.dicts['idx2labels'] = {i:l for l,i in self.dicts['labels2idx'].iteritems()}
        self.dicts['idx2words'] = {i:w for w,i in self.dicts['words2idx'].iteritems()}
            
        print "... building model"            
        self.model = Sequential()
        self.model.add(Embedding(input_dim=vocsize, output_dim=emb_dim))
        if self.mode=='LSTM':
            self.model.add(LSTM(output_dim=nhidden, activation='sigmoid', return_sequences=True))
        elif self.mode=='GRU':
            self.model.add(GRU(output_dim=nhidden, activation='sigmoid', return_sequences=True))
        else: 
            self.model.add(SimpleRNN(output_dim=nhidden, activation='sigmoid', return_sequences=True))
        self.model.add(TimeDistributed(Dense(output_dim=nlabels)))
        self.model.add(Activation('softmax'))
        sgd = SGD(lr=lr, momentum=.0, decay=.0, nesterov=False)
        self.model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
        
        print "... training model"
        patience = 3000
        patience_increase_ratio = 2
        improvement_threshold = .995
        best_iter = 0
        self.best_loss = np.inf
        self.best_accuracy = 0.
        for e in xrange(nepochs):
            e += 1
            if verbose: print "... Epoch: %d" % e
            self.__shuffle(self.X_train,self.Y_train,seed=0)
            for i in xrange(nsents):
                if self.X_train[i].shape[1]==1: continue
                self.model.train_on_batch(self.X_train[i],self.Y_train[i])
        
                if validation and i%validation_freq==0:
                    sample_idxs = random.sample(range(len(self.X_valid)),100)
                    X_valid_samples = [self.X_valid[k] for k in sample_idxs]
                    Y_valid_samples = [self.Y_valid[k] for k in sample_idxs]
                    avg_loss, avg_accuracy = self.__get_mean_evaluation(X_valid_samples, 
                                                                        Y_valid_samples)
                    current_iter = ((e-1)*nsents+i)
                    if avg_loss < self.best_loss*improvement_threshold and \
                       avg_accuracy > self.best_accuracy:
                        self.__save_best_model(mode='best_valid_model')
                        patience = max(patience, current_iter*patience_increase_ratio)
                        self.best_loss = avg_loss
                        self.best_accuracy = avg_accuracy
                        best_iter = current_iter
                        sample_idxs = random.sample(range(len(self.X_test)),100)
                        X_test_samples = [self.X_test[k] for k in sample_idxs]
                        Y_test_samples = [self.Y_test[k] for k in sample_idxs]
                        avg_test_loss, avg_test_accuracy = self.__get_mean_evaluation(X_test_samples, 
                                                                                      Y_test_samples)
                        print "Validation: Loss = %.6f | Accuracy = %.6f" % (avg_loss, avg_accuracy)
                        print "Test: Loss = %.6f | Accuracy = %.6f" % (avg_test_loss, avg_test_accuracy)
                        if avg_test_loss<self.best_loss and \
                           avg_test_accuracy>self.best_accuracy:
                            self.best_loss = avg_test_loss
                            self.best_accuracy = avg_test_accuracy
                            self.__save_best_model(mode='best_test_model')
                    if patience < current_iter:
                        print "TRAINING COMPLETE (at iteration %d)" % current_iter
                        return self.model
        return self.model
                
    def predict(self, x): 
        '''
        # x: sentences in string (e.g. "i'd like to book a flight from london to paris")
        # return: a list of predicted labels associated with the words in the sentence x.
        '''
        x_filtered = filter(lambda w:w in self.dicts['words2idx'].keys(), x.split())
        x_encoded = np.asarray([map(self.dicts['words2idx'].get,x_filtered)])
        pred_encoded = map(np.argmax,self.model.predict_on_batch(x_encoded)[0])
        return map(self.dicts['idx2labels'].get, pred_encoded)

In [155]:
model_dir = '/Users/jacobsw/Desktop/IMPLEMENTATION_CAMP/CODE/OJO/ojo_ner/ojo_ner/models/'

# Performance of Individual Models

In [151]:
performances = defaultdict(dict)
for meta_label in meta_labels:
    print "CURRENT DATASET'S META-LABEL: %s" % meta_label
    performance = defaultdict()
    dataset = datasets[meta_label]
    ner = NER(dataset,model_dir=model_dir)
    ner.train(validation=1,validation_freq=1000,verbose=1,nepochs=20)
    performance['best_loss'] = ner.best_loss
    performance['best_accuracy'] = ner.best_accuracy
    performances[meta_label] = performance

CURRENT DATASET'S META-LABEL: arrive_time
... configuring model
... building model
... training model
... Epoch: 1
New best_valid_model Saved!
Validation: Loss = 0.428441 | Accuracy = 0.995222
Test: Loss = 0.431019 | Accuracy = 0.994464
... Epoch: 2
TRAINING COMPLETE (at iteration 3983)
CURRENT DATASET'S META-LABEL: depart_time_meal
... configuring model
... building model
... training model
... Epoch: 1
New best_valid_model Saved!
Validation: Loss = 1.505589 | Accuracy = 0.966046
Test: Loss = 1.486618 | Accuracy = 0.979804
New best_test_model Saved!
... Epoch: 2
TRAINING COMPLETE (at iteration 3983)
CURRENT DATASET'S META-LABEL: return_cost
... configuring model
... building model
... training model
... Epoch: 1
New best_valid_model Saved!
Validation: Loss = 0.704529 | Accuracy = 0.996136
Test: Loss = 0.705529 | Accuracy = 0.996758
New best_valid_model Saved!
Validation: Loss = 0.025847 | Accuracy = 0.996867
Test: Loss = 0.040120 | Accuracy = 0.994011
... Epoch: 2
TRAINING COMPLETE (a

In [153]:
for meta_label,performance in performances.iteritems():
    print "%s: best_loss: %.6f | best_accuracy: %.6f" % (meta_label,performance['best_loss'],performance['best_accuracy'])

stop_to_loc: best_loss: 0.043699 | best_accuracy: 0.990506
flight: best_loss: 0.131216 | best_accuracy: 0.982539
codes_types: best_loss: 0.082974 | best_accuracy: 0.989159
depart_date_mod_or: best_loss: 0.006323 | best_accuracy: 1.000000
misc_date_time: best_loss: 0.875108 | best_accuracy: 1.000000
arrive_time: best_loss: 0.428441 | best_accuracy: 0.995222
arrive_date: best_loss: 0.002105 | best_accuracy: 1.000000
depart_time_meal: best_loss: 1.486618 | best_accuracy: 0.979804
return_cost: best_loss: 0.025847 | best_accuracy: 0.996867
from_loc: best_loss: 0.022983 | best_accuracy: 0.996912


# Model Ensemble vs. Single Model

### A. Model Ensemble

In [157]:
%%time
models = defaultdict()
for meta_label in meta_labels:
    print "CURRENT DATASET'S META-LABEL: %s" % meta_label
    dataset = datasets[meta_label]
    ner = NER(dataset,model_dir=model_dir)
    ner.train(validation=1,validation_freq=1000,verbose=1,nepochs=20)
    models[meta_label] = ner

CURRENT DATASET'S META-LABEL: arrive_time
... configuring model
... building model
... training model
... Epoch: 1
New best_valid_model Saved!
Validation: Loss = 0.421883 | Accuracy = 0.995222
Test: Loss = 0.424165 | Accuracy = 0.994464
... Epoch: 2
TRAINING COMPLETE (at iteration 3983)
CURRENT DATASET'S META-LABEL: depart_time_meal
... configuring model
... building model
... training model
... Epoch: 1
New best_valid_model Saved!
Validation: Loss = 1.132829 | Accuracy = 0.966046
Test: Loss = 1.117769 | Accuracy = 0.979804
New best_test_model Saved!
... Epoch: 2
TRAINING COMPLETE (at iteration 3983)
CURRENT DATASET'S META-LABEL: return_cost
... configuring model
... building model
... training model
... Epoch: 1
New best_valid_model Saved!
Validation: Loss = 0.798324 | Accuracy = 0.996136
Test: Loss = 0.797337 | Accuracy = 0.996758
New best_test_model Saved!
New best_valid_model Saved!
Validation: Loss = 0.025757 | Accuracy = 0.996867
Test: Loss = 0.040035 | Accuracy = 0.994011
... Ep

In [385]:
def sample_data(data):
    '''
    # data: a 2-tuple (sents, labels) from valid/test set.
    '''
    sents,labels = data
    sample_idxs = random.sample(range(len(sents)),100)
    sent_samples = [sents[i] for i in sample_idxs]
    label_samples = [labels[i] for i in sample_idxs]
    return (sent_samples, label_samples)

In [386]:
def extract_rough_label(label):
    if '.' in label:
        return set([label.split('.')[1],label.split('.')[0].split('-')[1]])
    elif '-' in label:
        return set([label.split('-')[1],''])
    else: return set([label,''])
# s = 'B-depart_time.period_of_day'

In [387]:
ner = NER(data,model_dir=model_dir) # for transforming data into fitting format.
X_samples, Y_samples = sample_data((ner.X_test,ner.Y_test))
i2l = {i:l for l,i in data['dicts']['labels2idx'].iteritems()}

In [388]:
entities_normalized = entities_normalize(entities)

In [395]:
confusion = defaultdict(int)
prediction_count, exact_correct_count, rough_correct_count = 0, 0, 0
confusion_record = []
for i,X in enumerate(X_samples):
#     Y = map(i2l.get,Y_samples[i])
    Y = map(i2l.get,map(np.argmax,Y_samples[i][0]))
    predictions = defaultdict()
    for meta_label,model in models.iteritems():
        predictions[meta_label] = map(model.dicts['idx2labels'].get,
                                      map(np.argmax,model.model.predict(X)[0]))
    for i,label in enumerate(Y):
        prediction_count += 1
        predicted_label = [predictions[meta_label][i] for meta_label in meta_labels]  
        if label in predicted_label: exact_correct_count += 1
        for pred in predicted_label:
            if extract_rough_label(label).intersection(extract_rough_label(pred)):
                rough_correct_count += 1
                break
        unique_labels = set(predicted_label)
        if len(unique_labels) > 2:
            confusion_record.append((label,unique_labels))
            confusion[tuple(unique_labels)] += 1
        print "===== TEST ====:"
        print "True Label: ", label
        print "Predicted Label: ", predicted_label
print "Exact Accuracy: %.2f%%" % (exact_correct_count/float(prediction_count)*100)
print "Rough Accuracy: %.2f%%" % (rough_correct_count/float(prediction_count)*100)
print "Confusion Rate: %.2f%%" % (sum(confusion.values())/float(prediction_count)*100)

===== TEST ====:
True Label:  O
Predicted Label:  ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
===== TEST ====:
True Label:  O
Predicted Label:  ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
===== TEST ====:
True Label:  B-airline_name
Predicted Label:  ['O', 'O', 'O', 'O', 'B-airline_name', 'O', 'O', 'O', 'O', 'O']
===== TEST ====:
True Label:  I-airline_name
Predicted Label:  ['O', 'O', 'O', 'O', 'I-airline_name', 'O', 'O', 'O', 'O', 'O']
===== TEST ====:
True Label:  O
Predicted Label:  ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
===== TEST ====:
True Label:  O
Predicted Label:  ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
===== TEST ====:
True Label:  B-meal
Predicted Label:  ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
===== TEST ====:
True Label:  O
Predicted Label:  ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
===== TEST ====:
True Label:  O
Predicted Label:  ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
===== TEST ====:
True Label:  O
Pr

In [396]:
confusion_record

[('B-toloc.city_name', {'B-city_name', 'B-toloc.city_name', 'O'})]

### B. Single Model

In [200]:
data.keys()

['test', 'train', 'valid', 'dicts']

In [391]:
ner = NER(data,model_dir=model_dir)
ner.train(validation=1,validation_freq=1000,verbose=1,nepochs=20)

... configuring model
... building model
... training model
... Epoch: 1
New best_valid_model Saved!
Validation: Loss = 4.059187 | Accuracy = 0.646422
Test: Loss = 4.096358 | Accuracy = 0.619585
New best_valid_model Saved!
Validation: Loss = 1.246188 | Accuracy = 0.726121
Test: Loss = 1.490649 | Accuracy = 0.673407
New best_valid_model Saved!
Validation: Loss = 0.979305 | Accuracy = 0.788543
Test: Loss = 1.242404 | Accuracy = 0.749781
New best_valid_model Saved!
Validation: Loss = 0.728996 | Accuracy = 0.837931
Test: Loss = 1.030074 | Accuracy = 0.780446
... Epoch: 2
New best_valid_model Saved!
Validation: Loss = 0.619675 | Accuracy = 0.863305
Test: Loss = 0.768520 | Accuracy = 0.825924
New best_valid_model Saved!
Validation: Loss = 0.594467 | Accuracy = 0.865076
Test: Loss = 0.825065 | Accuracy = 0.833799
... Epoch: 3
New best_valid_model Saved!
Validation: Loss = 0.572398 | Accuracy = 0.883848
Test: Loss = 0.677916 | Accuracy = 0.857985
New best_valid_model Saved!
Validation: Loss = 

<keras.models.Sequential at 0x146f0a1d0>

In [394]:
def sample_data(data):
    '''
    # data: a 2-tuple (sents, labels) from valid/test set.
    '''
    sents,labels = data
    sample_idxs = random.sample(range(len(sents)),100)
    sent_samples = [sents[i] for i in sample_idxs]
    label_samples = [labels[i] for i in sample_idxs]
    return (sent_samples, label_samples)
for k in xrange(10):
    X_samples, Y_samples = sample_data((ner.X_test,ner.Y_test))
    print "Sample No.%d" % (k+1)
    accuracies = []
    for i in xrange(100):
        pred = map(ner.dicts['idx2labels'].get,map(np.argmax,ner.model.predict(X_samples[i])[0]))
        true = map(ner.dicts['idx2labels'].get,map(np.argmax,Y_samples[i][0]))
        total = len(pred)
        accuracies.append( sum(p_i==t_i for p_i,t_i in zip(pred,true)) / float(total) )
    print "Exact Accuracy: %.2f%%" % (np.mean(accuracies)*100)

Sample No.1
Exact Accuracy: 94.81%
Sample No.2
Exact Accuracy: 95.91%
Sample No.3
Exact Accuracy: 96.84%
Sample No.4
Exact Accuracy: 97.43%
Sample No.5
Exact Accuracy: 95.59%
Sample No.6
Exact Accuracy: 97.54%
Sample No.7
Exact Accuracy: 93.31%
Sample No.8
Exact Accuracy: 94.16%
Sample No.9
Exact Accuracy: 95.64%
Sample No.10
Exact Accuracy: 95.24%
