In [1]:
import sys, os, inspect, codecs
import copy
import random 

import numpy as np
import time
import tensorflow as tf
from tensorflow.python.ops import rnn
from tensorflow.contrib.layers.python.layers import linear


# token과 target 의 데이터를 처리하여 int(id)와 str(symbol)의 두 형식으로 양방향 처리해주도록 설계된 모듈
class Vocab:
    def __init__(self, fn, mode='token'):
        self.mode = mode 
        self.token_unk_id = 0 #in 'token.vocab.txt', the id of '_UNK' is 0.
        self.token_unk_symbol = '_UNK' #'UNK' means unknown word, a word that doesn't exist the vocabulary set.

        self.token_pad_id = 1 #in 'token.vocab.txt', the id of '_PAD' is 1.
        self.token_pad_symbol = '_PAD' #'PAD' means non-character on 128 blank.
        
        self.token_2_id = {} #input from Vocab.load_token_vocab() / output to Vocab.get_id()
        self.id_2_token = {} #input from Vocab.load_token_vocab() / output to Vocab.get_symbol()

        self.target_2_id = {} #input from Vocab.load_target_vocab() / output to Vocab.get_id()
        self.id_2_target = {} #input from Vocab.load_target_vocab() / output to Vocab.get_symbol()

        self.target_out_symbol = 'O'
        
        if mode == 'token'  : self.load_token_vocab(fn)
        if mode == 'target' : self.load_target_vocab(fn)

    def load_token_vocab(self, fn):
        with codecs.open(fn, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.rstrip('\n\r') #.rstrip() 문자열의 오른쪽에 있는 한칸 이상의 공백들을 지우는 문자열 함수
                token, id = line.split('\t') #.split() ()안에 주어진 문자열을 기준으로 나누어 리스트에 넣어 반환함.

                id = int(id)

                if token == self.token_unk_symbol: #if token is '_UNK', set id to 0.
                    self.token_2_id[token] = self.token_unk_id
                    continue 

                if token == self.token_pad_symbol: #if token is '_PAD', set id to 1.
                    self.token_2_id[token] = self.token_pad_id
                    continue 

                # other tokens
                self.token_2_id[token] = id
                self.id_2_token[id] = token 

    def load_target_vocab(self, fn):
        with codecs.open(fn, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.rstrip('\n\r')
                target, id = line.split('\t')
                id = int(id)

                self.target_2_id[target] = id
                self.id_2_target[id] = target

    def get_id(self, symbol): #input from load_token_vocab() and load_target_vocab()
        if self.mode == 'token':
            return self.token_2_id.get(symbol, self.token_unk_id)

        if self.mode == 'target':
            return self.target_2_id.get(symbol)

    def get_symbol(self, id): #input from load_token_vocab() and load_target_vocab()
        if self.mode == 'token':
            return self.id_2_token.get(id)

        if self.mode == 'target':
            return self.id_2_target.get(id)

    def get_num_tokens(self):
        if self.mode == 'token': return len(self.token_2_id)
        return None 

    def get_num_targets(self):
        if self.mode == 'target': return len(self.target_2_id)
        return None 

    def get_token_pad_id(self):   return self.token_pad_id
    def get_target_null_id(self): return self.get_id(self.target_out_symbol)
    

    
    
 
class N21Item:
    def __init__(self, target, text):
        self.target = target
        self.text   = text

        self.target_id   = None
        self.token_ids   = None  # it should be array 

    def set_id(self, target_id, token_ids):
        self.target_id = target_id
        self.token_ids = token_ids

    def get_tokens(self):
        # currently, only support 'character' 
        return list(self.text) 

    
# txt file 또는 prediction()에서 개별 입력된 sentence 를 읽어와서 target, text 의 각각의 개체로 반환하여 data라는 리스트로 반환하는 모듈.
class N21TextData:
    def __init__(self, src=None, mode='file'):  # mode = 'file' | 'sentence'
        self.data = []
        
        if mode == 'file':      self.load_text_file_data(src)
        if mode == 'sentence':  self.load_text_data(src)

    def add_to_data(self, target, text):
        # normalize
        target = target.upper()
        text   = text.upper()
        self.data.append( N21Item(target, text) )

    def load_text_data(self, line):
        # mode = 'sentence'
        # format of line : "TAG  \t  SENTENCE"
        line = line.rstrip('\n\r')
        target, text = line.split('\t')
        self.add_to_data(target, text)

    def load_text_file_data(self, fn):
        # mode = 'file' 
        with codecs.open(fn, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.rstrip('\n\r')
                target, text = line.split('\t')

                self.add_to_data(target, text)
                

class Dataset():
    def __init__(self, id_data, batch_size, num_steps, target_num_step=None, pad_id=1, target_null_id=0, deterministic=False):
        self.data            = id_data       # it should be id-based data
        
        self.token_pad_id    = pad_id
        self.target_null_id  = target_null_id

        self.batch_size      = batch_size
        self.num_steps       = num_steps

        self.src_num_steps   = num_steps
        self.tar_num_steps   = target_num_step  # for sequence to sequence dataset

        self.deterministic   = deterministic  # if deterministic is True, data is shuffled and retrieved
        self.iterator           = self.iterate_forever() #during trian, shuffle index
        self.predict_iterator   = self.iterate_once() #during test, don't shuffle index

        self.epoch = 0 

    def get_num_examples(self): return len( self.data ) 
    def get_epoch_num(self): return self.epoch 

    def _iterate(self, index_gen, batch_size, max_len):
        """ Abstraction method for _iterate function"""
        raise NotImplementedError("Abstract method.".format( self.run.__name__))
    
    # for training
    def iterate_forever(self):
        def index_stream():
            # yield data index 
            self.indexs = list( range( self.get_num_examples() ) )
            while True:
                self.epoch += 1 
                if not self.deterministic:
                    random.shuffle( self.indexs ) 
                for index in self.indexs:
                    yield index 

        for a_data in self._iterate(index_stream()):
            yield a_data

    # for testing
    def iterate_once(self):
        def index_stream():
            # yield data index 
            self.indexs = list( range( self.get_num_examples() ) )
            for index in self.indexs:
                yield index 

        for a_data in self._iterate(index_stream()):
            yield a_data 


  return f(*args, **kwds)


In [2]:
# through Vocab class, convert text data to id_data which is tokenized to int.

class N21Converter:

    @staticmethod
    def convert(txt_data, target_vocab, token_vocab):
        # txt_data : it should be N21TextData
        # target_vocab    : it should be Vocab
        # token_vocab    : it should be Vocab

        id_data = []  # it should be list of N21Item

        for item in txt_data.data:
            target_id = target_vocab.get_id(item.target)
            text_tokens = item.get_tokens()

            token_ids = [ token_vocab.get_id(token) for token in text_tokens ] # for each token

            new_item = copy.deepcopy(item)
            new_item.set_id(target_id,token_ids)

            id_data.append( new_item )
        return id_data

In [3]:

# for sentiment dataset
class SentimentDataset(Dataset):
    def _iterate(self, index_gen):
        B = self.batch_size
        N = self.num_steps

        # vectorize id data
        sentiment  = np.zeros([B],    np.int64)  
        token      = np.zeros([B, N], np.int64)
        weight     = np.zeros([B, N], np.int64)

        while True:
            sentiment[:]  = 0
            token[:]      = 0
            weight[:]     = 0

            for b in range(B):
                try:
                    while True:
                        index = next(index_gen)
                        _num_steps = len( self.data[index].token_ids )
                        if _num_steps <= N: break 

                    _sentiment_id = copy.deepcopy( self.data[index].target_id )
                    _token_ids    = copy.deepcopy( self.data[index].token_ids )

                    # fill pad for weight
                    _weight_ids   = [0] * self.num_steps
                    for _idx, _ in enumerate(_token_ids): _weight_ids[_idx] = 1

                    # fill pad to token
                    _token_ids += [self.token_pad_id] * ( self.num_steps - len( _token_ids ) ) 

                    # output
                    sentiment[b] = -1 if _sentiment_id is None else _sentiment_id

                    # input
                    token[b]  = _token_ids
                    weight[b] = _weight_ids

                except StopIteration:
                    pass
            if not np.any(weight):
                return
            yield sentiment, token, weight  # tuple for (target, input)


def load_data():

    # vocab loader
    token_vocab_fn  = os.path.join( os.path.dirname('__file__'), 'data', 'token.vocab.txt')
    #__file__ 은 현재 프로글매의 파일 위치를 알아냄.
    #os.path.dirname()은 입력된 파일의 디렉토리 이름을 알아냄.
    #os.path.join() 은 디렉토리와 파일명을 이어주는 함수.
    token_vocab     = Vocab(token_vocab_fn, mode='token')
    target_vocab_fn = os.path.join( os.path.dirname('__file__'), 'data', 'target.vocab.txt')
    target_vocab    = Vocab(target_vocab_fn, mode='target')

    # load train data 
    #train_data_fn  = os.path.join( os.path.dirname('__file__'), 'data', 'train.sent_data.txt')
    train_data_fn  = os.path.join( os.path.dirname('__file__'), 'data', 'train_data.txt')
    train_txt_data = N21TextData(train_data_fn)

    # convert text data to id data
    train_id_data  = N21Converter.convert(train_txt_data, target_vocab, token_vocab)
    
    return train_id_data, token_vocab, target_vocab

In [4]:
train_id_data, token_vocab, target_vocab = load_data()
num_vocabs       = token_vocab.get_num_tokens() #Vocab.get_num_tokens()
num_target_class = target_vocab.get_num_targets() #Vocab.get_num_targets()

In [5]:
class HParams(object):
    def __init__(self, **kwargs):
        self._items = {}
        for k, v in kwargs.items():
            self._set(k, v)

    def _set(self, k, v):
        self._items[k] = v
        setattr(self, k, v)

    def parse(self, str_value):
        hps = HParams(**self._items)
        for entry in str_value.strip().split(","):
            entry = entry.strip()
            if not entry:
                continue
            key, sep, value = entry.partition("=")
            if not sep:
                raise ValueError("Unable to parse: %s" % entry)
            default_value = hps._items[key]
            if isinstance(default_value, bool):
                hps._set(key, value.lower() == "true")
            elif isinstance(default_value, int):
                hps._set(key, int(value))
            elif isinstance(default_value, float):
                hps._set(key, float(value))
            else:
                hps._set(key, value)
        return hps

    def update(self, **kwargs):
        for k, v in kwargs.items():
            self._set(k, v)

    def show(self):
        for k, v in self._items.items():
            print( u'{} : {}'.format(k,v) )

In [6]:
#deploy

def freeze_graph(model_dir, output_node_names, frozen_graph_name):
    """Extract the sub graph defined by the output nodes and convert 
    all its variables into constant 
    Args:
        model_dir: the root folder containing the checkpoint state file
        output_node_names: a string, containing all the output node's names, 
                            comma separated

    thiso code is from : https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc
    """
    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            "directory: %s" % model_dir)

    if not output_node_names:
        print("You need to supply the name of a node to --output_node_names.")
        return -1

    # We retrieve our checkpoint fullpath
    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path
    
    # We precise the file fullname of our freezed graph
    absolute_model_dir = "/".join(input_checkpoint.split('/')[:-1])
    output_graph = os.path.join(absolute_model_dir, frozen_graph_name)

    # We clear devices to allow TensorFlow to control on which device it will load operations
    clear_devices = True

    # We start a session using a temporary fresh Graph
    with tf.Session(graph=tf.Graph()) as sess:
        # We import the meta graph in the current default Graph
        saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices)

        # We restore the weights
        saver.restore(sess, input_checkpoint)

        # We use a built-in TF helper to export variables to constants
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess, # The session is used to retrieve the weights
            tf.get_default_graph().as_graph_def(), # The graph_def is used to retrieve the nodes 
            output_node_names.split(",") # The output node names are used to select the usefull nodes
        ) 

        # Finally we serialize and dump the output graph to the filesystem
        with tf.gfile.GFile(output_graph, "wb") as f:
            f.write(output_graph_def.SerializeToString())
        print("%d ops in the final graph." % len(output_graph_def.node))

    return output_graph_def


def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the 
    # unserialized graph_def
    #
    # this code is from https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc
    #

    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we import the graph_def into a new Graph and returns it 
    with tf.Graph().as_default() as graph:
        # The name var will prefix every op/nodes in your graph
        # Since we load everything in a new graph, this is not needed
        tf.import_graph_def(graph_def, name="prefix")
    return graph


In [7]:

class SentimentAnalysis():
    def __init__(self, hps, mode="train"):
        self.hps = hps
        self.x = tf.placeholder(tf.int32,   [None, hps.num_steps], name="pl_tokens")
        self.y = tf.placeholder(tf.int32,   [None], name="pl_target")
        self.w = tf.placeholder(tf.float32, [None, hps.num_steps], name="pl_weight")
        self.keep_prob = tf.placeholder(tf.float32, [], name="pl_keep_prob")

        def _embedding(x):
            # character embedding 
            shape       = [hps.vocab_size, hps.emb_size]
            initializer = tf.initializers.variance_scaling(distribution="uniform", dtype=tf.float32)
            emb_mat     = tf.get_variable("emb", shape, initializer=initializer, dtype=tf.float32)
            input_emb   = tf.nn.embedding_lookup(emb_mat, x)   # [batch_size, sent_len, emb_dim]

            # split input_emb -> num_steps
            step_inputs = tf.unstack(input_emb, axis=1)
            return step_inputs

        def _sequence_dropout(step_inputs, keep_prob):
            # apply dropout to each input
            # input : a list of input tensor which shape is [None, input_dim]
            with tf.name_scope('sequence_dropout') as scope:
                step_outputs = []
                for t, input in enumerate(step_inputs):
                    step_outputs.append( tf.nn.dropout(input, keep_prob) )
            return step_outputs

        def sequence_encoding_n21_rnn(step_inputs, cell_size, scope_name):
            # rnn based N21 encoding (GRU)
            step_inputs = list( reversed( step_inputs ) )
            f_rnn_cell = tf.contrib.rnn.GRUCell(cell_size, reuse=None)
            _inputs = tf.stack(step_inputs, axis=1)
            step_outputs, final_state = tf.contrib.rnn.static_rnn(f_rnn_cell,
                                                                  step_inputs,
                                                                  dtype=tf.float32,
                                                                  scope=scope_name)
            
            out = step_outputs[-1]
            return out

        def _to_class(input, num_class):
            out = linear(input, num_class, scope="Rnn2Sentiment") # out = [batch_size, 4]
            return out

        def _loss(out, ref):
            # out : [batch_size, num_class] float - unscaled logits
            # ref : [batch_size] integer
            # calculate loss function using cross-entropy
            # sparce_softmax_cross_entropy_with_logits()는 소프트맥스 활성화 함수를 적용한 다음 크로스 엔트로피를 계산한 것과 같음.
            # 추가적으로 크로스엔트로피 함수는 로짓이 커지면 부동소수점 반올림 오차로 소프트맥스 출력이 0 또는 1이 되는 문제가 있는데, 
            # 이는 음의 무한대가 되는 log(0)이 공식에 포함되어 있기 때문. 이를 작은 양수 e에 대해 log(e)를 적용함으로 문제를 해결함.
            batch_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=out, labels=ref, name="sentiment_loss") # [batch_size]
            loss = tf.reduce_mean(batch_loss)
            return loss
        
        seq_length    = tf.reduce_sum(self.w, 1) # [batch_size]

        step_inputs   = _embedding(self.x)
        step_inputs   = _sequence_dropout(step_inputs, self.keep_prob)
        sent_encoding = sequence_encoding_n21_rnn(step_inputs, hps.enc_dim, scope_name="encoder")
        out           = _to_class(sent_encoding, hps.num_target_class)
        loss          = _loss(out, self.y) 

        out_probs     = tf.nn.softmax(out, name="out_probs")
        out_pred      = tf.argmax(out_probs, 1, name="out_pred")

        self.loss      = loss
        self.out_probs = out_probs
        self.out_pred  = out_pred

        self.global_step = tf.get_variable("global_step", [], tf.int32, initializer=tf.zeros_initializer, trainable=False)

        if mode == "train":
            optimizer       = tf.train.AdamOptimizer(hps.learning_rate)
            self.train_op   = optimizer.minimize(self.loss, global_step=self.global_step)
        else:
            self.train_op = tf.no_op()


    @staticmethod
    def get_default_hparams():
        return HParams(
            learning_rate     = 0.001,
            keep_prob         = 0.5,
        )





In [8]:
#train
def train(train_id_data, num_vocabs, num_taget_class):
    #
    # train sentiment analysis using given train_id_data
    #
    max_epoch = 2000
    model_dir = "./trained_models"
    hps = SentimentAnalysis.get_default_hparams()
    hps.update(
                    batch_size= 100,
                    num_steps = 70,
                    emb_size  = 50,
                    enc_dim   = 100,
                    vocab_size=num_vocabs,
                    num_target_class=num_taget_class
               )

    with tf.variable_scope("model"):
        model = SentimentAnalysis(hps, "train")

    #create 'supervisor' to review training process
    #supervisor manage initialize of session, restore a model from checkpoint, 
    #and close the program when error is raise or operation is done.
    #reference: https://www.tensorflow.org/api_docs/python/tf/train/Supervisor
    sv = tf.train.Supervisor(is_chief=True,
                             logdir=model_dir,
                             summary_op=None,  
                             global_step=model.global_step)

    # tf assign compatible operators for gpu and cpu 
    tf_config = tf.ConfigProto(allow_soft_placement=True)

    with sv.managed_session(config=tf_config) as sess:
        local_step       = 0
        prev_global_step = sess.run(model.global_step)

        train_data_set = SentimentDataset(train_id_data, hps.batch_size, hps.num_steps)
        losses = []
        while not sv.should_stop():
            fetches = [model.global_step, model.loss, model.train_op]
            a_batch_data = next( train_data_set.iterator )
            y, x, w = a_batch_data
            fetched = sess.run(fetches, {
                                            model.x: x, 
                                            model.y: y, 
                                            model.w: w,

                                            model.keep_prob: hps.keep_prob,
                                        }
                              )

            local_step += 1

            _global_step = fetched[0]
            _loss        = fetched[1]
            losses.append( _loss )
            if local_step < 10 or local_step % 10 == 0:
                epoch = train_data_set.get_epoch_num()
                print("Epoch = {:3d} Step = {:7d} loss = {:5.3f}".format(epoch, _global_step, np.mean(losses)) )
                _loss = []                
                if epoch >= max_epoch : break 

        print("Training is done.")
    sv.stop()

    # model.out_pred, model.out_probs
    freeze_graph(model_dir, "model/out_pred,model/out_probs", "frozen_graph.tf.pb") ## freeze graph with params to probobuf format
    

In [9]:
#predict
from tensorflow.core.framework import graph_pb2

def predict(token_vocab, target_vocab, sent): # mode = 'file' | 'sentence'
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force to use cpu only (prediction)
    model_dir = "./trained_models"

    # prepare sentence converting
    # to make raw sentence to id data easily
    in_sent       = '{}\t{}'.format('___DUMMY_CLASS___', sent)
    pred_data     = N21TextData(in_sent, mode='sentence')       
    pred_id_data  = N21Converter.convert(pred_data, target_vocab, token_vocab)
    pred_data_set = SentimentDataset(pred_id_data, 1, 70)

    #
    a_batch_data = next(pred_data_set.predict_iterator) # a result
    b_sentiment_id, b_token_ids, b_weight = a_batch_data

    # Restore graph
    # note that frozen_graph.tf.pb contains graph definition with parameter values in binary format
    _graph_fn =  os.path.join(model_dir, 'frozen_graph.tf.pb')
    with tf.gfile.GFile(_graph_fn, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)

    with tf.Session(graph=graph) as sess:
        # to check load graph
        #for n in tf.get_default_graph().as_graph_def().node: print(n.name)

        # make interface for input
        pl_token     = graph.get_tensor_by_name('import/model/pl_tokens:0')
        pl_keep_prob = graph.get_tensor_by_name('import/model/pl_keep_prob:0')

        # make interface for output
        out_pred  = graph.get_tensor_by_name('import/model/out_pred:0')
        out_probs = graph.get_tensor_by_name('import/model/out_probs:0')
        

        # predict sentence 
        b_best_pred_index, b_pred_probs = sess.run([out_pred, out_probs], feed_dict={
                                                                                        pl_token : b_token_ids,
                                                                                        pl_keep_prob : 1.0,
                                                                                    }
                                          )

        best_pred_index = b_best_pred_index[0]
        pred_probs = b_pred_probs[0]

        best_target_class = target_vocab.get_symbol(best_pred_index)
        print( 'pred_target:', best_target_class,'pred_probs:', pred_probs[best_pred_index] )

    return best_target_class, pred_probs[best_pred_index]


In [10]:
#run train()
train(train_id_data, num_vocabs, num_target_class)

Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Saving checkpoint to path ./trained_models/model.ckpt
INFO:tensorflow:model/global_step/sec: 0
INFO:tensorflow:Starting queue runners.
Epoch =   1 Step =       1 loss = 1.386
Epoch =   1 Step =       2 loss = 1.379
Epoch =   1 Step =       3 loss = 1.374
Epoch =   1 Step =       4 loss = 1.365
Epoch =   1 Step =       5 loss = 1.362
Epoch =   1 Step =       6 loss = 1.349
Epoch =   1 Step =       7 loss = 1.335
Epoch =   1 Step =       8 loss = 1.326
Epoch =   1 Step =       9 loss = 1.327
Epoch =   2 Step =      10 loss = 1.323
Epoch =   3 Step =      20 loss = 1.243
Epoch =   4 Step =      30 loss = 1.189
Epoch =   5 Step =      40 loss = 1.157
Epoch =   6 Step =      50 loss = 1.131
Epoch =   7 Step =      60 loss = 1.103
Epoch =   8 Step =      70 loss = 1.072
Epoc

In [11]:
fn_pred = './data/pred_data.txt'

def pred_data(path):
    sent = []
    
    with codecs.open(path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.rstrip('\n\r')
            sent.append(line)
    return sent


In [12]:
sents = pred_data(fn_pred)

In [13]:
results = []

for sent in sents:
    results.append(predict(token_vocab, target_vocab, sent))

pred_target: POS pred_probs: 0.839555
pred_target: DES pred_probs: 1.0
pred_target: DES pred_probs: 0.999999
pred_target: DES pred_probs: 0.997163
pred_target: DES pred_probs: 1.0
pred_target: DES pred_probs: 1.0
pred_target: DES pred_probs: 0.999999
pred_target: DES pred_probs: 1.0
pred_target: DES pred_probs: 0.999999
pred_target: POS pred_probs: 0.999996
pred_target: POS pred_probs: 0.999561
pred_target: DES pred_probs: 1.0
pred_target: POS pred_probs: 0.999982
pred_target: POS pred_probs: 0.999968
pred_target: NEG pred_probs: 0.999993
pred_target: POS pred_probs: 0.999999


In [14]:
pred_label = []

with codecs.open('./data/pred_label.txt', 'r', encoding='utf-8') as f:
    for line in f:
        line = line.rstrip('\n\r')
        pred_label.append(line)


score = 0
for i in range(len(results)):
    if pred_label[i] == results[i][0]:
        score += 1

average_score = score / len(results)
print('num_predict:', len(results))
print('score: {}, average: {}'.format(score, average_score))

num_predict: 16
score: 11, average: 0.6875
