# Triplet-loss model using extracted features from BERT

- Extract features from pre-trained BERT
- Create and train a model with triplet loss

## Extract features from pre-trained BERT

In [1]:
import sys
sys.path.append("../notebook/bert")

In [2]:
import os
import re
import csv
import time
import codecs
import collections
import tempfile

import modeling
import optimization
import tokenization
import tensorflow as tf

In [3]:
tf.__version__

'1.10.1'

In [4]:
from extract_features import convert_examples_to_features
from extract_features import InputExample
from extract_features import read_examples
from extract_features import _truncate_seq_pair
from extract_features import InputFeatures
from extract_features import input_fn_builder
from extract_features import model_fn_builder

In [5]:
import pandas as pd
import numpy as np

In [6]:
train_data = pd.read_csv("../data/bert_train_1000.tsv", sep="\t")
test_data = pd.read_csv("../data/bert_dev_1000.tsv", sep="\t")

In [7]:
train_data.head(2)

Unnamed: 0,index,claim_app,claim_cited_grant,label
0,0,1 . A process comprising the following steps:(...,"1. A liquid supply apparatus, comprising:a wal...",not_entailment
1,1,1 - 10 . (canceled) 11 . A method for open-loo...,"1. A fuel supply apparatus for an engine, comp...",entailment


In [8]:
train_data.tail(2)

Unnamed: 0,index,claim_app,claim_cited_grant,label
2562,2562,1 . A method implemented at least in part by a...,"1. In a caching device, a method for providing...",entailment
2563,2563,1 . A nonvolatile memory device comprising:a m...,"1. A non-volatile storage system, comprising:a...",entailment


In [9]:
test_data.head(2)

Unnamed: 0,index,claim_app,claim_cited_grant,label
0,0,1 . A rotational angle detection device for a ...,1. A tangible computer-readable medium having ...,not_entailment
1,1,1 . A method comprising:sensing electrocardiog...,"1. A medical system, comprising:an implantable...",entailment


In [10]:
# train_data = train_data.sort_values(['claim_app', 'label'])
# test_data = test_data.sort_values(['claim_app', 'label'])
# train_data = train_data[0:50]
# test_data = test_data[0:50]

In [11]:
tr_claim_app_txt = tempfile.NamedTemporaryFile(mode='r+')
train_data['claim_app'].to_csv(tr_claim_app_txt.name, header=None, index=None)
!sed -i -e 's/\"//g' {tr_claim_app_txt.name}

tr_claim_cited_grant_txt = tempfile.NamedTemporaryFile(mode='r+')
train_data['claim_cited_grant'].to_csv(tr_claim_cited_grant_txt.name, header=None, index=None)
!sed -i -e 's/\"//g' {tr_claim_cited_grant_txt.name}

In [12]:
len(train_data)

2564

In [13]:
te_claim_app_txt = tempfile.NamedTemporaryFile(mode='r+')
test_data['claim_app'].to_csv(te_claim_app_txt.name, header=None, index=None)
!sed -i -e 's/\"//g' {te_claim_app_txt.name}

te_claim_cited_grant_txt = tempfile.NamedTemporaryFile(mode='r+')
test_data['claim_cited_grant'].to_csv(te_claim_cited_grant_txt.name, header=None, index=None)
!sed -i -e 's/\"//g' {te_claim_cited_grant_txt.name}

In [14]:
len( test_data )

2502

In [15]:
class FLAGS(object):
    '''Parameters.'''
    def __init__(self):
        self.vocab_file = "./bert/model/uncased_L-12_H-768_A-12/vocab.txt"
        self.do_lower_case = True
        self.use_tpu = False
        self.layers = "-1"
        self.bert_config_file = "./bert/model/uncased_L-12_H-768_A-12/bert_config.json"
        self.max_seq_length = 512
        self.init_checkpoint = "./bert/model/uncased_L-12_H-768_A-12/bert_model.ckpt"
        self.use_one_hot_embeddings = False
        self.batch_size = 16
        
        # The following parameters are not used in predictions.
        # Just use to create RunConfig.
        self.master = None
        self.save_checkpoints_steps = 1
        self.iterations_per_loop = 1
        self.num_tpu_cores = 1
        self.learning_rate = 0
        self.num_warmup_steps = 0
        self.num_train_steps = 0
        self.train_batch_size = 0
        self.eval_batch_size = 0

FLAGS = FLAGS()

In [16]:
layer_indexes = [int(x) for x in FLAGS.layers.split(",")]

bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

tokenizer = tokenization.FullTokenizer(
  vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
run_config = tf.contrib.tpu.RunConfig(
  master=FLAGS.master,
  tpu_config=tf.contrib.tpu.TPUConfig(
      num_shards=FLAGS.num_tpu_cores,
      per_host_input_for_training=is_per_host))

In [17]:
model_fn = model_fn_builder(
  bert_config=bert_config,
  init_checkpoint=FLAGS.init_checkpoint,
  layer_indexes=layer_indexes,
  use_tpu=FLAGS.use_tpu,
  use_one_hot_embeddings=FLAGS.use_one_hot_embeddings)

In [18]:
estimator = tf.contrib.tpu.TPUEstimator(
  use_tpu=FLAGS.use_tpu,
  model_fn=model_fn,
  config=run_config,
  predict_batch_size=FLAGS.batch_size)

INFO:tensorflow:Using config: {'_save_summary_steps': 100, '_session_config': None, '_save_checkpoints_secs': 600, '_train_distribute': None, '_task_id': 0, '_num_ps_replicas': 0, '_evaluation_master': '', '_num_worker_replicas': 1, '_is_chief': True, '_save_checkpoints_steps': None, '_service': None, '_model_dir': '/tmp/tmpg1qwdqun', '_cluster': None, '_keep_checkpoint_every_n_hours': 10000, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f0c2ccb50b8>, '_log_step_count_steps': None, '_keep_checkpoint_max': 5, '_tpu_config': TPUConfig(iterations_per_loop=2, num_shards=1, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None), '_tf_random_seed': None, '_master': '', '_task_type': 'worker', '_device_fn': None, '_global_id_in_cluster': 0}
INFO:tensorflow:_TPUContext: eval_on_tpu True


In [19]:
start = time.time()

In [20]:
examples = read_examples(tr_claim_app_txt.name)
features = convert_examples_to_features(examples=examples, seq_length=FLAGS.max_seq_length, tokenizer=tokenizer)
unique_id_to_feature = {}
for feature in features:
    unique_id_to_feature[feature.unique_id] = feature

input_fn = input_fn_builder(features=features, seq_length=FLAGS.max_seq_length)

results =  np.empty((0,768), float)

for result in estimator.predict(input_fn, yield_single_examples=True):
    results = np.append(results, result['layer_output_0'][0].reshape(1,768), axis=0 )

train_data['feature_claim_app'] = [elem for elem in results]

INFO:tensorflow:*** Example ***
INFO:tensorflow:unique_id: 0
INFO:tensorflow:tokens: [CLS] 1 . a process comprising the following steps : ( a ) cent ##ri ##fu ##ging a micro ##org ##ani ##sm - containing suspension to provide a first micro ##org ##ani ##sm - containing concentrate and a super ##nat ##ant liquid ; ( b ) filtering said first micro ##org ##ani ##sm - containing concentrate , to provide a per ##me ##ate and a second micro ##org ##ani ##sm - containing concentrate . 2 . the process of claim 1 , whereby said process is a continuous flow process comprising : ( a 1 ) cent ##ri ##fu ##ging a micro ##org ##ani ##sm - containing suspension to provide a first micro ##org ##ani ##sm - containing concentrate and a super ##nat ##ant liquid ; ( a 2 ) continuously withdrawing said first micro ##org ##ani ##sm - containing concentrate during cent ##ri ##fu ##gation ; and ( b ) filtering said first micro ##org ##ani ##sm - containing concentrate to provide a per ##me ##ate and a second m

INFO:tensorflow:input_ids: 101 1015 1011 2184 1012 1006 13261 1007 2340 1012 1037 4118 2005 2330 1011 7077 1998 2701 1011 7077 2491 1997 2019 4722 16513 3194 1010 9605 1996 4084 1997 1024 9756 4334 3778 2011 2701 1011 7077 2491 2076 3671 3169 1025 1998 1010 2065 1037 28829 4334 3778 13617 2003 11156 1010 5278 2013 3671 4082 5549 2000 5057 4082 5549 1010 1999 2029 1996 4334 3778 2003 4758 2011 2330 1011 7077 2491 1010 16726 1999 1996 5057 4082 5549 1010 1996 4334 3778 2003 24288 3445 2127 1037 13135 3778 2491 10764 16412 1010 2029 1999 2019 2330 2110 2417 7442 16649 4762 2013 1996 4334 2046 1037 4762 4951 1012 2260 1012 1996 4118 1999 10388 2007 4366 2340 1010 2164 1010 1999 1996 5057 4082 5549 1010 4852 1996 4334 3778 2138 1037 10514 7542 24420 2006 1037 2659 1011 3778 2217 1010 2029 4240 2004 1037 3778 21618 1010 2003 6051 2588 1999 2019 3098 3257 1012 2410 1012 1996 4118 1999 10388 2007 4366 2260 1010 2164 4292 1037 2275 2783 1010 2029 4240 2004 1037 29170 4742 1997 1996 10514 7542 2

INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 

INFO:tensorflow:input_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

INFO:tensorflow:Could not find trained model in model_dir: /tmp/tmpg1qwdqun, running initialization to predict.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running infer on CPU
INFO:tensorflow:**** Trainable Variables ****
INFO:tensorflow:  name = bert/embeddings/word_embeddings:0, shape = (30522, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/token_type_embeddings:0, shape = (2, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/position_embeddings:0, shape = (512, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/ke

INFO:tensorflow:  name = bert/encoder/layer_4/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tenso

INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT

In [21]:
examples = read_examples(tr_claim_cited_grant_txt.name)
features = convert_examples_to_features(examples=examples, seq_length=FLAGS.max_seq_length, tokenizer=tokenizer)
unique_id_to_feature = {}
for feature in features:
    unique_id_to_feature[feature.unique_id] = feature

input_fn = input_fn_builder(features=features, seq_length=FLAGS.max_seq_length)

results =  np.empty((0,768), float)

for result in estimator.predict(input_fn, yield_single_examples=True):
    results = np.append(results, result['layer_output_0'][0].reshape(1,768), axis=0 )

train_data['feature_claim_cited_grant'] = [elem for elem in results]

INFO:tensorflow:*** Example ***
INFO:tensorflow:unique_id: 0
INFO:tensorflow:tokens: [CLS] 1 . a liquid supply apparatus , comprising : a wall that substantially defines a reservoir ; an expand ##able chamber formed between the wall and a mo ##vable piston , wherein the chamber is selective ##ly communicated with the reservoir through an aperture in the wall ; and a portal configured to enable fluid ##ic communication of the chamber with a vessel , wherein fluid ##ic communication between the reservoir and the vessel is established through the aperture and the chamber after the piston is moved a pre ##de ##ter ##mined amount , and wherein the aperture is blocked by the piston until the chamber expands beyond a given volume and the piston is moved the pre ##de ##ter ##mined amount . 2 . the liquid supply apparatus of claim 1 , wherein : the piston is mo ##vable relative to the wall ; and the chamber is substantially defined by the piston and the wall . 3 . the liquid supply apparatus of

INFO:tensorflow:input_ids: 101 1015 1012 1037 4762 4425 14709 2005 2019 3194 1010 9605 1024 1037 4762 13341 10764 5214 1997 1999 20614 2075 4762 2000 1996 3194 1025 1037 4762 4951 5214 1997 23977 1996 4762 2005 1996 3194 1025 1037 4762 10216 5214 1997 17731 1996 4762 1999 1996 4762 4951 2000 1996 4762 13341 10764 3081 1037 4762 8667 1025 1037 6228 4335 10764 5214 1997 4192 1996 4762 1999 1996 4762 8667 2046 1996 4762 4951 2043 1037 3778 1999 1996 4762 8667 23651 1037 11207 1025 1037 3778 13617 5214 1997 25952 1037 3778 1997 1996 4762 1999 1996 4762 8667 1025 1998 1037 2491 3131 26928 2000 7953 2045 3406 1037 4742 11156 2013 1996 3778 13617 2000 6434 2045 19699 5358 1037 20063 8023 2005 1996 4762 10216 1010 16726 1996 2491 3131 2003 26928 2000 16463 3251 1996 3778 13617 2003 1999 1037 3671 3169 2110 2030 1999 2019 19470 3169 2110 1025 16726 2043 1996 3778 13617 2003 4340 2000 2022 1999 1996 3671 3169 2110 1010 1996 2491 3131 18422 2015 1996 20063 8023 2061 2008 1996 4762 3778 11156 2011

INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 

INFO:tensorflow:input_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

INFO:tensorflow:Could not find trained model in model_dir: /tmp/tmpg1qwdqun, running initialization to predict.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running infer on CPU
INFO:tensorflow:**** Trainable Variables ****
INFO:tensorflow:  name = bert/embeddings/word_embeddings:0, shape = (30522, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/token_type_embeddings:0, shape = (2, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/position_embeddings:0, shape = (512, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/ke

INFO:tensorflow:  name = bert/encoder/layer_4/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tenso

INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT

In [22]:
train_data.to_pickle("../data/bert_extracted_feature_train_1000.pkl")

In [23]:
examples = read_examples(te_claim_app_txt.name)
features = convert_examples_to_features(examples=examples, seq_length=FLAGS.max_seq_length, tokenizer=tokenizer)
unique_id_to_feature = {}
for feature in features:
    unique_id_to_feature[feature.unique_id] = feature

input_fn = input_fn_builder(features=features, seq_length=FLAGS.max_seq_length)

results =  np.empty((0,768), float)

for result in estimator.predict(input_fn, yield_single_examples=True):
    results = np.append(results, result['layer_output_0'][0].reshape(1,768), axis=0 )

test_data['feature_claim_app'] = [elem for elem in results]

INFO:tensorflow:*** Example ***
INFO:tensorflow:unique_id: 0
INFO:tensorflow:tokens: [CLS] 1 . a rotational angle detection device for a permanent magnet dynamo - electric machine comprising : a stat ##or that includes arm ##at ##ure winding ; a rotor that includes a rotor core and a plurality of permanent magnet ##s ; and a conduct ##ive circuit that includes first conductors extending in an axial direction of the rotor and being disposed in at least two places in a ci ##rc ##um ##fer ##ential direction of the rotor , and second conductors for electrically inter ##con ##ne ##cting the first conductors , wherein , the first conductors are each disposed within one of a range of an electrical angle between & # x ##22 ##12 ; 45 & # x ##b ##0 ; and + 45 & # x ##b ##0 ; in the ci ##rc ##um ##fer ##ential direction with a magnetic pole center of each of the plurality of permanent magnet ##s being set as a reference and a range of an electrical angle between & # x ##22 ##12 ; 45 & # x ##b ##0

INFO:tensorflow:input_ids: 101 1015 1012 1037 4118 9605 1024 13851 16175 11522 3695 13113 1006 14925 2290 1007 2951 2012 2019 14925 2290 13617 2291 1025 6364 1996 10596 14925 2290 2951 2012 1996 14925 2290 13617 2291 2000 5646 2028 2030 2062 5005 5300 1010 16726 2169 5005 3643 2003 24668 1997 1037 10903 1997 1037 6922 1997 1996 10596 14925 2290 2951 2008 2003 9280 3141 2000 1037 2512 1011 2540 3786 3120 1025 1998 6016 1996 2028 2030 2062 5005 5300 2000 2019 12978 13460 23416 2075 2291 2000 13460 23416 1996 14925 2290 13617 2291 1012 1016 1012 1996 4118 1997 4366 1015 1010 16726 1996 12978 13460 23416 2075 2291 10438 2019 4106 1997 1996 2028 2030 2062 5005 5300 1998 14847 1037 3120 1997 1037 3327 6922 1997 1996 10596 14925 2290 2951 2008 2003 3141 2000 1037 3327 2512 1011 2540 3786 3120 2241 2006 1996 4106 1012 1017 1012 1996 4118 1997 4366 1016 1010 2582 9605 1010 2044 6016 1996 2028 2030 2062 5005 5300 1010 4909 2019 7953 1998 1010 26651 2000 1996 7953 1010 26709 6593 17441 2028 2030 

INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 

INFO:tensorflow:input_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

INFO:tensorflow:Could not find trained model in model_dir: /tmp/tmpg1qwdqun, running initialization to predict.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running infer on CPU
INFO:tensorflow:**** Trainable Variables ****
INFO:tensorflow:  name = bert/embeddings/word_embeddings:0, shape = (30522, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/token_type_embeddings:0, shape = (2, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/position_embeddings:0, shape = (512, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/ke

INFO:tensorflow:  name = bert/encoder/layer_4/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tenso

INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT

In [24]:
examples = read_examples(te_claim_cited_grant_txt.name)
features = convert_examples_to_features(examples=examples, seq_length=FLAGS.max_seq_length, tokenizer=tokenizer)
unique_id_to_feature = {}
for feature in features:
    unique_id_to_feature[feature.unique_id] = feature

input_fn = input_fn_builder(features=features, seq_length=FLAGS.max_seq_length)

results =  np.empty((0,768), float)

for result in estimator.predict(input_fn, yield_single_examples=True):
    results = np.append(results, result['layer_output_0'][0].reshape(1,768), axis=0 )

test_data['feature_claim_cited_grant'] = [elem for elem in results]

INFO:tensorflow:*** Example ***
INFO:tensorflow:unique_id: 0
INFO:tensorflow:tokens: [CLS] 1 . a tangible computer - read ##able medium having stored there ##on computer - ex ##ec ##utable instructions for implementing a custom ##iza ##ble visual user interface on a screen display of a tel ##ep ##hony device comprising : an application program for providing services to a user on the tel ##ep ##hony device ; a shell program that expose ##s an application programming interface for custom ##izing the custom ##iza ##ble visual user interface ; a message center operative ##ly connected to the shell program for providing access to a user message ; and a tel ##ep ##hony module for managing tel ##ep ##hony functions , wherein the application programming interface exposed by the shell program comprises one or more methods , the application programming interface receiving input from an application , and wherein execution of the one or more methods of the application programming interface is resp

INFO:tensorflow:input_ids: 101 1015 1012 1037 2966 2291 1010 9605 1024 2019 27159 3085 5080 2164 1037 3847 1998 1037 4742 5527 5783 10837 1999 1996 3847 1010 1996 3847 5716 2019 28688 1997 1996 5080 1025 2019 27159 3085 2966 5992 2599 2164 1037 2034 28688 1998 1037 2117 28688 1010 1996 2034 1998 2117 28688 2015 5716 1037 13851 3940 2043 1996 2599 2003 11211 2000 1996 5080 1025 1998 1037 5080 13151 10837 1999 1996 3847 1998 16984 2000 11487 5005 2006 1996 13851 3940 1010 6709 1996 5005 2004 2291 5005 1010 1998 3573 2012 2560 2028 20810 1041 21693 7099 2013 1996 13851 3940 1998 1037 16483 1041 21693 7099 2013 2169 1997 1037 29018 1997 3405 7689 1999 3433 2000 12151 1996 2291 5005 1010 16726 1996 29018 1997 3405 7689 2950 1037 2034 3940 2719 2011 1996 2034 2599 28688 1998 1996 5080 28688 1010 2043 1996 2599 2003 11211 2000 1996 5080 1010 1998 1037 2117 3940 2719 2011 1996 2117 2599 28688 1998 1996 5080 28688 1010 2043 1996 2599 2003 11211 2000 1996 5080 1012 1016 1012 1996 2291 1997 4366 

INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 

INFO:tensorflow:input_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

INFO:tensorflow:Could not find trained model in model_dir: /tmp/tmpg1qwdqun, running initialization to predict.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running infer on CPU
INFO:tensorflow:**** Trainable Variables ****
INFO:tensorflow:  name = bert/embeddings/word_embeddings:0, shape = (30522, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/token_type_embeddings:0, shape = (2, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/position_embeddings:0, shape = (512, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/ke

INFO:tensorflow:  name = bert/encoder/layer_4/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tenso

INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT

In [25]:
test_data.to_pickle("../data/bert_extracted_feature_test_1000.pkl")

In [26]:
elapsed_time = time.time() - start
print ("elapsed_time:{0}".format(elapsed_time) + "[sec]")

elapsed_time:24374.943607330322[sec]


## Create and train a model with triplet loss

Please restart kernel before executing the following cells.

In [1]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np

tf.enable_eager_execution()
tfe = tf.contrib.eager

In [2]:
train_feature = pd.read_pickle("../data/bert_extracted_feature_train_1000.pkl")
test_feature = pd.read_pickle("../data/bert_extracted_feature_test_1000.pkl")

In [3]:
train_feature = train_feature.sort_values(['claim_app', 'label'])
test_feature = test_feature.sort_values(['claim_app', 'label'])

In [4]:
train_feature.head(2)

Unnamed: 0,index,claim_app,claim_cited_grant,label,feature_claim_app,feature_claim_cited_grant
2246,2246,"1 An LED (light emitting diode) lamp, compris...",1. An LED lighting device having heat convecti...,entailment,"[-0.718012809753418, 0.4575798809528351, -0.58...","[-0.819080114364624, 0.6711320877075195, -0.60..."
2408,2408,"1 An LED (light emitting diode) lamp, compris...",1. A method of operating a turbocharged intern...,not_entailment,"[-0.718012809753418, 0.4575798809528351, -0.58...","[-1.1225836277008057, 0.0022592851892113686, -..."


In [5]:
class Model(object):
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape
        self.W = tfe.Variable( tf.random_normal( [self.input_shape, self.output_shape] ), name='weight' )
        self.B = tfe.Variable( tf.random_normal( [self.output_shape] ), name='bias' ) 
        self.variables = [ self.W, self.B ]
    
    def frwrd_pass(self,X_train):
        out = tf.matmul( X_train, self.W ) + self.B
        
        return out

In [6]:
def tripletloss(anchor_out, positive_out, negative_out, margin=0.2):
    norm_a_out = tf.nn.l2_normalize(anchor_out, axis=1)
    norm_p_out = tf.nn.l2_normalize(positive_out, axis=1)
    norm_n_out = tf.nn.l2_normalize(negative_out, axis=1)
    
    d_pos = tf.losses.cosine_distance(norm_a_out, norm_p_out, axis=1)
    d_neg = tf.losses.cosine_distance(norm_a_out, norm_n_out, axis=1)
    
    loss = tf.maximum(0.0, margin + d_pos - d_neg)
    
    return tf.reduce_mean(loss)

In [7]:
def train(input_data_np, batch_size, epochs):
#     optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.001)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.00001)
    data_num = int(input_data_np.shape[1])

    for i in range(epochs):
        rand_idx = np.random.permutation(data_num)
        index_data_np = np.array([
            input_data_np[0][rand_idx], 
            input_data_np[1][rand_idx], 
            input_data_np[2][rand_idx]])

        input_data = tf.convert_to_tensor(input_data_np, dtype=tf.float32)
        anchor_data, positive_data, negative_data = input_data

        for iter_id in range(data_num // batch_size):        
            with tf.GradientTape() as tape:
                anchor_out = model.frwrd_pass(anchor_data[iter_id*batch_size : (iter_id+1)*batch_size])
                positive_out = model.frwrd_pass(positive_data[iter_id*batch_size : (iter_id+1)*batch_size])
                negative_out = model.frwrd_pass(negative_data[iter_id*batch_size : (iter_id+1)*batch_size])
                curr_loss = tripletloss(anchor_out, positive_out, negative_out)
            grads = tape.gradient( curr_loss, model.variables )
            optimizer.apply_gradients(zip(grads, model.variables), global_step=tf.train.get_or_create_global_step())

        if i % 10 == 0:
            print( "Loss at step {:d}: {:.5f}".format(i, curr_loss) )

In [8]:
# def train(input_data, epochs):
#     optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.005)
#     anchor_data, positive_data, negative_data = input_data

#     for i in range(epochs):
#         with tf.GradientTape() as tape:
#             anchor_out = model.frwrd_pass(anchor_data)
#             positive_out = model.frwrd_pass(positive_data)
#             negative_out = model.frwrd_pass(negative_data)
#             curr_loss = tripletloss(anchor_out, positive_out, negative_out)
#         grads = tape.gradient( curr_loss, model.variables )
#         optimizer.apply_gradients(zip(grads, model.variables), global_step=tf.train.get_or_create_global_step())

#         if i % 10 == 0:
#             print( "Loss at step {:d}: {:.5f}".format(i, curr_loss) )

In [9]:
# class Model(tf.keras.Model):
#     """
#     Model which extracts features for computing similarities.
#     """
#     def __init__(self, hidden_units, output_units):
#         super(Model, self).__init__()
#         self.hidden = tf.keras.layers.Dense(hidden_units, activation='relu')
#         self.output_feature = tf.keras.layers.Dense(output_units)

#     def call(self, inputs, training=None, mask=None):
#         x = self.hidden(inputs)
#         output = self.output_feature(x)

#         return output

In [10]:
anchor_list = []
positive_list = []
negative_list = []

for row in train_feature.itertuples():
    if row.label == 'entailment':
        anchor_list.append(row.feature_claim_app)
        positive_list.append(row.feature_claim_cited_grant)
    elif row.label == 'not_entailment':
        negative_list.append(row.feature_claim_cited_grant)

In [11]:
input_data = np.array([
    np.array(anchor_list),
    np.array(positive_list),
    np.array(negative_list),    
])

In [12]:
model = Model(input_shape=768, output_shape=100)

In [13]:
%%time
train(input_data, 10, 50)

Loss at step 0: 0.16513
Loss at step 10: 0.15407
Loss at step 20: 0.13694
Loss at step 30: 0.11066
Loss at step 40: 0.07395
CPU times: user 1min 23s, sys: 6.09 s, total: 1min 29s
Wall time: 1min 13s


In [15]:
test = np.array(input_data[0])
test = tf.convert_to_tensor(test[0:1,:], dtype=tf.float32)
test

<tf.Tensor: id=31, shape=(1, 768), dtype=float32, numpy=
array([[-7.18012810e-01,  4.57579881e-01, -5.84138095e-01,
         1.02933601e-01,  2.07249269e-01,  2.49588534e-01,
         1.70106426e-01,  7.55572021e-01, -4.79396135e-01,
        -4.63076293e-01, -2.48222440e-01, -2.84224927e-01,
        -3.57413650e-01,  6.64952338e-01, -3.01618040e-01,
         1.44136354e-01,  6.80678725e-01,  4.44230616e-01,
         7.32292533e-02, -1.66640431e-03, -6.15086615e-01,
        -3.31324786e-01,  7.39133894e-01,  1.44808859e-01,
        -2.26765350e-02, -2.41386011e-01, -7.70546019e-01,
        -3.77424717e-01, -6.00395918e-01,  1.97793871e-01,
         4.26622719e-01,  3.50045592e-01,  1.28916930e-02,
        -8.72512400e-01,  4.67004716e-01, -1.19301522e+00,
         5.63755929e-01, -5.98323166e-01,  5.48169613e-01,
        -4.92580712e-01, -5.76701045e-01,  2.63832718e-01,
         5.74676871e-01, -2.74352282e-01, -5.03228486e-01,
         7.46622503e-01, -3.70890880e+00,  1.11215971e-01,

In [15]:
model.frwrd_pass(test)

<tf.Tensor: id=2528761, shape=(1, 100), dtype=float32, numpy=
array([[ -5.9848123 ,   3.3136501 ,   0.7599023 ,  -0.30176213,
          9.084582  ,   0.6801322 ,  -0.26698658,   0.24088414,
         -2.3610084 ,  -0.21283185, -10.991645  , -11.912355  ,
         -8.895027  ,   8.2631235 ,   4.320994  ,  13.29192   ,
          7.327576  , -15.107805  ,   3.8141959 ,  12.2694    ,
          7.5517645 ,   6.52964   ,  -0.42170656,   2.541043  ,
          0.29088554,  11.865561  ,   4.453747  ,   1.2210617 ,
          2.1004543 , -17.643057  ,   5.0789804 ,  -2.4557247 ,
        -13.13237   ,   8.253451  ,  12.37809   ,  12.356433  ,
         10.254477  ,  -6.0443463 , -12.009161  ,  11.098864  ,
         -9.91517   ,  -9.4919405 ,   6.6668797 ,   3.5451522 ,
        -14.211459  ,  -6.225542  ,  -7.812647  ,  -4.996532  ,
          1.2727321 ,  -8.647376  ,   4.619104  ,   6.6735597 ,
          3.4074397 ,   6.2721453 ,  13.323231  ,   1.4377658 ,
          2.546699  ,   6.272154  ,   4.19

Save the trained model.

In [16]:
os.makedirs('./bert/model/trained_tripletloss_model', exist_ok=True)

In [17]:
saver = tfe.Saver(model.variables)

In [18]:
saver.save("./bert/model/trained_tripletloss_model/ckpt")

'./bert/model/trained_tripletloss_model/ckpt'

### Load the trained model and extract features of test datasets.

In [3]:
model = Model(input_shape=768, output_shape=100)

In [13]:
tfe.Saver((model.variables)).restore("./bert/model/trained_tripletloss_model/ckpt")

INFO:tensorflow:Restoring parameters from ./bert/model/trained_tripletloss_model/ckpt


In [16]:
model.frwrd_pass(test)

<tf.Tensor: id=36, shape=(1, 100), dtype=float32, numpy=
array([[ -5.9848123 ,   3.3136501 ,   0.7599023 ,  -0.30176213,
          9.084582  ,   0.6801322 ,  -0.26698658,   0.24088414,
         -2.3610084 ,  -0.21283185, -10.991645  , -11.912355  ,
         -8.895027  ,   8.2631235 ,   4.320994  ,  13.29192   ,
          7.327576  , -15.107805  ,   3.8141959 ,  12.2694    ,
          7.5517645 ,   6.52964   ,  -0.42170656,   2.541043  ,
          0.29088554,  11.865561  ,   4.453747  ,   1.2210617 ,
          2.1004543 , -17.643057  ,   5.0789804 ,  -2.4557247 ,
        -13.13237   ,   8.253451  ,  12.37809   ,  12.356433  ,
         10.254477  ,  -6.0443463 , -12.009161  ,  11.098864  ,
         -9.91517   ,  -9.4919405 ,   6.6668797 ,   3.5451522 ,
        -14.211459  ,  -6.225542  ,  -7.812647  ,  -4.996532  ,
          1.2727321 ,  -8.647376  ,   4.619104  ,   6.6735597 ,
          3.4074397 ,   6.2721453 ,  13.323231  ,   1.4377658 ,
          2.546699  ,   6.272154  ,   4.1985755

In [17]:
test_feature.head(2)

Unnamed: 0,index,claim_app,claim_cited_grant,label,feature_claim_app,feature_claim_cited_grant
1070,1070,"1 ) Body protective clothing, comprising a har...",1. A protective headgear assembly that reduces...,entailment,"[-1.1071923971176147, -0.04645990580320358, -0...","[-0.9353654980659485, 0.26779890060424805, -0...."
406,406,"1 ) Body protective clothing, comprising a har...","1. A method of making a molded coupler, the me...",not_entailment,"[-1.1071923971176147, -0.04645990580320358, -0...","[-0.874859094619751, 0.3370829224586487, -0.17..."


Use extracted features from BERT.

In [18]:
target_v = test_feature['feature_claim_app'][1070]
target_v = target_v / np.linalg.norm(target_v)

In [19]:
dist_dict = { 
    idx: np.sum( target_v * (elem / np.linalg.norm(elem)) ) 
    for idx,elem 
    in zip(test_feature['index'], test_feature['feature_claim_cited_grant']) }

In [35]:
sorted(dist_dict.items(), key=lambda x: x[1])[-10:]

[(1143, 0.8790275587640799),
 (1699, 0.8790275587640799),
 (403, 0.8790691807517872),
 (2388, 0.8806933127862158),
 (2471, 0.8806933127862158),
 (1229, 0.8845689904418945),
 (1948, 0.8845689904418945),
 (584, 0.8849547994259004),
 (1620, 0.8849547994259004),
 (807, 0.8852307968930513)]

In [36]:
test_feature[ test_feature['index'] == 807 ]

Unnamed: 0,index,claim_app,claim_cited_grant,label,feature_claim_app,feature_claim_cited_grant
807,807,1 - 11 . (canceled) 12 . A thermoelectric powe...,1. A thermoelectric generating device includin...,entailment,"[-1.1499402523040771, -0.04302491992712021, -0...","[-0.9175975322723389, 0.23365767300128937, -0...."


Use extracted features from the trained model.

In [21]:
target_f = tf.convert_to_tensor(test_feature['feature_claim_app'][1017].reshape(1,768), dtype=tf.float32)

In [22]:
target_o = model.frwrd_pass(target_f).numpy()
target_o = target_o / np.linalg.norm(target_o)

In [23]:
grant_f = tf.convert_to_tensor(test_feature['feature_claim_cited_grant'], dtype=tf.float32)

In [24]:
grant_o = model.frwrd_pass(grant_f).numpy()

In [25]:
dist_dict_model = { 
    idx: np.sum( target_o * (elem / np.linalg.norm(elem)) ) 
    for idx,elem 
    in zip(test_feature['index'], grant_o) }

In [26]:
sorted(dist_dict_model.items(), key=lambda x: x[1])[-10:]

[(2310, 0.74123454),
 (529, 0.7500526),
 (1550, 0.7500526),
 (1816, 0.7500526),
 (43, 0.7528553),
 (534, 0.7571601),
 (773, 0.7571601),
 (1470, 0.7571601),
 (1675, 0.7571601),
 (2354, 0.7571601)]

In [37]:
test_feature[ test_feature['index'] == 2354 ]

Unnamed: 0,index,claim_app,claim_cited_grant,label,feature_claim_app,feature_claim_cited_grant
2354,2354,1 . A method for analyzing data from a continu...,1. A method for use in calibrating a signal fr...,entailment,"[-0.986760139465332, 0.11502363532781601, -0.0...","[-0.9303633570671082, 0.4443694055080414, -0.1..."


In [38]:
test_feature[ test_feature['index'] == 1675 ]

Unnamed: 0,index,claim_app,claim_cited_grant,label,feature_claim_app,feature_claim_cited_grant
1675,1675,1 . Pre-distancing collapsible system comprisi...,1. A method for use in calibrating a signal fr...,not_entailment,"[-0.6365559101104736, 0.5404213070869446, 0.06...","[-0.9303633570671082, 0.4443694055080414, -0.1..."


In [39]:
test_feature[ test_feature['index'] == 1470 ]

Unnamed: 0,index,claim_app,claim_cited_grant,label,feature_claim_app,feature_claim_cited_grant
1470,1470,1 . A failure diagnostic apparatus for failure...,1. A method for use in calibrating a signal fr...,not_entailment,"[-0.9877709150314331, 0.20471225678920746, -0....","[-0.9303633570671082, 0.4443694055080414, -0.1..."


## Trial and errors.

In [None]:
!echo 'Who was Jim Henson ? ||| Jim Henson was a puppeteer' >> ./bert/tmp/input.txt

In [None]:
!python3 ./bert/extract_features.py \
  --input_file=./bert/tmp/input.txt \
  --output_file=./bert/tmp/output.json \
  --vocab_file=./bert/model/uncased_L-12_H-768_A-12/vocab.txt \
  --bert_config_file=./bert/model/uncased_L-12_H-768_A-12/bert_config.json \
  --init_checkpoint=./bert/model/uncased_L-12_H-768_A-12/bert_model.ckpt \
  --layers=-1 \
  --max_seq_length=512 \
  --batch_size=8

In [None]:
import json
with open("./bert/tmp/output.json") as f:
    output = json.load(f)

In [20]:
results =  np.empty((0,768), float)

for result in estimator.predict(input_fn, yield_single_examples=True):
    results = np.append(results, result['layer_output_0'][0].reshape(1,768), axis=0 )

INFO:tensorflow:Could not find trained model in model_dir: /tmp/tmpgwpsyy12, running initialization to predict.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Running infer on CPU
INFO:tensorflow:**** Trainable Variables ****
INFO:tensorflow:  name = bert/embeddings/word_embeddings:0, shape = (30522, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/token_type_embeddings:0, shape = (2, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/position_embeddings:0, shape = (512, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/embeddings/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_0/attention/self/ke

INFO:tensorflow:  name = bert/encoder/layer_4/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_4/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tenso

INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*
INFO:tensorflow:  name = bert/encoder/layer_9/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT

In [21]:
results.shape

(2, 768)

In [22]:
result['layer_output_0'].shape

(512, 768)

In [23]:
result['layer_output_0'][0].shape

(768,)

In [24]:
train_data.head()

Unnamed: 0,index,claim_app,claim_cited_grant,label
0,0,1 . A process comprising the following steps:(...,"1. A liquid supply apparatus, comprising:a wal...",not_entailment
1,1,1 - 10 . (canceled) 11 . A method for open-loo...,"1. A fuel supply apparatus for an engine, comp...",entailment
2,2,1 . A handpiece for treating biological tissue...,1. A method for irradiating tissue having abso...,entailment
3,3,1 . A power cable comprising:a power input com...,1. A temperature regulating system for a vehic...,not_entailment
4,4,1 . A cutting insert having a substantially cu...,1. A toolholder comprising:a) a cutter body ro...,entailment


In [28]:
test = []

for _ in range(len(train_data)):
    test.append(results[0])

In [29]:
train_data['test'] = test

In [32]:
train_data

Unnamed: 0,index,claim_app,claim_cited_grant,label,test
0,0,1 . A process comprising the following steps:(...,"1. A liquid supply apparatus, comprising:a wal...",not_entailment,"[-0.628110945224762, 0.19321474432945251, -0.7..."
1,1,1 - 10 . (canceled) 11 . A method for open-loo...,"1. A fuel supply apparatus for an engine, comp...",entailment,"[-0.628110945224762, 0.19321474432945251, -0.7..."
2,2,1 . A handpiece for treating biological tissue...,1. A method for irradiating tissue having abso...,entailment,"[-0.628110945224762, 0.19321474432945251, -0.7..."
3,3,1 . A power cable comprising:a power input com...,1. A temperature regulating system for a vehic...,not_entailment,"[-0.628110945224762, 0.19321474432945251, -0.7..."
4,4,1 . A cutting insert having a substantially cu...,1. A toolholder comprising:a) a cutter body ro...,entailment,"[-0.628110945224762, 0.19321474432945251, -0.7..."
5,5,"1 . A multimedia system, comprising:a multimed...","1. An illumination module, comprising:an integ...",entailment,"[-0.628110945224762, 0.19321474432945251, -0.7..."
6,6,1 .- 10 . (canceled) 11 . A blade for a comput...,1. A slide apparatus comprising:a slider beam ...,entailment,"[-0.628110945224762, 0.19321474432945251, -0.7..."
7,7,1 . A method for performing radio usage measur...,1. A mechanical torque wrench for engaging a w...,not_entailment,"[-0.628110945224762, 0.19321474432945251, -0.7..."
8,8,1 . A system configured to exchange energy wir...,1. A submersible table and seat assembly for u...,not_entailment,"[-0.628110945224762, 0.19321474432945251, -0.7..."
9,9,1 . A computer-implemented method for gray bal...,"1. A method for calibrating a printing device,...",entailment,"[-0.628110945224762, 0.19321474432945251, -0.7..."
