In [12]:
import tensorflow as tf
from transformers import TFBertForNextSentencePrediction
from transformers import AutoTokenizer
from tensorflow.keras.layers import Softmax



In [3]:
model = TFBertForNextSentencePrediction.from_pretrained('bert-base-uncased', from_pt=True)
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')


All PyTorch model weights were used when initializing TFBertForNextSentencePrediction.

All the weights of TFBertForNextSentencePrediction were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForNextSentencePrediction for predictions without further training.


In [4]:
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
next_sentence = "pizza is eaten with the use of a knife and fork. In casual settings, however, it is cut into wedges to be eaten while held in the hand."


In [6]:
encoding = tokenizer(prompt, next_sentence, return_tensors='tf')
print(encoding['input_ids'])


tf.Tensor(
[[  101  1999  3304  1010 10733  2366  1999  5337 10906  1010  2107  2004
   2012  1037  4825  1010  2003  3591  4895 14540  6610  2094  1012   102
  10733  2003  8828  2007  1996  2224  1997  1037  5442  1998  9292  1012
   1999 10017 10906  1010  2174  1010  2009  2003  3013  2046 17632  2015
   2000  2022  8828  2096  2218  1999  1996  2192  1012   102]], shape=(1, 58), dtype=int32)


In [7]:
# 문장의 시작을 알리는 cls 토큰과 각 문장의 끝을 알리는 sep 토큰 번호를 알아보자
print(tokenizer.cls_token, ':', tokenizer.cls_token_id)
print(tokenizer.sep_token, ':' , tokenizer.sep_token_id)


[CLS] : 101
[SEP] : 102


In [8]:
print(tokenizer.decode(encoding['input_ids'][0]))


[CLS] in italy, pizza served in formal settings, such as at a restaurant, is presented unsliced. [SEP] pizza is eaten with the use of a knife and fork. in casual settings, however, it is cut into wedges to be eaten while held in the hand. [SEP]


In [9]:
# 두문장에 대한 세크먼트 토큰 확인
print(encoding['token_type_ids'])


tf.Tensor(
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]], shape=(1, 58), dtype=int32)


In [None]:
logits = model(encoding['input_ids'], token_type_ids=encoding['token_type_ids'])[0]

# vsc 이렇게 하면 오류난다. import
# softmax = tf.keras.layers.Softmax()
# probs = softmax(logits)

# 이렇게 하면 오류난다. 왜냐하면 Softmax() 함수이다.
# probs = Softmax(logits)

# 이렇게 하거나
# softmax = Softmax()
# probs = softmax(logits)

# 이렇게 해야 한다.
probs = Softmax()(logits)

print(probs)


tf.Tensor([[9.9999714e-01 2.8381860e-06]], shape=(1, 2), dtype=float32)


In [16]:
print('최종 예측 레이블 :', tf.math.argmax(probs, axis=-1).numpy())

최종 예측 레이블 : [0]


In [17]:
# 실제적으로 이어지지 않는 문장으로 테스트를 해보자
# 상관없는 두 개의 문장
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
next_sentence = "The sky is blue due to the shorter wavelength of blue light."
encoding = tokenizer(prompt, next_sentence, return_tensors='tf')

logits = model(encoding['input_ids'], token_type_ids=encoding['token_type_ids'])[0]

# 이렇게 하면 오류 난다.
# softmax = tf.keras.layers.Softmax()
# probs = softmax(logits)

softmax = Softmax()
probs = softmax(logits)
print('최종 예측 레이블 :', tf.math.argmax(probs, axis=-1).numpy())


최종 예측 레이블 : [1]
