# The Stanford Sentiment Treebank 
The Stanford Sentiment Treebank consists of sentences from movie reviews and human annotations of their sentiment. The task is to predict the sentiment of a given sentence. We use the two-way (positive/negative) class split, and use only sentence-level labels.

In [90]:
import tensorflow as tf
import tensorflow_datasets
from transformers import (
    BertConfig,
    BertTokenizer,
    TFBertModel,
    TFBertForSequenceClassification,
    glue_convert_examples_to_features,
    glue_processors,
)
import math
import numpy as np

In [2]:
# Load dataset, tokenizer, model from pretrained model/vocabulary
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [3]:
data, info = tensorflow_datasets.load(name='glue/sst2',with_info=True)

INFO:absl:Overwrite dataset info from restored data version.
INFO:absl:Reusing dataset glue (/Users/tarrade/tensorflow_datasets/glue/sst2/1.0.0)
INFO:absl:Constructing tf.data.Dataset for split None, from /Users/tarrade/tensorflow_datasets/glue/sst2/1.0.0


In [4]:
info

tfds.core.DatasetInfo(
    name='glue',
    version=1.0.0,
    description='GLUE, the General Language Understanding Evaluation benchmark
(https://gluebenchmark.com/) is a collection of resources for training,
evaluating, and analyzing natural language understanding systems.

            The Stanford Sentiment Treebank consists of sentences from movie reviews and
            human annotations of their sentiment. The task is to predict the sentiment of a
            given sentence. We use the two-way (positive/negative) class split, and use only
            sentence-level labels.',
    homepage='https://nlp.stanford.edu/sentiment/index.html',
    features=FeaturesDict({
        'idx': tf.int32,
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
        'sentence': Text(shape=(), dtype=tf.string),
    }),
    total_num_examples=70042,
    splits={
        'test': 1821,
        'train': 67349,
        'validation': 872,
    },
    supervised_keys=None,
    citation="""@

In [5]:
info.features["label"].names

['negative', 'positive']

In [6]:
info.features["label"].num_classes

2

In [7]:
data

{'test': <DatasetV1Adapter shapes: {idx: (), label: (), sentence: ()}, types: {idx: tf.int32, label: tf.int64, sentence: tf.string}>,
 'train': <DatasetV1Adapter shapes: {idx: (), label: (), sentence: ()}, types: {idx: tf.int32, label: tf.int64, sentence: tf.string}>,
 'validation': <DatasetV1Adapter shapes: {idx: (), label: (), sentence: ()}, types: {idx: tf.int32, label: tf.int64, sentence: tf.string}>}

In [8]:
data.keys()

dict_keys(['test', 'train', 'validation'])

In [9]:
data['train']

<DatasetV1Adapter shapes: {idx: (), label: (), sentence: ()}, types: {idx: tf.int32, label: tf.int64, sentence: tf.string}>

In [10]:
from tensorflow.python.data.ops import dataset_ops
dataset_ops.get_legacy_output_shapes(data['train'])

{'idx': TensorShape([]), 'label': TensorShape([]), 'sentence': TensorShape([])}

In [11]:
dataset_ops.get_legacy_output_types(data['train'])

{'idx': tf.int32, 'label': tf.int64, 'sentence': tf.string}

In [12]:
dataset_ops.get_legacy_output_classes(data['train'])

{'idx': tensorflow.python.framework.ops.Tensor,
 'label': tensorflow.python.framework.ops.Tensor,
 'sentence': tensorflow.python.framework.ops.Tensor}

In [13]:
for l in data['train']:
    print(l.keys())
    print(l)
    print(l['idx'])
    print(l['label'])
    print(l['sentence'])
    break

dict_keys(['idx', 'label', 'sentence'])
{'idx': <tf.Tensor: shape=(), dtype=int32, numpy=16399>, 'label': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'sentence': <tf.Tensor: shape=(), dtype=string, numpy=b'for the uninitiated plays better on video with the sound '>}
tf.Tensor(16399, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(b'for the uninitiated plays better on video with the sound ', shape=(), dtype=string)


In [14]:
np_array=np.array(list(data['train'].as_numpy_iterator()))

In [15]:
for element in np_array: 
    print(element) 
    break

{'idx': 16399, 'label': 0, 'sentence': b'for the uninitiated plays better on video with the sound '}


In [16]:
np.shape(np_array)

(67349,)

In [17]:
len(np_array)

67349

In [18]:
np_array[0]

{'idx': 16399,
 'label': 0,
 'sentence': b'for the uninitiated plays better on video with the sound '}

In [19]:
# Prepare dataset for GLUE as a tf.data.Dataset instance
train_dataset = glue_convert_examples_to_features(data['train'], tokenizer, max_length=128, task='sst-2')
valid_dataset = glue_convert_examples_to_features(data['validation'], tokenizer, max_length=128, task='sst-2')
train_dataset = train_dataset.shuffle(100).batch(32).repeat(2)
valid_dataset = valid_dataset.batch(64)

In [20]:
list(train_dataset.take(1).as_numpy_iterator())

[({'input_ids': array([[  101,   112,   188, ...,     0,     0,     0],
          [  101,  2434,   102, ...,     0,     0,     0],
          [  101,  1103,  1273, ...,     0,     0,     0],
          ...,
          [  101,   112,   188, ...,     0,     0,     0],
          [  101,  1191,  5411, ...,     0,     0,     0],
          [  101,   170, 13533, ...,     0,     0,     0]], dtype=int32),
   'attention_mask': array([[1, 1, 1, ..., 0, 0, 0],
          [1, 1, 1, ..., 0, 0, 0],
          [1, 1, 1, ..., 0, 0, 0],
          ...,
          [1, 1, 1, ..., 0, 0, 0],
          [1, 1, 1, ..., 0, 0, 0],
          [1, 1, 1, ..., 0, 0, 0]], dtype=int32),
   'token_type_ids': array([[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]], dtype=int32)},
  array([0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,
         0,

In [21]:
list(valid_dataset.take(1).as_numpy_iterator())

[({'input_ids': array([[ 101,  170, 2860, ...,    0,    0,    0],
          [ 101, 3022,  170, ...,    0,    0,    0],
          [ 101,  178, 1821, ...,    0,    0,    0],
          ...,
          [ 101, 1122,  112, ...,    0,    0,    0],
          [ 101, 1122,  112, ...,    0,    0,    0],
          [ 101, 1103, 1273, ...,    0,    0,    0]], dtype=int32),
   'attention_mask': array([[1, 1, 1, ..., 0, 0, 0],
          [1, 1, 1, ..., 0, 0, 0],
          [1, 1, 1, ..., 0, 0, 0],
          ...,
          [1, 1, 1, ..., 0, 0, 0],
          [1, 1, 1, ..., 0, 0, 0],
          [1, 1, 1, ..., 0, 0, 0]], dtype=int32),
   'token_type_ids': array([[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]], dtype=int32)},
  array([0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,

In [22]:
valid_dataset

<BatchDataset shapes: ({input_ids: (None, None), attention_mask: (None, None), token_type_ids: (None, None)}, (None,)), types: ({input_ids: tf.int32, attention_mask: tf.int32, token_type_ids: tf.int32}, tf.int64)>

In [23]:
train_dataset

<RepeatDataset shapes: ({input_ids: (None, None), attention_mask: (None, None), token_type_ids: (None, None)}, (None,)), types: ({input_ids: tf.int32, attention_mask: tf.int32, token_type_ids: tf.int32}, tf.int64)>

In [24]:
np_train_dataset=list(train_dataset.as_numpy_iterator())

In [25]:
len(np_train_dataset)

4210

In [26]:
#sample size/batch size and repeat 2 times
math.ceil((67349/32)*2)

4210

In [27]:
# label
np_train_dataset[0][1]

array([1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 0, 1, 0, 0, 1, 1, 1, 1, 0])

In [28]:
np_train_dataset[0][0].keys()

dict_keys(['input_ids', 'attention_mask', 'token_type_ids'])

In [29]:
np_train_dataset[0][0]

{'input_ids': array([[  101,  1120,  1551, ...,     0,     0,     0],
        [  101, 12104,  2269, ...,     0,     0,     0],
        [  101,   172, 27195, ...,     0,     0,     0],
        ...,
        [  101, 12545,  4923, ...,     0,     0,     0],
        [  101, 22593, 21449, ...,     0,     0,     0],
        [  101,  1167,   175, ...,     0,     0,     0]], dtype=int32),
 'attention_mask': array([[1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        ...,
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0],
        [1, 1, 1, ..., 0, 0, 0]], dtype=int32),
 'token_type_ids': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=int32)}

In [30]:
np_train_dataset[0][0]['input_ids'][0]

array([  101,  1120,  1551, 14124,  1193,  2232,   102,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0]

In [31]:
for i in np_train_dataset[0][0]['input_ids'][0]:
    print('{:7d}    ---->    {}'.format(i, tokenizer.decode(int(i))))

    101    ---->    [ C L S ]
   1120    ---->    a t
   1551    ---->    t i m e s
  14124    ---->    u n c o m m o n
   1193    ---->    # # l y
   2232    ---->    m o v i n g
    102    ---->    [ S E P ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0    ---->    [ P A D ]
      0   

In [32]:
np_train_dataset[0][0]['input_ids']

array([[  101,  1120,  1551, ...,     0,     0,     0],
       [  101, 12104,  2269, ...,     0,     0,     0],
       [  101,   172, 27195, ...,     0,     0,     0],
       ...,
       [  101, 12545,  4923, ...,     0,     0,     0],
       [  101, 22593, 21449, ...,     0,     0,     0],
       [  101,  1167,   175, ...,     0,     0,     0]], dtype=int32)

In [33]:
np_train_dataset[0][0]['input_ids'].shape

(32, 128)

In [34]:
np_train_dataset[0][0]['attention_mask']

array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]], dtype=int32)

In [35]:
np_train_dataset[0][0]['attention_mask'].shape

(32, 128)

In [36]:
np_train_dataset[0][0]['token_type_ids']

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int32)

In [37]:
np_train_dataset[0][0]['token_type_ids'].shape

(32, 128)

In [38]:
text=np_array[0]['sentence'].decode("utf-8") 

In [39]:
text

'for the uninitiated plays better on video with the sound '

In [40]:
for word in text.split(' ') :
    print('{:10}    ---->    {}'.format(word, tokenizer.encode(word)))

for           ---->    [101, 1111, 102]
the           ---->    [101, 1103, 102]
uninitiated    ---->    [101, 8362, 4729, 10691, 1906, 102]
plays         ---->    [101, 2399, 102]
better        ---->    [101, 1618, 102]
on            ---->    [101, 1113, 102]
video         ---->    [101, 1888, 102]
with          ---->    [101, 1114, 102]
the           ---->    [101, 1103, 102]
sound         ---->    [101, 1839, 102]
              ---->    [101, 102]


In [41]:
for i in tokenizer.encode(text):
    print('{:7d}    ---->    {}'.format(i, tokenizer.decode(int(i))))

    101    ---->    [ C L S ]
   1111    ---->    f o r
   1103    ---->    t h e
   8362    ---->    u n
   4729    ---->    # # i n i
  10691    ---->    # # t i a
   1906    ---->    # # t e d
   2399    ---->    p l a y s
   1618    ---->    b e t t e r
   1113    ---->    o n
   1888    ---->    v i d e o
   1114    ---->    w i t h
   1103    ---->    t h e
   1839    ---->    s o u n d
    102    ---->    [ S E P ]


In [42]:
len(tokenizer.encode(text))

15

In [43]:
model = TFBertForSequenceClassification.from_pretrained('bert-base-cased',num_labels=4)

In [44]:
# Prepare training: Compile tf.keras model with optimizer, loss and learning rate schedule
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

In [60]:
model.summary()

Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bert (TFBertMainLayer)       multiple                  108310272 
_________________________________________________________________
dropout_37 (Dropout)         multiple                  0         
_________________________________________________________________
classifier (Dense)           multiple                  3076      
Total params: 108,313,348
Trainable params: 108,313,348
Non-trainable params: 0
_________________________________________________________________


In [58]:
model.inputs

{'attention_mask': <tf.Tensor 'attention_mask:0' shape=(None, 128) dtype=int32>,
 'input_ids': <tf.Tensor 'input_ids:0' shape=(None, 128) dtype=int32>,
 'token_type_ids': <tf.Tensor 'token_type_ids:0' shape=(None, 128) dtype=int32>}

In [59]:
model.outputs

[<tf.Tensor 'tf_bert_for_sequence_classification/Identity:0' shape=(None, 4) dtype=float32>]

In [69]:
model.layers

[<transformers.modeling_tf_bert.TFBertMainLayer at 0x1a3d14f690>,
 <tensorflow.python.keras.layers.core.Dropout at 0x1a41861550>,
 <tensorflow.python.keras.layers.core.Dense at 0x1a41861b10>]

In [81]:
for layer in model.layers:
    print(layer.name, layer._inbound_nodes, layer._outbound_nodes)

bert [] []
dropout_37 [] []
classifier [] []


In [86]:
model._inbound_nodes

[]

In [66]:
model.layers[0].inbound_nodes

[]

In [78]:
for i in model.layers:
    print(i.inbound_nodes)


[]
[]
[]


In [52]:
dir(model.layers[2])

['_TF_MODULE_IGNORED_PROPERTIES',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_activity_regularizer',
 '_add_inbound_node',
 '_add_trackable',
 '_add_variable_with_custom_getter',
 '_attribute_sentinel',
 '_autocast',
 '_call_accepts_kwargs',
 '_call_arg_was_passed',
 '_call_fn_args',
 '_call_full_argspec',
 '_callable_losses',
 '_checkpoint_dependencies',
 '_clear_losses',
 '_collect_input_masks',
 '_compute_dtype',
 '_dedup_weights',
 '_deferred_dependencies',
 '_dtype',
 '_dtype_defaulted_to_floatx',
 '_dtype_policy',
 '_dynamic',
 '_eager_add_metric',
 '_eager_losses',
 '_expects_mask_arg',
 '_expects_training_arg

In [53]:
model.summary()

Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bert (TFBertMainLayer)       multiple                  108310272 
_________________________________________________________________
dropout_37 (Dropout)         multiple                  0         
_________________________________________________________________
classifier (Dense)           multiple                  3076      
Total params: 108,313,348
Trainable params: 108,313,348
Non-trainable params: 0
_________________________________________________________________


In [54]:
# Train and evaluate using tf.keras.Model.fit()
history = model.fit(train_dataset, epochs=2, steps_per_epoch=10)

Train for 10 steps
Epoch 1/2
Epoch 2/2


In [55]:
history.epoch

[0, 1]

In [56]:
history.history

{'loss': [1.1529567956924438, 0.8439195096492768],
 'accuracy': [0.46875, 0.559375]}

In [91]:
out_val=model.predict(valid_dataset)

In [92]:
y_pred = tf.nn.softmax(out_val)
y_pred

<tf.Tensor: shape=(872, 4), dtype=float32, numpy=
array([[0.42255393, 0.49657318, 0.03445925, 0.04641363],
       [0.43725368, 0.49089155, 0.03565618, 0.03619868],
       [0.4424509 , 0.4681127 , 0.04147896, 0.04795737],
       ...,
       [0.4033167 , 0.51713747, 0.0366515 , 0.04289435],
       [0.43461922, 0.48780352, 0.03541361, 0.04216358],
       [0.4509448 , 0.45976418, 0.04092937, 0.0483616 ]], dtype=float32)>

In [93]:
y_pred_argmax = tf.math.argmax(y_pred, axis=1)
y_pred_argmax

<tf.Tensor: shape=(872,), dtype=int64, numpy=
array([1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
       0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,
       1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
     

In [57]:
input_layer = tf.keras.Input(shape = (128,), dtype='int64')  
bert_ini = TFBertModel.from_pretrained('bert-base-cased')(input_layer)
bert = bert_ini[1]    
dropout = tf.keras.layers.Dropout(0.1)(bert)
flat = tf.keras.layers.Flatten()(dropout)
classifier = tf.keras.layers.Dense(units=5)(flat)                  
model2 = tf.keras.Model(inputs=input_layer, outputs=classifier)

NameError: name 'TFBertModel' is not defined

In [None]:
bert_ini

In [None]:
# Prepare training: Compile tf.keras model with optimizer, loss and learning rate schedule
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
model2.compile(optimizer=optimizer, loss=loss, metrics=[metric])

In [None]:
model2.summary()

In [None]:
# Train and evaluate using tf.keras.Model.fit()
history = model2.fit(train_dataset, epochs=2, steps_per_epoch=115)

In [None]:
# Train and evaluate using tf.keras.Model.fit()
history = model.fit(train_dataset, epochs=2, steps_per_epoch=115,
                    validation_data=valid_dataset, validation_steps=7)

In [None]:
# Load the TensorFlow model in PyTorch for inspection
model.save_pretrained('./save/')
pytorch_model = BertForSequenceClassification.from_pretrained('./save/', from_tf=True)

In [None]:
# Quickly test a few predictions - MRPC is a paraphrasing task, let's see if our model learned the task
sentence_0 = "This research was consistent with his findings."
sentence_1 = "His findings were compatible with this research."
sentence_2 = "His findings were not compatible with this research."
inputs_1 = tokenizer.encode_plus(sentence_0, sentence_1, add_special_tokens=True, return_tensors='pt')
inputs_2 = tokenizer.encode_plus(sentence_0, sentence_2, add_special_tokens=True, return_tensors='pt')

In [None]:
pred_1 = pytorch_model(inputs_1['input_ids'], token_type_ids=inputs_1['token_type_ids'])[0].argmax().item()
pred_2 = pytorch_model(inputs_2['input_ids'], token_type_ids=inputs_2['token_type_ids'])[0].argmax().item()

print("sentence_1 is", "a paraphrase" if pred_1 else "not a paraphrase", "of sentence_0")
print("sentence_2 is", "a paraphrase" if pred_2 else "not a paraphrase", "of sentence_0")