In [4]:
# load data (read from xlsx)

import utils

data_df = utils.load_annotated_discussions_data(keep_punctuation=True)

messages = data_df[utils.COL_MESSAGE]
target = data_df[utils.COL_TARGET]

In [5]:
# change target values to numbers

from sklearn import preprocessing

label_encoder = preprocessing.LabelEncoder()

target = label_encoder.fit_transform(target)


In [6]:
# split data to train, development and test sets

import numpy as np
from sklearn.model_selection import train_test_split

X = np.array(messages)
y = np.array(target)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

print("Train dataset shape: {0}, \nTest dataset shape: {1} \nValidation dataset shape: {2}".format(X_train.shape, X_test.shape, X_val.shape))

Train dataset shape: (568,), 
Test dataset shape: (72,) 
Validation dataset shape: (71,)


In [7]:
# load model and tokenizer from transformers

from transformers import TFBertForSequenceClassification, BertTokenizer

#bert_model = TFBertForSequenceClassification.from_pretrained("bert-base-cased", num_labels=15)
bert_model = TFBertForSequenceClassification.from_pretrained('../models/bert/', num_labels=15)
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-cased")

Some layers from the model checkpoint at ../models/bert/ were not used when initializing TFBertForSequenceClassification: ['dropout_113']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at ../models/bert/.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


In [8]:
from tqdm import tqdm

def convert_to_input(messages, pad_token=0, pad_token_segment_id=0, max_length=128):
    input_ids, attention_masks,token_type_ids=[],[],[]

    for message in tqdm(messages, position=0, leave=True):
        inputs = bert_tokenizer.encode_plus(message, add_special_tokens=True, max_length=max_length)

        i, t = inputs["input_ids"], inputs["token_type_ids"]
        m = [1] * len(i)

        padding_length = max_length - len(i)

        i = i + ([pad_token] * padding_length)
        m = m + ([0] * padding_length)
        t = t + ([pad_token_segment_id] * padding_length)
        
        input_ids.append(i)
        attention_masks.append(m)
        token_type_ids.append(t)

    return [np.asarray(input_ids), 
            np.asarray(attention_masks), 
            np.asarray(token_type_ids)]

In [9]:
X_test_input = convert_to_input(X_test)
X_train_input = convert_to_input(X_train)
X_val_input = convert_to_input(X_val)

  0%|          | 0/72 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
100%|██████████| 72/72 [00:00<00:00, 1915.28it/s]
100%|██████████| 568/568 [00:00<00:00, 2476.71it/s]
100%|██████████| 71/71 [00:00<00:00, 2409.56it/s]


In [10]:
import tensorflow as tf

def example_to_features(input_ids,attention_masks,token_type_ids,y):
  return {"input_ids": input_ids,
          "attention_mask": attention_masks,
          "token_type_ids": token_type_ids},y

train_ds = tf.data.Dataset.from_tensor_slices((X_train_input[0],X_train_input[1],X_train_input[2],y_train)).map(example_to_features).shuffle(100).batch(12).repeat(5)
val_ds=tf.data.Dataset.from_tensor_slices((X_val_input[0],X_val_input[1],X_val_input[2],y_val)).map(example_to_features).batch(12)
test_ds=tf.data.Dataset.from_tensor_slices((X_test_input[0],X_test_input[1],X_test_input[2],y_test)).map(example_to_features).batch(12)

In [11]:
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')

bert_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

In [32]:
# train model
bert_history = bert_model.fit(train_ds, epochs=3, validation_data=val_ds)

Epoch 1/3


KeyboardInterrupt: 

In [35]:
# save model
bert_model.save_pretrained('../models/bert/')

In [12]:
results_true = test_ds.unbatch()
results_true = np.asarray([element[1].numpy() for element in results_true])
print(results_true)

[ 2 14  2  2 14  9 11  2  1 11 11 14  2 11  2 12 14 14 13  2  2 11  6  2
 14  2  2  4  2 11  2  2  6  0  2  4  2  2 14 11  2  8  6  7  0  2  2  2
  2  2  2 11 14  1 14 14 14 11  2 11  2 13  2  9 11  7  1  2 11  2 14  9]


In [13]:
results = bert_model.predict(test_ds)
print(f"Model predictions:\n {results.logits}")

results_predicted = np.argmax(results.logits, axis=1)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Model predictions:
 [[-0.6188839  -1.9589926   8.020284   ... -0.4137999   1.0120095
  -2.3402874 ]
 [ 0.28942007 -1.0124781  -1.1985749  ...  1.6433005   1.1098207
  -1.5175879 ]
 [-0.98468506 -2.0506408   8.866593   ... -0.99820065  0.04651871
  -0.93529224]
 ...
 [-0.78769237 -1.9857748   7.7307553  ...  0.06558775  0.9302552
  -2.1287682 ]
 [-0.9669965  -2.1618223   0.35727975 ... -1.4432449  -2.027272
   5.8343353 ]
 [-1.5184282   0.3841

In [14]:
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

# average = [None, 'micro', 'macro', 'weighted'].
print(f"F1 score: {f1_score(results_true, results_predicted, average='weighted')}")
print(f"Accuracy score: {accuracy_score(results_true, results_predicted)}")

F1 score: 0.6700228557780046
Accuracy score: 0.6666666666666666
