In [41]:
from farm.utils import initialize_device_settings
from farm.modeling.tokenization import Tokenizer
from farm.data_handler.processor import TextClassificationProcessor, SquadProcessor
from farm.data_handler.data_silo import DataSilo
from farm.eval import Evaluator
from farm.modeling.adaptive_model import AdaptiveModel
from pathlib import Path

In [42]:
device, n_gpu = initialize_device_settings(use_cuda=False)
lang_model = "./models/bert-multi-toxic-comment"
do_lower_case = True
batch_size = 32

07/07/2020 01:20:06 - INFO - farm.utils -   device: cpu n_gpu: 0, distributed training: False, automatic mixed precision training: None


In [43]:
data_dir = Path("../data/toxic-comments")
evaluation_filename = "val.tsv"
label_list = ["toxic","severe_toxic","obscene","threat","insult","identity_hate"]
metric = "f1_macro"

In [44]:
# 1.Create a tokenizer
tokenizer = Tokenizer.load(
    pretrained_model_name_or_path=lang_model,
    do_lower_case=do_lower_case)

07/07/2020 01:20:06 - INFO - farm.modeling.tokenization -   Loading tokenizer of type 'BertTokenizer'
07/07/2020 01:20:06 - INFO - transformers.tokenization_utils -   Model name './models/bert-multi-toxic-comment' not found in model shortcut name list (bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese, bert-base-german-cased, bert-large-uncased-whole-word-masking, bert-large-cased-whole-word-masking, bert-large-uncased-whole-word-masking-finetuned-squad, bert-large-cased-whole-word-masking-finetuned-squad, bert-base-cased-finetuned-mrpc, bert-base-german-dbmdz-cased, bert-base-german-dbmdz-uncased, TurkuNLP/bert-base-finnish-cased-v1, TurkuNLP/bert-base-finnish-uncased-v1, wietsedv/bert-base-dutch-cased). Assuming './models/bert-multi-toxic-comment' is a path, a model identifier, or url to a directory containing tokenizer files.
07/07/2020 01:20:06 - INFO - transformers.tokenization_

In [45]:
processor = TextClassificationProcessor(tokenizer=tokenizer,
                                        max_seq_len=128,
                                        data_dir=Path("./data/toxic-comments"),
                                        label_list=label_list,
                                        label_column_name="label",
                                        metric=metric,
                                        quote_char='"',
                                        multilabel=True,
                                        train_filename=None,
                                        dev_filename=None,
                                        dev_split=0,
                                        test_filename=evaluation_filename
                                        )

In [46]:
# 3. Create a DataSilo that loads dataset, provides DataLoaders for them and calculates a few descriptive statistics of our datasets
data_silo = DataSilo(
    processor=processor,
    batch_size=batch_size)

07/07/2020 01:20:07 - INFO - farm.data_handler.data_silo -   
Loading data into the data silo ... 
              ______
               |o  |   !
   __          |:`_|---'-.
  |__|______.-/ _ \-----.|       
 (o)(o)------'\ _ /     ( )      
 
07/07/2020 01:20:07 - INFO - farm.data_handler.data_silo -   No train set is being loaded
07/07/2020 01:20:07 - INFO - farm.data_handler.data_silo -   No dev set is being loaded
07/07/2020 01:20:07 - INFO - farm.data_handler.data_silo -   Loading test set from: data/toxic-comments/val.tsv
07/07/2020 01:20:07 - INFO - farm.data_handler.data_silo -   Got ya 7 parallel workers to convert 10000 dictionaries to pytorch datasets (chunksize = 286)...
07/07/2020 01:20:07 - INFO - farm.data_handler.data_silo -    0    0    0    0    0    0    0 
07/07/2020 01:20:07 - INFO - farm.data_handler.data_silo -   /w\  /w\  /w\  /|\  /w\  /w\  /|\
07/07/2020 01:20:07 - INFO - farm.data_handler.data_silo -   /'\  / \  /'\  /'\  /'\  /'\  /'\
07/07/2020 01:20:07 - INF

Preprocessing Dataset data/toxic-comments/val.tsv: 100%|██████████| 10000/10000 [00:09<00:00, 1045.53 Dicts/s]
07/07/2020 01:20:17 - INFO - farm.data_handler.data_silo -   Examples in train: 0
07/07/2020 01:20:17 - INFO - farm.data_handler.data_silo -   Examples in dev  : 0
07/07/2020 01:20:17 - INFO - farm.data_handler.data_silo -   Examples in test : 10000
07/07/2020 01:20:17 - INFO - farm.data_handler.data_silo -   


In [47]:
# 4. Create an Evaluator
evaluator = Evaluator(
    data_loader=data_silo.get_data_loader("test"),
    tasks=data_silo.processor.tasks,
    device=device
)

In [48]:
# 5. Load model
# model = AdaptiveModel.convert_from_transformers(lang_model, device=device, task_type="text_classification")
# use "load" if you want to use a local model that was trained with FARM
model = AdaptiveModel.load(lang_model, device=device)
model.connect_heads_with_processor(data_silo.processor.tasks, require_labels=True)

# 6. Run the Evaluator
results = evaluator.eval(model)
f1_score = results[0]["f1_macro"]
print("Macro-averaged F1-Score:", f1_score)

07/07/2020 01:20:17 - INFO - transformers.modeling_utils -   loading weights file models/bert-multi-toxic-comment/language_model.bin from cache at models/bert-multi-toxic-comment/language_model.bin
07/07/2020 01:20:19 - INFO - farm.modeling.adaptive_model -   Found files for loading 1 prediction heads
07/07/2020 01:20:19 - INFO - farm.modeling.prediction_head -   Prediction head initialized with size [768, 6]
07/07/2020 01:20:19 - INFO - farm.modeling.prediction_head -   Loading prediction head from models/bert-multi-toxic-comment/prediction_head_0.bin
Evaluating: 100%|██████████| 313/313 [17:35<00:00,  3.37s/it]

Macro-averaged F1-Score: 0.6080237118447319



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
