In [1]:
import torch
import transformers

from life_after_bert import LaBEvaluator

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = transformers.AutoModelForMaskedLM.from_pretrained("facebook/bart-large")
tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/bart-large")

In [3]:
task_infos = [
    ("Age Comparison", 2), 
    ("Always Never", 5), 
    ("Antonym Negation", 2), 
    ("Multihop Composition", 3), 
    ("Size Comparison", 2),
    ("Taxonomy Conjunction", 3)
]

evaluator = LaBEvaluator()
task_accs = evaluator.evaluate(model, tokenizer, task_infos, model_arch="encoder", device=device)
task_accs

2022-04-21 11:54:42 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_age_comparison_dev.jsonl


Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

2022-04-21 11:54:47 | INFO | eval.py | Accuracy on Age Comparison: 0.862
2022-04-21 11:54:47 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_always_never_dev.jsonl


Evaluating:   0%|          | 0/18 [00:00<?, ?it/s]

2022-04-21 11:54:48 | INFO | eval.py | Accuracy on Always Never: 0.14285714285714285
2022-04-21 11:54:48 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_antonym_negation_dev.jsonl


Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

2022-04-21 11:54:50 | INFO | eval.py | Accuracy on Antonym Negation: 0.538
2022-04-21 11:54:50 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_multihop_composition_dev.jsonl


Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

2022-04-21 11:54:52 | INFO | eval.py | Accuracy on Multihop Composition: 0.338
2022-04-21 11:54:52 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_size_comparison_dev.jsonl


Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

2022-04-21 11:54:54 | INFO | eval.py | Accuracy on Size Comparison: 0.508
2022-04-21 11:54:54 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_taxonomy_conjunction_dev.jsonl


Evaluating:   0%|          | 0/38 [00:00<?, ?it/s]

2022-04-21 11:54:56 | INFO | eval.py | Accuracy on Taxonomy Conjunction: 0.4257095158597663


{'Age Comparison': 0.862,
 'Always Never': 0.14285714285714285,
 'Antonym Negation': 0.538,
 'Multihop Composition': 0.338,
 'Size Comparison': 0.508,
 'Taxonomy Conjunction': 0.4257095158597663}

In [4]:
model = transformers.AutoModelForSeq2SeqLM.from_pretrained("google/pegasus-large")
tokenizer = transformers.AutoTokenizer.from_pretrained("google/pegasus-large")
task_accs = evaluator.evaluate(model, tokenizer, task_infos, model_arch="encoder-decoder", device=device)
task_accs

2022-04-21 11:55:05 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_age_comparison_dev.jsonl


Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

2022-04-21 11:55:08 | INFO | eval.py | Accuracy on Age Comparison: 0.506
2022-04-21 11:55:08 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_always_never_dev.jsonl


Evaluating:   0%|          | 0/18 [00:00<?, ?it/s]

2022-04-21 11:55:09 | INFO | eval.py | Accuracy on Always Never: 0.25357142857142856
2022-04-21 11:55:09 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_antonym_negation_dev.jsonl


Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

2022-04-21 11:55:10 | INFO | eval.py | Accuracy on Antonym Negation: 0.498
2022-04-21 11:55:10 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_multihop_composition_dev.jsonl


Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

2022-04-21 11:55:12 | INFO | eval.py | Accuracy on Multihop Composition: 0.338
2022-04-21 11:55:12 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_size_comparison_dev.jsonl


Evaluating:   0%|          | 0/32 [00:00<?, ?it/s]

2022-04-21 11:55:14 | INFO | eval.py | Accuracy on Size Comparison: 0.506
2022-04-21 11:55:14 | INFO | data.py | Loading jsonl file from /home/kzhao/life-after-bert/tests/data/oLMpics_taxonomy_conjunction_dev.jsonl


Evaluating:   0%|          | 0/38 [00:00<?, ?it/s]

2022-04-21 11:55:16 | INFO | eval.py | Accuracy on Taxonomy Conjunction: 0.3572621035058431


{'Age Comparison': 0.506,
 'Always Never': 0.25357142857142856,
 'Antonym Negation': 0.498,
 'Multihop Composition': 0.338,
 'Size Comparison': 0.506,
 'Taxonomy Conjunction': 0.3572621035058431}