In [2]:
import torch
from transformers import BertTokenizer, AutoConfig, BertForSequenceClassification
from utils.modeling import BertForSpanClassification
from utils.super_glue_data_utils import superglue_convert_examples_to_features, RecordProcessor
from utils.data_utils import SuperGLUEDataset
from transformers.utils import logging

logging.set_verbosity_error()

In [6]:
config = AutoConfig.from_pretrained(pretrained_model_name_or_path='bert-base-uncased', num_labels=2)
config.num_spans = 2
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSpanClassification.from_pretrained(pretrained_model_name_or_path='/home/moskovskiy/workspace/nlp/compression/superglue_models/bert-base-uncased_None_150/wsc', config=config)
model = model.train().cuda()

In [8]:
processor = WscProcessor()
examples = processor.get_dev_examples('data/WSC/')
labels = processor.get_labels()
features = superglue_convert_examples_to_features(examples, tokenizer, task='wsc', max_length=150)
all_guids = torch.tensor([f.guid for f in features], dtype=torch.long)
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
all_attention_mask = torch.tensor(
    [f.attention_mask for f in features], dtype=torch.long
)
all_token_type_ids = torch.tensor(
    [f.token_type_ids for f in features], dtype=torch.long
)
all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
all_spans = torch.tensor([f.span_locs for f in features])
dataset = SuperGLUEDataset(
            input_ids=all_input_ids,
            attention_masks=all_attention_mask,
            token_type_ids=all_token_type_ids,
            labels=all_labels,
            span_cl=True,
            spans=all_spans,
            guids=all_guids,
        )

In [9]:
results = []
for sample in dataset:
    out = model(**{k: v.unsqueeze(0).cuda() for k, v in sample.items() if k != "guid"})
    results.append(out)

In [16]:
preds = [x[1].argmax(-1).item() for x in results]

In [20]:
labels = [f.label for f in features]

In [22]:
from sklearn.metrics import accuracy_score, f1_score

In [26]:
accuracy_score(labels, preds)

0.5288461538461539

In [27]:
f1_score(labels, preds)

0.6573426573426574

In [34]:
import numpy as np
arr = np.load('preds.txt.npy')

In [47]:
np.argmax(arr, -1)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

**Record Debug**

In [16]:
import torch
from transformers import BertTokenizer, AutoConfig, BertForSequenceClassification
from utils.modeling import BertForSpanClassification
from utils.super_glue_data_utils import superglue_convert_examples_to_features, RecordProcessor
from utils.data_utils import SuperGLUEDataset
from transformers.utils import logging

logging.set_verbosity_error()

In [40]:
config = AutoConfig.from_pretrained(pretrained_model_name_or_path='bert-base-uncased', num_labels=2)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained(pretrained_model_name_or_path='superglue_models/bert-base-uncased-svd-ffn-310/record/checkpoint-49000', config=config)
model = model.eval().cuda(1)

In [41]:
processor = RecordProcessor()
examples = processor.get_dev_examples('data/ReCoRD_small/')
answers = processor.get_answers('data/ReCoRD_small/', 'dev')
labels = processor.get_labels()
features = superglue_convert_examples_to_features(examples, tokenizer, task='wsc', max_length=450)
all_guids = torch.tensor([f.guid for f in features], dtype=torch.long)
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
all_attention_mask = torch.tensor(
    [f.attention_mask for f in features], dtype=torch.long
)
all_token_type_ids = torch.tensor(
    [f.token_type_ids for f in features], dtype=torch.long
)
all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
# all_spans = torch.tensor([f.span_locs for f in features])
dataset = SuperGLUEDataset(
            input_ids=all_input_ids,
            attention_masks=all_attention_mask,
            token_type_ids=all_token_type_ids,
            labels=all_labels,
            span_cl=False,
            # spans=all_spans,
            guids=all_guids,
        )

In [42]:
examples[0]

InputExample(guid=[0, 0, 0], text_a="Tracy Morgan hasn't appeared on stage since the devastating New Jersey crash that nearly ended his life last summer, but all that will change this fall when he returns to host Saturday Night Live. NBC announced on Twitter Monday that Morgan, an SNL alum with seven seasons as a cast member under his belt, will headline the third episode of Season 41 airing October 17. For Morgan, 46, it will be a second time hosting the long-running variety show, the first since the June 2014 pileup on the New Jersey Turnpike that killed his friend and mentor James 'Jimmy Mack' McNair.\n@highlight\nMorgan, 46, will host third episode of season 41 of SNL airing October 17\n@highlight\nHe tweeted to his fans: 'Stoked to be going home...#SNL'\n@highlight\nFor the SNL alum who had spent seven years as cast member, it will be a second time hosting the show\n@highlight\nMorgan has been sidelined by severe head trauma suffered in deadly June 2014 crash on New Jersey Turnpik

In [43]:
results = []
with torch.no_grad():
    for sample in dataset:
        out = model(**{k: v.unsqueeze(0).cuda(1) for k, v in sample.items() if k != "guid"})
        results.append(out) 

In [51]:
preds = [x.logits.argmax(1).item() for x in results]

In [45]:
set(preds)

{0}

In [46]:
label = [e['labels'].item() for e in dataset]

In [47]:
from sklearn.metrics import accuracy_score, f1_score

In [49]:
print(accuracy_score(label, preds))
print(accuracy_score(label, [1 for _ in range(len(label))]))

0.14252961406190295
0.8574703859380971


In [50]:
f1_score(label, preds)

0.0