In [2]:
import torch
from transformers import BertTokenizer, AutoConfig, BertForSequenceClassification
from utils.modeling import BertForSpanClassification
from utils.super_glue_data_utils import superglue_convert_examples_to_features, RecordProcessor
from utils.data_utils import SuperGLUEDataset
from transformers.utils import logging

logging.set_verbosity_error()

In [6]:
config = AutoConfig.from_pretrained(pretrained_model_name_or_path='bert-base-uncased', num_labels=2)
config.num_spans = 2
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSpanClassification.from_pretrained(pretrained_model_name_or_path='/home/moskovskiy/workspace/nlp/compression/superglue_models/bert-base-uncased_None_150/wsc', config=config)
model = model.train().cuda()

In [8]:
processor = WscProcessor()
examples = processor.get_dev_examples('data/WSC/')
labels = processor.get_labels()
features = superglue_convert_examples_to_features(examples, tokenizer, task='wsc', max_length=150)
all_guids = torch.tensor([f.guid for f in features], dtype=torch.long)
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
all_attention_mask = torch.tensor(
    [f.attention_mask for f in features], dtype=torch.long
)
all_token_type_ids = torch.tensor(
    [f.token_type_ids for f in features], dtype=torch.long
)
all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
all_spans = torch.tensor([f.span_locs for f in features])
dataset = SuperGLUEDataset(
            input_ids=all_input_ids,
            attention_masks=all_attention_mask,
            token_type_ids=all_token_type_ids,
            labels=all_labels,
            span_cl=True,
            spans=all_spans,
            guids=all_guids,
        )

In [9]:
results = []
for sample in dataset:
    out = model(**{k: v.unsqueeze(0).cuda() for k, v in sample.items() if k != "guid"})
    results.append(out)

In [16]:
preds = [x[1].argmax(-1).item() for x in results]

In [20]:
labels = [f.label for f in features]

In [22]:
from sklearn.metrics import accuracy_score, f1_score

In [26]:
accuracy_score(labels, preds)

0.5288461538461539

In [27]:
f1_score(labels, preds)

0.6573426573426574

In [34]:
import numpy as np
arr = np.load('preds.txt.npy')

In [47]:
np.argmax(arr, -1)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

**Record Debug**

In [1]:
import torch
from transformers import BertTokenizer, AutoConfig, BertForSequenceClassification
from utils.modeling import BertForSpanClassification
from utils.super_glue_data_utils import superglue_convert_examples_to_features, RecordProcessor
from utils.data_utils import SuperGLUEDataset
from transformers.utils import logging

logging.set_verbosity_error()

In [2]:
config = AutoConfig.from_pretrained(pretrained_model_name_or_path='bert-base-uncased', num_labels=2)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained(pretrained_model_name_or_path='superglue_models/bert-base-uncased-svd-ffn-310/record/checkpoint-49000', config=config)
model = model.eval().cuda(0)

In [4]:
processor = RecordProcessor()
examples = processor.get_dev_examples('data/ReCoRD_small/')
answers = processor.get_answers('data/ReCoRD_small/', 'dev')
labels = processor.get_labels()
features = superglue_convert_examples_to_features(examples, tokenizer, task='record', max_length=512)
all_guids = torch.tensor([f.guid for f in features], dtype=torch.long)
all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
all_attention_mask = torch.tensor(
    [f.attention_mask for f in features], dtype=torch.long
)
all_token_type_ids = torch.tensor(
    [f.token_type_ids for f in features], dtype=torch.long
)
all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
# all_spans = torch.tensor([f.span_locs for f in features])
dataset = SuperGLUEDataset(
            input_ids=all_input_ids,
            attention_masks=all_attention_mask,
            token_type_ids=all_token_type_ids,
            labels=all_labels,
            span_cl=False,
            # spans=all_spans,
            guids=all_guids,
        )

In [10]:
examples[0]

InputExample(guid=[0, 0, 0], text_a="Tracy Morgan hasn't appeared on stage since the devastating New Jersey crash that nearly ended his life last summer, but all that will change this fall when he returns to host Saturday Night Live. NBC announced on Twitter Monday that Morgan, an SNL alum with seven seasons as a cast member under his belt, will headline the third episode of Season 41 air", text_b='On October 10, acclaimed comedian and star of the summer box office hit Trainwreck Amy Schumer will make her SNL debut, followed by Tracy Morgan a week later.', label=1)

In [11]:
tokenizer.decode(dataset[0]['input_ids'], skip_special_tokens=False)

"[CLS] tracy morgan hasn't appeared on stage since the devastating new jersey crash that nearly ended his life last summer, but all that will change this fall when he returns to host saturday night live. nbc announced on twitter monday that morgan, an snl alum with seven seasons as a cast member under his belt, will headline the third episode of season 41 air [SEP] on october 10, acclaimed comedian and star of the summer box office hit trainwreck amy schumer will make her snl debut, followed by tracy morgan a week later. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]

In [5]:
len(examples[0].text_a)

1077

In [43]:
results = []
with torch.no_grad():
    for sample in dataset:
        out = model(**{k: v.unsqueeze(0).cuda(1) for k, v in sample.items() if k != "guid"})
        results.append(out) 

In [51]:
preds = [x.logits.argmax(1).item() for x in results]

In [45]:
set(preds)

{0}

In [46]:
label = [e['labels'].item() for e in dataset]

In [47]:
from sklearn.metrics import accuracy_score, f1_score

In [49]:
print(accuracy_score(label, preds))
print(accuracy_score(label, [1 for _ in range(len(label))]))

0.14252961406190295
0.8574703859380971


In [50]:
f1_score(label, preds)

0.0