In [89]:
from pathlib import Path

import numpy as np
import pandas as pd
from transformers import BertTokenizer

In [90]:
GLUE_TASKS = ['CoLA', 'MNLI', 'MRPC', 'QNLI', 'QQP', 'RTE', 'SST-2', 'STS-B', 'WNLI']
MODELS = ['bert-base-uncased', 'Nov09_19-12-14_elm15', 'Nov09_23-02-38_elm15']

In [91]:
target_timestamp = '20201115_1429'
target_path = Path('runs') / target_timestamp

In [92]:
for task in GLUE_TASKS:
    print(f'{task}:')
    for model in MODELS:
        task_txt = target_path / task / model / f'eval_results_{task.lower()}.txt'
        if not task_txt.exists():
            continue
        with open(task_txt, 'r') as f:
            print(f'  {model}:')
            for line in f.readlines():
                print(f'\t{line.strip()}')

CoLA:
  bert-base-uncased:
	eval_loss = 0.5498700141906738
	eval_mcc = 0.5468753188432375
  Nov09_19-12-14_elm15:
	eval_loss = 0.5325039625167847
	eval_mcc = 0.5858564219548863
  Nov09_23-02-38_elm15:
	eval_loss = 0.5603988766670227
	eval_mcc = 0.5936105573332983
MNLI:
  bert-base-uncased:
	eval_loss = 0.49452319741249084
	eval_mnli/acc = 0.8445236882322975
  Nov09_19-12-14_elm15:
	eval_loss = 0.6853656768798828
	eval_mnli/acc = 0.8371879775853286
  Nov09_23-02-38_elm15:
	eval_loss = 0.6645665168762207
	eval_mnli/acc = 0.8388181355068772
MRPC:
  bert-base-uncased:
	eval_loss = 0.4949655830860138
	eval_acc = 0.8112745098039216
	eval_f1 = 0.8752025931928687
	eval_acc_and_f1 = 0.8432385514983951
  Nov09_19-12-14_elm15:
	eval_loss = 0.487006276845932
	eval_acc = 0.8333333333333334
	eval_f1 = 0.8855218855218856
	eval_acc_and_f1 = 0.8594276094276094
  Nov09_23-02-38_elm15:
	eval_loss = 0.4862278997898102
	eval_acc = 0.8308823529411765
	eval_f1 = 0.8840336134453781
	eval_acc_and_f1 = 0.857457

In [93]:
from datasets import load_dataset

wnli = load_dataset('glue', 'wnli', split='validation')

Reusing dataset glue (/Users/otakumesi/.cache/huggingface/datasets/glue/wnli/1.0.0/7c99657241149a24692c402a5c3f34d4c9f1df5ac2e4c3759fadea38f6cb29c4)


In [94]:
df = pd.DataFrame({'sentence1': wnli['sentence1'], 'sentence2': wnli['sentence2'], 'label': wnli['label']})

for model in MODELS:
    task_txt = target_path / 'wnli' / model / f'eval_labels_wnli.txt'
    df_task_results = pd.read_csv(task_txt, sep='\t')
    df[model] = df_task_results['prediction']

In [95]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 700)

In [109]:
only_pred_bert_base = df[(df['bert-base-uncased'] == df['label']) & ~(df['Nov09_19-12-14_elm15'] == df['label'])]
only_pred_bert_base

Unnamed: 0,sentence1,sentence2,label,bert-base-uncased,Nov09_19-12-14_elm15,Nov09_23-02-38_elm15
2,"Beth didn't get angry with Sally, who had cut her off, because she stopped and counted to ten.",Sally stopped and counted to ten.,0,0,1,1
5,"Susan knew that Ann's son had been in a car accident, so she told her about it.",Ann told her about it.,0,0,1,1
8,Look! There is a minnow swimming right below that duck! It had better get away to safety fast!,The duck had better get away to safety fast!,0,0,1,1
10,"When the sponsors of the bill got to the town hall, they were surprised to find that the room was full of opponents. They were very much in the majority.",The sponsors were very much in the majority.,0,0,1,1
14,Madonna fired her trainer because she slept with her boyfriend.,Madonna slept with her boyfriend.,0,0,1,1
15,"If the con artist has succeeded in fooling Sam, he would have gotten a lot of money.",Sam would have gotten a lot of money.,0,0,1,1
17,"Everyone really loved the oatmeal cookies; only a few people liked the chocolate chip cookies. Next time, we should make fewer of them.",We should make fewer of the oatmeal cookies.,0,0,1,1
18,Bob collapsed on the sidewalk. Soon he saw Carl coming to help. He was very ill.,Carl was very ill.,0,0,1,1
21,"Ann asked Mary what time the library closes, because she had forgotten.",Mary had forgotten.,0,0,1,1
22,"George got free tickets to the play, but he gave them to Eric, even though he was particularly eager to see it.",Eric was particularly eager to see it.,0,0,1,1


In [119]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenizer.unk_token_id

100

In [124]:
[tokenizer.decode(tokenizer(sent_1)['input_ids']) for sent_1 in only_pred_bert_base['sentence1']]

["[CLS] beth didn't get angry with sally, who had cut her off, because she stopped and counted to ten. [SEP]",
 "[CLS] susan knew that ann's son had been in a car accident, so she told her about it. [SEP]",
 '[CLS] look! there is a minnow swimming right below that duck! it had better get away to safety fast! [SEP]',
 '[CLS] when the sponsors of the bill got to the town hall, they were surprised to find that the room was full of opponents. they were very much in the majority. [SEP]',
 '[CLS] madonna fired her trainer because she slept with her boyfriend. [SEP]',
 '[CLS] if the con artist has succeeded in fooling sam, he would have gotten a lot of money. [SEP]',
 '[CLS] everyone really loved the oatmeal cookies ; only a few people liked the chocolate chip cookies. next time, we should make fewer of them. [SEP]',
 '[CLS] bob collapsed on the sidewalk. soon he saw carl coming to help. he was very ill. [SEP]',
 '[CLS] ann asked mary what time the library closes, because she had forgotten. [

In [104]:
all_pred = df[(df['bert-base-uncased'] == df['label']) & ~(df['Nov09_19-12-14_elm15'] != df['label'])]
all_pred

Unnamed: 0,sentence1,sentence2,label,bert-base-uncased,Nov09_19-12-14_elm15,Nov09_23-02-38_elm15
0,The drain is clogged with hair. It has to be cleaned.,The hair has to be cleaned.,0,0,0,0
9,"Bernard, who had not told the government official that he was less than 21 when he filed for a homestead claim, did not consider that he had done anything dishonest. Still, anyone who knew that he was 19 years old could take his claim away from him.",Anyone who knew that he was 19 years old could take his claim away from anyone.,0,0,0,0
12,The large ball crashed right through the table because it was made of styrofoam.,The large ball was made of styrofoam.,0,0,0,0
13,"I tried to paint a picture of an orchard, with lemons in the lemon trees, but they came out looking more like light bulbs.",The lemon trees came out looking more like light bulbs.,0,0,0,0
19,"Mr. Moncrieff visited Chester's luxurious New York apartment, thinking that it belonged to his son Edward. The result was that Mr. Moncrieff has decided to cancel Edward's allowance on the ground that he no longer requires his financial support.",He no longer requires Chester's financial support.,0,0,0,0
20,"Tatyana knew that Grandma always enjoyed serving an abundance of food to her guests. Now Tatyana watched as Grandma gathered Tatyana's small mother into a wide, scrawny embrace and then propelled her to the table, lifting her shawl from her shoulders, seating her in the place of honor, and saying simply: ""There's plenty.""","Grandma gathered Tatyana's small mother into a wide, scrawny embrace and then propelled Tatyana to the table.",0,0,0,0
36,"I tried to paint a picture of an orchard, with lemons in the lemon trees, but they came out looking more like telephone poles.",The lemons came out looking more like telephone poles.,0,0,0,0
37,"Always before, Larry had helped Dad with his work. But he could not help him now, for Dad said that his boss at the railroad company would not want anyone but him to work in the office.",He could not help Larry now.,0,0,0,0
40,The drain is clogged with hair. It has to be removed.,The drain has to be removed.,0,0,0,0
44,I couldn't put the pot on the shelf because it was too high.,The pot was too high.,0,0,0,0


In [123]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
[tokenizer.decode(tokenizer(sent_1)['input_ids']) for sent_1 in all_pred['sentence1']]

['[CLS] the drain is clogged with hair. it has to be cleaned. [SEP]',
 '[CLS] bernard, who had not told the government official that he was less than 21 when he filed for a homestead claim, did not consider that he had done anything dishonest. still, anyone who knew that he was 19 years old could take his claim away from him. [SEP]',
 '[CLS] the large ball crashed right through the table because it was made of styrofoam. [SEP]',
 '[CLS] i tried to paint a picture of an orchard, with lemons in the lemon trees, but they came out looking more like light bulbs. [SEP]',
 "[CLS] mr. moncrieff visited chester's luxurious new york apartment, thinking that it belonged to his son edward. the result was that mr. moncrieff has decided to cancel edward's allowance on the ground that he no longer requires his financial support. [SEP]",
 '[CLS] tatyana knew that grandma always enjoyed serving an abundance of food to her guests. now tatyana watched as grandma gathered tatyana\'s small mother into a wi

In [106]:
no_pred = df[(df['bert-base-uncased'] != df['label']) & ~(df['Nov09_19-12-14_elm15'] == df['label'])]
no_pred

Unnamed: 0,sentence1,sentence2,label,bert-base-uncased,Nov09_19-12-14_elm15,Nov09_23-02-38_elm15
1,Jane knocked on Susan's door but she did not answer.,Susan did not answer.,1,0,0,0
3,No one joins Facebook to be sad and lonely. But a new study from the University of Wisconsin psychologist George Lincoln argues that that's exactly how it makes us feel.,That's exactly how Facebook makes us feel.,1,0,0,0
4,The man couldn't lift his son because he was so heavy.,The son was so heavy.,1,0,0,0
6,"When Tommy dropped his ice cream, Timmy giggled, so father gave him a stern look.",Father gave Timmy a stern look.,1,0,0,0
7,"There is a pillar between me and the stage, and I can't see around it.",I can't see around the pillar.,1,0,0,0
11,I can't cut that tree down with that axe; it is too thick.,The tree is too thick.,1,0,0,0
23,The delivery truck zoomed by the school bus because it was going so slow.,The school bus was going so slow.,1,0,0,0
27,Fred is the only man still alive who remembers my great-grandfather. He is a remarkable man.,Fred is a remarkable man.,1,0,0,0
28,"Always before, Larry had helped Dad with his work. But he could not help him now, for Dad said that his boss at the railroad company would not want anyone but him to work in the office.",Larry could not help him now.,1,0,0,0
30,"They broadcast an announcement, but a subway came into the station and I couldn't hear over it.",I couldn't hear the subway.,1,0,0,0


In [116]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
[tokenizer.convert_ids_to_tokens(tokenizer(sent_1)['input_ids']) for sent_1 in no_pred['sentence1']]

[['[CLS]',
  'jane',
  'knocked',
  'on',
  'susan',
  "'",
  's',
  'door',
  'but',
  'she',
  'did',
  'not',
  'answer',
  '.',
  '[SEP]'],
 ['[CLS]',
  'no',
  'one',
  'joins',
  'facebook',
  'to',
  'be',
  'sad',
  'and',
  'lonely',
  '.',
  'but',
  'a',
  'new',
  'study',
  'from',
  'the',
  'university',
  'of',
  'wisconsin',
  'psychologist',
  'george',
  'lincoln',
  'argues',
  'that',
  'that',
  "'",
  's',
  'exactly',
  'how',
  'it',
  'makes',
  'us',
  'feel',
  '.',
  '[SEP]'],
 ['[CLS]',
  'the',
  'man',
  'couldn',
  "'",
  't',
  'lift',
  'his',
  'son',
  'because',
  'he',
  'was',
  'so',
  'heavy',
  '.',
  '[SEP]'],
 ['[CLS]',
  'when',
  'tommy',
  'dropped',
  'his',
  'ice',
  'cream',
  ',',
  'timmy',
  'giggled',
  ',',
  'so',
  'father',
  'gave',
  'him',
  'a',
  'stern',
  'look',
  '.',
  '[SEP]'],
 ['[CLS]',
  'there',
  'is',
  'a',
  'pillar',
  'between',
  'me',
  'and',
  'the',
  'stage',
  ',',
  'and',
  'i',
  'can',
  "'",
  

In [111]:
only_pred_finetune = df[(df['bert-base-uncased'] != df['label']) & ~(df['Nov09_19-12-14_elm15'] != df['label'])]
only_pred_finetune

Unnamed: 0,sentence1,sentence2,label,bert-base-uncased,Nov09_19-12-14_elm15,Nov09_23-02-38_elm15
16,"The lawyer asked the witness a question, but he was reluctant to repeat it.",The lawyer was reluctant to repeat it.,1,0,1,1
25,Mark heard Steve's feet going down the ladder. The door of the shop closed after him. He ran to look out the window.,Mark ran to look out the window.,1,0,1,1
29,"George got free tickets to the play, but he gave them to Eric, because he was particularly eager to see it.",Eric was particularly eager to see it.,1,0,1,1
32,"Beth didn't get angry with Sally, who had cut her off, because she stopped and apologized.",Sally stopped and apologized.,1,0,1,1
46,"The cat was lying by the mouse hole waiting for the mouse, but it was too cautious.",The mouse was too cautious.,1,0,1,1


In [112]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
[tokenizer.decode(tokenizer(sent_1)['input_ids']) for sent_1 in only_pred_finetune['sentence1']]

['[CLS] the lawyer asked the witness a question, but he was reluctant to repeat it. [SEP]',
 "[CLS] mark heard steve's feet going down the ladder. the door of the shop closed after him. he ran to look out the window. [SEP]",
 '[CLS] george got free tickets to the play, but he gave them to eric, because he was particularly eager to see it. [SEP]',
 "[CLS] beth didn't get angry with sally, who had cut her off, because she stopped and apologized. [SEP]",
 '[CLS] the cat was lying by the mouse hole waiting for the mouse, but it was too cautious. [SEP]']

In [115]:
tokenizer.convert_ids_to_tokens(tokenizer('otakumesi asks yot to go to shopping.')['input_ids'])

['[CLS]',
 'ot',
 '##ak',
 '##ume',
 '##si',
 'asks',
 'yo',
 '##t',
 'to',
 'go',
 'to',
 'shopping',
 '.',
 '[SEP]']