In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}


<IPython.core.display.Javascript object>

In [22]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
from pprint import pprint

In [3]:
%matplotlib inline

In [4]:
TEXT_COLOUR = {
    'PURPLE':'\033[95m',
    'CYAN':'\033[96m',
    'DARKCYAN':'\033[36m',
    'BLUE':'\033[94m',
    'GREEN':'\033[92m',
    'YELLOW':'\033[93m',
    'RED':'\033[91m',
    'BOLD':'\033[1m',
    'UNDERLINE':'\033[4m',
    'END':'\033[0m'
}

def print_bold(*msgs):
    print(TEXT_COLOUR['BOLD'])
    print(*msgs)
    print(TEXT_COLOUR['END'])

def print_green(*msgs):
    print(TEXT_COLOUR['GREEN'])
    print(*msgs)
    print(TEXT_COLOUR['END'])

def print_error(*msgs):
    print(TEXT_COLOUR['RED'])
    print(*msgs)
    print(TEXT_COLOUR['END'])

def wrap_green(msg):
    return TEXT_COLOUR['GREEN'] + msg + TEXT_COLOUR['END']

def wrap_red(msg):
    return TEXT_COLOUR['RED'] + msg + TEXT_COLOUR['END']

def up_down_str(val):
    msg = str(val)
    if val > 0:
        msg = wrap_green(msg)
    elif val < 0:
        msg = wrap_red(msg)
    return msg

In [6]:
tasks = ["CoLA", "MNLI", "MNLI-MM", "MRPC", "QNLI", "QQP", "RTE", "SST-2", "STS-B", "WNLI"]

metrics = {
    "CoLA":["mcc"],
    "MNLI":["acc"],
    "MNLI-MM":["acc"],
    "MRPC":["acc", "f1", "acc_and_f1"],
    "QNLI":["acc"],
    "QQP":["acc", "f1", "acc_and_f1"],
    "RTE":["acc"],
    "SST-2":["acc"],
    "STS-B":["pearson", "spearmanr", "corr"],
    "WNLI":["acc"]
}

for task in tasks:
    print(metrics[task])

['mcc']
['acc']
['acc']
['acc', 'f1', 'acc_and_f1']
['acc']
['acc', 'f1', 'acc_and_f1']
['acc']
['acc']
['pearson', 'spearmanr', 'corr']
['acc']


In [25]:
def find_best(exp):

    results = {}

    for task in tasks:
        task_results = {}
        task_metrics = metrics[task]
        for metric in task_metrics:
            per_lr = []
            for i in range(1,6):
                # base metrics

                f=open(f"../logs/baseline/{exp}/{task}/{i}e-5_{metric}.txt", "r")
                lines = f.read().splitlines()
                reported = []
                for line in lines:
                    val = float(line.split('\t')[1])
                    if val != 0:
                        reported.append(val)
                if len(reported) != 0:
                    per_lr.append(np.mean(reported))

            task_results[metric] = str(np.argmax(per_lr) + 1) + f" - {np.max(per_lr)}"
        results[task] = task_results
        
    return results

In [26]:
exp='bert-base'

pprint(find_best(exp))

reported_in_paper = {
    "CoLA":0.521,
    "MNLI":0.846,
    "MNLI-MM":0.834,
    "MRPC":0.889,
    "QNLI":0.905,
    "QQP":0.712,
    "RTE":0.664,
    "SST-2":0.935,
    "STS-B":0.858,
    "WNLI":0.651 #temp
}

{'CoLA': {'mcc': '3 - 0.5746376040603758'},
 'MNLI': {'acc': '2 - 0.8426388181355069'},
 'MNLI-MM': {'acc': '2 - 0.8495219690805533'},
 'MRPC': {'acc': '3 - 0.8762254901960784',
          'acc_and_f1': '3 - 0.8945497362052588',
          'f1': '3 - 0.9128739822144393'},
 'QNLI': {'acc': '2 - 0.9127768625297455'},
 'QQP': {'acc': '2 - 0.9086940390798912',
         'acc_and_f1': '2 - 0.8928583307670738',
         'f1': '2 - 0.8770226224542563'},
 'RTE': {'acc': '3 - 0.6750902527075813'},
 'SST-2': {'acc': '2 - 0.9288990825688074'},
 'STS-B': {'corr': '5 - 0.8922247379755471',
           'pearson': '5 - 0.8947432417497347',
           'spearmanr': '5 - 0.8897062342013595'},
 'WNLI': {'acc': '4 - 0.4366197183098592'}}


In [27]:
exp='bert-large'

pprint(find_best(exp))

reported_in_paper = {
    "CoLA":0.521,
    "MNLI":0.846,
    "MNLI-MM":0.834,
    "MRPC":0.889,
    "QNLI":0.905,
    "QQP":0.712,
    "RTE":0.664,
    "SST-2":0.935,
    "STS-B":0.858,
    "WNLI":0.651 #temp
}

{'CoLA': {'mcc': '3 - 0.6172866282938361'},
 'MNLI': {'acc': '1 - 0.8643402954661232'},
 'MNLI-MM': {'acc': '2 - 0.8646257119609438'},
 'MRPC': {'acc': '2 - 0.8602941176470589',
          'acc_and_f1': '2 - 0.8821671248600621',
          'f1': '2 - 0.9040401320730653'},
 'QNLI': {'acc': '1 - 0.9206479956068094'},
 'QQP': {'acc': '1 - 0.9122062824635172',
         'acc_and_f1': '1 - 0.897048599018555',
         'f1': '1 - 0.8818909155735929'},
 'RTE': {'acc': '3 - 0.7111913357400722'},
 'SST-2': {'acc': '1 - 0.9340596330275229'},
 'STS-B': {'corr': '2 - 0.8996243628641183',
           'pearson': '2 - 0.9009686537670677',
           'spearmanr': '2 - 0.898280071961169'},
 'WNLI': {'acc': '3 - 0.5'}}


In [28]:
exp='roberta-base'

pprint(find_best(exp))

reported_in_paper = {
    "CoLA":0.521,
    "MNLI":0.846,
    "MNLI-MM":0.834,
    "MRPC":0.889,
    "QNLI":0.905,
    "QQP":0.712,
    "RTE":0.664,
    "SST-2":0.935,
    "STS-B":0.858,
    "WNLI":0.651 #temp
}

{'CoLA': {'mcc': '3 - 0.6161067903576496'},
 'MNLI': {'acc': '1 - 0.8742740703005604'},
 'MNLI-MM': {'acc': '1 - 0.871033360455655'},
 'MRPC': {'acc': '3 - 0.8946078431372549',
          'acc_and_f1': '3 - 0.9091156444815938',
          'f1': '3 - 0.9236234458259325'},
 'QNLI': {'acc': '2 - 0.9256818597840014'},
 'QQP': {'acc': '1 - 0.9167202572347267',
         'acc_and_f1': '1 - 0.9027605381421164',
         'f1': '1 - 0.8888008190495063'},
 'RTE': {'acc': '1 - 0.7689530685920578'},
 'SST-2': {'acc': '1 - 0.9438073394495413'},
 'STS-B': {'corr': '3 - 0.9065130409589317',
           'pearson': '3 - 0.9077812569357945',
           'spearmanr': '4 - 0.9055167415395866'},
 'WNLI': {'acc': '5 - 0.5633802816901409'}}


In [29]:
exp='roberta-large'

pprint(find_best(exp))

reported_in_paper = {
    "CoLA":0.521,
    "MNLI":0.846,
    "MNLI-MM":0.834,
    "MRPC":0.889,
    "QNLI":0.905,
    "QQP":0.712,
    "RTE":0.664,
    "SST-2":0.935,
    "STS-B":0.858,
    "WNLI":0.651 #temp
}

{'CoLA': {'mcc': '1 - 0.6651627094568674'},
 'MNLI': {'acc': '1 - 0.8992358634742741'},
 'MNLI-MM': {'acc': '1 - 0.8962571196094385'},
 'MRPC': {'acc': '1 - 0.8946078431372549',
          'acc_and_f1': '1 - 0.9093850503164229',
          'f1': '1 - 0.9241622574955909'},
 'QNLI': {'acc': '1 - 0.9461834157056562'},
 'QQP': {'acc': '1 - 0.9190699975265891',
         'acc_and_f1': '1 - 0.9051611620177482',
         'f1': '1 - 0.8912523265089072'},
 'RTE': {'acc': '1 - 0.776173285198556'},
 'SST-2': {'acc': '1 - 0.9518348623853211'},
 'STS-B': {'corr': '1 - 0.9190949589877095',
           'pearson': '1 - 0.9194788577334798',
           'spearmanr': '1 - 0.9187110602419393'},
 'WNLI': {'acc': '2 - 0.5633802816901409'}}
