In [44]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}


<IPython.core.display.Javascript object>

In [45]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams

In [46]:
%matplotlib inline

In [47]:
TEXT_COLOUR = {
    'PURPLE':'\033[95m',
    'CYAN':'\033[96m',
    'DARKCYAN':'\033[36m',
    'BLUE':'\033[94m',
    'GREEN':'\033[92m',
    'YELLOW':'\033[93m',
    'RED':'\033[91m',
    'BOLD':'\033[1m',
    'UNDERLINE':'\033[4m',
    'END':'\033[0m'
}

def print_bold(*msgs):
    print(TEXT_COLOUR['BOLD'])
    print(*msgs)
    print(TEXT_COLOUR['END'])

def print_green(*msgs):
    print(TEXT_COLOUR['GREEN'])
    print(*msgs)
    print(TEXT_COLOUR['END'])

def print_error(*msgs):
    print(TEXT_COLOUR['RED'])
    print(*msgs)
    print(TEXT_COLOUR['END'])

def wrap_green(msg):
    return TEXT_COLOUR['GREEN'] + msg + TEXT_COLOUR['END']

def wrap_red(msg):
    return TEXT_COLOUR['RED'] + msg + TEXT_COLOUR['END']

def up_down_str(val):
    msg = str(val)
    if val > 0:
        msg = wrap_green(msg)
    elif val < 0:
        msg = wrap_red(msg)
    return msg

In [48]:
exp='roberta-base'
num_layers = 12

In [75]:
tasks = ["CoLA","SST-2","MRPC","STS-B","QQP","MNLI", "MNLI-MM", "QNLI","RTE"]
tasks = ["CoLA","SST-2","MRPC","STS-B", "QNLI","RTE"]


metrics = {
    "CoLA":["mcc"],
    "MNLI":["acc"],
    "MNLI-MM":["acc"],
    "MRPC":["f1"],
    "QNLI":["acc"],
    "QQP":["f1"],
    "RTE":["acc"],
    "SST-2":["acc"],
    "STS-B":["spearmanr"],
    "WNLI":["acc"] #temp
}

reported_in_paper = {
    "CoLA":0.00,
    "MNLI":0.00,
    "MNLI-MM":0.0,
    "MRPC":0.00,
    "QNLI":0.00,
    "QQP":0.00,
    "RTE":0.00,
    "SST-2":0.00,
    "STS-B":0.00,
    "WNLI":0.00
}

In [76]:

def get_average_val(lines):
    reported = []
    for line in lines:
        print('\t', line)
        val = float(line.split('\t')[1])
        if val != 0:
            reported.append(val)
    out = 0
    if len(reported) != 0:
        reported.sort(reverse = True)
        candidates = [reported[0]]
        for j in range(1, len(reported)):
            if reported[j] > 0.9 * reported[0]:
                candidates.append(reported[j])
        out = np.mean(candidates)
        
    return out


In [77]:
results = {}

for task in tasks:
    task_results = {}
    task_metrics = metrics[task]
    for metric in task_metrics:
        
        try:
            # base metrics
            print(f"../exp_results/{exp}/{task}/base-{metric}.txt")
            f=open(f"../exp_results/{exp}/{task}/base-{metric}.txt", "r")
            lines = f.read().splitlines()
            task_results[f'base-{metric}'] = get_average_val(lines)
        except:
            print("fail")
            
        # no layer metrics
        try:
            fine_tuning_metrics = []
            print(f"../exp_results/{exp}/{task}/no_layer-{metric}.txt")
            f=open(f"../exp_results/{exp}/{task}/no_layer-{metric}.txt", "r")

            lines = f.read().splitlines()
            fine_tuning_metrics.append(get_average_val(lines))
        
        except:
            print("fail")
        
        # fine-tuned metrics
        
        log_file_prefix=''
        for i in reversed(range(int(num_layers/2), num_layers)):
            log_file_prefix += str(i)
            f=open(f"../exp_results/{exp}/{task}/{log_file_prefix}-{metric}.txt", "r")
            lines = f.read().splitlines()
            print(i)
            fine_tuning_metrics.append(get_average_val(lines))
            
            log_file_prefix +='_'
        
        task_results[f'{metric}'] = list(reversed(fine_tuning_metrics))
        
    results[task] = task_results

../exp_results/roberta-base/CoLA/base-mcc.txt
	 1	0.5815775806078913
	 2	0.6056612573992737
	 3	0.6056594364604692
	 4	0.59561622728651
	 5	0.5761637348694234
	 6	0.5981333967857257
../exp_results/roberta-base/CoLA/no_layer-mcc.txt
	 1	0.0
	 2	0.0
	 3	0.0
	 4	0.0
	 5	0.0
	 6	0.0
11
	 1	0.5364506086387147
	 2	0.49380076678433954
	 3	0.5206326433339101
	 4	0.5099519351292859
	 5	0.5073664747016221
	 6	0.5286883616838448
10
	 1	0.544301235611677
	 2	0.544301235611677
	 3	0.5103111740675049
	 4	0.5127811849112192
	 5	0.5142153328951897
	 6	0.5345640705889373
9
	 1	0.5260483181662633
	 2	0.5443521169935567
	 3	0.5312017201039245
	 4	0.5416905121171213
	 5	0.5584013487121077
	 6	0.5521069582827846
8
	 1	0.5630491915710935
	 3	0.5403785768297347
	 4	0.5599279872250126
	 5	0.5245973684146213
	 6	0.562419829892001
7
	 1	0.5832008422729765
	 2	0.5907413763612096
	 3	0.5882977917441249
	 4	0.5981451281520205
	 5	0.5804132033917235
	 6	0.5728724145437311
6
	 1	0.5788207437251082
	 2	0.585467058423

In [78]:
x_axis = []

for i in range(int(num_layers/2), num_layers):
    x_axis.append(str(i))

x_axis.append("none")

In [79]:
def draw_graph(task, y_label, paper, base, reported):
    plt.figure(figsize=(10,6))
    plt.plot(x_axis, reported)
    
    plt.xlabel("layers")
    plt.ylabel(y_label)
    
    if paper == 0.0:    
        gap = max(reported) - min(reported)
        top = max(max(reported), base) + (gap*0.2)
        bottom = min(min(reported), base) - (gap*0.2)
    
        plt.ylim(bottom, top)

        plt.axhline(y=base, linestyle='--', c='green')
    else:
        gap = max(reported) - min(reported)
        top = max(max(reported), base, paper) + (gap*0.2)
        bottom = min(min(reported), base, paper) - (gap*0.2)
    
        plt.ylim(bottom, top)

        plt.axhline(y=base, linestyle='--', c='green')
        plt.axhline(y=paper, linestyle='--', c='red')
    
    plt.title(f'{exp}-{task} ({round(base,4)})')
    plt.savefig(f'images/{exp}/{task}', format='png', bbox_inches='tight')
    plt.show()

In [80]:
for task in tasks:
    task_results = results[task]
    task_metrics = metrics[task]
    for metric in task_metrics:
        reported = task_results[metric]
        base = task_results[f'base-{metric}']
        print_bold(task, metric)
        print(f"\tbase : {round(base * 100, 2)}")
        print(f"\t50% : {round(task_results[metric][0] * 100, 2)}")
        print(f"\tnone : {round(task_results[metric][-1] * 100, 2)}")
#         draw_graph(task, metric, reported_in_paper[task], base, reported)

[1m
CoLA mcc
[0m
	base : 59.38
	50% : 58.45
	none : 0
[1m
SST-2 acc
[0m
	base : 94.31
	50% : 94.0
	none : 80.17
[1m
MRPC f1
[0m
	base : 92.34
	50% : 90.49
	none : 81.22
[1m
STS-B spearmanr
[0m
	base : 90.62
	50% : 88.89
	none : 20.0
[1m
QNLI acc
[0m
	base : 92.75
	50% : 91.71
	none : 65.71
[1m
RTE acc
[0m
	base : 77.51
	50% : 75.45
	none : 57.54
