# Edgeprobe template

This is the main analysis notebook.

In [1]:
import sys, os, re, json
from importlib import reload
import itertools
import collections

import numpy as np
import pandas as pd

import analysis
reload(analysis)

tasks = analysis.TASKS
exp_types = analysis.EXP_TYPES

task_sort_key = analysis.task_sort_key
exp_type_sort_key = analysis.exp_type_sort_key

from scipy.special import logsumexp
from scipy.stats import entropy

def softmax(x, axis=None):
    return np.exp(x - logsumexp(x, axis=axis, keepdims=True))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import bokeh
import bokeh.plotting as bp
bp.output_notebook()

import datetime
import socket
def get_compact_timestamp():
    now = datetime.datetime.now()
    return now.strftime("%Y%m%d.%H%M%S")

def _save_figure_to_bucket(fig, name, title=None, export_format="html"):
    now = get_compact_timestamp()
    fname = f"{name}.{now:s}.{export_format}"
    title = title or name
    if fname.endswith('.png'):
        bokeh.io.export_png(p, os.path.join("/tmp", fname))
    else:
        bp.save(p, os.path.join("/tmp", fname), title=title, 
                resources=bokeh.resources.CDN)
    hostname = socket.gethostname()
    GCP_PROJECT="edge-probing"
    !gsutil cp /tmp/$fname gs://$GCP_PROJECT/$hostname/plots/$fname
    !gsutil acl ch -u AllUsers:R gs://$GCP_PROJECT/$hostname/plots/$fname
    url = f"https://storage.googleapis.com/{GCP_PROJECT}/{hostname}/plots/{fname}"
    print(f"Public URL: {url}")
    return url

In [4]:
ID_COLS = ['run', 'task', 'split']

def agg_label_group(df, task_predicate, label_predicate, group_name):
    agg_map = {k:"sum" for k in df.columns if k.endswith("_count")}
    mask = df['task'].map(task_predicate) & df['label'].map(label_predicate)
    sdf = df[mask].groupby(by=ID_COLS).agg(agg_map).reset_index()
    sdf['label'] = group_name
    return sdf

def agg_stratifier_group(df, stratifier, key_predicate, group_name):
    agg_map = {k:"sum" for k in df.columns if k.endswith("_count")}
    # Use this for short-circuit evaluation, so we don't call key_predicate on invalid keys
    mask = [(s == stratifier and key_predicate(key)) 
            for s, key in zip(df['stratifier'], df['stratum_key'])]
    sdf = df[mask].groupby(by=ID_COLS).agg(agg_map).reset_index()
    sdf['label'] = group_name
    return sdf    

def load_scores_file(filename, tag=None, seed=None):
    df = pd.read_csv(filename, sep="\t", header=0)
    df.drop(['Unnamed: 0'], axis='columns', inplace=True)
    # df['task_raw'] = df['task'].copy()
    df['task'] = df['task'].map(analysis.clean_task_name)
    if not "stratifier" in df.columns:
        df["stratifier"] = None
    if not "stratum_key" in df.columns:
        df["stratum_key"] = 0
    ###
    # Add additional custom aggregations
    _eg = []
    # SRL core, non-core, and cleaned micro F1
    _eg.append(agg_label_group(df, analysis.is_srl_task, analysis.is_core_role, "_core_"))
    _eg.append(agg_label_group(df, analysis.is_srl_task, analysis.is_non_core_role, "_non_core_"))
    _eg.append(agg_label_group(df, analysis.is_srl_task, analysis.is_core_or_noncore, "_clean_micro_"))
    # Constituents: split into POS, nonterminals
    _eg.append(agg_stratifier_group(df, 'info.height', lambda x: int(x) == 1, "_pos_"))
    _eg.append(agg_stratifier_group(df, 'info.height', lambda x: int(x) > 1, "_nonterminal_"))
    # Relations: ignore negative class (no_relation)
    _eg.append(agg_label_group(df, analysis.is_relation_task, analysis.is_positive_relation, "_clean_micro_"))
    df = pd.concat([df] + _eg, ignore_index=True, sort=False)


    df.insert(0, "exp_name", df['run'].map(lambda p: os.path.basename(os.path.dirname(p.strip("/")))))
    """ここが問題"""
    
    df.insert(1, "exp_type", df['exp_name'].map(analysis.get_exp_type))
    df.insert(1, "layer_num", df['exp_name'].map(analysis.get_layer_num))
    if tag is not None:
        df.insert(0, "tag", tag)
    df.insert(1, "seed", seed)
    return df

In [5]:
score_files = []
score_files = [
    ("bert", "),
]
dfs = []
for tag, score_file in score_files:
    df = load_scores_file(score_file, tag=tag)
    dfs.append(df)

df = pd.concat(dfs, ignore_index=True, sort=False)
def _format_display_col(exp_type, layer_num, tag):
    ret = exp_type
    if layer_num:
        ret += f"-{layer_num}"
    if tag:
        ret += f" ({tag})"
    return ret
# df['display_col'] = ["%s (%s)" % et for et in zip(df.exp_type, df.tag)]
df['display_col'] = list(map(_format_display_col, df.exp_type, df.layer_num, df.tag))
print(df['task'].unique())
print(df['exp_type'].unique())

['coref-ontonotes' 'rel-semeval' 'spr2' 'ner-ontonotes' 'dep-ud-ewt'
 'srl-ontonotes' 'nonterminal-ontonotes' 'pos-ontonotes']
['bert-base-uncased-mix' 'visualbert-mix' 'lxmert-mix']


In [6]:
analysis.score_from_confusion_matrix(df)

##
# Set 'score' column for task-appropriate metric
def _get_final_score(row):
    if row['task'] == 'noun-verb':
        return row['accuracy'], row['accuracy_errn95']
    else:
        return row['f1_score'], row['f1_errn95']

df['score'], df['score_errn95'] = zip(*(_get_final_score(row) for i, row in df.iterrows()))

In [7]:
df

Unnamed: 0,tag,seed,exp_name,layer_num,exp_type,run,task,split,label,tn_count,...,accuracy,precision,recall,f1_score,accuracy_errn95,precision_errn95,recall_errn95,f1_errn95,score,score_errn95
0,bert,,bert-base-uncased-mix_12-edges-coref-ontonotes,12,bert-base-uncased-mix,/cl/work4/naoto-sh/exp/bert-base_20221211/bert...,coref-ontonotes,val,0,5267,...,0.957810,0.973700,0.972134,0.972916,0.002428,0.002191,0.002252,0.002221,0.972916,0.002221
1,bert,,bert-base-uncased-mix_12-edges-coref-ontonotes,12,bert-base-uncased-mix,/cl/work4/naoto-sh/exp/bert-base_20221211/bert...,coref-ontonotes,val,1,19957,...,0.957848,0.902330,0.906993,0.904656,0.002427,0.007617,0.007471,0.007543,0.904656,0.007543
2,bert,,bert-base-uncased-mix_12-edges-coref-ontonotes,12,bert-base-uncased-mix,/cl/work4/naoto-sh/exp/bert-base_20221211/bert...,coref-ontonotes,val,_macro_avg_,25224,...,0.957829,0.957881,0.957772,0.957826,0.001717,0.002426,0.002429,0.002428,0.957826,0.002428
3,bert,,bert-base-uncased-mix_12-edges-coref-ontonotes,12,bert-base-uncased-mix,/cl/work4/naoto-sh/exp/bert-base_20221211/bert...,coref-ontonotes,val,_micro_avg_,25224,...,0.957829,0.957881,0.957772,0.957826,0.001717,0.002426,0.002429,0.002428,0.957826,0.002428
4,bert,,bert-base-uncased-mix_12-edges-coref-ontonotes,12,bert-base-uncased-mix,/cl/work4/naoto-sh/exp/bert-base_20221211/bert...,coref-ontonotes,val,_span_distance_0_,2769,...,0.963776,0.964423,0.963079,0.963750,0.004833,0.006781,0.006898,0.006839,0.963750,0.006839
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10525,lxmert,,lxmert-mix_13-edges-pos-ontonotes,13,lxmert-mix,/cl/work4/naoto-sh/exp/lxmert-black-20221229/l...,pos-ontonotes,val,_pos_,13607867,...,0.994668,0.912953,0.822511,0.865375,0.000038,0.001081,0.001391,0.001216,0.865375,0.001216
10526,lxmert,,lxmert-mix_13-edges-nonterminal-ontonotes,13,lxmert-mix,/cl/work4/naoto-sh/exp/lxmert-black-20221229/l...,nonterminal-ontonotes,test,_nonterminal_,5499551,...,0.983895,0.827373,0.653114,0.729988,0.000103,0.001910,0.002137,0.002017,0.729988,0.002017
10527,lxmert,,lxmert-mix_13-edges-nonterminal-ontonotes,13,lxmert-mix,/cl/work4/naoto-sh/exp/lxmert-black-20221229/l...,nonterminal-ontonotes,val,_nonterminal_,7364488,...,0.983562,0.826468,0.641575,0.722379,0.000090,0.001668,0.001861,0.001759,0.722379,0.001759
10528,lxmert,,lxmert-mix_13-edges-rel-semeval,13,lxmert-mix,/cl/work4/naoto-sh/exp/lxmert-black-20221229/l...,rel-semeval,test,_clean_micro_,46381,...,0.971190,0.809869,0.493151,0.613018,0.001483,0.020719,0.020599,0.020659,0.613018,0.020659


## Compute clean metrics for each task

For most tasks this is just the micro or macro average F1, but we need to ignore the 0 label for coref, and drop references and continuations for SRL.

In [8]:
SPLIT = "test"
#SPLIT = "val"
mask = df['split'] == SPLIT
mask &= (df['exp_type'] != "openai")
mask &= df['exp_type'].map(lambda s: '-cased-' not in s)  # skip cased BERT for now
# Skip these tasks
mask &= (df['task'] != "constituent-ontonotes")
mask &= (df['task'] != "ner-tacred")
mask &= (df['task'] != "coref-gap")
mask &= (df['task'] != "coref-gap-ontonotes")
mask &= (df['task'] != "noun-verb")
# mask &= (df['task'] != "rel-tacred")
# mask &= (df['task'] != "rel-semeval")

# Only look at perlayer scores
mask &= df['layer_num'].notnull()

final_scores = []
for task in df['task'].unique():
    task_scores = df[mask & (df['task'] == task)]
    if analysis.is_coref_task(task):
        final_scores.append(task_scores[task_scores['label'] == "1"])
    elif analysis.is_srl_task(task):
#         final_scores.append(task_scores[task_scores['label'] == '_core_'])
#         final_scores.append(task_scores[task_scores['label'] == '_non_core_'])
        # Use clean version, average only over core or noncore roles.
        final_scores.append(task_scores[task_scores['label'] == '_clean_micro_'])
#     elif task == "nonterminal-ontonotes":
#         final_scores.append(task_scores[task_scores['label'] == '_micro_avg_'])
#         final_scores.append(task_scores[task_scores['label'] == '_info.height_2_'])
#         final_scores.append(task_scores[task_scores['label'] == '_info.height_3_'])
#         final_scores.append(task_scores[task_scores['label'] == '_info.height_4_'])
#         final_scores.append(task_scores[task_scores['label'] == '_info.height_5_'])
#         final_scores.append(task_scores[task_scores['label'] == '_info.height_6_'])
#     elif task == "dep-labeling-ewt":
#         final_scores.append(task_scores[task_scores['label'] == '_micro_avg_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_0_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_1_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_2_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_3_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_4_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_5_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_6_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_7_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_8_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_9_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_10_'])
    elif analysis.is_relation_task(task):
        # Relation tasks include specific "no_relation" label
        final_scores.append(task_scores[task_scores['label'] == '_clean_micro_'])
    elif task == "noun-verb":
        # Noun-verb reports accuracy on VERB class
        final_scores.append(task_scores[task_scores['label'] == 'VERB'])
    else:
        final_scores.append(task_scores[task_scores['label'] == '_micro_avg_'])
        
fdf = pd.concat(final_scores, axis=0, ignore_index=True, sort=False)
# fdf['task_and_metric'] = ["%s-%s" % tl for tl in zip(fdf.task, fdf.label)]
def format_display_row(task, label, seed):
    ret = f"{task}-{label}"
    if seed:
        ret += f"-{seed:d}"
    return ret

fdf['display_row'] = [format_display_row(*args) for args in zip(fdf.task, fdf.label, fdf.seed)]
print(len(fdf))
fdf.columns.values

72


array(['tag', 'seed', 'exp_name', 'layer_num', 'exp_type', 'run', 'task',
       'split', 'label', 'tn_count', 'fp_count', 'fn_count', 'tp_count',
       'stratifier', 'stratum_key', 'display_col', 'pred_pos_count',
       'true_pos_count', 'total_count', 'accuracy', 'precision', 'recall',
       'f1_score', 'accuracy_errn95', 'precision_errn95', 'recall_errn95',
       'f1_errn95', 'score', 'score_errn95', 'display_row'], dtype=object)

In [9]:
fdf.head()

Unnamed: 0,tag,seed,exp_name,layer_num,exp_type,run,task,split,label,tn_count,...,precision,recall,f1_score,accuracy_errn95,precision_errn95,recall_errn95,f1_errn95,score,score_errn95,display_row
0,bert,,bert-base-uncased-mix_12-edges-coref-ontonotes,12,bert-base-uncased-mix,/cl/work4/naoto-sh/exp/bert-base_20221211/bert...,coref-ontonotes,test,1,21119,...,0.8935,0.915007,0.904125,0.002359,0.007698,0.007042,0.007356,0.904125,0.007356,coref-ontonotes-1
1,bert,,bert-base-uncased-mix_12-edges-coref-ontonotes,12,bert-base-uncased-mix,/cl/work4/naoto-sh/exp/bert-base_20221213/bert...,coref-ontonotes,test,1,21141,...,0.896343,0.911521,0.903868,0.002358,0.007633,0.007172,0.007395,0.903868,0.007395,coref-ontonotes-1
2,bert,,bert-base-uncased-mix_12-edges-coref-ontonotes,12,bert-base-uncased-mix,/cl/work4/naoto-sh/exp/bert-base_20230101/bert...,coref-ontonotes,test,1,21128,...,0.894771,0.914675,0.904613,0.002353,0.007664,0.007055,0.007347,0.904613,0.007347,coref-ontonotes-1
3,visualbert,,visualbert-mix_12-edges-coref-ontonotes,12,visualbert-mix,/cl/work4/naoto-sh/exp/visualbert-black-202212...,coref-ontonotes,test,1,21098,...,0.885839,0.87334,0.879545,0.002606,0.008088,0.008399,0.00824,0.879545,0.00824,coref-ontonotes-1
4,visualbert,,visualbert-mix_12-edges-coref-ontonotes,12,visualbert-mix,/cl/work4/naoto-sh/exp/visualbert-black-202212...,coref-ontonotes,test,1,21041,...,0.878432,0.88164,0.880033,0.002612,0.008237,0.008158,0.008197,0.880033,0.008197,coref-ontonotes-1


In [64]:
f = fdf[['tag', 'task', 'f1_score']]

In [65]:
fdf[['tag', 'task', 'f1_score']]

Unnamed: 0,tag,task,f1_score
0,bert,coref-ontonotes,0.904125
1,bert,coref-ontonotes,0.903868
2,bert,coref-ontonotes,0.904613
3,visualbert,coref-ontonotes,0.879545
4,visualbert,coref-ontonotes,0.880033
...,...,...,...
67,visualbert,pos-ontonotes,0.959346
68,visualbert,pos-ontonotes,0.959346
69,lxmert,pos-ontonotes,0.871027
70,lxmert,pos-ontonotes,0.870858


In [66]:
f = fdf[['tag', 'task', 'f1_score']].values.tolist()

In [67]:
for i in range(len(f)//3):
    score3 = ((f[i*3][2] + f[(i*3)+1][2] + f[(i*3)+2][2])/3) * 100
    print(f[i*3][0], f[i*3][1], score3)

bert coref-ontonotes 90.42023315140474
visualbert coref-ontonotes 88.1247945153516
lxmert coref-ontonotes 78.65486835674656
bert rel-semeval 81.91133690743709
visualbert rel-semeval 80.80935718437043
lxmert rel-semeval 62.33054462846237
bert spr2 83.89876229554943
visualbert spr2 83.49053172880049
lxmert spr2 81.38470081725676
bert ner-ontonotes 96.02122158079293
visualbert ner-ontonotes 94.895559796869
lxmert ner-ontonotes 87.1259222415651
bert dep-ud-ewt 95.01733436551228
visualbert dep-ud-ewt 94.33795324518256
lxmert dep-ud-ewt 86.55265358845908
bert srl-ontonotes 91.08519029242626
visualbert srl-ontonotes 90.08766437602458
lxmert srl-ontonotes 78.69625434925815
bert nonterminal-ontonotes 86.9997542580233
visualbert nonterminal-ontonotes 85.79100721001276
lxmert nonterminal-ontonotes 73.07002646993865
bert pos-ontonotes 96.59220992999332
visualbert pos-ontonotes 95.93463277009704
lxmert pos-ontonotes 87.0914293042798


Pivot DataFrame to present each task on a row, and each experiment on a column.

This form is suitable to copy-paste into a spreadsheet.

In [69]:
# Pivot to wide-form for spreadsheet, and sort in (mostly) stable order.
sheet_df = fdf.pivot(index="display_row", columns="display_col", values="score")
sheet_df = sheet_df.reindex(sorted(sheet_df.columns, 
                                   key=exp_type_sort_key), axis=1)
sheet_df = sheet_df.reindex(sorted(sheet_df.index,
                                   key=task_sort_key), axis=0)

csv_args = dict(float_format="%.4f")
print((100*sheet_df).to_csv(**csv_args))

ValueError: Index contains duplicate entries, cannot reshape

# Plot F1 by layer as a bar plot

In [12]:
#EXPECTED_NUM_LAYERS = {13, 25}
EXPECTED_NUM_LAYERS = {12, 24}
USE_RUNNING_MAX = False
# USE_RUNNING_MAX = True

def _compute_exp_layer(sub_df, min_layer):
    sub_df = sub_df[sub_df['layer_num'].map(int) >= min_layer]
#     ds = sub_df['delta_score'].map(lambda s: max(s, 0))
    ds = sub_df['delta_score']
    num = np.sum(ds * sub_df['layer_num'].map(int))
    denom = np.sum(ds)
    return num / denom

# entropy(scalars[i], qk=masks[i], base=2)
def _compute_kl_unif(sub_df, min_layer):
    sub_df = sub_df[sub_df['layer_num'].map(int) >= min_layer]
    ds = sub_df['delta_score'].map(lambda s: max(s, 0))
    return entropy(ds.values, qk=np.ones_like(ds.values), base=2)

fdf['max_layer_score'] = None
fdf['lex_score'] = None
fdf['prev_layer_score'] = None
fdf['delta_score'] = None
fdf['exp_layer'] = None
gb = fdf.groupby(by=['display_row', 'exp_type', 'tag'])
for key, idxs in gb.groups.items():
    if len(idxs) not in EXPECTED_NUM_LAYERS:
        print(f"Warning: key '{key}' has {len(idxs)} matches (expected {EXPECTED_NUM_LAYERS})")
    sub_df = fdf.loc[idxs]
    layer_nums = sub_df['layer_num'].map(int)
    max_layer = layer_nums.max()
    assert set(layer_nums.map(int)) == set(range(max_layer+1))
    layer_scores = np.zeros(max_layer+1, dtype=np.float32)
    for i, score in zip(layer_nums, sub_df['score']):
        layer_scores[i] = score
    running_max_layer_scores = np.maximum.accumulate(layer_scores)
    fdf.loc[idxs, 'prev_layer_score'] = [layer_scores[i-1] if i > 0 else 0.0 for i in layer_nums]
    fdf.loc[idxs, 'prev_score_max'] = [running_max_layer_scores[i-1] if i > 0 else 0.0 for i in layer_nums]
    if USE_RUNNING_MAX:
        fdf.loc[idxs, 'delta_score'] = running_max_layer_scores - fdf.loc[idxs, 'prev_score_max']
    else:
        fdf.loc[idxs, 'delta_score'] = fdf.loc[idxs, 'score'] - fdf.loc[idxs, 'prev_layer_score']
    fdf.loc[idxs, 'real_delta_score'] = fdf.loc[idxs, 'score'] - fdf.loc[idxs, 'prev_layer_score']
    sub_df = fdf.loc[idxs]
    fdf.loc[idxs, 'exp_layer'] = _compute_exp_layer(sub_df, min_layer=1)
    fdf.loc[idxs, 'kl_unif'] = _compute_kl_unif(sub_df, min_layer=1)
    fdf.loc[idxs, 'max_layer_score'] = sub_df['score'].max()
    fdf.loc[idxs, 'lex_score'] = sub_df[layer_nums == 0]['score'].max()
    fdf.loc[idxs, 'contextual_headroom'] = fdf.loc[idxs, 'max_layer_score'] - fdf.loc[idxs, 'lex_score']
    fdf.loc[idxs, 'headroom_frac'] = fdf.loc[idxs, 'delta_score'] / fdf.loc[idxs, 'contextual_headroom']
    fdf.loc[idxs, 'real_headroom_frac'] = fdf.loc[idxs, 'real_delta_score'] / fdf.loc[idxs, 'contextual_headroom']
fdf



AssertionError: 

In [31]:
palette = bokeh.palettes.Category20c_20

# EXPT_TYPE = "bert-base-uncased-mix"
# EXPT_TYPE = "bert-large-uncased-mix"
EXPT_TYPE = "bert-base-uncased-at"
MAX_LAYER = 24 if "-large-" in EXPT_TYPE else 13
BAR_SCALE = 1.2
COLORS = (palette[0], palette[0]) # blues
COLORS = (palette[12], palette[12]) # purples
NEG_COLORS = (palette[5], palette[4]) # oranges
PLOT_WIDTH = 800

##
# Don't change below here
##
def _make_display_name(task, label):
    if task.startswith("pos-"):
        return "POS"
    elif task.startswith("coref-"):
        return "Coref."
    elif task.startswith("srl"):
        return "SRL"
    elif task.startswith("spr"):
        return "SPR"
    elif task.startswith("rel-"):
        return "Relations"
    elif task.startswith("dep-"):
        return "Deps."
    elif task.startswith("nonterminal-"):
        return "Consts."
    else:
        return analysis.make_display_name(task, label) 

mask = fdf['exp_type'] == EXPT_TYPE
mask &= fdf['layer_num'].notnull()
mask &= fdf['task'] != 'constituent-ontonotes'  # don't use this task
mask &= fdf['task'] != 'ner-tacred'  # don't use this task
mask &= fdf['task'] != 'coref-gap'   # don't use this task
mask &= fdf['task'] != 'coref-gap-ontonotes'  # don't use this task
mask &= fdf['task'] != 'noun-verb'   # don't use this task
mask &= fdf['task'] != 'dpr'   # don't use this task- noisy
## Skip these for now
mask &= fdf['task'] != 'rel-tacred'   # don't use this task
# mask &= fdf['task'] != 'rel-semeval'   # don't use this task
# mask &= fdf['task'] != 'spr1'   # don't use this task- noisy
mask &= fdf['task'] != 'spr2'   # don't use this task- noisy
## TEMPORARY
# mask &= fdf['task'] == "dep-labeling-ewt"   # TEMPORARY

# ELMo models also have 'scalar_mix_0.', which is for pretraining and not used by edge probing.
# mask &= df['scalar_set'].map(lambda s: s.endswith("scalar_mix.") or s.endswith("scalar_mix_1."))
plot_df = fdf[mask].copy()

##
# Make long-form DataFrame
plot_df['_display_name'] = list(map(_make_display_name, plot_df['task'], plot_df['label']))
# plot_df['_display_name'] = plot_df['display_row']

# plot_df['_bar_height'] = plot_df['score'] * BAR_SCALE
# plot_df['_bar_height'] = list(map(lambda low, high, s: BAR_SCALE * (s - low)/max(high - low, 0.025), 
#                                   plot_df['lex_score'], plot_df['max_layer_score'], plot_df['score']))
# plot_df['_bar_height'] = plot_df['real_headroom_frac'].map(lambda s: BAR_SCALE * s)
plot_df['_bar_height'] = (plot_df['score'] - plot_df['lex_score']) / (plot_df['max_layer_score'] - plot_df['lex_score'])

plot_df['_bar_center'] = [(l, h/2-0.5) for l, h in zip(plot_df['_display_name'], plot_df['_bar_height'])]
# plot_df['_bar_center'] = [(l, 0) for l, h in zip(plot_df['_display_name'], plot_df['_bar_height'])]

plot_df['_fill_color'] = [COLORS[0] if h > 0 else NEG_COLORS[0] for h in plot_df['_bar_height']]
plot_df['_line_color'] = [COLORS[1] if h > 0 else NEG_COLORS[1] for h in plot_df['_bar_height']]
plot_df['_bar_height'] = plot_df['_bar_height'].map(np.abs)

plot_df['_formatted_exp_layer'] = plot_df['exp_layer'].map(lambda l: "E[layer] = {:.02f}".format(l))
plot_df['_formatted_score'] = plot_df['score'].map(lambda l: "{:.1f}".format(100*l))

# sorted_rows = sorted(plot_df['display_row'].unique(), key=task_sort_key)
# cats = list(reversed(sorted_rows))
sorted_rows = sorted(pd.Series(list(zip(plot_df['task'], plot_df['label']))).unique(), 
                     key=lambda tl: (task_sort_key(tl[0]), tl[1]))
cats = [_make_display_name(*tl) for tl in reversed(sorted_rows)]

hover = bokeh.models.HoverTool(
  tooltips=[
      ("task", "@_display_name"),
      ("layer", "@layer_num"),
      ("score", "@score{0.00} (Δ @delta_score{0.00})"),
  ],
  renderers=[]
)

x_range = (0.5, MAX_LAYER+0.5)
# PLOT_WIDTH = 700 if MAX_LAYER > 2 else 300
PLOT_HEIGHT = 80 + 80*len(cats)
p = bp.figure(y_range=bokeh.models.FactorRange(*cats, factor_padding=0.20), x_range=x_range,
              plot_width=PLOT_WIDTH, plot_height=PLOT_HEIGHT, tools=[hover, 'save'])

##
# Add background bars
bgbar_color = "#f2f2f2"
p.hbar(y='_display_name', left=x_range[0], right=x_range[1], 
       height=1.0, 
       fill_color=bgbar_color, fill_alpha=0.40, 
#        line_color="#e6e6e6", 
#        line_alpha=0.80,
#        line_color="Gray",
       line_alpha=0.0,
#        line_width=0.5,
       source=plot_df, 
       level='image')
p.hbar(y='_display_name', left=x_range[0], right=x_range[1], 
       height=1.0, 
       fill_color="White", fill_alpha=0.0,
       line_color="#e6e6e6",
       line_alpha=1.0,
       line_width=0.5,
       source=plot_df, 
       level='underlay')

bars = p.rect(x='layer_num', y='_bar_center', width=0.9, height="_bar_height",
       fill_color='_fill_color', line_color='_line_color',
       source=plot_df)
# Add an invisible overlay for easier tooltips on small bars
shadow_bars = p.rect(x='layer_num', y='_bar_center', width=0.9, height=1.0,
       source=plot_df, alpha=0.0)
hover.renderers.extend([bars, shadow_bars])

# Add score labels
y = bokeh.transform.dodge("_display_name", -0.5, range=p.y_range)
score_labels = bokeh.models.annotations.LabelSet(
    y=y, x="layer_num", text="_formatted_score",
    y_offset=5, text_align="center", text_baseline="bottom",
    text_color="White", text_font_size="12pt", 
    source=bokeh.models.ColumnDataSource(plot_df))
p.add_layout(score_labels)

# Add labels with entropy
# score_labels = bokeh.models.annotations.LabelSet(
#     y="_display_name", x=MAX_LAYER, text="_formatted_exp_layer",
#     text_align="right", text_baseline="middle", y_offset=-15,
#     x_offset=8,
#     text_color=COLORS[0], text_font_size="11pt",
#     text_font_style="bold",
#     background_fill_color="White", border_line_color="White", border_line_width=5,
#     source=bokeh.models.ColumnDataSource(plot_df[plot_df['layer_num'].map(int) == 0]))
# p.add_layout(score_labels)

p.xaxis.ticker = bokeh.models.FixedTicker(ticks=np.arange(0, MAX_LAYER+1))
p.xgrid.ticker = p.xaxis[0].ticker

p.ygrid.ticker = bokeh.models.FixedTicker(ticks=np.arange(0, len(cats), 0.5))
    
_FONT_SIZE = "13pt"
p.yaxis.major_label_text_font_size = _FONT_SIZE
p.xaxis.major_label_text_font_size = _FONT_SIZE
p.xaxis.axis_label = "Encoder Layer"
p.xaxis.axis_label_text_font_size = _FONT_SIZE

bp.show(p)



AttributeError: '_Feature' object has no attribute 'LabelSet'

# Extract mixing scalars

In [4]:
def load_scalars_file(filename, tag=None):
    df = pd.read_csv(filename, sep="\t", header=0)
    df.drop(['Unnamed: 0'], axis='columns', inplace=True)
    
    df.insert(0, "exp_name", df['run'].map(lambda p: os.path.basename(os.path.dirname(p.strip("/")))))
    df.insert(1, "exp_type", df['exp_name'].map(analysis.get_exp_type))
    df.insert(2, "task", df['exp_name'].map(lambda name: analysis.clean_task_name(name.split("-edges-")[1])))
    if tag is not None:
        df.insert(0, "tag", tag)
        
    return df

scalar_files = [
    ("bert", "your/result"),
]

dfs = []
for tag, scalar_file in scalar_files:
    dfs.append(load_scalars_file(scalar_file, tag=tag))
scalar_df = pd.concat(dfs, ignore_index=True, sort=False)
scalar_df['display_col'] = ["%s (%s)" % et for et in zip(scalar_df.exp_type, scalar_df.tag)]
# ELMo models also have 'scalar_mix_0.', which is for pretraining and not used by edge probing.
mask = scalar_df['scalar_set'].map(lambda s: s.endswith("scalar_mix.") or s.endswith("scalar_mix_1."))
scalar_df = scalar_df[mask].copy()
print(scalar_df['task'].unique())
print(scalar_df['exp_type'].unique())
print(len(scalar_df))
print("Scalar sets:", scalar_df['scalar_set'].unique())

['coref-ontonotes' 'dep-ud-ewt' 'ner-ontonotes' 'nonterminal-ontonotes'
 'pos-ontonotes' 'rel-semeval' 'spr2' 'srl-ontonotes']
['bert-base-uncased-mix' 'visualbert-mix' 'lxmert-mix']
72
Scalar sets: ['sent_encoder._text_field_embedder.scalar_mix.']


In [5]:
# Count total scalar columns
scalar_columns = collections.OrderedDict(sorted(
    [(int(m.group(1)), m.group(0)) for m in 
     (re.match("^scalar_parameters\.(\d+)$", str(name)) for name in scalar_df.columns)
     if m]
))

# Fill NaN with -inf for scalar columns
for name in scalar_columns.values():
    scalar_df[name].fillna(value=-np.inf, inplace=True)

# Pre-fill number columns
for number in scalar_columns.keys():
    scalar_df[number] = None
scalar_df["weight_entropy"] = None
    
# Softmax over parameters in each row
num_scalars = max(scalar_columns.keys()) + 1
scalars = {}
masks = {}
for i, row in scalar_df.iterrows():
    arr = np.zeros(num_scalars, dtype=np.float32)
    for j, col in scalar_columns.items():
        arr[j] = float(row[col])
        if np.isnan(arr[j]):
            arr[j] = -np.inf
    # Softmax over row
    scalars[i] = softmax(arr)
    masks[i] = np.isfinite(arr)

# Add softmax weights back to DataFrame, with numeric column names.
# This way, we can convert to long-form for easy plotting.
for i in scalar_df.index:
    for j in scalar_columns.keys():
        scalar_df.loc[i, j] = scalars[i][j]
    # Compute entropy
    scalar_df.loc[i, "weight_entropy"] = entropy(scalars[i], base=2)
    scalar_df.loc[i, "weight_kl_unif"] = entropy(scalars[i], qk=masks[i], base=2)
    # Compute expectation
    weighted_layers = scalars[i] * np.arange(len(scalars[i])) * masks[i]
    scalar_df.loc[i, "weight_exp_layer"] = np.sum(weighted_layers)
    scalar_df.loc[i, "weight_exp_layer_oneplus"] = np.sum(weighted_layers[1:]) / np.sum(scalars[i][1:] * masks[i][1:])
scalar_df

Unnamed: 0,tag,exp_name,exp_type,task,gamma,scalar_parameters.0,scalar_parameters.1,scalar_parameters.2,scalar_parameters.3,scalar_parameters.4,...,8,9,10,11,12,13,weight_entropy,weight_kl_unif,weight_exp_layer,weight_exp_layer_oneplus
0,bert,bert-base-uncased-mix_12-edges-coref-ontonotes,bert-base-uncased-mix,coref-ontonotes,1.563524,-0.406576,-0.681076,-0.686471,-0.699170,-0.654445,...,0.137581,0.218226,0.155394,0.094965,0.059027,0.0,3.359579,0.340861,7.742890,8.067494
1,bert,bert-base-uncased-mix_12-edges-dep-ud-ewt,bert-base-uncased-mix,dep-ud-ewt,2.737733,-0.138301,-0.424941,-0.489178,-0.273124,1.247979,...,0.298447,0.050294,0.03529,0.031969,0.027703,0.0,3.166823,0.533617,6.501032,6.725945
2,bert,bert-base-uncased-mix_12-edges-ner-ontonotes,bert-base-uncased-mix,ner-ontonotes,2.165716,0.229233,-0.263455,-0.438551,-0.555840,-0.512907,...,0.143231,0.131433,0.098851,0.085653,0.063275,0.0,3.588624,0.111816,6.626506,7.325894
3,bert,bert-base-uncased-mix_12-edges-nonterminal-ont...,bert-base-uncased-mix,nonterminal-ontonotes,2.182357,-0.271525,-0.617399,-0.457925,0.027574,1.116139,...,0.209141,0.035975,0.021866,0.021651,0.023305,0.0,3.248473,0.451967,6.004874,6.235740
4,bert,bert-base-uncased-mix_12-edges-pos-ontonotes,bert-base-uncased-mix,pos-ontonotes,0.840410,0.168013,0.006455,0.014571,-0.065829,0.202387,...,0.078557,0.065044,0.063946,0.066308,0.067079,0.0,3.689017,0.011423,5.731303,6.294814
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,lxmert,lxmert-mix_13-edges-nonterminal-ontonotes,lxmert-mix,nonterminal-ontonotes,2.100487,0.338398,0.324993,-0.027106,0.354762,0.789473,...,0.059144,0.044665,0.041643,0.036161,0.039249,0.037735,3.684981,0.122374,5.450714,5.937757
68,lxmert,lxmert-mix_13-edges-pos-ontonotes,lxmert-mix,pos-ontonotes,1.311461,1.153207,0.700866,0.257541,0.506387,0.205879,...,0.042948,0.046148,0.039386,0.045729,0.048167,0.044806,3.64296,0.164395,4.795336,5.861110
69,lxmert,lxmert-mix_13-edges-rel-semeval,lxmert-mix,rel-semeval,1.085776,-0.066325,-0.080155,-0.110004,-0.151122,-0.105928,...,0.081962,0.087455,0.078923,0.073916,0.071286,0.071617,3.800218,0.007137,6.737647,7.226316
70,lxmert,lxmert-mix_13-edges-spr2,lxmert-mix,spr2,1.057611,0.060785,0.048797,0.040215,0.029989,0.024329,...,0.072063,0.071047,0.068592,0.06824,0.068151,0.067344,3.806543,0.000812,6.369972,6.888780


In [6]:
sc = scalar_df[['tag','task','weight_exp_layer']].values.tolist()

In [14]:
for i in range(1, (len(sc)//3)+1):
    weight = sc[i-1][2] + sc[(i-1)*2][2] + sc[(i-1)*2][2]
    print(sc[i-1][0], sc[i-1][1],weight/3)

bert coref-ontonotes 7.7428897228091955
bert dep-ud-ewt 6.584681263193488
bert ner-ontonotes 6.029703879108031
bert nonterminal-ontonotes 6.043416224420071
bert pos-ontonotes 7.134943063060443
bert rel-semeval 6.727314803749323
bert spr2 5.807565815746784
bert srl-ontonotes 6.254758919278781


In [7]:
scalar_df.exp_type.unique()

array(['bert-base-uncased-mix', 'visualbert-mix', 'lxmert-mix'],
      dtype=object)

## Make compound plot with F1 scores as well

In [11]:
# https://bokeh.pydata.org/en/latest/docs/reference/palettes.html
palette = bokeh.palettes.Category20c_20

MODEL_NAME = "visualbert"
#MODEL_NAME = "bert-base-uncased"
#MODEL_NAME = "lxmert"
EXPT_TYPES = [f"{MODEL_NAME}-mix"]
SCORE_EXPT_TYPE = f"{MODEL_NAME}-mix"
MAX_LAYER = 24 if '-large-' in MODEL_NAME else 13
WEIGHT_SCALE = 2.7 if '-large-' in MODEL_NAME else 2.1
# SCORE_SCALE = 1.5
# SCORE_SCALE = 2.2
SCORE_SCALE = WEIGHT_SCALE
WEIGHT_COLORS = (palette[0], palette[9]) # blue, green
SCORE_COLORS = (palette[12], palette[12]) # purples
NEG_COLORS = (palette[5], palette[4]) # oranges
# PLOT_WIDTH = 900
PLOT_WIDTH=450
# _PLOT_HEIGHT_FN=lambda num_cats: 80 + 100*num_cats
_PLOT_HEIGHT_FN=lambda num_cats: 750

##
# Don't change below here
##
def _make_display_name(task, label):
    if task.startswith("pos-"):
        return "POS"
    elif task.startswith("coref-"):
        return "Coref."
    elif task.startswith("spr"):
        return "SPR2"
    elif task.startswith("rel-"):
        return "Relations"
    elif task.startswith("dep-"):
        return "Deps."
    elif task.startswith("nonterminal-"):
        return "Consts."
    else:
        return analysis.make_display_name(task, label)        

def _make_mask(df):
    mask = df['exp_type'].map(lambda s: s in EXPT_TYPES)
    mask &= df['task'] != 'constituent-ontonotes'  # don't use this task
    mask &= df['task'] != 'ner-tacred'  # don't use this task
    mask &= df['task'] != 'coref-gap-ontonotes'  # don't use this task
    mask &= df['task'] != 'rel-tacred'  # don't use this task
#     mask &= df['task'] != 'rel-semeval'  # don't use this task
    # Skip Winograd and SPR2 for this
    mask &= df['task'] != 'dpr'
#     mask &= df['task'] != 'spr1'
#     mask &= df['task'] != 'spr2'
    return mask

mask = _make_mask(scalar_df)

weight_df = scalar_df[mask].copy()

##
# Make long-form DataFrame and add plotting values
skip_cols = set(scalar_columns.keys()).union(scalar_columns.values())
id_vars = [c for c in weight_df.columns if c not in skip_cols]
value_vars = scalar_columns.keys()
weight_df = pd.melt(weight_df, id_vars=id_vars, value_vars=value_vars, 
                    var_name="layer_num", value_name="layer_weight")
weight_df['label'] = None

"""
##
# Append the scores DataFrame
mask = _make_mask(fdf)
mask &= fdf['layer_num'].notnull()
mask &= fdf['exp_type'] == SCORE_EXPT_TYPE
# mask &= fdf['layer_num'].astype(float) > 0
score_df = fdf[mask].copy()
# Erase labels, for now
score_df['label'] = None
"""

##
# Plotting code below this line
##

# Row keys
sorted_tasks = sorted(weight_df['task'].unique(), key=task_sort_key)
cats = [_make_display_name(t, None) for t in sorted_tasks]
cats = list(reversed(cats))
PLOT_HEIGHT = _PLOT_HEIGHT_FN(len(cats))

# Row names, matching row keys
"""
score_df['_display_name'] = list(map(_make_display_name, score_df['task'], score_df['label']))
"""

weight_df['_display_name'] = list(map(_make_display_name, weight_df['task'], weight_df['label']))

# Bar heights for weights
weight_df['_bar_height'] = weight_df['layer_weight'] * WEIGHT_SCALE
weight_df['_bar_center'] = weight_df['_display_name']
weight_df['_formatted_entropy'] = weight_df['weight_entropy'].map(lambda h: "H(s) = {:.02f} bits".format(h))
#weight_df['_formatted_kl_unif'] = weight_df['weight_kl_unif'].map(lambda h: "KL(s||uniform) = {:.02f} bits".format(h))
weight_df['_formatted_kl_unif'] = weight_df['weight_kl_unif'].map(lambda h: "K(s) = {:.02f}".format(h))
#weight_df['_formatted_exp_layer'] = weight_df['weight_exp_layer_oneplus'].map(lambda l: "E[k] = {:.02f}".format(l))


"""
# Bar heights for scores (cumulative)
# score_df['_bar_height'] = score_df['real_headroom_frac'] * SCORE_SCALE
score_df['_bar_height'] = score_df['headroom_frac'] * SCORE_SCALE
score_df['_bar_height'] = score_df['_bar_height'].map(lambda h: min(h, 1.0))
# Add offset so bars start at baseline
score_df['_bar_center'] = [(l, h/2-0.5) for l, h in zip(score_df['_display_name'], score_df['_bar_height'])]
# score_df['_bar_center'] = score_df["_display_name"]

score_df['_fill_color'] = [SCORE_COLORS[0] if h > 0 else NEG_COLORS[0] for h in score_df['_bar_height']]
score_df['_line_color'] = [SCORE_COLORS[1] if h > 0 else NEG_COLORS[1] for h in score_df['_bar_height']]
score_df['_bar_height'] = score_df['_bar_height'].map(np.abs)

# score_df['_formatted_exp_layer'] = score_df['exp_layer'].map(lambda l: "E[layer] = {:.02f}".format(l))
score_df['_formatted_exp_layer'] = score_df['exp_layer'].map(lambda l: "{:.02f}".format(l))
score_df['_formatted_kl_unif'] = score_df['kl_unif'].map(lambda h: "K(Δ) = {:.02f}".format(h))
"""


hover_0 = bokeh.models.HoverTool(
  tooltips=[
      ("task", "@_display_name"),
      ("experiment", "@exp_type"),
      ("layer", "@layer_num"),
      ("weight", "@layer_weight{0.0%}"),
  ],
  renderers=[],
)

"""
hover_2 = bokeh.models.HoverTool(
  tooltips=[
      ("task", "@_display_name"),
      ("experiment", "@exp_type"),
      ("layer", "@layer_num"),
      ("score", "@score{0.0%} (Δ @delta_score{0.0%})"),
      ("headroom fraction", "@headroom_frac{0.0%}"),
  ],
  renderers=[],
)
"""

x_range = (-0.5, MAX_LAYER+0.5)
p = bp.figure(y_range=bokeh.models.FactorRange(*cats, factor_padding=0.10), x_range=x_range,
              plot_width=PLOT_WIDTH, plot_height=PLOT_HEIGHT, tools=[hover_0, 'save'])

##
# Add background bars
bgbar_color = "#f2f2f2"
p.hbar(y='_display_name', left=x_range[0], right=x_range[1], 
       height=1.0, 
       fill_color=bgbar_color, fill_alpha=0.40, 
#        line_color="#e6e6e6", 
#        line_alpha=0.80,
#        line_color="Gray",
       line_alpha=0.0,
#        line_width=0.5,
       source=weight_df, 
       level='image')
p.hbar(y='_display_name', left=x_range[0], right=x_range[1], 
       height=1.0, 
       fill_color="White", fill_alpha=0.0,
       line_color="#e6e6e6",
       line_alpha=1.0,
       line_width=0.5,
       source=weight_df, 
       level='underlay')

def _plot_bars(sdf, x_dodge=0, y_dodge=0, **kw):
    y = bokeh.transform.dodge('_bar_center', y_dodge, range=p.y_range)
    x = 'layer_num'
    bars = p.rect(x=x, y=y, width=0.9, height="_bar_height", source=sdf, **kw)
    shadow_bars = p.rect(x=x, y=y, width=0.9, height=0.5, source=sdf, alpha=0.0)
    return bars, shadow_bars

##
# Plot weights and delta scores
    #colar

_WEIGHT_BAR_PARAMS = dict(fill_color=WEIGHT_COLORS[0], line_color=WEIGHT_COLORS[0],
#                           line_width=1.5, fill_alpha=0.1,
                         )


"""                     
_SCORE_BAR_PARAMS = dict(fill_color='_fill_color', line_color='_line_color', 
                         line_width=1.5, fill_alpha=0.1,
                        )

"""


b0, s0 = _plot_bars(weight_df[weight_df.exp_type == EXPT_TYPES[0]], y_dodge=0, 
                hover_fill_color="firebrick", hover_fill_alpha=1.0,
                **_WEIGHT_BAR_PARAMS)
"""

b2, s2 = _plot_bars(score_df[score_df['layer_num'].map(int) > 0], y_dodge=0, 
                hover_fill_color="firebrick", hover_fill_alpha=0.7, **_SCORE_BAR_PARAMS)

"""
hover_0.renderers.extend([b0, s0])

"""
hover_2.renderers.extend([b2, s2])
"""


p.xaxis.ticker = bokeh.models.FixedTicker(ticks=np.arange(0, MAX_LAYER+1))
p.xgrid.ticker = p.xaxis[0].ticker
    
_FONT_SIZE = "13pt"
p.yaxis.major_label_text_font_size = _FONT_SIZE
p.xaxis.major_label_text_font_size = _FONT_SIZE
# p.xaxis.axis_label = "Encoder Layer"
# p.xaxis.axis_label_text_font_size = _FONT_SIZE

# p.yaxis.major_label_orientation = 60 * np.pi / 180
p.yaxis.major_label_orientation = "vertical"
if PLOT_WIDTH < 600 and MAX_LAYER > 12:
    p.xaxis.ticker = bokeh.models.FixedTicker(ticks=np.arange(0, MAX_LAYER+1, 2))

# p.toolbar.autohide = True

# Add labels with entropy
# _label_y = [28, 10]
label_kw = [
    dict(x=x_range[1],      y_offset=18, x_offset=-10, text_baseline="bottom", text_align="right"),
    dict(x=x_range[1]*0.20, y_offset=18, x_offset=0, text_baseline="bottom", text_align="left"),
]
LABEL_COLOR = "#404040"

"""
score_labels = bokeh.models.annotations.LabelSet(
    y="_display_name", text="_formatted_kl_unif",
    text_color=LABEL_COLOR, text_font_size="12pt",
    source=bokeh.models.ColumnDataSource(weight_df[weight_df['layer_num'] == 0]), **label_kw[0])
p.add_layout(score_labels)

score_labels = bokeh.models.annotations.LabelSet(
    y="_display_name", text="_formatted_kl_unif",
    text_color=LABEL_COLOR, text_font_size="12pt",
    source=bokeh.models.ColumnDataSource(score_df[score_df['layer_num'].map(int) == 0]), **label_kw[1])
p.add_layout(score_labels)
"""

# # Add labels with expected layer
# score_labels = bokeh.models.annotations.LabelSet(
#     y="_display_name", 
#     x="weight_exp_layer",
#     x=x_range[1] // 6, 
#     text="_formatted_exp_layer",
#     text_align="left", text_baseline="bottom", y_offset=25, x_offset=0,
#     text_color="#595959", text_font_size="11pt",
#     source=bokeh.models.ColumnDataSource(weight_df[weight_df['layer_num'] == 0]))
# p.add_layout(score_labels)

# p.xgrid.visible = False
p.min_border_left = 0
p.min_border_right = 0
p.min_border_top = 0
p.min_border_bottom = 0
p.toolbar_location = None

bp.show(p)

In [14]:
print(f"Plot y range: weights:{1/WEIGHT_SCALE:.2f}, scores:{1/SCORE_SCALE:.2f}")

Plot y range: weights:0.48, scores:0.48


In [17]:
_save_figure_to_bucket(p, name=f"{MODEL_NAME}.weights_and_scores" + (".running_max" if USE_RUNNING_MAX else ""),
                       title=f"{MODEL_NAME} mixing weights and differential scores" + (" (running_max)" if USE_RUNNING_MAX else ""))

gsutil: Command not found.
gsutil: Command not found.
Public URL: https://storage.googleapis.com/edge-probing/elm14/plots/visualbert.weights_and_scores.20221214.153305.html


'https://storage.googleapis.com/edge-probing/elm14/plots/visualbert.weights_and_scores.20221214.153305.html'

### Make aggregate plot

In [15]:
cats_range = bokeh.models.FactorRange(*cats, factor_padding=0.10, range_padding=0.10, range_padding_units='absolute')
layers_range = x_range = (-0.5, MAX_LAYER+0.5)

# PLOT_WIDTH=450
# PLOT_WIDTH=260
P2_WIDTH = 180
PLOT_WIDTH = 450 - P2_WIDTH
# PLOT_HEIGHT=450
# BAR_HEIGHT=0.425
# BAR_DODGE=BAR_HEIGHT/2
PLOT_HEIGHT=320
BAR_HEIGHT=0.9
BAR_DODGE=0

# WEIGHT_EXP_FIELD = 'weight_exp_layer_oneplus'
WEIGHT_EXP_FIELD = 'weight_exp_layer'
weight_df['_formatted_exp_layer'] = weight_df[WEIGHT_EXP_FIELD].map(lambda l: "{:.02f}".format(l))

##
# Add second plot
p2 = bp.figure(plot_width=PLOT_WIDTH, plot_height=PLOT_HEIGHT, 
               y_range=cats_range,
               x_axis_location="above",
               title="Expected layer & center-of-gravity",
               tools=['save'])
p2.title.align = "center"
p2.toolbar.autohide = True
p2.yaxis.major_tick_line_color = None
# p2.yaxis.major_label_text_font_size = "0pt"
# p2.xaxis.axis_label = "Encoder Layer ℓ"
p2.xaxis.axis_label_text_font_style = "bold"

##
# Add bars for weight cog
mask = weight_df['layer_num'].astype(float) == 1
mask &= weight_df.exp_type == EXPT_TYPES[0]
sdf = weight_df[mask]
y = bokeh.transform.dodge("_display_name", -BAR_DODGE, range=p2.y_range)
p2.hbar(y=y, left=0, right=WEIGHT_EXP_FIELD, height=BAR_HEIGHT,
        fill_color=WEIGHT_COLORS[0], line_color=WEIGHT_COLORS[0],
        line_width=1.5,
        source=sdf)
# Add labels with expected layer
score_labels = bokeh.models.annotations.LabelSet(
    y=y, x=WEIGHT_EXP_FIELD, text="_formatted_exp_layer",
    text_align="right", text_baseline="middle", y_offset=0,
    x_offset=-6,
#     text_color=SCORE_COLORS[0], 
    text_color="White",
    text_font_size="11pt",
    text_font_style="bold",
#     background_fill_color="White", border_line_color="White", border_line_width=5,
    source=bokeh.models.ColumnDataSource(sdf))
p2.add_layout(score_labels)

##
# Add bars for expected layer
sdf = score_df[score_df['layer_num'].astype(float) == 1]
y = bokeh.transform.dodge("_display_name", BAR_DODGE, range=p2.y_range)
p2.hbar(y=y, left=0, right="exp_layer", height=BAR_HEIGHT,
#         fill_color=SCORE_COLORS[0], 
        line_color=SCORE_COLORS[0], 
        fill_color="#D5D2E7",
#         line_color="Black",
#         fill_alpha=0.3,
        source=sdf)
score_labels = bokeh.models.annotations.LabelSet(
    y=y, x="exp_layer", text="_formatted_exp_layer",
    text_align="right", text_baseline="middle", y_offset=0,
    x_offset=-6,
#     text_color=SCORE_COLORS[0], 
#     text_color="White",
    text_color="Black",
    text_font_size="11pt",
    text_font_style="bold",
#     background_fill_color="White",
#     border_line_color="White", border_line_width=5,
    source=bokeh.models.ColumnDataSource(sdf))
p2.add_layout(score_labels)

p2.x_range.start = 0

_FONT_SIZE = "13pt"
# p2.yaxis.major_label_text_font_size = _FONT_SIZE
p2.yaxis.major_label_text_font_size = "0pt"
p2.xaxis.major_label_text_font_size = _FONT_SIZE

p2.min_border_left = 0
p2.min_border_right = 0
p2.min_border_top = 0
p2.min_border_bottom = 0
p2.toolbar_location = None

##
# Side plot with bottom and top layer scores
score_df['_formatted_score'] = score_df['score'].map(lambda h: "{:.1f}".format(100*h))
score_df["_formatted_layer_num"] = score_df["layer_num"].map(lambda l: "ℓ={:d}".format(int(l)))
mask = score_df['layer_num'].map(lambda l: int(l) in {0, MAX_LAYER})
sdf = score_df[mask]

x_range = bokeh.models.FactorRange(*sdf['_formatted_layer_num'].unique())
p4 = bp.figure(x_range=x_range, y_range=p2.y_range, plot_width=P2_WIDTH, plot_height=PLOT_HEIGHT,
               x_axis_location="above", title="F1 Scores")
# Add labels with bottom-layer score

score_labels = bokeh.models.annotations.LabelSet(
    y="_display_name", x='_formatted_layer_num', text="_formatted_score",
    text_align="center", text_baseline="middle",
    source=bokeh.models.ColumnDataSource(sdf)
)
p4.add_layout(score_labels)
# # Add labels with top-layer score
# sdf = score_df[score_df['layer_num'].map(int) == MAX_LAYER]
# score_labels = bokeh.models.annotations.LabelSet(
#     y="_display_name", x='layer_num', text="_formatted_score",
#     text_align="center", text_baseline="middle",
#     source=bokeh.models.ColumnDataSource(sdf)
# )
# p4.add_layout(score_labels)

# p4.yaxis.major_label_text_font_size = "0pt"
p4.yaxis.major_label_text_font_size = _FONT_SIZE
p4.xaxis.major_label_text_font_size = _FONT_SIZE
p4.yaxis.major_tick_line_color = None
p4.yaxis.minor_tick_line_color = None
p4.xaxis.major_tick_line_color = None
p4.xaxis.minor_tick_line_color = None
p4.ygrid.visible = False
p4.xgrid.visible = False
p4.yaxis.axis_line_color = None
p4.xaxis.axis_line_color = None
p4.title.align = "center"
p4.toolbar_location = None
p4.min_border_left = 0
p4.min_border_right = 5  # add a little padding for visual aid
p4.min_border_top = 0
p4.min_border_bottom = 0

bgbar_color = "#f2f2f2"
p4.background_fill_color = bgbar_color
p4.background_fill_alpha = 0.80

# p2.xaxis.bounds = (0, 17)

# p = p2
# p = p4
# p = bokeh.layouts.Row(p2, p4)
p = bokeh.layouts.Row(p4, p2)
bp.show(p)

AttributeError: '_Feature' object has no attribute 'LabelSet'

In [None]:
_save_figure_to_bucket(p, name=f"{MODEL_NAME}.exp_layer" + (".running_max" if USE_RUNNING_MAX else ""),
                       title=f"{MODEL_NAME} expected layer and mixing CoG"  + (" (running_max)" if USE_RUNNING_MAX else ""))



gsutil: Command not found.
gsutil: Command not found.
Public URL: https://storage.googleapis.com/edge-probing/elm14/plots/bert-base-uncased.exp_layer.20221211.141523.html


'https://storage.googleapis.com/edge-probing/elm14/plots/bert-base-uncased.exp_layer.20221211.141523.html'