# Edgeprobe Aggregate Analysis (for ACL camera-ready)

This is the main analysis notebook for [BERT Rediscovers the Classical NLP Pipeline
](https://arxiv.org/abs/1905.05950), a.k.a. "the BERT layer paper", which makes the aggregate plots (Figure 1 and 2).

This notebook is intended to be run on the output of the [`analyze_runs.py`](analyze_runs.py) script; run that on a folder of experiments to produce a `scores.tsv` file that can be loaded here.

In [1]:
import sys, os, re, json
from importlib import reload
import itertools
import collections

import numpy as np
import pandas as pd

import analysis
reload(analysis)

tasks = analysis.TASKS
exp_types = analysis.EXP_TYPES

task_sort_key = analysis.task_sort_key
exp_type_sort_key = analysis.exp_type_sort_key

from scipy.special import logsumexp
from scipy.stats import entropy

def softmax(x, axis=None):
    return np.exp(x - logsumexp(x, axis=axis, keepdims=True))

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [18]:
import bokeh
import bokeh.plotting as bp
bp.output_notebook()

import datetime
import socket
def get_compact_timestamp():
    now = datetime.datetime.now()
    return now.strftime("%Y%m%d.%H%M%S")

def _save_figure_to_bucket(fig, name, title=None, export_format="html"):
    now = get_compact_timestamp()
    fname = f"{name}.{now:s}.{export_format}"
    title = title or name
    if fname.endswith('.png'):
        bokeh.io.export_png(p, os.path.join("/tmp", fname))
    else:
        bp.save(p, os.path.join("/tmp", fname), title=title, 
                resources=bokeh.resources.CDN)
    hostname = socket.gethostname()
    GCP_PROJECT="edge-probing"
    !gsutil cp /tmp/$fname gs://$GCP_PROJECT/$hostname/plots/$fname
    !gsutil acl ch -u AllUsers:R gs://$GCP_PROJECT/$hostname/plots/$fname
    url = f"https://storage.googleapis.com/{GCP_PROJECT}/{hostname}/plots/{fname}"
    print(f"Public URL: {url}")
    return url

In [3]:
ID_COLS = ['run', 'task', 'split']

def agg_label_group(df, task_predicate, label_predicate, group_name):
    agg_map = {k:"sum" for k in df.columns if k.endswith("_count")}
    mask = df['task'].map(task_predicate) & df['label'].map(label_predicate)
    sdf = df[mask].groupby(by=ID_COLS).agg(agg_map).reset_index()
    sdf['label'] = group_name
    return sdf

def agg_stratifier_group(df, stratifier, key_predicate, group_name):
    agg_map = {k:"sum" for k in df.columns if k.endswith("_count")}
    # Use this for short-circuit evaluation, so we don't call key_predicate on invalid keys
    mask = [(s == stratifier and key_predicate(key)) 
            for s, key in zip(df['stratifier'], df['stratum_key'])]
    sdf = df[mask].groupby(by=ID_COLS).agg(agg_map).reset_index()
    sdf['label'] = group_name
    return sdf    

def load_scores_file(filename, tag=None, seed=None):
    df = pd.read_csv(filename, sep="\t", header=0)
    df.drop(['Unnamed: 0'], axis='columns', inplace=True)
    # df['task_raw'] = df['task'].copy()
    df['task'] = df['task'].map(analysis.clean_task_name)
    if not "stratifier" in df.columns:
        df["stratifier"] = None
    if not "stratum_key" in df.columns:
        df["stratum_key"] = 0
        
    ###
    # Add additional custom aggregations
    _eg = []
    # SRL core, non-core, and cleaned micro F1
    _eg.append(agg_label_group(df, analysis.is_srl_task, analysis.is_core_role, "_core_"))
    _eg.append(agg_label_group(df, analysis.is_srl_task, analysis.is_non_core_role, "_non_core_"))
    _eg.append(agg_label_group(df, analysis.is_srl_task, analysis.is_core_or_noncore, "_clean_micro_"))
    # Constituents: split into POS, nonterminals
    _eg.append(agg_stratifier_group(df, 'info.height', lambda x: int(x) == 1, "_pos_"))
    _eg.append(agg_stratifier_group(df, 'info.height', lambda x: int(x) > 1, "_nonterminal_"))
    # Relations: ignore negative class (no_relation)
    _eg.append(agg_label_group(df, analysis.is_relation_task, analysis.is_positive_relation, "_clean_micro_"))
    df = pd.concat([df] + _eg, ignore_index=True, sort=False)
    
    df.insert(0, "exp_name", df['run'].map(lambda p: os.path.basename(os.path.dirname(p.strip("/")))))
    df.insert(1, "exp_type", df['exp_name'].map(analysis.get_exp_type))
    df.insert(1, "layer_num", df['exp_name'].map(analysis.get_layer_num))
    if tag is not None:
        df.insert(0, "tag", tag)
    df.insert(1, "seed", seed)
    return df

In [4]:
score_files = []
score_files = [
#     ("base", "/nfs/jsalt/exp/edges-20190211-perlayer/scores.tsv"),
#     ("base", "/nfs/jsalt/exp/edges-20190213-perlayer-rerun/scores.tsv"),
    ("base", "/nfs/jsalt/exp/edges-20190217-mix-pre/scores.tsv"),
    ("base", "/nfs/jsalt/exp/edges-20190218-mix-pre/scores.tsv"),
    ("base", "/nfs/jsalt/exp/edges-20190219-perlayer/scores.tsv"),
]
dfs = []
for tag, score_file in score_files:
    df = load_scores_file(score_file, tag=tag)
    dfs.append(df)

df = pd.concat(dfs, ignore_index=True, sort=False)
def _format_display_col(exp_type, layer_num, tag):
    ret = exp_type
    if layer_num:
        ret += f"-{layer_num}"
    if tag:
        ret += f" ({tag})"
    return ret
# df['display_col'] = ["%s (%s)" % et for et in zip(df.exp_type, df.tag)]
df['display_col'] = list(map(_format_display_col, df.exp_type, df.layer_num, df.tag))
print(df['task'].unique())
print(df['exp_type'].unique())

['coref-ontonotes-conll' 'dep-labeling-ewt' 'dpr' 'ner-ontonotes'
 'nonterminal-ontonotes' 'pos-ontonotes' 'rel-semeval' 'rel-tacred' 'spr1'
 'spr2' 'srl-conll2012']
['bert-base-uncased-lex' 'bert-base-uncased-mix'
 'bert-base-uncased-mix-pre' 'bert-large-uncased-lex'
 'bert-large-uncased-mix' 'bert-large-uncased-mix-pre']


In [5]:
analysis.score_from_confusion_matrix(df)

##
# Set 'score' column for task-appropriate metric
def _get_final_score(row):
    if row['task'] == 'noun-verb':
        return row['accuracy'], row['accuracy_errn95']
    else:
        return row['f1_score'], row['f1_errn95']

df['score'], df['score_errn95'] = zip(*(_get_final_score(row) for i, row in df.iterrows()))

## Compute clean metrics for each task

For most tasks this is just the micro or macro average F1, but we need to ignore the 0 label for coref, and drop references and continuations for SRL.

In [6]:
# SPLIT = "test"
SPLIT = "val"
mask = df['split'] == SPLIT
mask &= (df['exp_type'] != "openai")
mask &= df['exp_type'].map(lambda s: '-cased-' not in s)  # skip cased BERT for now
# Skip these tasks
mask &= (df['task'] != "constituent-ontonotes")
mask &= (df['task'] != "ner-tacred")
mask &= (df['task'] != "coref-gap")
mask &= (df['task'] != "coref-gap-ontonotes")
mask &= (df['task'] != "noun-verb")
# mask &= (df['task'] != "rel-tacred")
# mask &= (df['task'] != "rel-semeval")

# Only look at perlayer scores
mask &= df['layer_num'].notnull()

final_scores = []
for task in df['task'].unique():
    task_scores = df[mask & (df['task'] == task)]
    if analysis.is_coref_task(task):
        final_scores.append(task_scores[task_scores['label'] == "1"])
    elif analysis.is_srl_task(task):
#         final_scores.append(task_scores[task_scores['label'] == '_core_'])
#         final_scores.append(task_scores[task_scores['label'] == '_non_core_'])
        # Use clean version, average only over core or noncore roles.
        final_scores.append(task_scores[task_scores['label'] == '_clean_micro_'])
#     elif task == "nonterminal-ontonotes":
#         final_scores.append(task_scores[task_scores['label'] == '_micro_avg_'])
#         final_scores.append(task_scores[task_scores['label'] == '_info.height_2_'])
#         final_scores.append(task_scores[task_scores['label'] == '_info.height_3_'])
#         final_scores.append(task_scores[task_scores['label'] == '_info.height_4_'])
#         final_scores.append(task_scores[task_scores['label'] == '_info.height_5_'])
#         final_scores.append(task_scores[task_scores['label'] == '_info.height_6_'])
#     elif task == "dep-labeling-ewt":
#         final_scores.append(task_scores[task_scores['label'] == '_micro_avg_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_0_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_1_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_2_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_3_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_4_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_5_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_6_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_7_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_8_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_9_'])
#         final_scores.append(task_scores[task_scores['label'] == '_span_distance_10_'])
    elif analysis.is_relation_task(task):
        # Relation tasks include specific "no_relation" label
        final_scores.append(task_scores[task_scores['label'] == '_clean_micro_'])
    elif task == "noun-verb":
        # Noun-verb reports accuracy on VERB class
        final_scores.append(task_scores[task_scores['label'] == 'VERB'])
    else:
        final_scores.append(task_scores[task_scores['label'] == '_micro_avg_'])
        
fdf = pd.concat(final_scores, axis=0, ignore_index=True, sort=False)
# fdf['task_and_metric'] = ["%s-%s" % tl for tl in zip(fdf.task, fdf.label)]
def format_display_row(task, label, seed):
    ret = f"{task}-{label}"
    if seed:
        ret += f"-{seed:d}"
    return ret

fdf['display_row'] = [format_display_row(*args) for args in zip(fdf.task, fdf.label, fdf.seed)]
print(len(fdf))
fdf

418


Unnamed: 0,tag,seed,exp_name,layer_num,exp_type,label,num_epochs,num_steps,run,task,...,precision,recall,f1_score,accuracy_errn95,precision_errn95,recall_errn95,f1_errn95,score,score_errn95,display_row
0,base,,bert-base-uncased-mix-00-edges-coref-ontonotes...,00,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.844300,0.760248,0.800073,0.003346,0.009828,0.010982,0.010373,0.800073,0.010373,coref-ontonotes-conll-1
1,base,,bert-base-uncased-mix-01-edges-coref-ontonotes...,01,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.848998,0.802790,0.825248,0.003181,0.009471,0.010235,0.009838,0.825248,0.009838,coref-ontonotes-conll-1
2,base,,bert-base-uncased-mix-02-edges-coref-ontonotes...,02,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.845075,0.840854,0.842960,0.003063,0.009331,0.009410,0.009370,0.842960,0.009370,coref-ontonotes-conll-1
3,base,,bert-base-uncased-mix-03-edges-coref-ontonotes...,03,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.860228,0.843782,0.851926,0.002971,0.009006,0.009339,0.009169,0.851926,0.009169,coref-ontonotes-conll-1
4,base,,bert-base-uncased-mix-04-edges-coref-ontonotes...,04,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.873176,0.865656,0.869400,0.002808,0.008597,0.008772,0.008684,0.869400,0.008684,coref-ontonotes-conll-1
5,base,,bert-base-uncased-mix-05-edges-coref-ontonotes...,05,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.879472,0.872201,0.875822,0.002743,0.008410,0.008588,0.008498,0.875822,0.008498,coref-ontonotes-conll-1
6,base,,bert-base-uncased-mix-06-edges-coref-ontonotes...,06,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.887824,0.877885,0.882827,0.002667,0.008163,0.008422,0.008291,0.882827,0.008291,coref-ontonotes-conll-1
7,base,,bert-base-uncased-mix-07-edges-coref-ontonotes...,07,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.898627,0.879435,0.888928,0.002594,0.007848,0.008376,0.008103,0.888928,0.008103,coref-ontonotes-conll-1
8,base,,bert-base-uncased-mix-08-edges-coref-ontonotes...,08,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.903833,0.901653,0.902742,0.002446,0.007593,0.007660,0.007626,0.902742,0.007626,coref-ontonotes-conll-1
9,base,,bert-base-uncased-mix-09-edges-coref-ontonotes...,09,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.905445,0.902170,0.903805,0.002432,0.007540,0.007642,0.007591,0.903805,0.007591,coref-ontonotes-conll-1


Pivot DataFrame to present each task on a row, and each experiment on a column.

This form is suitable to copy-paste into a spreadsheet.

In [7]:
# Pivot to wide-form for spreadsheet, and sort in (mostly) stable order.
sheet_df = fdf.pivot(index="display_row", columns="display_col", values="score")
sheet_df = sheet_df.reindex(sorted(sheet_df.columns, 
                                   key=exp_type_sort_key), axis=1)
sheet_df = sheet_df.reindex(sorted(sheet_df.index,
                                   key=task_sort_key), axis=0)

csv_args = dict(float_format="%.4f")
print((100*sheet_df).to_csv(**csv_args))

display_row,bert-base-uncased-mix-00 (base),bert-base-uncased-mix-01 (base),bert-base-uncased-mix-02 (base),bert-base-uncased-mix-03 (base),bert-base-uncased-mix-04 (base),bert-base-uncased-mix-05 (base),bert-base-uncased-mix-06 (base),bert-base-uncased-mix-07 (base),bert-base-uncased-mix-08 (base),bert-base-uncased-mix-09 (base),bert-base-uncased-mix-10 (base),bert-base-uncased-mix-11 (base),bert-base-uncased-mix-12 (base),bert-large-uncased-mix-00 (base),bert-large-uncased-mix-01 (base),bert-large-uncased-mix-02 (base),bert-large-uncased-mix-03 (base),bert-large-uncased-mix-04 (base),bert-large-uncased-mix-05 (base),bert-large-uncased-mix-06 (base),bert-large-uncased-mix-07 (base),bert-large-uncased-mix-08 (base),bert-large-uncased-mix-09 (base),bert-large-uncased-mix-10 (base),bert-large-uncased-mix-11 (base),bert-large-uncased-mix-12 (base),bert-large-uncased-mix-13 (base),bert-large-uncased-mix-14 (base),bert-large-uncased-mix-15 (base),bert-large-uncased-mix-16 (base),bert-large-

# Plot F1 by layer as a bar plot

In [8]:
EXPECTED_NUM_LAYERS = {13, 25}
USE_RUNNING_MAX = False
# USE_RUNNING_MAX = True

def _compute_exp_layer(sub_df, min_layer):
    sub_df = sub_df[sub_df['layer_num'].map(int) >= min_layer]
#     ds = sub_df['delta_score'].map(lambda s: max(s, 0))
    ds = sub_df['delta_score']
    num = np.sum(ds * sub_df['layer_num'].map(int))
    denom = np.sum(ds)
    return num / denom

# entropy(scalars[i], qk=masks[i], base=2)
def _compute_kl_unif(sub_df, min_layer):
    sub_df = sub_df[sub_df['layer_num'].map(int) >= min_layer]
    ds = sub_df['delta_score'].map(lambda s: max(s, 0))
    return entropy(ds.values, qk=np.ones_like(ds.values), base=2)

fdf['max_layer_score'] = None
fdf['lex_score'] = None
fdf['prev_layer_score'] = None
fdf['delta_score'] = None
fdf['exp_layer'] = None
gb = fdf.groupby(by=['display_row', 'exp_type', 'tag'])
for key, idxs in gb.groups.items():
    if len(idxs) not in EXPECTED_NUM_LAYERS:
        print(f"Warning: key '{key}' has {len(idxs)} matches (expected {EXPECTED_NUM_LAYERS})")
    sub_df = fdf.loc[idxs]
    layer_nums = sub_df['layer_num'].map(int)
    max_layer = layer_nums.max()
    assert set(layer_nums.map(int)) == set(range(max_layer+1))
    layer_scores = np.zeros(max_layer+1, dtype=np.float32)
    for i, score in zip(layer_nums, sub_df['score']):
        layer_scores[i] = score
    running_max_layer_scores = np.maximum.accumulate(layer_scores)
    fdf.loc[idxs, 'prev_layer_score'] = [layer_scores[i-1] if i > 0 else 0.0 for i in layer_nums]
    fdf.loc[idxs, 'prev_score_max'] = [running_max_layer_scores[i-1] if i > 0 else 0.0 for i in layer_nums]
    if USE_RUNNING_MAX:
        fdf.loc[idxs, 'delta_score'] = running_max_layer_scores - fdf.loc[idxs, 'prev_score_max']
    else:
        fdf.loc[idxs, 'delta_score'] = fdf.loc[idxs, 'score'] - fdf.loc[idxs, 'prev_layer_score']
    fdf.loc[idxs, 'real_delta_score'] = fdf.loc[idxs, 'score'] - fdf.loc[idxs, 'prev_layer_score']
    sub_df = fdf.loc[idxs]
    fdf.loc[idxs, 'exp_layer'] = _compute_exp_layer(sub_df, min_layer=1)
    fdf.loc[idxs, 'kl_unif'] = _compute_kl_unif(sub_df, min_layer=1)
    fdf.loc[idxs, 'max_layer_score'] = sub_df['score'].max()
    fdf.loc[idxs, 'lex_score'] = sub_df[layer_nums == 0]['score'].max()
    fdf.loc[idxs, 'contextual_headroom'] = fdf.loc[idxs, 'max_layer_score'] - fdf.loc[idxs, 'lex_score']
    fdf.loc[idxs, 'headroom_frac'] = fdf.loc[idxs, 'delta_score'] / fdf.loc[idxs, 'contextual_headroom']
    fdf.loc[idxs, 'real_headroom_frac'] = fdf.loc[idxs, 'real_delta_score'] / fdf.loc[idxs, 'contextual_headroom']
fdf

Unnamed: 0,tag,seed,exp_name,layer_num,exp_type,label,num_epochs,num_steps,run,task,...,lex_score,prev_layer_score,delta_score,exp_layer,prev_score_max,real_delta_score,kl_unif,contextual_headroom,headroom_frac,real_headroom_frac
0,base,,bert-base-uncased-mix-00-edges-coref-ontonotes...,00,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.800073,0,0.800073,3.61864,0.000000,0.800073,0.632103,0.105585,7.57755,7.57755
1,base,,bert-base-uncased-mix-01-edges-coref-ontonotes...,01,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.800073,0.800072,0.0251754,3.61864,0.800072,0.0251754,0.632103,0.105585,0.238438,0.238438
2,base,,bert-base-uncased-mix-02-edges-coref-ontonotes...,02,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.800073,0.825248,0.0177116,3.61864,0.825248,0.0177116,0.632103,0.105585,0.167748,0.167748
3,base,,bert-base-uncased-mix-03-edges-coref-ontonotes...,03,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.800073,0.84296,0.0089664,3.61864,0.842960,0.0089664,0.632103,0.105585,0.0849214,0.0849214
4,base,,bert-base-uncased-mix-04-edges-coref-ontonotes...,04,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.800073,0.851926,0.0174738,3.61864,0.851926,0.0174738,0.632103,0.105585,0.165496,0.165496
5,base,,bert-base-uncased-mix-05-edges-coref-ontonotes...,05,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.800073,0.8694,0.00642173,3.61864,0.869400,0.00642173,0.632103,0.105585,0.0608207,0.0608207
6,base,,bert-base-uncased-mix-06-edges-coref-ontonotes...,06,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.800073,0.875822,0.00700518,3.61864,0.875822,0.00700518,0.632103,0.105585,0.0663466,0.0663466
7,base,,bert-base-uncased-mix-07-edges-coref-ontonotes...,07,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.800073,0.882827,0.00610089,3.61864,0.882827,0.00610089,0.632103,0.105585,0.057782,0.057782
8,base,,bert-base-uncased-mix-08-edges-coref-ontonotes...,08,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.800073,0.888928,0.0138143,3.61864,0.888928,0.0138143,0.632103,0.105585,0.130836,0.130836
9,base,,bert-base-uncased-mix-09-edges-coref-ontonotes...,09,bert-base-uncased-mix,1,,,/nfs/jsalt/exp/edges-20190219-perlayer/bert-ba...,coref-ontonotes-conll,...,0.800073,0.902742,0.00106283,3.61864,0.902742,0.00106283,0.632103,0.105585,0.0100661,0.0100661


In [9]:
palette = bokeh.palettes.Category20c_20

# EXPT_TYPE = "bert-base-uncased-mix"
# EXPT_TYPE = "bert-large-uncased-mix"
EXPT_TYPE = "bert-base-uncased-at"
MAX_LAYER = 24 if "-large-" in EXPT_TYPE else 12
BAR_SCALE = 1.2
COLORS = (palette[0], palette[0]) # blues
COLORS = (palette[12], palette[12]) # purples
NEG_COLORS = (palette[5], palette[4]) # oranges
PLOT_WIDTH = 800

##
# Don't change below here
##
def _make_display_name(task, label):
    if task.startswith("pos-"):
        return "POS"
    elif task.startswith("coref-"):
        return "Coref."
    elif task.startswith("srl"):
        return "SRL"
    elif task.startswith("spr"):
        return "SPR"
    elif task.startswith("rel-"):
        return "Relations"
    elif task.startswith("dep-"):
        return "Deps."
    elif task.startswith("nonterminal-"):
        return "Consts."
    else:
        return analysis.make_display_name(task, label) 

mask = fdf['exp_type'] == EXPT_TYPE
mask &= fdf['layer_num'].notnull()
mask &= fdf['task'] != 'constituent-ontonotes'  # don't use this task
mask &= fdf['task'] != 'ner-tacred'  # don't use this task
mask &= fdf['task'] != 'coref-gap'   # don't use this task
mask &= fdf['task'] != 'coref-gap-ontonotes'  # don't use this task
mask &= fdf['task'] != 'noun-verb'   # don't use this task
mask &= fdf['task'] != 'dpr'   # don't use this task- noisy
## Skip these for now
mask &= fdf['task'] != 'rel-tacred'   # don't use this task
# mask &= fdf['task'] != 'rel-semeval'   # don't use this task
# mask &= fdf['task'] != 'spr1'   # don't use this task- noisy
mask &= fdf['task'] != 'spr2'   # don't use this task- noisy
## TEMPORARY
# mask &= fdf['task'] == "dep-labeling-ewt"   # TEMPORARY

# ELMo models also have 'scalar_mix_0.', which is for pretraining and not used by edge probing.
# mask &= df['scalar_set'].map(lambda s: s.endswith("scalar_mix.") or s.endswith("scalar_mix_1."))
plot_df = fdf[mask].copy()

##
# Make long-form DataFrame
plot_df['_display_name'] = list(map(_make_display_name, plot_df['task'], plot_df['label']))
# plot_df['_display_name'] = plot_df['display_row']

# plot_df['_bar_height'] = plot_df['score'] * BAR_SCALE
# plot_df['_bar_height'] = list(map(lambda low, high, s: BAR_SCALE * (s - low)/max(high - low, 0.025), 
#                                   plot_df['lex_score'], plot_df['max_layer_score'], plot_df['score']))
# plot_df['_bar_height'] = plot_df['real_headroom_frac'].map(lambda s: BAR_SCALE * s)
plot_df['_bar_height'] = (plot_df['score'] - plot_df['lex_score']) / (plot_df['max_layer_score'] - plot_df['lex_score'])

plot_df['_bar_center'] = [(l, h/2-0.5) for l, h in zip(plot_df['_display_name'], plot_df['_bar_height'])]
# plot_df['_bar_center'] = [(l, 0) for l, h in zip(plot_df['_display_name'], plot_df['_bar_height'])]

plot_df['_fill_color'] = [COLORS[0] if h > 0 else NEG_COLORS[0] for h in plot_df['_bar_height']]
plot_df['_line_color'] = [COLORS[1] if h > 0 else NEG_COLORS[1] for h in plot_df['_bar_height']]
plot_df['_bar_height'] = plot_df['_bar_height'].map(np.abs)

plot_df['_formatted_exp_layer'] = plot_df['exp_layer'].map(lambda l: "E[layer] = {:.02f}".format(l))
plot_df['_formatted_score'] = plot_df['score'].map(lambda l: "{:.1f}".format(100*l))

# sorted_rows = sorted(plot_df['display_row'].unique(), key=task_sort_key)
# cats = list(reversed(sorted_rows))
sorted_rows = sorted(pd.Series(list(zip(plot_df['task'], plot_df['label']))).unique(), 
                     key=lambda tl: (task_sort_key(tl[0]), tl[1]))
cats = [_make_display_name(*tl) for tl in reversed(sorted_rows)]

hover = bokeh.models.HoverTool(
  tooltips=[
      ("task", "@_display_name"),
      ("layer", "@layer_num"),
      ("score", "@score{0.00} (Δ @delta_score{0.00})"),
  ],
  renderers=[]
)

x_range = (0.5, MAX_LAYER+0.5)
# PLOT_WIDTH = 700 if MAX_LAYER > 2 else 300
PLOT_HEIGHT = 80 + 80*len(cats)
p = bp.figure(y_range=bokeh.models.FactorRange(*cats, factor_padding=0.20), x_range=x_range,
              plot_width=PLOT_WIDTH, plot_height=PLOT_HEIGHT, tools=[hover, 'save'])

##
# Add background bars
bgbar_color = "#f2f2f2"
p.hbar(y='_display_name', left=x_range[0], right=x_range[1], 
       height=1.0, 
       fill_color=bgbar_color, fill_alpha=0.40, 
#        line_color="#e6e6e6", 
#        line_alpha=0.80,
#        line_color="Gray",
       line_alpha=0.0,
#        line_width=0.5,
       source=plot_df, 
       level='image')
p.hbar(y='_display_name', left=x_range[0], right=x_range[1], 
       height=1.0, 
       fill_color="White", fill_alpha=0.0,
       line_color="#e6e6e6",
       line_alpha=1.0,
       line_width=0.5,
       source=plot_df, 
       level='underlay')

bars = p.rect(x='layer_num', y='_bar_center', width=0.9, height="_bar_height",
       fill_color='_fill_color', line_color='_line_color',
       source=plot_df)
# Add an invisible overlay for easier tooltips on small bars
shadow_bars = p.rect(x='layer_num', y='_bar_center', width=0.9, height=1.0,
       source=plot_df, alpha=0.0)
hover.renderers.extend([bars, shadow_bars])

# Add score labels
y = bokeh.transform.dodge("_display_name", -0.5, range=p.y_range)
score_labels = bokeh.models.annotations.LabelSet(
    y=y, x="layer_num", text="_formatted_score",
    y_offset=5, text_align="center", text_baseline="bottom",
    text_color="White", text_font_size="12pt", 
    source=bokeh.models.ColumnDataSource(plot_df))
p.add_layout(score_labels)

# Add labels with entropy
# score_labels = bokeh.models.annotations.LabelSet(
#     y="_display_name", x=MAX_LAYER, text="_formatted_exp_layer",
#     text_align="right", text_baseline="middle", y_offset=-15,
#     x_offset=8,
#     text_color=COLORS[0], text_font_size="11pt",
#     text_font_style="bold",
#     background_fill_color="White", border_line_color="White", border_line_width=5,
#     source=bokeh.models.ColumnDataSource(plot_df[plot_df['layer_num'].map(int) == 0]))
# p.add_layout(score_labels)

p.xaxis.ticker = bokeh.models.FixedTicker(ticks=np.arange(0, MAX_LAYER+1))
p.xgrid.ticker = p.xaxis[0].ticker

p.ygrid.ticker = bokeh.models.FixedTicker(ticks=np.arange(0, len(cats), 0.5))
    
_FONT_SIZE = "13pt"
p.yaxis.major_label_text_font_size = _FONT_SIZE
p.xaxis.major_label_text_font_size = _FONT_SIZE
p.xaxis.axis_label = "Encoder Layer"
p.xaxis.axis_label_text_font_size = _FONT_SIZE

bp.show(p)

# Extract mixing scalars

In [28]:
def load_scalars_file(filename, tag=None):
    df = pd.read_csv(filename, sep="\t", header=0)
    df.drop(['Unnamed: 0'], axis='columns', inplace=True)
    
    df.insert(0, "exp_name", df['run'].map(lambda p: os.path.basename(os.path.dirname(p.strip("/")))))
    df.insert(1, "exp_type", df['exp_name'].map(analysis.get_exp_type))
    df.insert(2, "task", df['exp_name'].map(lambda name: analysis.clean_task_name(name.split("-edges-")[1])))
    if tag is not None:
        df.insert(0, "tag", tag)
        
    return df

scalar_files = [
    ("base", "/nfs/jsalt/exp/edges-20190217-mix-pre/scalars.tsv"),
    ("base", "/nfs/jsalt/exp/edges-20190218-mix-pre/scalars.tsv"),
    # ELMo plot for camera-ready
    ("base", "/nfs/jsalt/home/iftenney/exp/final_20180927/base/scalars.tsv"),
    # Comment this out for re-creating paper plots, otherwise get duplicate rows which
    # screw up the plot formatting.
    ("base", "/nfs/jsalt/exp/edges-20190205-semeval/scalars.tsv"),
]

dfs = []
for tag, scalar_file in scalar_files:
    dfs.append(load_scalars_file(scalar_file, tag=tag))
scalar_df = pd.concat(dfs, ignore_index=True, sort=False)
scalar_df['display_col'] = ["%s (%s)" % et for et in zip(scalar_df.exp_type, scalar_df.tag)]
# ELMo models also have 'scalar_mix_0.', which is for pretraining and not used by edge probing.
mask = scalar_df['scalar_set'].map(lambda s: s.endswith("scalar_mix.") or s.endswith("scalar_mix_1."))
scalar_df = scalar_df[mask].copy()
print(scalar_df['task'].unique())
print(scalar_df['exp_type'].unique())
print(len(scalar_df))
print("Scalar sets:", scalar_df['scalar_set'].unique())

['coref-ontonotes-conll' 'dep-labeling-ewt' 'dpr' 'ner-ontonotes'
 'nonterminal-ontonotes' 'pos-ontonotes' 'rel-semeval' 'rel-tacred' 'spr1'
 'spr2' 'srl-conll2012' 'constituent-ontonotes']
['bert-base-uncased-mix' 'bert-base-uncased-mix-pre'
 'bert-large-uncased-mix' 'bert-large-uncased-mix-pre' 'elmo-full'
 'elmo-ortho']
58
Scalar sets: ['sent_encoder._text_field_embedder.scalar_mix.'
 'sent_encoder._text_field_embedder.token_embedder_elmo._elmo.scalar_mix_1.']


In [29]:
print(scalar_df.columns)

Index(['tag', 'exp_name', 'exp_type', 'task', 'checkpoint', 'gamma', 'label',
       'run', 'scalar_parameters.0', 'scalar_parameters.1',
       'scalar_parameters.10', 'scalar_parameters.11', 'scalar_parameters.12',
       'scalar_parameters.2', 'scalar_parameters.3', 'scalar_parameters.4',
       'scalar_parameters.5', 'scalar_parameters.6', 'scalar_parameters.7',
       'scalar_parameters.8', 'scalar_parameters.9', 'scalar_set',
       'scalar_parameters.13', 'scalar_parameters.14', 'scalar_parameters.15',
       'scalar_parameters.16', 'scalar_parameters.17', 'scalar_parameters.18',
       'scalar_parameters.19', 'scalar_parameters.20', 'scalar_parameters.21',
       'scalar_parameters.22', 'scalar_parameters.23', 'scalar_parameters.24',
       'display_col'],
      dtype='object')


In [30]:
# Count total scalar columns
scalar_columns = collections.OrderedDict(sorted(
    [(int(m.group(1)), m.group(0)) for m in 
     (re.match("^scalar_parameters\.(\d+)$", str(name)) for name in scalar_df.columns)
     if m]
))

# Fill NaN with -inf for scalar columns
for name in scalar_columns.values():
    scalar_df[name].fillna(value=-np.inf, inplace=True)

# Pre-fill number columns
for number in scalar_columns.keys():
    scalar_df[number] = None
scalar_df["weight_entropy"] = None
    
# Softmax over parameters in each row
num_scalars = max(scalar_columns.keys()) + 1
scalars = {}
masks = {}
for i, row in scalar_df.iterrows():
    arr = np.zeros(num_scalars, dtype=np.float32)
    for j, col in scalar_columns.items():
        arr[j] = float(row[col])
        if np.isnan(arr[j]):
            arr[j] = -np.inf
    # Softmax over row
    scalars[i] = softmax(arr)
    masks[i] = np.isfinite(arr)

# Add softmax weights back to DataFrame, with numeric column names.
# This way, we can convert to long-form for easy plotting.
for i in scalar_df.index:
    for j in scalar_columns.keys():
        scalar_df.loc[i, j] = scalars[i][j]
    # Compute entropy
    scalar_df.loc[i, "weight_entropy"] = entropy(scalars[i], base=2)
    scalar_df.loc[i, "weight_kl_unif"] = entropy(scalars[i], qk=masks[i], base=2)
    # Compute expectation
    weighted_layers = scalars[i] * np.arange(len(scalars[i])) * masks[i]
    scalar_df.loc[i, "weight_exp_layer"] = np.sum(weighted_layers)
    scalar_df.loc[i, "weight_exp_layer_oneplus"] = np.sum(weighted_layers[1:]) / np.sum(scalars[i][1:] * masks[i][1:])

scalar_df.head()

Unnamed: 0,tag,exp_name,exp_type,task,checkpoint,gamma,label,run,scalar_parameters.0,scalar_parameters.1,...,19,20,21,22,23,24,weight_entropy,weight_kl_unif,weight_exp_layer,weight_exp_layer_oneplus
0,base,bert-base-uncased-mix-edges-coref-ontonotes-conll,bert-base-uncased-mix,coref-ontonotes-conll,/model_state_eval_best.th,1.682971,__scalar_mix__,/nfs/jsalt/exp/edges-20190217-mix-pre/bert-bas...,-0.630028,-0.761247,...,0,0,0,0,0,0,3.29132,0.4091179,7.962118,8.21473
1,base,bert-base-uncased-mix-edges-dep-labeling-ewt,bert-base-uncased-mix,dep-labeling-ewt,/model_state_eval_best.th,3.02645,__scalar_mix__,/nfs/jsalt/exp/edges-20190217-mix-pre/bert-bas...,-0.324348,-0.365665,...,0,0,0,0,0,0,3.10252,0.5979175,6.54809,6.716994
2,base,bert-base-uncased-mix-edges-dpr,bert-base-uncased-mix,dpr,/model_state_eval_best.th,0.996561,__scalar_mix__,/nfs/jsalt/exp/edges-20190217-mix-pre/bert-bas...,-0.001596,-0.000428,...,0,0,0,0,0,0,3.70044,6.778836e-07,6.002542,6.501843
3,base,bert-base-uncased-mix-edges-ner-ontonotes,bert-base-uncased-mix,ner-ontonotes,/model_state_eval_best.th,2.208929,__scalar_mix__,/nfs/jsalt/exp/edges-20190217-mix-pre/bert-bas...,-0.221502,-0.218776,...,0,0,0,0,0,0,3.58759,0.1128481,6.862706,7.276716
4,base,bert-base-uncased-mix-edges-nonterminal-ontonotes,bert-base-uncased-mix,nonterminal-ontonotes,/model_state_eval_best.th,2.014895,__scalar_mix__,/nfs/jsalt/exp/edges-20190217-mix-pre/bert-bas...,-0.521184,-0.483713,...,0,0,0,0,0,0,3.27354,0.4269024,6.006378,6.193607


In [41]:
scalar_df.exp_type.unique()

array(['bert-base-uncased-mix', 'bert-base-uncased-mix-pre',
       'bert-large-uncased-mix', 'bert-large-uncased-mix-pre',
       'elmo-full', 'elmo-ortho'], dtype=object)

## Make compound plot with F1 scores as well

In [31]:
# https://bokeh.pydata.org/en/latest/docs/reference/palettes.html
palette = bokeh.palettes.Category20c_20

# MODEL_NAME = "bert-base-uncased"
MODEL_NAME = "bert-large-uncased"
EXPT_TYPES = [f"{MODEL_NAME}-mix"]
SCORE_EXPT_TYPE = f"{MODEL_NAME}-mix"
MAX_LAYER = 24 if '-large-' in MODEL_NAME else 12
WEIGHT_SCALE = 2.7 if '-large-' in MODEL_NAME else 2.1
# SCORE_SCALE = 1.5
# SCORE_SCALE = 2.2
SCORE_SCALE = WEIGHT_SCALE
WEIGHT_COLORS = (palette[0], palette[9]) # blue, green
SCORE_COLORS = (palette[12], palette[12]) # purples
NEG_COLORS = (palette[5], palette[4]) # oranges
# PLOT_WIDTH = 900
PLOT_WIDTH=450
# _PLOT_HEIGHT_FN=lambda num_cats: 80 + 100*num_cats
_PLOT_HEIGHT_FN=lambda num_cats: 750

##
# Don't change below here
##
def _make_display_name(task, label):
    if task.startswith("pos-"):
        return "POS"
    elif task.startswith("coref-"):
        return "Coref."
    elif task.startswith("spr"):
        return "SPR"
    elif task.startswith("rel-"):
        return "Relations"
    elif task.startswith("dep-"):
        return "Deps."
    elif task.startswith("nonterminal-"):
        return "Consts."
    else:
        return analysis.make_display_name(task, label)        

def _make_mask(df):
    mask = df['exp_type'].map(lambda s: s in EXPT_TYPES)
    mask &= df['task'] != 'constituent-ontonotes'  # don't use this task
    mask &= df['task'] != 'ner-tacred'  # don't use this task
    mask &= df['task'] != 'coref-gap-ontonotes'  # don't use this task
    mask &= df['task'] != 'rel-tacred'  # don't use this task
#     mask &= df['task'] != 'rel-semeval'  # don't use this task
    # Skip Winograd and SPR2 for this
    mask &= df['task'] != 'dpr'
#     mask &= df['task'] != 'spr1'
    mask &= df['task'] != 'spr2'
    return mask

mask = _make_mask(scalar_df)
weight_df = scalar_df[mask].copy()

##
# Make long-form DataFrame and add plotting values
skip_cols = set(scalar_columns.keys()).union(scalar_columns.values())
id_vars = [c for c in weight_df.columns if c not in skip_cols]
value_vars = scalar_columns.keys()
weight_df = pd.melt(weight_df, id_vars=id_vars, value_vars=value_vars, 
                    var_name="layer_num", value_name="layer_weight")
weight_df['label'] = None

##
# Append the scores DataFrame
mask = _make_mask(fdf)
mask &= fdf['layer_num'].notnull()
mask &= fdf['exp_type'] == SCORE_EXPT_TYPE
# mask &= fdf['layer_num'].astype(float) > 0
score_df = fdf[mask].copy()
# Erase labels, for now
score_df['label'] = None

##
# Plotting code below this line
##

# Row keys
sorted_tasks = sorted(weight_df['task'].unique(), key=task_sort_key)
cats = [_make_display_name(t, None) for t in sorted_tasks]
cats = list(reversed(cats))
PLOT_HEIGHT = _PLOT_HEIGHT_FN(len(cats))

# Row names, matching row keys
score_df['_display_name'] = list(map(_make_display_name, score_df['task'], score_df['label']))
weight_df['_display_name'] = list(map(_make_display_name, weight_df['task'], weight_df['label']))

# Bar heights for weights
weight_df['_bar_height'] = weight_df['layer_weight'] * WEIGHT_SCALE
weight_df['_bar_center'] = weight_df['_display_name']
weight_df['_formatted_entropy'] = weight_df['weight_entropy'].map(lambda h: "H(s) = {:.02f} bits".format(h))
# weight_df['_formatted_kl_unif'] = weight_df['weight_kl_unif'].map(lambda h: "KL(s||uniform) = {:.02f} bits".format(h))
weight_df['_formatted_kl_unif'] = weight_df['weight_kl_unif'].map(lambda h: "K(s) = {:.02f}".format(h))
# weight_df['_formatted_exp_layer'] = weight_df['weight_exp_layer_oneplus'].map(lambda l: "E[k] = {:.02f}".format(l))

# Bar heights for scores (cumulative)
# score_df['_bar_height'] = score_df['real_headroom_frac'] * SCORE_SCALE
score_df['_bar_height'] = score_df['headroom_frac'] * SCORE_SCALE
score_df['_bar_height'] = score_df['_bar_height'].map(lambda h: min(h, 1.0))
# Add offset so bars start at baseline
score_df['_bar_center'] = [(l, h/2-0.5) for l, h in zip(score_df['_display_name'], score_df['_bar_height'])]
# score_df['_bar_center'] = score_df["_display_name"]

score_df['_fill_color'] = [SCORE_COLORS[0] if h > 0 else NEG_COLORS[0] for h in score_df['_bar_height']]
score_df['_line_color'] = [SCORE_COLORS[1] if h > 0 else NEG_COLORS[1] for h in score_df['_bar_height']]
score_df['_bar_height'] = score_df['_bar_height'].map(np.abs)

# score_df['_formatted_exp_layer'] = score_df['exp_layer'].map(lambda l: "E[layer] = {:.02f}".format(l))
score_df['_formatted_exp_layer'] = score_df['exp_layer'].map(lambda l: "{:.02f}".format(l))
score_df['_formatted_kl_unif'] = score_df['kl_unif'].map(lambda h: "K(Δ) = {:.02f}".format(h))

hover_0 = bokeh.models.HoverTool(
  tooltips=[
      ("task", "@_display_name"),
      ("experiment", "@exp_type"),
      ("layer", "@layer_num"),
      ("weight", "@layer_weight{0.0%}"),
  ],
  renderers=[],
)
hover_2 = bokeh.models.HoverTool(
  tooltips=[
      ("task", "@_display_name"),
      ("experiment", "@exp_type"),
      ("layer", "@layer_num"),
      ("score", "@score{0.0%} (Δ @delta_score{0.0%})"),
      ("headroom fraction", "@headroom_frac{0.0%}"),
  ],
  renderers=[],
)

x_range = (-0.5, MAX_LAYER+0.5)
p = bp.figure(y_range=bokeh.models.FactorRange(*cats, factor_padding=0.10), x_range=x_range,
              plot_width=PLOT_WIDTH, plot_height=PLOT_HEIGHT, tools=[hover_0, hover_2, 'save'])

##
# Add background bars
bgbar_color = "#f2f2f2"
p.hbar(y='_display_name', left=x_range[0], right=x_range[1], 
       height=1.0, 
       fill_color=bgbar_color, fill_alpha=0.40, 
#        line_color="#e6e6e6", 
#        line_alpha=0.80,
#        line_color="Gray",
       line_alpha=0.0,
#        line_width=0.5,
       source=weight_df, 
       level='image')
p.hbar(y='_display_name', left=x_range[0], right=x_range[1], 
       height=1.0, 
       fill_color="White", fill_alpha=0.0,
       line_color="#e6e6e6",
       line_alpha=1.0,
       line_width=0.5,
       source=weight_df, 
       level='underlay')

def _plot_bars(sdf, x_dodge=0, y_dodge=0, **kw):
    y = bokeh.transform.dodge('_bar_center', y_dodge, range=p.y_range)
    x = 'layer_num'
    bars = p.rect(x=x, y=y, width=0.9, height="_bar_height", source=sdf, **kw)
    shadow_bars = p.rect(x=x, y=y, width=0.9, height=0.5, source=sdf, alpha=0.0)
    return bars, shadow_bars

##
# Plot weights and delta scores
_WEIGHT_BAR_PARAMS = dict(fill_color=WEIGHT_COLORS[0], line_color=WEIGHT_COLORS[0],
#                           line_width=1.5, fill_alpha=0.1,
                         )
_SCORE_BAR_PARAMS = dict(fill_color='_fill_color', line_color='_line_color', 
                         line_width=1.5, fill_alpha=0.1,
                        )
b0, s0 = _plot_bars(weight_df[weight_df.exp_type == EXPT_TYPES[0]], y_dodge=0, 
                hover_fill_color="firebrick", hover_fill_alpha=1.0,
                **_WEIGHT_BAR_PARAMS)
b2, s2 = _plot_bars(score_df[score_df['layer_num'].map(int) > 0], y_dodge=0, 
                hover_fill_color="firebrick", hover_fill_alpha=0.7, **_SCORE_BAR_PARAMS)
hover_0.renderers.extend([b0, s0])
hover_2.renderers.extend([b2, s2])

p.xaxis.ticker = bokeh.models.FixedTicker(ticks=np.arange(0, MAX_LAYER+1))
p.xgrid.ticker = p.xaxis[0].ticker
    
_FONT_SIZE = "13pt"
p.yaxis.major_label_text_font_size = _FONT_SIZE
p.xaxis.major_label_text_font_size = _FONT_SIZE
# p.xaxis.axis_label = "Encoder Layer"
# p.xaxis.axis_label_text_font_size = _FONT_SIZE

# p.yaxis.major_label_orientation = 60 * np.pi / 180
p.yaxis.major_label_orientation = "vertical"
if PLOT_WIDTH < 600 and MAX_LAYER > 12:
    p.xaxis.ticker = bokeh.models.FixedTicker(ticks=np.arange(0, MAX_LAYER+1, 2))

# p.toolbar.autohide = True

# Add labels with entropy
# _label_y = [28, 10]
label_kw = [
    dict(x=x_range[1],      y_offset=18, x_offset=-10, text_baseline="bottom", text_align="right"),
    dict(x=x_range[1]*0.20, y_offset=18, x_offset=0, text_baseline="bottom", text_align="left"),
]
LABEL_COLOR = "#404040"
score_labels = bokeh.models.annotations.LabelSet(
    y="_display_name", text="_formatted_kl_unif",
    text_color=LABEL_COLOR, text_font_size="12pt",
    source=bokeh.models.ColumnDataSource(weight_df[weight_df['layer_num'] == 0]), **label_kw[0])
p.add_layout(score_labels)
score_labels = bokeh.models.annotations.LabelSet(
    y="_display_name", text="_formatted_kl_unif",
    text_color=LABEL_COLOR, text_font_size="12pt",
    source=bokeh.models.ColumnDataSource(score_df[score_df['layer_num'].map(int) == 0]), **label_kw[1])
p.add_layout(score_labels)
# # Add labels with expected layer
# score_labels = bokeh.models.annotations.LabelSet(
#     y="_display_name", 
# #     x="weight_exp_layer",
#     x=x_range[1] // 6, 
#     text="_formatted_exp_layer",
#     text_align="left", text_baseline="bottom", y_offset=25, x_offset=0,
#     text_color="#595959", text_font_size="11pt",
#     source=bokeh.models.ColumnDataSource(weight_df[weight_df['layer_num'] == 0]))
# p.add_layout(score_labels)

# p.xgrid.visible = False
p.min_border_left = 0
p.min_border_right = 0
p.min_border_top = 0
p.min_border_bottom = 0
p.toolbar_location = None

bp.show(p)



In [32]:
print(f"Plot y range: weights:{1/WEIGHT_SCALE:.2f}, scores:{1/SCORE_SCALE:.2f}")

Plot y range: weights:0.37, scores:0.37


In [33]:
_save_figure_to_bucket(p, name=f"{MODEL_NAME}.weights_and_scores" + (".running_max" if USE_RUNNING_MAX else ""),
                       title=f"{MODEL_NAME} mixing weights and differential scores" + (" (running_max)" if USE_RUNNING_MAX else ""))

Copying file:///tmp/bert-large-uncased.weights_and_scores.20190603.211908.html [Content-Type=text/html]...
/ [1 files][730.9 KiB/730.9 KiB]                                                
Operation completed over 1 objects/730.9 KiB.                                    
Updated ACL on gs://edge-probing/iftenney/plots/bert-large-uncased.weights_and_scores.20190603.211908.html
Public URL: https://storage.googleapis.com/edge-probing/iftenney/plots/bert-large-uncased.weights_and_scores.20190603.211908.html


'https://storage.googleapis.com/edge-probing/iftenney/plots/bert-large-uncased.weights_and_scores.20190603.211908.html'

### Make aggregate plot

In [34]:
cats_range = bokeh.models.FactorRange(*cats, factor_padding=0.10, range_padding=0.10, range_padding_units='absolute')
layers_range = x_range = (-0.5, MAX_LAYER+0.5)

# PLOT_WIDTH=450
# PLOT_WIDTH=260
P2_WIDTH = 180
PLOT_WIDTH = 450 - P2_WIDTH
# PLOT_HEIGHT=450
# BAR_HEIGHT=0.425
# BAR_DODGE=BAR_HEIGHT/2
PLOT_HEIGHT=320
BAR_HEIGHT=0.9
BAR_DODGE=0

# WEIGHT_EXP_FIELD = 'weight_exp_layer_oneplus'
WEIGHT_EXP_FIELD = 'weight_exp_layer'
weight_df['_formatted_exp_layer'] = weight_df[WEIGHT_EXP_FIELD].map(lambda l: "{:.02f}".format(l))

##
# Add second plot
p2 = bp.figure(plot_width=PLOT_WIDTH, plot_height=PLOT_HEIGHT, 
               y_range=cats_range,
               x_axis_location="above",
               title="Expected layer & center-of-gravity",
               tools=['save'])
p2.title.align = "center"
p2.toolbar.autohide = True
p2.yaxis.major_tick_line_color = None
# p2.yaxis.major_label_text_font_size = "0pt"
# p2.xaxis.axis_label = "Encoder Layer ℓ"
p2.xaxis.axis_label_text_font_style = "bold"

##
# Add bars for weight cog
mask = weight_df['layer_num'].astype(float) == 1
mask &= weight_df.exp_type == EXPT_TYPES[0]
sdf = weight_df[mask]
y = bokeh.transform.dodge("_display_name", -BAR_DODGE, range=p2.y_range)
p2.hbar(y=y, left=0, right=WEIGHT_EXP_FIELD, height=BAR_HEIGHT,
        fill_color=WEIGHT_COLORS[0], line_color=WEIGHT_COLORS[0],
        line_width=1.5,
        source=sdf)
# Add labels with expected layer
score_labels = bokeh.models.annotations.LabelSet(
    y=y, x=WEIGHT_EXP_FIELD, text="_formatted_exp_layer",
    text_align="right", text_baseline="middle", y_offset=0,
    x_offset=-6,
#     text_color=SCORE_COLORS[0], 
    text_color="White",
    text_font_size="11pt",
    text_font_style="bold",
#     background_fill_color="White", border_line_color="White", border_line_width=5,
    source=bokeh.models.ColumnDataSource(sdf))
p2.add_layout(score_labels)

##
# Add bars for expected layer
sdf = score_df[score_df['layer_num'].astype(float) == 1]
y = bokeh.transform.dodge("_display_name", BAR_DODGE, range=p2.y_range)
p2.hbar(y=y, left=0, right="exp_layer", height=BAR_HEIGHT,
#         fill_color=SCORE_COLORS[0], 
        line_color=SCORE_COLORS[0], 
        fill_color="#D5D2E7",
#         line_color="Black",
#         fill_alpha=0.3,
        source=sdf)
score_labels = bokeh.models.annotations.LabelSet(
    y=y, x="exp_layer", text="_formatted_exp_layer",
    text_align="right", text_baseline="middle", y_offset=0,
    x_offset=-6,
#     text_color=SCORE_COLORS[0], 
#     text_color="White",
    text_color="Black",
    text_font_size="11pt",
    text_font_style="bold",
#     background_fill_color="White",
#     border_line_color="White", border_line_width=5,
    source=bokeh.models.ColumnDataSource(sdf))
p2.add_layout(score_labels)

p2.x_range.start = 0

_FONT_SIZE = "13pt"
# p2.yaxis.major_label_text_font_size = _FONT_SIZE
p2.yaxis.major_label_text_font_size = "0pt"
p2.xaxis.major_label_text_font_size = _FONT_SIZE

p2.min_border_left = 0
p2.min_border_right = 0
p2.min_border_top = 0
p2.min_border_bottom = 0
p2.toolbar_location = None

##
# Side plot with bottom and top layer scores
score_df['_formatted_score'] = score_df['score'].map(lambda h: "{:.1f}".format(100*h))
score_df["_formatted_layer_num"] = score_df["layer_num"].map(lambda l: "ℓ={:d}".format(int(l)))
mask = score_df['layer_num'].map(lambda l: int(l) in {0, MAX_LAYER})
sdf = score_df[mask]

x_range = bokeh.models.FactorRange(*sdf['_formatted_layer_num'].unique())
p4 = bp.figure(x_range=x_range, y_range=p2.y_range, plot_width=P2_WIDTH, plot_height=PLOT_HEIGHT,
               x_axis_location="above", title="F1 Scores")
# Add labels with bottom-layer score

score_labels = bokeh.models.annotations.LabelSet(
    y="_display_name", x='_formatted_layer_num', text="_formatted_score",
    text_align="center", text_baseline="middle",
    source=bokeh.models.ColumnDataSource(sdf)
)
p4.add_layout(score_labels)
# # Add labels with top-layer score
# sdf = score_df[score_df['layer_num'].map(int) == MAX_LAYER]
# score_labels = bokeh.models.annotations.LabelSet(
#     y="_display_name", x='layer_num', text="_formatted_score",
#     text_align="center", text_baseline="middle",
#     source=bokeh.models.ColumnDataSource(sdf)
# )
# p4.add_layout(score_labels)

# p4.yaxis.major_label_text_font_size = "0pt"
p4.yaxis.major_label_text_font_size = _FONT_SIZE
p4.xaxis.major_label_text_font_size = _FONT_SIZE
p4.yaxis.major_tick_line_color = None
p4.yaxis.minor_tick_line_color = None
p4.xaxis.major_tick_line_color = None
p4.xaxis.minor_tick_line_color = None
p4.ygrid.visible = False
p4.xgrid.visible = False
p4.yaxis.axis_line_color = None
p4.xaxis.axis_line_color = None
p4.title.align = "center"
p4.toolbar_location = None
p4.min_border_left = 0
p4.min_border_right = 5  # add a little padding for visual aid
p4.min_border_top = 0
p4.min_border_bottom = 0

bgbar_color = "#f2f2f2"
p4.background_fill_color = bgbar_color
p4.background_fill_alpha = 0.80

# p2.xaxis.bounds = (0, 17)

# p = p2
# p = p4
# p = bokeh.layouts.Row(p2, p4)
p = bokeh.layouts.Row(p4, p2)
bp.show(p)

In [35]:
_save_figure_to_bucket(p, name=f"{MODEL_NAME}.exp_layer" + (".running_max" if USE_RUNNING_MAX else ""),
                       title=f"{MODEL_NAME} expected layer and mixing CoG"  + (" (running_max)" if USE_RUNNING_MAX else ""))

Copying file:///tmp/bert-large-uncased.exp_layer.20190603.211912.html [Content-Type=text/html]...
/ [1 files][ 52.9 KiB/ 52.9 KiB]                                                
Operation completed over 1 objects/52.9 KiB.                                     
Updated ACL on gs://edge-probing/iftenney/plots/bert-large-uncased.exp_layer.20190603.211912.html
Public URL: https://storage.googleapis.com/edge-probing/iftenney/plots/bert-large-uncased.exp_layer.20190603.211912.html


'https://storage.googleapis.com/edge-probing/iftenney/plots/bert-large-uncased.exp_layer.20190603.211912.html'

## Old plotting code, look at labels or strata within a task

In [36]:
mask = fdf['layer_num'].astype(float) == 0
mask &= fdf['task'] == 'dep-labeling-ewt'
mask &= fdf['exp_type'] == 'bert-large-uncased-mix'
cols = ['task', 'exp_type', 'label', 'exp_layer']
print(fdf[mask][cols].to_csv(index=False,**csv_args))

task,exp_type,label,exp_layer
dep-labeling-ewt,bert-large-uncased-mix,_micro_avg_,5.691932390197615



In [37]:
mask = fdf['layer_num'].astype(float) == 0
mask &= fdf['task'] == 'nonterminal-ontonotes'
mask &= fdf['exp_type'] == 'bert-large-uncased-mix'
cols = ['task', 'exp_type', 'label', 'exp_layer']
print(fdf[mask][cols].to_csv(index=False,**csv_args))

task,exp_type,label,exp_layer
nonterminal-ontonotes,bert-large-uncased-mix,_micro_avg_,3.792980575634427



## Plot scalar mix for ELMo

In [43]:
weight_df

Unnamed: 0,tag,exp_name,exp_type,task,checkpoint,gamma,label,run,scalar_set,display_col,...,weight_kl_unif,weight_exp_layer,weight_exp_layer_oneplus,layer_num,layer_weight,_display_name,_bar_height,_bar_center,_formatted_entropy,_formatted_kl_unif
0,base,elmo-full-edges-coref-ontonotes-conll,elmo-full,coref-ontonotes-conll,/model_state_eval_best.th,2.094788,,elmo-full-edges-coref-ontonotes-conll/run,sent_encoder._text_field_embedder.token_embedd...,elmo-full (base),...,0.099761,0.873651,1.279741,0,0.317322,Coref.,0.666376,Coref.,H(s) = 1.49 bits,K(s) = 0.10
1,base,elmo-full-edges-dep-labeling-ewt,elmo-full,dep-labeling-ewt,/model_state_eval_best.th,3.233000,,elmo-full-edges-dep-labeling-ewt/run,sent_encoder._text_field_embedder.token_embedd...,elmo-full (base),...,0.821292,1.001022,1.082230,0,0.0750376,Deps.,0.157579,Deps.,H(s) = 0.76 bits,K(s) = 0.82
2,base,elmo-full-edges-ner-ontonotes,elmo-full,ner-ontonotes,/model_state_eval_best.th,2.308605,,elmo-full-edges-ner-ontonotes/run,sent_encoder._text_field_embedder.token_embedd...,elmo-full (base),...,0.035871,0.966025,1.375614,0,0.29775,Entities,0.625275,Entities,H(s) = 1.55 bits,K(s) = 0.04
3,base,elmo-full-edges-nonterminal-ontonotes,elmo-full,nonterminal-ontonotes,/model_state_eval_best.th,2.295377,,elmo-full-edges-nonterminal-ontonotes/run,sent_encoder._text_field_embedder.token_embedd...,elmo-full (base),...,0.770121,0.996855,1.089024,0,0.0846346,Consts.,0.177733,Consts.,H(s) = 0.81 bits,K(s) = 0.77
4,base,elmo-full-edges-pos-ontonotes,elmo-full,pos-ontonotes,/model_state_eval_best.th,0.768965,,elmo-full-edges-pos-ontonotes/run,sent_encoder._text_field_embedder.token_embedd...,elmo-full (base),...,0.137134,0.807429,1.237478,0,0.347521,POS,0.729793,POS,H(s) = 1.45 bits,K(s) = 0.14
5,base,elmo-full-edges-spr1,elmo-full,spr1,/model_state_eval_best.th,1.035713,,elmo-full-edges-spr1/run,sent_encoder._text_field_embedder.token_embedd...,elmo-full (base),...,0.003384,1.018538,1.473512,0,0.308769,SPR,0.648414,SPR,H(s) = 1.58 bits,K(s) = 0.00
6,base,elmo-full-edges-srl-conll2012,elmo-full,srl-conll2012,/model_state_eval_best.th,2.064842,,elmo-full-edges-srl-conll2012/run,sent_encoder._text_field_embedder.token_embedd...,elmo-full (base),...,0.475175,0.968179,1.140540,0,0.151122,SRL,0.317356,SRL,H(s) = 1.11 bits,K(s) = 0.48
7,base,elmo-full-edges-rel-semeval,elmo-full,rel-semeval,/model_state_eval_best.th,1.068652,,/nfs/jsalt/exp/edges-20190205-semeval/elmo-ful...,sent_encoder._text_field_embedder.token_embedd...,elmo-full (base),...,0.006973,1.068583,1.499896,0,0.287562,Relations,0.60388,Relations,H(s) = 1.58 bits,K(s) = 0.01
8,base,elmo-full-edges-coref-ontonotes-conll,elmo-full,coref-ontonotes-conll,/model_state_eval_best.th,2.094788,,elmo-full-edges-coref-ontonotes-conll/run,sent_encoder._text_field_embedder.token_embedd...,elmo-full (base),...,0.099761,0.873651,1.279741,1,0.491705,Coref.,1.03258,Coref.,H(s) = 1.49 bits,K(s) = 0.10
9,base,elmo-full-edges-dep-labeling-ewt,elmo-full,dep-labeling-ewt,/model_state_eval_best.th,3.233000,,elmo-full-edges-dep-labeling-ewt/run,sent_encoder._text_field_embedder.token_embedd...,elmo-full (base),...,0.821292,1.001022,1.082230,1,0.848903,Deps.,1.7827,Deps.,H(s) = 0.76 bits,K(s) = 0.82


In [44]:
weight_df.layer_num.unique()

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
       19, 20, 21, 22, 23, 24], dtype=object)

In [49]:
# https://bokeh.pydata.org/en/latest/docs/reference/palettes.html
palette = bokeh.palettes.Category20c_20

# MODEL_NAME = "bert-base-uncased"
# MODEL_NAME = "bert-large-uncased"
# EXPT_TYPES = [f"{MODEL_NAME}-mix"]
MODEL_NAME = "elmo-full"
EXPT_TYPES = ["elmo-full"]
MAX_LAYER = 2
WEIGHT_SCALE = 1.0
# SCORE_EXPT_TYPE = f"{MODEL_NAME}-mix"
# MAX_LAYER = 24 if '-large-' in MODEL_NAME else 12
# WEIGHT_SCALE = 2.7 if '-large-' in MODEL_NAME else 2.1
# SCORE_SCALE = 1.5
# SCORE_SCALE = 2.2
SCORE_SCALE = WEIGHT_SCALE
WEIGHT_COLORS = (palette[0], palette[9]) # blue, green
SCORE_COLORS = (palette[12], palette[12]) # purples
NEG_COLORS = (palette[5], palette[4]) # oranges
# PLOT_WIDTH = 900
# PLOT_WIDTH=450
PLOT_WIDTH=200
# _PLOT_HEIGHT_FN=lambda num_cats: 80 + 100*num_cats
_PLOT_HEIGHT_FN=lambda num_cats: 750

##
# Don't change below here
##
def _make_display_name(task, label):
    if task.startswith("pos-"):
        return "POS"
    elif task.startswith("coref-"):
        return "Coref."
    elif task.startswith("spr"):
        return "SPR"
    elif task.startswith("rel-"):
        return "Relations"
    elif task.startswith("dep-"):
        return "Deps."
    elif task.startswith("nonterminal-"):
        return "Consts."
    else:
        return analysis.make_display_name(task, label)        

def _make_mask(df):
    mask = df['exp_type'].map(lambda s: s in EXPT_TYPES)
    mask &= df['task'] != 'constituent-ontonotes'  # don't use this task
    mask &= df['task'] != 'ner-tacred'  # don't use this task
    mask &= df['task'] != 'coref-gap-ontonotes'  # don't use this task
    mask &= df['task'] != 'rel-tacred'  # don't use this task
#     mask &= df['task'] != 'rel-semeval'  # don't use this task
    # Skip Winograd and SPR2 for this
    mask &= df['task'] != 'dpr'
#     mask &= df['task'] != 'spr1'
    mask &= df['task'] != 'spr2'
    return mask

mask = _make_mask(scalar_df)
weight_df = scalar_df[mask].copy()

##
# Make long-form DataFrame and add plotting values
skip_cols = set(scalar_columns.keys()).union(scalar_columns.values())
id_vars = [c for c in weight_df.columns if c not in skip_cols]
value_vars = scalar_columns.keys()
weight_df = pd.melt(weight_df, id_vars=id_vars, value_vars=value_vars, 
                    var_name="layer_num", value_name="layer_weight")
weight_df['label'] = None

##
# Append the scores DataFrame
mask = _make_mask(fdf)
mask &= fdf['layer_num'].notnull()
mask &= fdf['exp_type'] == SCORE_EXPT_TYPE
# mask &= fdf['layer_num'].astype(float) > 0
score_df = fdf[mask].copy()
# Erase labels, for now
score_df['label'] = None

##
# Plotting code below this line
##

# Row keys
sorted_tasks = sorted(weight_df['task'].unique(), key=task_sort_key)
cats = [_make_display_name(t, None) for t in sorted_tasks]
cats = list(reversed(cats))
PLOT_HEIGHT = _PLOT_HEIGHT_FN(len(cats))

# Row names, matching row keys
score_df['_display_name'] = list(map(_make_display_name, score_df['task'], score_df['label']))
weight_df['_display_name'] = list(map(_make_display_name, weight_df['task'], weight_df['label']))

# Bar heights for weights
weight_df['_bar_height'] = weight_df['layer_weight'] * WEIGHT_SCALE
weight_df['_bar_center'] = weight_df['_display_name']
weight_df['_formatted_entropy'] = weight_df['weight_entropy'].map(lambda h: "H(s) = {:.02f} bits".format(h))
# weight_df['_formatted_kl_unif'] = weight_df['weight_kl_unif'].map(lambda h: "KL(s||uniform) = {:.02f} bits".format(h))
weight_df['_formatted_kl_unif'] = weight_df['weight_kl_unif'].map(lambda h: "K(s) = {:.02f}".format(h))
# weight_df['_formatted_exp_layer'] = weight_df['weight_exp_layer_oneplus'].map(lambda l: "E[k] = {:.02f}".format(l))

# Bar heights for scores (cumulative)
# score_df['_bar_height'] = score_df['real_headroom_frac'] * SCORE_SCALE
score_df['_bar_height'] = score_df['headroom_frac'] * SCORE_SCALE
score_df['_bar_height'] = score_df['_bar_height'].map(lambda h: min(h, 1.0))
# Add offset so bars start at baseline
score_df['_bar_center'] = [(l, h/2-0.5) for l, h in zip(score_df['_display_name'], score_df['_bar_height'])]
# score_df['_bar_center'] = score_df["_display_name"]

score_df['_fill_color'] = [SCORE_COLORS[0] if h > 0 else NEG_COLORS[0] for h in score_df['_bar_height']]
score_df['_line_color'] = [SCORE_COLORS[1] if h > 0 else NEG_COLORS[1] for h in score_df['_bar_height']]
score_df['_bar_height'] = score_df['_bar_height'].map(np.abs)

# score_df['_formatted_exp_layer'] = score_df['exp_layer'].map(lambda l: "E[layer] = {:.02f}".format(l))
score_df['_formatted_exp_layer'] = score_df['exp_layer'].map(lambda l: "{:.02f}".format(l))
score_df['_formatted_kl_unif'] = score_df['kl_unif'].map(lambda h: "K(Δ) = {:.02f}".format(h))

hover_0 = bokeh.models.HoverTool(
  tooltips=[
      ("task", "@_display_name"),
      ("experiment", "@exp_type"),
      ("layer", "@layer_num"),
      ("weight", "@layer_weight{0.0%}"),
  ],
  renderers=[],
)
hover_2 = bokeh.models.HoverTool(
  tooltips=[
      ("task", "@_display_name"),
      ("experiment", "@exp_type"),
      ("layer", "@layer_num"),
      ("score", "@score{0.0%} (Δ @delta_score{0.0%})"),
      ("headroom fraction", "@headroom_frac{0.0%}"),
  ],
  renderers=[],
)

x_range = (-0.5, MAX_LAYER+0.5)
p = bp.figure(y_range=bokeh.models.FactorRange(*cats, factor_padding=0.10), x_range=x_range,
              plot_width=PLOT_WIDTH, plot_height=PLOT_HEIGHT, tools=[hover_0, hover_2, 'save'])

##
# Add background bars
bgbar_color = "#f2f2f2"
p.hbar(y='_display_name', left=x_range[0], right=x_range[1], 
       height=1.0, 
       fill_color=bgbar_color, fill_alpha=0.40, 
#        line_color="#e6e6e6", 
#        line_alpha=0.80,
#        line_color="Gray",
       line_alpha=0.0,
#        line_width=0.5,
       source=weight_df, 
       level='image')
p.hbar(y='_display_name', left=x_range[0], right=x_range[1], 
       height=1.0, 
       fill_color="White", fill_alpha=0.0,
       line_color="#e6e6e6",
       line_alpha=1.0,
       line_width=0.5,
       source=weight_df, 
       level='underlay')

def _plot_bars(sdf, x_dodge=0, y_dodge=0, **kw):
    y = bokeh.transform.dodge('_bar_center', y_dodge, range=p.y_range)
    x = 'layer_num'
    bars = p.rect(x=x, y=y, width=0.9, height="_bar_height", source=sdf, **kw)
    shadow_bars = p.rect(x=x, y=y, width=0.9, height=0.5, source=sdf, alpha=0.0)
    return bars, shadow_bars

##
# Plot weights and delta scores
_WEIGHT_BAR_PARAMS = dict(fill_color=WEIGHT_COLORS[0], line_color=WEIGHT_COLORS[0],
#                           line_width=1.5, fill_alpha=0.1,
                         )
_SCORE_BAR_PARAMS = dict(fill_color='_fill_color', line_color='_line_color', 
                         line_width=1.5, fill_alpha=0.1,
                        )
b0, s0 = _plot_bars(weight_df[weight_df.exp_type == EXPT_TYPES[0]], y_dodge=0, 
                hover_fill_color="firebrick", hover_fill_alpha=1.0,
                **_WEIGHT_BAR_PARAMS)
b2, s2 = _plot_bars(score_df[score_df['layer_num'].map(int) > 0], y_dodge=0, 
                hover_fill_color="firebrick", hover_fill_alpha=0.7, **_SCORE_BAR_PARAMS)
hover_0.renderers.extend([b0, s0])
hover_2.renderers.extend([b2, s2])

p.xaxis.ticker = bokeh.models.FixedTicker(ticks=np.arange(0, MAX_LAYER+1))
p.xgrid.ticker = p.xaxis[0].ticker
    
_FONT_SIZE = "13pt"
p.yaxis.major_label_text_font_size = _FONT_SIZE
p.xaxis.major_label_text_font_size = _FONT_SIZE
# p.xaxis.axis_label = "Encoder Layer"
# p.xaxis.axis_label_text_font_size = _FONT_SIZE

# p.yaxis.major_label_orientation = 60 * np.pi / 180
p.yaxis.major_label_orientation = "vertical"
if PLOT_WIDTH < 600 and MAX_LAYER > 12:
    p.xaxis.ticker = bokeh.models.FixedTicker(ticks=np.arange(0, MAX_LAYER+1, 2))

# p.toolbar.autohide = True

# Add labels with entropy
# _label_y = [28, 10]
label_kw = [
    dict(x=x_range[1],      y_offset=18, x_offset=-10, text_baseline="bottom", text_align="right"),
    dict(x=x_range[1]*0.20, y_offset=18, x_offset=0, text_baseline="bottom", text_align="left"),
]
LABEL_COLOR = "#404040"
score_labels = bokeh.models.annotations.LabelSet(
    y="_display_name", text="_formatted_kl_unif",
    text_color=LABEL_COLOR, text_font_size="12pt",
    source=bokeh.models.ColumnDataSource(weight_df[weight_df['layer_num'] == 0]), **label_kw[0])
p.add_layout(score_labels)
score_labels = bokeh.models.annotations.LabelSet(
    y="_display_name", text="_formatted_kl_unif",
    text_color=LABEL_COLOR, text_font_size="12pt",
    source=bokeh.models.ColumnDataSource(score_df[score_df['layer_num'].map(int) == 0]), **label_kw[1])
p.add_layout(score_labels)
# # Add labels with expected layer
# score_labels = bokeh.models.annotations.LabelSet(
#     y="_display_name", 
# #     x="weight_exp_layer",
#     x=x_range[1] // 6, 
#     text="_formatted_exp_layer",
#     text_align="left", text_baseline="bottom", y_offset=25, x_offset=0,
#     text_color="#595959", text_font_size="11pt",
#     source=bokeh.models.ColumnDataSource(weight_df[weight_df['layer_num'] == 0]))
# p.add_layout(score_labels)

# p.xgrid.visible = False
p.min_border_left = 0
p.min_border_right = 0
p.min_border_top = 0
p.min_border_bottom = 0
p.toolbar_location = None

bp.show(p)



In [51]:
_save_figure_to_bucket(p, name=f"{MODEL_NAME}.weights",
                       title=f"{MODEL_NAME} mixing weights")

Copying file:///tmp/elmo-full.weights.20190603.212822.html [Content-Type=text/html]...
/ [1 files][315.4 KiB/315.4 KiB]                                                
Operation completed over 1 objects/315.4 KiB.                                    
Updated ACL on gs://edge-probing/iftenney/plots/elmo-full.weights.20190603.212822.html
Public URL: https://storage.googleapis.com/edge-probing/iftenney/plots/elmo-full.weights.20190603.212822.html


'https://storage.googleapis.com/edge-probing/iftenney/plots/elmo-full.weights.20190603.212822.html'