# Edge Probing Per-Label Analysis

In [1]:
import sys, os, re, json
import itertools
import collections
from importlib import reload
import pandas as pd
import numpy as np
from sklearn import metrics

In [2]:
import datetime
def get_compact_timestamp():
    now = datetime.datetime.now()
    return now.strftime("%Y%m%d.%H%M%S")

In [3]:
import bokeh
import bokeh.plotting as bp
bokeh.io.output_notebook()

The latest runs are here:

In [4]:
ls /nfs/jsalt/home/iftenney/exp/edges-20180727/

[0m[01;34melmo-chars-edges-constituent-ptb[0m/  [01;34melmo-full-edges-constituent-ptb[0m/
[01;34melmo-chars-edges-coref-ontonotes[0m/  [01;34melmo-full-edges-coref-ontonotes[0m/
[01;34melmo-chars-edges-dep-labeling[0m/     [01;34melmo-full-edges-dep-labeling[0m/
[01;34melmo-chars-edges-dpr[0m/              [01;34melmo-full-edges-dpr[0m/
[01;34melmo-chars-edges-ner-conll2003[0m/    [01;34melmo-full-edges-ner-conll2003[0m/
[01;34melmo-chars-edges-spr2[0m/             [01;34melmo-full-edges-spr2[0m/
[01;34melmo-chars-edges-srl-conll2005[0m/    [01;34melmo-full-edges-srl-conll2005[0m/


In [5]:
top_expt_dir = "/nfs/jsalt/home/iftenney/exp/edges-20180727/"
all_expt_dirs = os.listdir(top_expt_dir)
all_expt_dirs

['elmo-full-edges-constituent-ptb',
 'elmo-full-edges-ner-conll2003',
 'elmo-full-edges-dep-labeling',
 'elmo-chars-edges-ner-conll2003',
 'elmo-chars-edges-constituent-ptb',
 'elmo-chars-edges-spr2',
 'elmo-chars-edges-dep-labeling',
 'elmo-full-edges-srl-conll2005',
 'elmo-chars-edges-dpr',
 'elmo-full-edges-spr2',
 'elmo-chars-edges-srl-conll2005',
 'elmo-chars-edges-coref-ontonotes',
 'elmo-full-edges-dpr',
 'elmo-full-edges-coref-ontonotes']

In [6]:
task_names = {re.sub(r"elmo-\w+-", "", s) for s in all_expt_dirs}
task_names

{'edges-constituent-ptb',
 'edges-coref-ontonotes',
 'edges-dep-labeling',
 'edges-dpr',
 'edges-ner-conll2003',
 'edges-spr2',
 'edges-srl-conll2005'}

In [8]:
import analysis
reload(analysis)

def load_pair_preds(task_name, split_name="run"):
    lexi_path = os.path.join(top_expt_dir, "elmo-chars-" + task_name, "run")
    full_path = os.path.join(top_expt_dir, "elmo-full-" + task_name, "run")
    return (analysis.Predictions.from_run(lexi_path, task_name, split_name),
            analysis.Predictions.from_run(full_path, task_name, split_name))

task_name = 'edges-srl-conll2005'
pair = load_pair_preds(task_name, 'val')
pair[0].target_df_wide  # force load
pair[1].target_df_wide  # force load
print("Done!")

Loading vocabulary from /nfs/jsalt/home/iftenney/exp/edges-20180727/elmo-chars-edges-srl-conll2005/vocab
Loading predictions from /nfs/jsalt/home/iftenney/exp/edges-20180727/elmo-chars-edges-srl-conll2005/run/edges-srl-conll2005_val.json
Loading vocabulary from /nfs/jsalt/home/iftenney/exp/edges-20180727/elmo-full-edges-srl-conll2005/vocab
Loading predictions from /nfs/jsalt/home/iftenney/exp/edges-20180727/elmo-full-edges-srl-conll2005/run/edges-srl-conll2005_val.json
Generating wide-form target DataFrame. May be slow... Done!
Generating wide-form target DataFrame. May be slow... Done!
Done!


In [11]:
reload(analysis)
if task_name.startswith("edges-srl"):
    # Filter out references and continuations, because these are mostly noise.
    label_filter = lambda label: not (label.startswith("R-") or label.startswith("C-"))
else:
    label_filter = lambda label: True
comp = analysis.Comparison(*pair, label_filter=label_filter)
# SORT_FIELD="abs_diff_f1"
SORT_FIELD = "label"
# SORT_FIELD = "true_count"
p = comp.plot_scores(task_name, metric="f1", sort_field=SORT_FIELD, row_height=350,
                     sort_ascending=(SORT_FIELD == 'label'))

# Save chart and push to cloud bucket.
import socket
now = get_compact_timestamp()
fname = f"chart.{task_name}.{SORT_FIELD}.{now:s}.html"
hostname = socket.gethostname()
title = f"{task_name} sorted by '{SORT_FIELD}'"
bp.save(p, os.path.join("/tmp", fname), title=title, resources=bokeh.resources.CDN)
!gsutil cp /tmp/$fname gs://jsalt-scratch/$hostname/plots/$fname
!gsutil acl ch -u AllUsers:R gs://jsalt-scratch/$hostname/plots/$fname
print(f"Public URL: https://storage.googleapis.com/jsalt-scratch/{hostname}/plots/{fname}")
bp.show(p)

Scoring base run...
Scoring expt run...


  'precision', 'predicted', average, warn_for)


Done scoring!
Copying file:///tmp/chart.edges-srl-conll2005.label.20180731.204139.html [Content-Type=text/html]...
/ [1 files][ 34.0 KiB/ 34.0 KiB]                                                
Operation completed over 1 objects/34.0 KiB.                                     
Updated ACL on gs://jsalt-scratch/iftenney/plots/chart.edges-srl-conll2005.label.20180731.204139.html
Public URL: https://storage.googleapis.com/jsalt-scratch/iftenney/plots/chart.edges-srl-conll2005.label.20180731.204139.html
