Permalink
Browse files

Update the DRAGNN (#1191)

* Release DRAGNN

* Update CoNLL evaluation table & evaluator.py

* Update documentation & tutorial for DRAGNN

* Update the DRAGNN

* Tutorial link
  • Loading branch information...
1 parent 51fcc99 commit e8464d33d363c6f8e90a29c08e5af11ef01be7a1 @bogatyy bogatyy committed with calberti Mar 17, 2017
Showing with 86,578 additions and 4,360 deletions.
  1. +3 −2 syntaxnet/Dockerfile
  2. +18 −8 syntaxnet/README.md
  3. BIN syntaxnet/beam_search_training.png
  4. +66 −0 syntaxnet/docker-devel/Dockerfile.min
  5. +64 −0 syntaxnet/docker-devel/README.txt
  6. +1 −0 syntaxnet/docker-devel/build_devel.sh
  7. +4 −10 syntaxnet/dragnn/protos/spec.proto
  8. +1 −0 syntaxnet/dragnn/python/BUILD
  9. +29 −6 syntaxnet/dragnn/python/visualization.py
  10. +16 −1 syntaxnet/dragnn/python/visualization_test.py
  11. +19 −0 syntaxnet/dragnn/viz/dragnn_layout.js
  12. +1,545 −0 syntaxnet/dragnn/viz/dragnn_tutorial_2.html
  13. +2,261 −0 syntaxnet/dragnn/viz/example_with_lookahead.html
  14. +13 −7 syntaxnet/dragnn/viz/visualize.js
  15. BIN syntaxnet/dragnn/viz/viz.min.js.gz
  16. +51 −0 syntaxnet/examples/dragnn/BUILD
  17. +1 −0 syntaxnet/examples/dragnn/data/en/category-map
  18. +100 −0 syntaxnet/examples/dragnn/data/en/char-map
  19. +11,990 −0 syntaxnet/examples/dragnn/data/en/char-ngram-map
  20. BIN syntaxnet/examples/dragnn/data/{es → en}/checkpoint
  21. +50 −0 syntaxnet/examples/dragnn/data/en/label-map
  22. +15,388 −0 syntaxnet/examples/dragnn/data/en/lcword-map
  23. 0 syntaxnet/examples/dragnn/data/{es → en}/parser_spec.textproto
  24. BIN syntaxnet/examples/dragnn/data/en/prefix-table
  25. +18 −0 syntaxnet/examples/dragnn/data/en/segmenter/category-map
  26. +100 −0 syntaxnet/examples/dragnn/data/en/segmenter/char-map
  27. +2,018 −0 syntaxnet/examples/dragnn/data/en/segmenter/char-ngram-map
  28. BIN syntaxnet/examples/dragnn/data/en/segmenter/checkpoint.data-00000-of-00001
  29. BIN syntaxnet/examples/dragnn/data/en/segmenter/checkpoint.index
  30. BIN syntaxnet/examples/dragnn/data/{es → en}/segmenter/checkpoint.meta
  31. +50 −0 syntaxnet/examples/dragnn/data/en/segmenter/label-map
  32. +15,388 −0 syntaxnet/examples/dragnn/data/en/segmenter/lcword-map
  33. BIN syntaxnet/examples/dragnn/data/en/segmenter/prefix-table
  34. 0 syntaxnet/examples/dragnn/data/{es → en}/segmenter/spec.textproto
  35. BIN syntaxnet/examples/dragnn/data/en/segmenter/suffix-table
  36. +51 −0 syntaxnet/examples/dragnn/data/en/segmenter/tag-map
  37. +50 −0 syntaxnet/examples/dragnn/data/en/segmenter/tag-to-category
  38. +18,257 −0 syntaxnet/examples/dragnn/data/en/segmenter/word-map
  39. BIN syntaxnet/examples/dragnn/data/en/suffix-table
  40. +157 −0 syntaxnet/examples/dragnn/data/en/tag-map
  41. +156 −0 syntaxnet/examples/dragnn/data/en/tag-to-category
  42. +18,257 −0 syntaxnet/examples/dragnn/data/en/word-map
  43. +0 −292 syntaxnet/examples/dragnn/data/es/char-map
  44. 0 syntaxnet/examples/dragnn/data/{ → es}/es-universal-dev.conll
  45. 0 syntaxnet/examples/dragnn/data/{ → es}/es-universal-train.conll
  46. +0 −33 syntaxnet/examples/dragnn/data/es/label-map
  47. +0 −292 syntaxnet/examples/dragnn/data/es/segmenter/char-map
  48. +0 −2,622 syntaxnet/examples/dragnn/data/es/segmenter/char-ngram-map
  49. BIN syntaxnet/examples/dragnn/data/es/segmenter/checkpoint.data-00000-of-00001
  50. BIN syntaxnet/examples/dragnn/data/es/segmenter/checkpoint.index
  51. +0 −33 syntaxnet/examples/dragnn/data/es/segmenter/label-map
  52. +0 −5 syntaxnet/examples/dragnn/data/es/segmenter/tag-map
  53. +0 −5 syntaxnet/examples/dragnn/data/es/segmenter/tag-to-category
  54. +0 −397 syntaxnet/examples/dragnn/data/es/tag-map
  55. +0 −396 syntaxnet/examples/dragnn/data/es/tag-to-category
  56. BIN syntaxnet/examples/dragnn/data/mini-english.checkpoint
  57. BIN syntaxnet/examples/dragnn/data/mini-english.checkpoint.meta
  58. +28 −26 syntaxnet/examples/dragnn/interactive_text_analyzer.ipynb
  59. +27 −0 syntaxnet/examples/dragnn/test_run_all_tutorials.sh
  60. +53 −45 syntaxnet/examples/dragnn/{basic_parser_tutorial.ipynb → trainer_tutorial.ipynb}
  61. +77 −0 syntaxnet/examples/dragnn/tutorial_1.py
  62. +104 −0 syntaxnet/examples/dragnn/tutorial_2.py
  63. +90 −0 syntaxnet/examples/dragnn/tutorial_data/sentence.prototext
  64. BIN syntaxnet/ff_nn_schematic.png
  65. +1 −2 syntaxnet/g3doc/CLOUD.md
  66. +74 −72 syntaxnet/g3doc/conll2017/README.md
  67. +2 −2 syntaxnet/g3doc/conll2017/cooking.md
  68. BIN syntaxnet/looping-parser.gif
  69. BIN syntaxnet/sawman.png
  70. 0 syntaxnet/syntaxnet/demo.sh
  71. +0 −104 syntaxnet/universal.md
View
@@ -1,5 +1,5 @@
# Java baseimage, for Bazel.
-FROM java:8
+FROM openjdk:8
ENV SYNTAXNETDIR=/opt/tensorflow PATH=$PATH:/root/bin
@@ -50,6 +50,8 @@ RUN python -m pip install \
&& python -m pip install pygraphviz \
--install-option="--include-path=/usr/include/graphviz" \
--install-option="--library-path=/usr/lib/graphviz/" \
+ && python -m jupyter_core.command nbextension enable \
+ --py --sys-prefix widgetsnbextension \
&& rm -rf /root/.cache/pip /tmp/pip*
# Installs the latest version of Bazel.
@@ -86,6 +88,5 @@ EXPOSE 8888
# This does not need to be compiled, only copied.
COPY examples $SYNTAXNETDIR/syntaxnet/examples
# Todo: Move this earlier in the file (don't want to invalidate caches for now).
-RUN jupyter nbextension enable --py --sys-prefix widgetsnbextension
CMD /bin/bash -c "bazel-bin/dragnn/tools/oss_notebook_launcher notebook --debug --notebook-dir=/opt/tensorflow/syntaxnet/examples"
View
@@ -20,12 +20,16 @@ This repository is largely divided into two sub-packages:
1. **DRAGNN:
[code](https://github.com/tensorflow/models/tree/master/syntaxnet/dragnn),
- [documentation](g3doc/DRAGNN.md)** implements Dynamic Recurrent Acyclic
- Graphical Neural Networks (DRAGNN), a framework for building multi-task,
- fully dynamic constructed computation graphs. Practically, we use DRAGNN to
- extend our prior work from [Andor et al.
+ [documentation](g3doc/DRAGNN.md),
+ [paper](https://arxiv.org/pdf/1703.04474.pdf)** implements Dynamic Recurrent
+ Acyclic Graphical Neural Networks (DRAGNN), a framework for building
+ multi-task, fully dynamically constructed computation graphs. Practically, we
+ use DRAGNN to extend our prior work from [Andor et al.
(2016)](http://arxiv.org/abs/1603.06042) with end-to-end, deep recurrent
- models and to provide a much easier to use interface to SyntaxNet.
+ models and to provide a much easier to use interface to SyntaxNet. *DRAGNN
+ is designed first and foremost as a Python library, and therefore much
+ easier to use than the original SyntaxNet implementation.*
+
1. **SyntaxNet:
[code](https://github.com/tensorflow/models/tree/master/syntaxnet/syntaxnet),
[documentation](g3doc/syntaxnet-tutorial.md)** is a transition-based
@@ -42,7 +46,7 @@ There are three ways to use SyntaxNet:
SyntaxNet/DRAGNN baseline for the CoNLL2017 Shared Task, and running the
ParseySaurus models.
* You can use DRAGNN to train your NLP models for other tasks and dataset. See
- "Getting started with DRAGNN below."
+ "Getting started with DRAGNN" below.
* You can continue to use the Parsey McParseface family of pre-trained
SyntaxNet models. See "Pre-trained NLP models" below.
@@ -117,9 +121,13 @@ We have a few guides on this README, as well as more extensive
![DRAGNN](g3doc/unrolled-dragnn.png)
-An easy and visual way to get started with DRAGNN is to run [our Jupyter
-Notebook](examples/dragnn/basic_parser_tutorial.ipynb). Our tutorial
+An easy and visual way to get started with DRAGNN is to run our Jupyter
+notebooks for [interactive
+debugging](examples/dragnn/interactive_text_analyzer.ipynb) and [training a new
+model](examples/dragnn/trainer_tutorial.ipynb). Our tutorial
[here](g3doc/CLOUD.md) explains how to start it up from the Docker container.
+Once you have DRAGNN installed and running, try out the
+[ParseySaurus](g3doc/conll2017) models.
### Using the Pre-trained NLP models
@@ -285,6 +293,7 @@ Original authors of the code in this package include (in alphabetical order):
* Aliaksei Severyn
* Andy Golding
* Bernd Bohnet
+* Chayut Thanapirom
* Chris Alberti
* Daniel Andor
* David Weiss
@@ -294,6 +303,7 @@ Original authors of the code in this package include (in alphabetical order):
* Ji Ma
* Keith Hall
* Kuzman Ganchev
+* Lingpeng Kong
* Livio Baldini Soares
* Mark Omernick
* Michael Collins
Deleted file not rendered
@@ -0,0 +1,66 @@
+# You need to build wheels before building this image. Please consult
+# docker-devel/README.txt.
+
+# This is the base of the openjdk image.
+#
+# It might be more efficient to use a minimal distribution, like Alpine. But
+# the upside of this being popular is that people might already have it.
+FROM buildpack-deps:jessie-curl
+
+ENV SYNTAXNETDIR=/opt/tensorflow PATH=$PATH:/root/bin
+
+RUN apt-get update \
+ && apt-get install -y \
+ file \
+ git \
+ graphviz \
+ libcurl3 \
+ libfreetype6 \
+ libgraphviz-dev \
+ liblapack3 \
+ libopenblas-base \
+ libpng12-0 \
+ libxft2 \
+ python-dev \
+ python-mock \
+ python-pip \
+ python2.7 \
+ zlib1g-dev \
+ && apt-get clean \
+ && (rm -f /var/cache/apt/archives/*.deb \
+ /var/cache/apt/archives/partial/*.deb /var/cache/apt/*.bin || true)
+
+# Install common Python dependencies. Similar to above, remove caches
+# afterwards to help keep Docker images smaller.
+RUN pip install --ignore-installed pip \
+ && python -m pip install numpy \
+ && rm -rf /root/.cache/pip /tmp/pip*
+RUN python -m pip install \
+ asciitree \
+ ipykernel \
+ jupyter \
+ matplotlib \
+ pandas \
+ protobuf \
+ scipy \
+ sklearn \
+ && python -m ipykernel.kernelspec \
+ && python -m pip install pygraphviz \
+ --install-option="--include-path=/usr/include/graphviz" \
+ --install-option="--library-path=/usr/lib/graphviz/" \
+ && rm -rf /root/.cache/pip /tmp/pip*
+
+COPY syntaxnet_with_tensorflow-0.2-cp27-none-linux_x86_64.whl $SYNTAXNETDIR/
+RUN python -m pip install \
+ $SYNTAXNETDIR/syntaxnet_with_tensorflow-0.2-cp27-none-linux_x86_64.whl \
+ && rm -rf /root/.cache/pip /tmp/pip*
+
+# This makes the IP exposed actually "*"; we'll do host restrictions by passing
+# a hostname to the `docker run` command.
+COPY tensorflow/tensorflow/tools/docker/jupyter_notebook_config.py /root/.jupyter/
+EXPOSE 8888
+
+# This does not need to be compiled, only copied.
+COPY examples $SYNTAXNETDIR/syntaxnet/examples
+# For some reason, this works if we run it in a bash shell :/ :/ :/
+CMD /bin/bash -c "python -m jupyter_core.command notebook --debug --notebook-dir=/opt/tensorflow/syntaxnet/examples"
@@ -0,0 +1,64 @@
+Docker is used for packaging the SyntaxNet. There are three primary things we
+build with Docker,
+
+1. A development image, which contains all source built with Bazel.
+2. Python/pip wheels, built by running a command in the development container.
+3. A minified image, which only has the compiled version of TensorFlow and
+ SyntaxNet, by installing the wheel built by the above step.
+
+
+Important info (please read)
+------------------------------
+
+One thing to be wary of is that YOU CAN LOSE DATA IF YOU DEVELOP IN A DOCKER
+CONTAINER. Please be very careful to mount data you care about to Docker
+volumes, or use a volume mount so that it's mapped to your host filesystem.
+
+Another note, especially relevant to training models, is that Docker sends the
+whole source tree to the Docker daemon every time you try to build an image.
+This can take some time if you have large temporary model files lying around.
+You can exclude your model files by editing .dockerignore, or just don't store
+them in the base directory.
+
+
+Step 1: Building the development image
+------------------------------
+
+Simply run `docker build -t dragnn-oss .` in the base directory. Make sure you
+have all the source checked out correctly, including git submodules.
+
+
+Step 2: Building wheels
+------------------------------
+
+Please run,
+
+ bash ./docker-devel/build_wheels.sh
+
+This actually builds the image from Step 1 as well.
+
+
+Step 3: Building the development image
+------------------------------
+
+First, ensure you have the file
+
+ syntaxnet_with_tensorflow-0.2-cp27-none-linux_x86_64.whl
+
+in your working directory, from step 2. Then run,
+
+ docker build -t dragnn-oss:latest-minimal -f docker-devel/Dockerfile.min
+
+If the filename changes (e.g. you are on a different architecture), just update
+Dockerfile.min.
+
+
+Developing in Docker
+------------------------------
+
+We recommend developing in Docker by using the `./docker-devel/build_devel.sh`
+script; it will set up a few volume mounts, and port mappings automatically.
+You may want to add more port mappings on your own. If you want to drop into a
+shell instead of launching the notebook, simply run,
+
+ ./docker-devel/build_devel.sh /bin/bash
@@ -23,5 +23,6 @@ syntaxnet_base="/opt/tensorflow/syntaxnet"
docker run --rm -ti \
-v "${root_path}"/syntaxnet:"${syntaxnet_base}"/syntaxnet \
-v "${root_path}"/dragnn:"${syntaxnet_base}"/dragnn \
+ -v "${root_path}"/examples:"${syntaxnet_base}"/examples \
-p 127.0.0.1:8888:8888 \
dragnn-oss "$@"
@@ -13,14 +13,10 @@ package syntaxnet.dragnn;
message MasterSpec {
repeated ComponentSpec component = 1;
- // DEPRECATED: Use the "batch_size" param of DragnnTensorFlowTrainer instead.
- optional int32 deprecated_batch_size = 2 [default = 1, deprecated = true];
-
- // DEPRECATED: Use ComponentSpec.*_beam_size instead.
- optional int32 deprecated_beam_size = 3 [default = 1, deprecated = true];
-
// Whether to extract debug traces.
optional bool debug_tracing = 4 [default = false];
+
+ reserved 2, 3, 5;
}
// Complete specification for a single task.
@@ -221,10 +217,6 @@ message GridPoint {
// problems for updates at the start of training.
optional double gradient_clip_norm = 11 [default = 0.0];
- // DEPRECATED: Use TrainTarget instead.
- repeated double component_weights = 5;
- repeated bool unroll_using_oracle = 6;
-
// A spec for using multiple optimization methods.
message CompositeOptimizerSpec {
// First optimizer.
@@ -254,6 +246,8 @@ message GridPoint {
// should be restricted. If left empty, no filtering will take
// place. Typically a single component.
optional string self_norm_components_filter = 21;
+
+ reserved 5, 6;
}
// Training target to be built into the graph.
@@ -154,6 +154,7 @@ py_test(
srcs = ["visualization_test.py"],
deps = [
":visualization",
+ "//dragnn/protos:spec_py_pb2",
"//dragnn/protos:trace_py_pb2",
"@org_tensorflow//tensorflow:tensorflow_py",
],
@@ -54,6 +54,15 @@ def parse_trace_json(trace):
return as_json
+def _optional_master_spec_json(master_spec):
+ """Helper function to return 'null' or a master spec JSON string."""
+ if master_spec is None:
+ return 'null'
+ else:
+ return json_format.MessageToJson(
+ master_spec, preserving_proto_field_name=True)
+
+
def _container_div(height='700px', contents=''):
elt_id = str(uuid.uuid4())
html = """
@@ -64,7 +73,11 @@ def _container_div(height='700px', contents=''):
return elt_id, html
-def trace_html(trace, convert_to_unicode=True, height='700px', script=None):
+def trace_html(trace,
+ convert_to_unicode=True,
+ height='700px',
+ script=None,
+ master_spec=None):
"""Generates HTML that will render a master trace.
This will result in a self-contained "div" element.
@@ -76,6 +89,8 @@ def trace_html(trace, convert_to_unicode=True, height='700px', script=None):
often pass the output of this function to IPython.display.HTML.
height: CSS string representing the height of the element, default '700px'.
script: Visualization script contents, if the defaults are unacceptable.
+ master_spec: Master spec proto (parsed), which can improve the layout. May
+ be required in future versions.
Returns:
unicode or str with HTML contents.
@@ -89,10 +104,14 @@ def trace_html(trace, convert_to_unicode=True, height='700px', script=None):
{div_html}
<script type='text/javascript'>
{script}
- visualizeToDiv({json}, "{elt_id}");
+ visualizeToDiv({json}, "{elt_id}", {master_spec_json});
</script>
""".format(
- script=script, json=json_trace, elt_id=elt_id, div_html=div_html)
+ script=script,
+ json=json_trace,
+ master_spec_json=_optional_master_spec_json(master_spec),
+ elt_id=elt_id,
+ div_html=div_html)
return unicode(as_str, 'utf-8') if convert_to_unicode else as_str
@@ -174,11 +193,13 @@ def initial_html(self, height='700px', script=None, init_message=None):
script=script, div_html=div_html)
return unicode(html, 'utf-8') # IPython expects unicode.
- def show_trace(self, trace):
+ def show_trace(self, trace, master_spec=None):
"""Returns a JS script HTML fragment, which will populate the container.
Args:
trace: binary-encoded MasterTrace string.
+ master_spec: Master spec proto (parsed), which can improve the layout. May
+ be required in future versions.
Returns:
unicode with HTML contents.
@@ -187,8 +208,10 @@ def show_trace(self, trace):
<meta charset="utf-8"/>
<script type='text/javascript'>
document.getElementById("{elt_id}").innerHTML = ""; // Clear previous.
- visualizeToDiv({json}, "{elt_id}");
+ visualizeToDiv({json}, "{elt_id}", {master_spec_json});
</script>
""".format(
- json=parse_trace_json(trace), elt_id=self.elt_id)
+ json=parse_trace_json(trace),
+ master_spec_json=_optional_master_spec_json(master_spec),
+ elt_id=self.elt_id)
return unicode(html, 'utf-8') # IPython expects unicode.
@@ -1,10 +1,12 @@
+# -*- coding: utf-8 -*-
"""Tests for dragnn.python.visualization."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.platform import googletest
+from dragnn.protos import spec_pb2
from dragnn.protos import trace_pb2
from dragnn.python import visualization
@@ -15,10 +17,16 @@ def _get_trace_proto_string():
step_trace=[
trace_pb2.ComponentStepTrace(fixed_feature_trace=[]),
],
- name='test_component',)
+ # Google Translate says this is "component" in Chinese. (To test UTF-8).
+ name='零件',)
return trace.SerializeToString()
+def _get_master_spec():
+ return spec_pb2.MasterSpec(
+ component=[spec_pb2.ComponentSpec(name='jalapeño')])
+
+
class VisualizationTest(googletest.TestCase):
def testCanFindScript(self):
@@ -37,6 +45,13 @@ def testInteractiveVisualization(self):
widget.initial_html()
widget.show_trace(_get_trace_proto_string())
+ def testMasterSpecJson(self):
+ visualization.trace_html(
+ _get_trace_proto_string(), master_spec=_get_master_spec())
+ widget = visualization.InteractiveVisualization()
+ widget.initial_html()
+ widget.show_trace(_get_trace_proto_string(), master_spec=_get_master_spec())
+
if __name__ == '__main__':
googletest.main()
Oops, something went wrong.

0 comments on commit e8464d3

Please sign in to comment.