tensorflow · calberti · Nov 14, 2017 · Nov 13, 2017 · Nov 13, 2017 · Nov 13, 2017
@@ -1,5 +1,4 @@
-# Java baseimage, for Bazel.
-FROM openjdk:8
+FROM ubuntu:16.10
 
 ENV SYNTAXNETDIR=/opt/tensorflow PATH=$PATH:/root/bin
 
@@ -21,13 +20,15 @@ RUN mkdir -p $SYNTAXNETDIR \
           libopenblas-dev \
           libpng-dev \
           libxft-dev \
-          patch \
+          openjdk-8-jdk \
           python-dev \
           python-mock \
           python-pip \
           python2.7 \
           swig \
+          unzip \
           vim \
+          wget \
           zlib1g-dev \
     && apt-get clean \
     && (rm -f /var/cache/apt/archives/*.deb \
@@ -55,7 +56,7 @@ RUN python -m pip install \
           --py --sys-prefix widgetsnbextension \
     && rm -rf /root/.cache/pip /tmp/pip*
 
-# Installs the latest version of Bazel.
+# Installs Bazel.
 RUN wget --quiet https://github.com/bazelbuild/bazel/releases/download/0.5.4/bazel-0.5.4-installer-linux-x86_64.sh \
     && chmod +x bazel-0.5.4-installer-linux-x86_64.sh \
     && ./bazel-0.5.4-installer-linux-x86_64.sh \
@@ -65,13 +66,11 @@ COPY WORKSPACE $SYNTAXNETDIR/syntaxnet/WORKSPACE
 COPY tools/bazel.rc $SYNTAXNETDIR/syntaxnet/tools/bazel.rc
 COPY tensorflow $SYNTAXNETDIR/syntaxnet/tensorflow
 
-# Workaround solving the PYTHON_BIN_PATH not found problem
-ENV PYTHON_BIN_PATH=/usr/bin/python
 # Compile common TensorFlow targets, which don't depend on DRAGNN / SyntaxNet
 # source. This makes it more convenient to re-compile DRAGNN / SyntaxNet for
 # development (though not as convenient as the docker-devel scripts).
 RUN cd $SYNTAXNETDIR/syntaxnet/tensorflow \
-    && ./configure CPU \
+    && tensorflow/tools/ci_build/builds/configured CPU \
     && cd $SYNTAXNETDIR/syntaxnet \
     && bazel build -c opt @org_tensorflow//tensorflow:tensorflow_py
 
@@ -92,4 +91,4 @@ EXPOSE 8888
 COPY examples $SYNTAXNETDIR/syntaxnet/examples
 # Todo: Move this earlier in the file (don't want to invalidate caches for now).
 
-CMD /bin/bash -c "bazel-bin/dragnn/tools/oss_notebook_launcher notebook --debug --notebook-dir=/opt/tensorflow/syntaxnet/examples --allow-root"
+CMD /bin/bash -c "bazel-bin/dragnn/tools/oss_notebook_launcher notebook --debug --notebook-dir=/opt/tensorflow/syntaxnet/examples"
@@ -23,8 +23,8 @@ This repository is largely divided into two sub-packages:
     [documentation](g3doc/DRAGNN.md),
     [paper](https://arxiv.org/pdf/1703.04474.pdf)** implements Dynamic Recurrent
     Acyclic Graphical Neural Networks (DRAGNN), a framework for building
-    multi-task, fully dynamically constructed computation graphs. Practically, we
-    use DRAGNN to extend our prior work from [Andor et al.
+    multi-task, fully dynamically constructed computation graphs. Practically,
+    we use DRAGNN to extend our prior work from [Andor et al.
     (2016)](http://arxiv.org/abs/1603.06042) with end-to-end, deep recurrent
     models and to provide a much easier to use interface to SyntaxNet. *DRAGNN
     is designed first and foremost as a Python library, and therefore much
@@ -54,20 +54,47 @@ There are three ways to use SyntaxNet:
 
 ### Docker installation
 
+_This process takes ~10 minutes._
+
 The simplest way to get started with DRAGNN is by loading our Docker container.
 [Here](g3doc/CLOUD.md) is a tutorial for running the DRAGNN container on
 [GCP](https://cloud.google.com) (just as applicable to your own computer).
 
+### Ubuntu 16.10+ binary installation
+
+_This process takes ~5 minutes, but is only compatible with Linux using GNU libc
+3.4.22 and above (e.g. Ubuntu 16.10)._
+
+Binary wheel packages are provided for TensorFlow and SyntaxNet. If you do not
+need to write new binary TensorFlow ops, these should suffice.
+
+*   `apt-get install -y graphviz libgraphviz-dev libopenblas-base libpng16-16
+    libxft2 python-pip python-mock`
+*   `pip install pygraphviz
+    --install-option="--include-path=/usr/include/graphviz"
+    --install-option="--library-path=/usr/lib/graphviz/"`
+*   `pip install 'ipython<6.0' protobuf numpy scipy jupyter
+    syntaxnet-with-tensorflow`
+*   `python -m jupyter_core.command nbextension enable --py --sys-prefix
+    widgetsnbextension`
+
+You can test that binary modules can be successfully imported by running,
+
+*   `python -c 'import dragnn.python.load_dragnn_cc_impl,
+    syntaxnet.load_parser_ops'`
+
 ### Manual installation
 
+_This process takes 1-2 hours._
+
 Running and training SyntaxNet/DRAGNN models requires building this package from
 source. You'll need to install:
 
 *   python 2.7:
     *   Python 3 support is not available yet
-*   bazel:
+*   bazel 0.5.4:
     *   Follow the instructions [here](http://bazel.build/docs/install.html)
-    *   Alternately, Download bazel <.deb> from
+    *   Alternately, Download bazel 0.5.4 <.deb> from
         [https://github.com/bazelbuild/bazel/releases](https://github.com/bazelbuild/bazel/releases)
         for your system configuration.
     *   Install it using the command: sudo dpkg -i <.deb file>
@@ -103,9 +130,12 @@ following commands:
   bazel test --linkopt=-headerpad_max_install_names \
     dragnn/... syntaxnet/... util/utf8/...
 ```
+
 Bazel should complete reporting all tests passed.
 
-Now you can install the SyntaxNet and DRAGNN Python modules with the following commands:
+Now you can install the SyntaxNet and DRAGNN Python modules with the following
+commands:
+
 ```shell
   mkdir /tmp/syntaxnet_pkg
   bazel-bin/dragnn/tools/build_pip_package --output-dir=/tmp/syntaxnet_pkg
@@ -116,8 +146,6 @@ Now you can install the SyntaxNet and DRAGNN Python modules with the following c
 To build SyntaxNet with GPU support please refer to the instructions in
 [issues/248](https://github.com/tensorflow/models/issues/248).
 
-
-
 **Note:** If you are running Docker on OSX, make sure that you have enough
 memory allocated for your Docker VM.
 

@@ -0,0 +1,11 @@
+FROM dragnn-oss-test-base:latest
+
+RUN rm -rf \
+  $SYNTAXNETDIR/syntaxnet/dragnn \
+  $SYNTAXNETDIR/syntaxnet/syntaxnet \
+  $SYNTAXNETDIR/syntaxnet/third_party \
+  $SYNTAXNETDIR/syntaxnet/util/utf8
+COPY dragnn $SYNTAXNETDIR/syntaxnet/dragnn
+COPY syntaxnet $SYNTAXNETDIR/syntaxnet/syntaxnet
+COPY third_party $SYNTAXNETDIR/syntaxnet/third_party
+COPY util/utf8 $SYNTAXNETDIR/syntaxnet/util/utf8
@@ -0,0 +1,91 @@
+FROM ubuntu:16.10
+
+ENV SYNTAXNETDIR=/opt/tensorflow PATH=$PATH:/root/bin
+
+# Install system packages. This doesn't include everything the TensorFlow
+# dockerfile specifies, so if anything goes awry, maybe install more packages
+# from there. Also, running apt-get clean before further commands will make the
+# Docker images smaller.
+RUN mkdir -p $SYNTAXNETDIR \
+    && cd $SYNTAXNETDIR \
+    && apt-get update \
+    && apt-get install -y \
+          file \
+          git \
+          graphviz \
+          libcurl3-dev \
+          libfreetype6-dev \
+          libgraphviz-dev \
+          liblapack-dev \
+          libopenblas-dev \
+          libpng-dev \
+          libxft-dev \
+          openjdk-8-jdk \
+          python-dev \
+          python-mock \
+          python-pip \
+          python2.7 \
+          swig \
+          unzip \
+          vim \
+          wget \
+          zlib1g-dev \
+    && apt-get clean \
+    && (rm -f /var/cache/apt/archives/*.deb \
+        /var/cache/apt/archives/partial/*.deb /var/cache/apt/*.bin || true)
+
+# Install common Python dependencies. Similar to above, remove caches
+# afterwards to help keep Docker images smaller.
+RUN pip install --ignore-installed pip \
+    && python -m pip install numpy \
+    && rm -rf /root/.cache/pip /tmp/pip*
+RUN python -m pip install \
+          asciitree \
+          ipykernel \
+          jupyter \
+          matplotlib \
+          pandas \
+          protobuf \
+          scipy \
+          sklearn \
+    && python -m ipykernel.kernelspec \
+    && python -m pip install pygraphviz \
+          --install-option="--include-path=/usr/include/graphviz" \
+          --install-option="--library-path=/usr/lib/graphviz/" \
+    && python -m jupyter_core.command nbextension enable \
+          --py --sys-prefix widgetsnbextension \
+    && rm -rf /root/.cache/pip /tmp/pip*
+
+# Installs Bazel.
+RUN wget --quiet https://github.com/bazelbuild/bazel/releases/download/0.5.3/bazel-0.5.3-installer-linux-x86_64.sh \
+    && chmod +x bazel-0.5.3-installer-linux-x86_64.sh \
+    && JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ ./bazel-0.5.3-installer-linux-x86_64.sh \
+    && rm ./bazel-0.5.3-installer-linux-x86_64.sh
+
+COPY WORKSPACE $SYNTAXNETDIR/syntaxnet/WORKSPACE
+COPY tools/bazel.rc $SYNTAXNETDIR/syntaxnet/tools/bazel.rc
+
+# Compile common TensorFlow targets, which don't depend on DRAGNN / SyntaxNet
+# source. This makes it more convenient to re-compile DRAGNN / SyntaxNet for
+# development (though not as convenient as the docker-devel scripts).
+RUN cd $SYNTAXNETDIR/syntaxnet \
+    && git clone --branch r1.3 --recurse-submodules https://github.com/tensorflow/tensorflow \
+    && cd tensorflow \
+    # This line removes a bad archive target which causes Tensorflow install
+    # to fail.
+    && sed -i '\@https://github.com/google/protobuf/archive/0b059a3d8a8f8aa40dde7bea55edca4ec5dfea66.tar.gz@d' tensorflow/workspace.bzl \
+    && tensorflow/tools/ci_build/builds/configured CPU \\
+    && cd $SYNTAXNETDIR/syntaxnet \
+    && bazel build -c opt @org_tensorflow//tensorflow:tensorflow_py
+
+# Just copy the code and run tests. The build and test flags differ enough that
+# doing a normal build of TensorFlow targets doesn't save much test time.
+WORKDIR $SYNTAXNETDIR/syntaxnet
+COPY dragnn $SYNTAXNETDIR/syntaxnet/dragnn
+COPY syntaxnet $SYNTAXNETDIR/syntaxnet/syntaxnet
+COPY third_party $SYNTAXNETDIR/syntaxnet/third_party
+COPY util/utf8 $SYNTAXNETDIR/syntaxnet/util/utf8
+
+# Doesn't matter if the tests pass or not, since we're going to re-copy over the
+# code.
+RUN bazel test -c opt ... || true
@@ -1,11 +1,9 @@
 # You need to build wheels before building this image. Please consult
 # docker-devel/README.txt.
-
-# This is the base of the openjdk image.
 #
 # It might be more efficient to use a minimal distribution, like Alpine. But
 # the upside of this being popular is that people might already have it.
-FROM buildpack-deps:jessie-curl
+FROM ubuntu:16.10
 
 ENV SYNTAXNETDIR=/opt/tensorflow PATH=$PATH:/root/bin
 
@@ -19,7 +17,7 @@ RUN apt-get update \
           libgraphviz-dev \
           liblapack3 \
           libopenblas-base \
-          libpng12-0 \
+          libpng16-16 \
           libxft2 \
           python-dev \
           python-mock \
@@ -48,11 +46,13 @@ RUN python -m pip install \
     && python -m pip install pygraphviz \
           --install-option="--include-path=/usr/include/graphviz" \
           --install-option="--library-path=/usr/lib/graphviz/" \
+    && python -m jupyter_core.command nbextension enable \
+          --py --sys-prefix widgetsnbextension \
     && rm -rf /root/.cache/pip /tmp/pip*
 
-COPY syntaxnet_with_tensorflow-0.2-cp27-none-linux_x86_64.whl $SYNTAXNETDIR/
+COPY syntaxnet_with_tensorflow-0.2-cp27-cp27mu-linux_x86_64.whl $SYNTAXNETDIR/
 RUN python -m pip install \
-        $SYNTAXNETDIR/syntaxnet_with_tensorflow-0.2-cp27-none-linux_x86_64.whl \
+        $SYNTAXNETDIR/syntaxnet_with_tensorflow-0.2-cp27-cp27mu-linux_x86_64.whl \
     && rm -rf /root/.cache/pip /tmp/pip*
 
 # This makes the IP exposed actually "*"; we'll do host restrictions by passing
@@ -63,4 +63,4 @@ EXPOSE 8888
 # This does not need to be compiled, only copied.
 COPY examples $SYNTAXNETDIR/syntaxnet/examples
 # For some reason, this works if we run it in a bash shell :/ :/ :/
-CMD /bin/bash -c "python -m jupyter_core.command notebook --debug --notebook-dir=/opt/tensorflow/syntaxnet/examples"
+CMD /bin/bash -c "python -m jupyter_core.command notebook --debug --notebook-dir=/opt/tensorflow/syntaxnet/examples --allow-root"
@@ -43,11 +43,11 @@ Step 3: Building the development image
 
 First, ensure you have the file
 
-  syntaxnet_with_tensorflow-0.2-cp27-none-linux_x86_64.whl
+  syntaxnet_with_tensorflow-0.2-cp27-cp27mu-linux_x86_64.whl
 
 in your working directory, from step 2. Then run,
 
-  docker build -t dragnn-oss:latest-minimal -f docker-devel/Dockerfile.min
+  docker build -t dragnn-oss:latest-minimal -f docker-devel/Dockerfile.min .
 
 If the filename changes (e.g. you are on a different architecture), just update
 Dockerfile.min.

@@ -10,7 +10,6 @@ cc_library(
         "//dragnn/core:component_registry",
         "//dragnn/core/interfaces:component",
         "//dragnn/core/interfaces:transition_state",
-        "//dragnn/io:sentence_input_batch",
         "//dragnn/protos:data_proto",
         "//syntaxnet:base",
     ],

@@ -16,7 +16,6 @@
 #include "dragnn/core/component_registry.h"
 #include "dragnn/core/interfaces/component.h"
 #include "dragnn/core/interfaces/transition_state.h"
-#include "dragnn/io/sentence_input_batch.h"
 #include "dragnn/protos/data.pb.h"
 #include "syntaxnet/base.h"
 
@@ -25,7 +24,8 @@ namespace dragnn {
 namespace {
 
 // A component that does not create its own transition states; instead, it
-// simply forwards the states of the previous component.  Does not support all
+// simply forwards the states of the previous component.  Requires that some
+// previous component has converted the input batch.  Does not support all
 // methods.  Intended for "compute-only" bulk components that only use linked
 // features, which use only a small subset of DRAGNN functionality.
 class StatelessComponent : public Component {
@@ -38,8 +38,7 @@ class StatelessComponent : public Component {
   void InitializeData(
       const std::vector<std::vector<const TransitionState *>> &parent_states,
       int max_beam_size, InputBatchCache *input_data) override {
-    // Must use SentenceInputBatch to match SyntaxNetComponent.
-    batch_size_ = input_data->GetAs<SentenceInputBatch>()->data()->size();
+    batch_size_ = input_data->Size();
     beam_size_ = max_beam_size;
     parent_states_ = parent_states;
 
@@ -84,31 +83,34 @@ class StatelessComponent : public Component {
     LOG(FATAL) << "[" << name_ << "] Method not supported";
     return nullptr;
   }
-  void AdvanceFromPrediction(const float transition_matrix[],
-                             int matrix_length) override {
-    LOG(FATAL) << "[" << name_ << "] Method not supported";
+  bool AdvanceFromPrediction(const float *transition_matrix, int num_items,
+                             int num_actions) override {
+    LOG(FATAL) << "[" << name_ << "] AdvanceFromPrediction not supported";
   }
   void AdvanceFromOracle() override {
-    LOG(FATAL) << "[" << name_ << "] Method not supported";
+    LOG(FATAL) << "[" << name_ << "] AdvanceFromOracle not supported";
   }
   std::vector<std::vector<int>> GetOracleLabels() const override {
     LOG(FATAL) << "[" << name_ << "] Method not supported";
-    return {};
   }
   int GetFixedFeatures(std::function<int32 *(int)> allocate_indices,
                        std::function<int64 *(int)> allocate_ids,
                        std::function<float *(int)> allocate_weights,
                        int channel_id) const override {
     LOG(FATAL) << "[" << name_ << "] Method not supported";
-    return 0;
   }
   int BulkGetFixedFeatures(const BulkFeatureExtractor &extractor) override {
     LOG(FATAL) << "[" << name_ << "] Method not supported";
-    return 0;
   }
+  void BulkEmbedFixedFeatures(
+      int batch_size_padding, int num_steps_padding, int output_array_size,
+      const vector<const float *> &per_channel_embeddings,
+      float *embedding_output) override {
+    LOG(FATAL) << "[" << name_ << "] Method not supported";
+  }
+
   std::vector<LinkFeatures> GetRawLinkFeatures(int channel_id) const override {
     LOG(FATAL) << "[" << name_ << "] Method not supported";
-    return {};
   }
   void AddTranslatedLinkFeaturesToTrace(
       const std::vector<LinkFeatures> &features, int channel_id) override {