Class config (#21)

* move configuration to classes * Code style and formatting changes. * update documentation
merantix · Jul 7, 2017 · 1d396da · 1d396da
1 parent 4878f96
commit 1d396da
Show file tree

Hide file tree

Showing 32 changed files with 785 additions and 875 deletions.
diff --git a/docs/models.rst b/docs/models.rst
@@ -12,19 +12,18 @@ We include three `examples`_ for you to try: a model trained on the `MNIST`_ dat
 
 If you built your model with Keras using a `Sequential`_ model, you should be more or less good to go.  If you used Tensorflow, you'll need to manually specify the entry and exit points [#]_.
 
-You can specify the backend (Tensorflow or Keras) using the ``PICASSO_BACKEND_ML`` setting. The allowed values are ``tensorflow`` or ``keras`` (see :doc:`settings`).
-
 Your model data
 ===============
 
-You can specify the data directory with the ``PICASSO_DATA_DIR`` setting. This directory should contain the Keras or Tensorflow checkpoint files.  If multiple checkpoints are found, the latest one will be used (see example `Keras model code`_).
+You can specify the data directory with the ``MODEL_LOAD_ARGS.data_dir`` setting (see :doc:`settings`). This directory should contain the Keras or Tensorflow checkpoint files.  If multiple checkpoints are found, the latest one will be used (see example `Keras model code`_).
 
 Utility functions
 =================
 
 In addition to the graph and weight information of the model itself, you'll need to define a few functions to help the visualization interact with user input, and interpret raw output from your computational graph.  These are arbitrary python functions, and their locations can be specified in the :doc:`settings`.
 
-We'll draw from the `Keras MNIST example`_ for this guide.
+We'll draw from the `Keras MNIST example`_ for this guide.  All custom models
+from the relevant model: either ``KerasModel`` or ``TensorflowModel``.
 
 Preprocessor
 ------------
@@ -33,39 +32,29 @@ The preprocessor takes images uploaded to the webapp and converts them into arra
 
 .. code-block:: python3
 
-   MNIST_DIM = (28, 28)
-
-   def preprocess(targets):
-       image_arrays = []
-       for target in targets:
-           im = target.convert('L')
-           im = im.resize(MNIST_DIM, Image.ANTIALIAS)
-           arr = np.array(im)
-           image_arrays.append(arr)
-
-       all_targets = np.array(image_arrays)
-       return all_targets.reshape(len(all_targets),
-                                  MNIST_DIM[0],
-                                  MNIST_DIM[1], 1).astype('float32') / 255
-
-Specifically, we have to convert an arbitrary input color image to a float array of the input size specified with ``MNIST_DIM``.
+   import numpy as np
+   from PIL import Image
+   
+   from picasso.models.keras import KerasModel
 
-Postprocessor
--------------
+   MNIST_DIM = (28, 28)
 
-For some visualizations, it's useful to convert a flat representation back into an array with the same shape as the original image.
+   class KerasMNISTModel(KerasModel):
 
-.. code-block:: python3
+       def preprocess(self, raw_inputs):
+           image_arrays = []
+           for target in targets:
+               im = target.convert('L')
+               im = im.resize(MNIST_DIM, Image.ANTIALIAS)
+               arr = np.array(im)
+               image_arrays.append(arr)
 
-   def postprocess(output_arr):
-       images = []
-       for row in output_arr:
-           im_array = row.reshape(MNIST_DIM)
-           images.append(im_array)
+           all_targets = np.array(image_arrays)
+           return all_targets.reshape(len(all_targets),
+                                      MNIST_DIM[0],
+                                      MNIST_DIM[1], 1).astype('float32') / 255
 
-       return images
-
-This therefore takes an arbitrary array (with the same number of total entries as the image array) and reshapes it back.
+Specifically, we have to convert an arbitrary input color image to a float array of the input size specified with ``MNIST_DIM``.
 
 Class Decoder
 -------------
@@ -74,24 +63,27 @@ Class probabilities are usually returned in an array.  For any visualization whe
 
 .. code-block:: python3
 
-   def prob_decode(probability_array, top=5):
-       results = []
-       for row in probability_array:
-           entries = []
-           for i, prob in enumerate(row):
-               entries.append({'index': i,
-                               'name': str(i),
-                               'prob': prob})
+   class KerasMNISTModel(KerasModel):
 
-           entries = sorted(entries,
-                            key=itemgetter('prob'),
-                            reverse=True)[:top]
+       ...
+       
+       def decode_prob(self, class_probabilities):
+           results = []
+           for row in class_probabilities:
+               entries = []
+               for i, prob in enumerate(row):
+                   entries.append({'index': i,
+                                   'name': str(i),
+                                   'prob': prob})
 
-           for entry in entries:
-               entry['prob'] = '{:.3f}'.format(entry['prob'])
-           results.append(entries)
+               entries = sorted(entries,
+                                key=itemgetter('prob'),
+                                reverse=True)[:self.top_probs]
 
-       return results
+               for entry in entries:
+                   entry['prob'] = '{:.3f}'.format(entry['prob'])
+               results.append(entries)
+           return results
 
 ``results`` is then a list of dicts in the format ``[{'index': class_index, 'name': class_name, 'prob': class_probability}, ...]``. In the case of the MNIST dataset, the index is the same as the class name (digits 0-9).
 
@@ -103,9 +95,9 @@ Class probabilities are usually returned in an array.  For any visualization whe
 
 .. _Sequential: https://keras.io/models/sequential/
 
-.. _Keras model code: https://github.com/merantix/picasso/blob/master/picasso/ml_frameworks/keras/model.py
+.. _Keras model code: https://github.com/merantix/picasso/blob/master/picasso/keras/keras.py
 
-.. _Keras MNIST example: https://github.com/merantix/picasso/blob/master/picasso/examples/keras/util.py
+.. _Keras MNIST example: https://github.com/merantix/picasso/blob/master/picasso/examples/keras/model.py
 
 .. _PIL Image: http://pillow.readthedocs.io/en/latest/reference/Image.html
 

diff --git a/docs/settings.rst b/docs/settings.rst
@@ -21,29 +21,18 @@ Tells the app to use this configuration instead of the default one.  Inside
    
    base_dir = os.path.split(os.path.abspath(__file__))[0]
    
-   BACKEND_ML = 'tensorflow'
-   BACKEND_PREPROCESSOR_NAME = 'util'
-   BACKEND_PREPROCESSOR_PATH = os.path.join(base_dir, 'util.py')
-   BACKEND_POSTPROCESSOR_NAME = 'postprocess'
-   BACKEND_POSTPROCESSOR_PATH = os.path.join(base_dir, 'util.py')
-   BACKEND_PROB_DECODER_NAME = 'prob_decode'
-   BACKEND_PROB_DECODER_PATH = os.path.join(base_dir, 'util.py')
-   DATA_DIR = os.path.join(base_dir, 'data-volume')
-
-Any lowercase line is ignored for the purposes of determining a setting.  These
-can also be set via environment variables, but you must append the app name.
-For instance ``BACKEND_ML = 'tensorflow'`` would become ``export
-PICASSO_BACKEND_ML=tensorflow``.
-
-For explanations of each setting, see :mod:`picasso.settings`.  Any
-additional settings starting with `BACKEND_` will be sent to the model backend
-as a keyword argument.  The input and output tensor names can be passed to the
-Tensorflow backend in this way:
-
-.. code-block:: python3
-
-   ...
-   BACKEND_TF_PREDICT_VAR='Softmax:0'
-   BACKEND_TF_INPUT_VAR='convolution2d_input_1:0'
+   MODEL_CLS_PATH = os.path.join(base_dir, 'model.py')
+   MODEL_CLS_NAME = 'TensorflowMNISTModel'
+   MODEL_LOAD_ARGS = {
+       'data_dir': os.path.join(base_dir, 'data-volume'),
+       'tf_input_var': 'convolution2d_input_1:0',
+       'tf_predict_var': 'Softmax:0',
+   }
+
+Any lowercase line is ignored for the purposes of determining a setting.
+``MODEL_LOAD_ARGS`` will pass the arguments along to the model's ``load`` 
+function.
+
+For explanations of each setting, see :mod:`picasso.config`.  
 
 .. _managed by Flask: http://flask.pocoo.org/docs/latest/config/
diff --git a/docs/visualizations.rst b/docs/visualizations.rst
@@ -13,14 +13,12 @@ For our example, ``FunViz``, we'll need ``picasso/visualizations/fun_viz.py``:
 
 .. code-block:: python3
 
-   from picasso.visualizations import BaseVisualization
+   from picasso.visualizations.base import BaseVisualization
 
 
    class FunViz(BaseVisualization):
 
-       def __init__(self, model):
-           self.description = 'A fun visualization!'
-           self.model = model
+       DESCRIPTION = 'A fun visualization!'
 
        def make_visualization(self, inputs, output_dir, settings=None):
            pass
@@ -34,7 +32,7 @@ and ``picasso/templates/FunViz.html``:
    your visualization html goes here
    {% endblock %}
 
-Some explanation for the ``FunViz`` class in ``fun_viz.py``: All visualizations should inherit from :class:`~picasso.visualizations.__init__.BaseVisualization` (see `code <BaseVisualization>`_).  You must implement the ``__init__`` method, and it should accept one argument, ``model``. ``model`` will be an instance of a child class of `Model`_, which provides an interface to the machine learning backend.  You can also add a description which will display on the landing page.
+Some explanation for the ``FunViz`` class in ``fun_viz.py``: All visualizations should inherit from :class:`~picasso.visualizations.base.__init__.BaseVisualization`.  You can also add a description which will display on the landing page.
 
 Some explanation for ``FunViz.html``: The web app is uses `Flask`_, which uses `Jinja2`_ templating.  This explains the funny ``{% %}`` delimiters.   The ``{% extends "result.html" %}`` just tells the your page to inherit from a boilerplate.  All your html should sit within the ``vis`` block.
 
@@ -53,16 +51,14 @@ Add visualization logic
 Our visualization should actually do something.  It's just going to compute the class probabilities and pass them back along to the web app. So we'll add:
 
 .. code-block:: python3
-   :emphasize-lines: 11-21
+   :emphasize-lines: 9-21
 
-   from picasso.visualizations import BaseVisualization
+   from picasso.visualizations.base import BaseVisualization
 
 
    class FunViz(BaseVisualization):
 
-       def __init__(self, model):
-           self.description = 'A fun visualization!'
-           self.model = model
+       DESCRIPTION = 'A fun visualization!'
 
        def make_visualization(self, inputs, output_dir, settings=None):
            pre_processed_arrays = self.model.preprocess([example['data']
@@ -311,20 +307,19 @@ Similarly, there is an ``outputs/`` folder (not shown in this example).  Its pat
 Add some settings
 =================
 
-Maybe we'd like the user to be able to limit the number of classes shown.  We can easily do this by adding a ``settings`` property to the ``FunViz`` class.
+Maybe we'd like the user to be able to limit the number of classes shown.  We can easily do this by adding an ``ALLOWED_SETTINGS`` property to the ``FunViz`` class.
 
 .. code-block:: python3
-   :emphasize-lines: 5, 21
+   :emphasize-lines: 6, 20
 
    from picasso.visualizations import BaseVisualization
 
 
    class FunViz(BaseVisualization):
-       settings = {'Display': ['1', '2', '3']}
 
-       def __init__(self, model):
-           self.description = 'A fun visualization!'
-           self.model = model
+       ALLOWED_SETTINGS = {'Display': ['1', '2', '3']}
+
+       DESCRIPTION = 'A fun visualization!'
 
        def make_visualization(self, inputs, output_dir, settings=None):
            pre_processed_arrays = self.model.preprocess([example['data']
@@ -391,10 +386,6 @@ For more complex visualizations, see the examples in `the visualizations module`
 
 .. _template: https://github.com/merantix/picasso/blob/master/picasso/templates/ClassProbabilities.html
 
-.. _BaseVisualization: https://github.com/merantix/picasso/blob/master/picasso/visualizations/__init__.py 
-
-.. _Model: https://github.com/merantix/picasso/blob/master/picasso/ml_frameworks/model.py
-
 .. _Flask: http://flask.pocoo.org/
 
 .. _Jinja2: http://jinja.pocoo.org/docs/

diff --git a/picasso/__init__.py b/picasso/__init__.py
@@ -13,9 +13,28 @@
     raise SystemError('Python 3.5+ required, found {}'.format(sys.version))
 
 app = Flask(__name__)
-app.config.from_object('picasso.settings.Default')
+app.config.from_object('picasso.config.Default')
 
 if os.getenv('PICASSO_SETTINGS'):
     app.config.from_envvar('PICASSO_SETTINGS')
 
+deprecated_settings = ['BACKEND_PREPROCESSOR_NAME',
+                       'BACKEND_PREPROCESSOR_PATH',
+                       'BACKEND_POSTPROCESSOR_NAME',
+                       'BACKEND_POSTPROCESSOR_PATH',
+                       'BACKEND_PROB_DECODER_NAME',
+                       'BACKEND_PROB_DECODER_PATH',
+                       'DATA_DIR']
+
+if any([x in app.config.keys() for x in deprecated_settings]):
+    raise ValueError('It looks like you\'re using a deprecated'
+                     ' setting.  The settings and utility functions'
+                     ' have been changed as of version v0.2.0 (and '
+                     'you\'re using {}). Changing to the updated '
+                     ' settings is trivial: see '
+                     'https://picasso.readthedocs.io/en/latest/models.html'
+                     ' and '
+                     'https://picasso.readthedocs.io/en/latest/settings.html'
+                     .format(__version__))
+
 import picasso.picasso
diff --git a/picasso/config.py b/picasso/config.py
@@ -0,0 +1,30 @@
+import os
+
+base_dir = os.path.dirname(__file__)  # only for default config
+
+
+class Default:
+    """Default settings for the Flask app.
+
+    The Flask app uses these settings if no custom settings are defined.  You
+    can define custom settings by creating a Python module, defining global
+    variables in that module, and setting the environment variable
+    `PICASSO_SETTINGS` to the path to that module.
+
+    If `PICASSO_SETTINGS` is not set, or if any particular setting is not
+    defined in the indicated module, then the Flask app uses these default
+    settings.
+
+    """
+    # :obj:`str`: filepath of the module containing the model to run
+    MODEL_CLS_PATH = os.path.join(
+        base_dir, 'examples', 'keras', 'model.py')
+
+    # :obj:`str`: name of model class
+    MODEL_CLS_NAME = 'KerasMNISTModel'
+
+    # :obj:`dict`: dictionary of args to pass to the `load` method of the
+    # model instance.
+    MODEL_LOAD_ARGS = {
+        'data_dir': os.path.join(base_dir, 'examples', 'keras', 'data-volume'),
+    }
diff --git a/picasso/examples/keras-vgg16/config.py b/picasso/examples/keras-vgg16/config.py
@@ -1,12 +1,15 @@
+# Note: By default, Flask doesn't know that this file exists.  If you want
+# Flask to load the settings you specify here, you must set the environment
+# variable `PICASSO_SETTINGS` to point to this file.  E.g.:
+#
+#   export PICASSO_SETTINGS=/path/to/examples/keras-vgg16/config.py
+#
 import os
 
 base_dir = os.path.dirname(os.path.abspath(__file__))
 
-BACKEND_ML = 'keras'
-BACKEND_PREPROCESSOR_NAME = 'preprocess'
-BACKEND_PREPROCESSOR_PATH = os.path.join(base_dir, 'util.py')
-BACKEND_POSTPROCESSOR_NAME = 'postprocess'
-BACKEND_POSTPROCESSOR_PATH = os.path.join(base_dir, 'util.py')
-BACKEND_PROB_DECODER_NAME = 'prob_decode'
-BACKEND_PROB_DECODER_PATH = os.path.join(base_dir, 'util.py')
-DATA_DIR = os.path.join(base_dir, 'data-volume')
+MODEL_CLS_PATH = os.path.join(base_dir, 'model.py')
+MODEL_CLS_NAME = 'KerasVGG16Model'
+MODEL_LOAD_ARGS = {
+    'data_dir': os.path.join(base_dir, 'data-volume'),
+}
diff --git a/picasso/examples/keras-vgg16/model.py b/picasso/examples/keras-vgg16/model.py
@@ -0,0 +1,48 @@
+from keras.applications import imagenet_utils
+import numpy as np
+from PIL import Image
+
+from picasso.models.keras import KerasModel
+
+VGG16_DIM = (224, 224, 3)
+
+
+class KerasVGG16Model(KerasModel):
+
+    def preprocess(self, raw_inputs):
+        """
+        Args:
+            raw_inputs (list of Images): a list of PIL Image objects
+        Returns:
+            array (float32): num images * height * width * num channels
+        """
+        image_arrays = []
+        for raw_im in raw_inputs:
+            im = raw_im.resize(VGG16_DIM[:2], Image.ANTIALIAS)
+            im = im.convert('RGB')
+            arr = np.array(im).astype('float32')
+            image_arrays.append(arr)
+
+        all_raw_inputs = np.array(image_arrays)
+        return imagenet_utils.preprocess_input(all_raw_inputs)
+
+    def decode_prob(self, class_probabilities):
+        r = imagenet_utils.decode_predictions(class_probabilities,
+                                              top=self.top_probs)
+        results = [
+            [{'code': entry[0],
+              'name': entry[1],
+              'prob': '{:.3f}'.format(entry[2])}
+             for entry in row]
+            for row in r
+        ]
+        classes = imagenet_utils.CLASS_INDEX
+        class_keys = list(classes.keys())
+        class_values = list(classes.values())
+
+        for result in results:
+            for entry in result:
+                entry['index'] = int(
+                    class_keys[class_values.index([entry['code'],
+                                                   entry['name']])])
+        return results