enable keras, tensorflow

update requirements for docker installation and livetest rebasing on master fix dependencies
omegaml · Aug 17, 2019 · 747fb7e · 747fb7e
1 parent 224f668
commit 747fb7e
Show file tree

Hide file tree

Showing 89 changed files with 17,977 additions and 6,304 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -10,5 +10,5 @@ RUN conda install -y --file /app/conda-requirements.txt && \
 RUN pip install --no-cache-dir -q -r /app/requirements.txt
 RUN mkdir -p ~/.jupyter && \
     cp /app/omegaml/notebook/jupyter/*py ~/.jupyter && \
-    cd /app && pip install .
+    cd /app && pip install .[all]
 CMD ["jupyter", '--config-dir', '/app/.jupyter']
diff --git a/Makefile b/Makefile
@@ -13,11 +13,11 @@ livetest: dist
 	scripts/livetest.sh --local --build
 
 devtest:
-	scripts/devtest.sh
+	scripts/devtest.sh --headless
 
 image:
 	: "run docker build"
-	docker build -t omegaml/omegaml:$(VERSION) .
+	scripts/livetest.sh --build
 
 release-test: dist
 	: "twine upload to pypi test"
@@ -42,7 +42,6 @@ release-docker: dist
 	docker push omegaml/omegaml:${VERSION}
 	docker push omegaml/omegaml:latest
 
-
 thirdparty:
 	: "create THIRDPARTY & THIRDPARTY-LICENSES"
 	pip-licenses > THIRDPARTY

diff --git a/README.rst b/README.rst
@@ -6,11 +6,11 @@ omega|ml takes just a single line of code to
 * implement data pipelines quickly, without memory limitation, all from a Pandas-like API
 * serve models and data from an easy to use REST API 
 
-In addition you can
+Further, omega|ml is the fastest way to
 
-* train models on the integrated compute cluster (or any other cluster using custom backends)
-* collaborate on data science projects easily (using Jupyter Notebook)
-* deploy beautiful dashboards right from your Jupyter Notebook (using dashserve)
+* scale model training on the included scalable pure-Python compute cluster, on Spark or any other cloud
+* collaborate on data science projects easily, sharing Jupyter Notebooks
+* deploy beautiful dashboards right from your Jupyter Notebook, using dashserve
 
 Documentation: https://omegaml.github.io/omegaml/
 

diff --git a/THIRDPARTY-LICENSES b/THIRDPARTY-LICENSES
diff --git a/conda-requirements.txt b/conda-requirements.txt
@@ -1,11 +1,157 @@
-# conda-requirements.txt
-pandas=0.24.1
-scikit-learn=0.20.3
-jupyter=1.0.0
-pytables=3.4.4
-nose=1.3.7
-nosexcover=1.0.11
-distributed=1.26.0
-dask=1.1.4
-notebook=5.7.6
-tornado=6.0.1
+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: linux-64
+_libgcc_mutex=0.1=main
+_tflow_select=2.3.0=mkl
+absl-py=0.7.1=py36_0
+astor=0.7.1=py36_0
+attrs=19.1.0=py36_1
+backcall=0.1.0=py36_0
+blas=1.0=mkl
+bleach=3.1.0=py36_0
+blosc=1.16.3=hd408876_0
+bokeh=1.2.0=py36_0
+bzip2=1.0.6=h14c3975_5
+c-ares=1.15.0=h7b6447c_1
+ca-certificates=2019.5.15=1
+certifi=2019.6.16=py36_1
+click=7.0=py36_0
+cloudpickle=1.2.1=py_0
+coverage=4.5.3=py36h7b6447c_0
+cycler=0.10.0=py36_0
+cytoolz=0.9.0.1=py36h14c3975_1
+dask=1.1.4=py36_1
+dask-core=1.1.4=py36_1
+dbus=1.13.6=h746ee38_0
+decorator=4.4.0=py36_1
+defusedxml=0.6.0=py_0
+distributed=1.26.0=py36_1
+entrypoints=0.3=py36_0
+expat=2.2.6=he6710b0_0
+fontconfig=2.13.0=h9420a91_0
+freetype=2.9.1=h8a8886c_1
+gast=0.2.2=py36_0
+glib=2.56.2=hd408876_0
+gmp=6.1.2=h6c8ec71_1
+google-pasta=0.1.7=py_0
+grpcio=1.16.1=py36hf8bcb03_1
+gst-plugins-base=1.14.0=hbbd80ab_1
+gstreamer=1.14.0=hb453b48_1
+h5py=2.9.0=py36h7918eee_0
+hdf5=1.10.4=hb1b8bf9_0
+heapdict=1.0.0=py36_2
+icu=58.2=h9c2bf20_1
+intel-openmp=2019.4=243
+ipykernel=5.1.1=py36h39e3cac_0
+ipython=7.6.0=py36h39e3cac_0
+ipython_genutils=0.2.0=py36_0
+ipywidgets=7.4.2=py36_0
+jedi=0.13.3=py36_0
+jinja2=2.10.1=py36_0
+jpeg=9b=h024ee3a_2
+jsonschema=3.0.1=py36_0
+jupyter=1.0.0=py36_7
+jupyter_client=5.2.4=py36_0
+jupyter_console=6.0.0=py36_0
+jupyter_core=4.5.0=py_0
+keras=2.2.4=0
+keras-applications=1.0.8=py_0
+keras-base=2.2.4=py36_0
+keras-preprocessing=1.1.0=py_1
+kiwisolver=1.1.0=py36he6710b0_0
+libedit=3.1.20181209=hc058e9b_0
+libffi=3.2.1=hd88cf55_4
+libgcc-ng=9.1.0=hdf63c60_0
+libgfortran-ng=7.3.0=hdf63c60_0
+libpng=1.6.37=hbc83047_0
+libprotobuf=3.8.0=hd408876_0
+libsodium=1.0.16=h1bed415_0
+libstdcxx-ng=9.1.0=hdf63c60_0
+libtiff=4.0.10=h2733197_2
+libuuid=1.0.3=h1bed415_2
+libxcb=1.13=h1bed415_1
+libxml2=2.9.9=hea5a465_1
+locket=0.2.0=py36_1
+lz4-c=1.8.1.2=h14c3975_0
+lzo=2.10=h49e0be7_2
+markdown=3.1.1=py36_0
+markupsafe=1.1.1=py36h7b6447c_0
+matplotlib=3.1.0=py36h5429711_0
+mistune=0.8.4=py36h7b6447c_0
+mkl=2019.4=243
+mkl_fft=1.0.12=py36ha843d7b_0
+mkl_random=1.0.2=py36hd81dba3_0
+msgpack-python=0.6.1=py36hfd86e86_1
+nbconvert=5.5.0=py_0
+nbformat=4.4.0=py36_0
+ncurses=6.1=he6710b0_1
+nose=1.3.7=py36_2
+nosexcover=1.0.11=py36_1
+notebook=5.7.6=py36_0
+numexpr=2.6.9=py36h9e4a6bb_0
+numpy=1.16.4=py36h7e9f1db_0
+numpy-base=1.16.4=py36hde5b4d6_0
+olefile=0.46=py36_0
+openssl=1.1.1c=h7b6447c_1
+packaging=19.0=py36_0
+pandas=0.24.1=py36he6710b0_0
+pandoc=2.2.3.2=0
+pandocfilters=1.4.2=py36_1
+parso=0.5.0=py_0
+partd=1.0.0=py_0
+pcre=8.43=he6710b0_0
+pexpect=4.7.0=py36_0
+pickleshare=0.7.5=py36_0
+pillow=6.0.0=py36h34e0f95_0
+pip=19.1.1=py36_0
+prometheus_client=0.7.1=py_0
+prompt_toolkit=2.0.9=py36_0
+protobuf=3.8.0=py36he6710b0_0
+psutil=5.6.3=py36h7b6447c_0
+ptyprocess=0.6.0=py36_0
+pygments=2.4.2=py_0
+pyparsing=2.4.0=py_0
+pyqt=5.9.2=py36h05f1152_2
+pyrsistent=0.14.11=py36h7b6447c_0
+pytables=3.4.4=py36h71ec239_0
+python=3.6.8=h0371630_0
+python-dateutil=2.8.0=py36_0
+pytz=2019.1=py_0
+pyyaml=5.1.1=py36h7b6447c_0
+pyzmq=18.0.0=py36he6710b0_0
+qt=5.9.7=h5867ecd_1
+qtconsole=4.5.1=py_0
+readline=7.0=h7b6447c_5
+scikit-learn=0.20.3=py36hd81dba3_0
+scipy=1.2.1=py36h7c811a0_0
+send2trash=1.5.0=py36_0
+setuptools=41.0.1=py36_0
+sip=4.19.8=py36hf484d3e_0
+six=1.12.0=py36_0
+snappy=1.1.7=hbae5bb6_3
+sortedcontainers=2.1.0=py36_0
+sqlite=3.28.0=h7b6447c_0
+tblib=1.4.0=py_0
+tensorboard=1.14.0=py36hf484d3e_0
+tensorflow=1.14.0=mkl_py36h2526735_0
+tensorflow-base=1.14.0=mkl_py36h7ce6ba3_0
+tensorflow-estimator=1.14.0=py_0
+termcolor=1.1.0=py36_1
+terminado=0.8.2=py36_0
+testpath=0.4.2=py36_0
+tk=8.6.8=hbc83047_0
+toolz=0.9.0=py36_0
+tornado=6.0.1=py36h7b6447c_0
+traitlets=4.3.2=py36_0
+wcwidth=0.1.7=py36_0
+webencodings=0.5.1=py36_1
+werkzeug=0.15.4=py_0
+wheel=0.33.4=py36_0
+widgetsnbextension=3.4.2=py36_0
+wrapt=1.11.2=py36h7b6447c_0
+xz=5.2.4=h14c3975_4
+yaml=0.1.7=had09818_2
+zeromq=4.3.1=he6710b0_3
+zict=1.0.0=py_0
+zlib=1.2.11=h7b6447c_3
+zstd=1.3.7=h0b5b093_0
diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
@@ -5,7 +5,7 @@ services:
     ports:
       - "27017:27017"
       - "28017:28017"
-    command: ["--auth"]
+    command: ["--auth", "--oplogSize", "100"]
   rabbitmq:
     image: rabbitmq
     ports:

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -30,7 +30,7 @@ services:
          - OMEGA_BROKER=amqp://rabbitmq:5672//
          - C_FORCE_ROOT=yes
    rabbitmq:
-       image: rabbitmq
+       image: rabbitmq:3.7.17
        hostname: rabbitmq
    mongodb:
        image: mongo:3.6.8-stretch

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -3,3 +3,4 @@ sphinx-autobuild==0.6.0
 
 sphinx-rtd-theme==0.2.4
 sphinx-autobuild==0.6.0
+nbsphinx==0.4.2
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -35,6 +35,7 @@
     'sphinx.ext.mathjax',
     'sphinx.ext.ifconfig',
     'sphinx.ext.autosummary',
+    'nbsphinx',
     #'sphinx.ext.githubpages',
 ]
 
@@ -96,7 +97,8 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = []
+exclude_patterns = ['_build', '**.ipynb_checkpoints']
+nbsphinx_execute = 'never'
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.

diff --git a/docs/source/devguide/mixins.rst b/docs/source/devguide/mixins.rst
@@ -79,7 +79,7 @@ convenience.
    class CrossValidationMixin(object):
        def cross_validate(modelName, Xname, Yname, *args, **kwargs):
             # get the cross validation task
-            task = self.runtime.task('custom.tasks.cross_validate')
+            task = self.task('custom.tasks.cross_validate')
             return task.delay(modelName, Xname, Yname, *args, **kwargs)
             
    

diff --git a/docs/source/guide/keras.rst b/docs/source/guide/keras.rst
@@ -0,0 +1,12 @@
+Keras
++++++
+
+The Keras backend implements the `.fit()` method with the following Keras-specific extensions:
+
+* :code:`validation_data=` can refer to a tuple of (testX, testY) dataset names instead of actual
+  data values, similar to X, Y. This will load the validation dataset before :code:`model.fit()`.
+
+* :code:`Metadata.attributes.history` stores the history.history object, which is a dictionary
+  of all metrics with one entry per epoch as the return value of Keras's model.fit() method.
+
+
diff --git a/docs/source/guide/modelstore.rst b/docs/source/guide/modelstore.rst
@@ -1,16 +1,20 @@
 Working with Machine Learning Models
 ====================================
 
-omega|ml currently implements two backends to store models. More backends can
-be implemented using the model backend-API.
+omega|ml currently implements the following machine learning frameworks out of the box. More backends are planned.
+Any backend can be implemented using the backend API.
 
-* scikit-learn models
-* Apache Spark models
+* scikit-learn
+* Keras
+* Tensorflow (tf.keras, tf.estimator, tf.data, tf.SavedModel)
+* Apache Spark MLLib
+
+Note that support for Keras, Tensorflow and Apache Spark is experimental at this time.
 
 Storing models
 --------------
 
-Storing models (and Pipeline) is as straight forward as storing Pandas DataFrames and Series.
+Storing models and pipelines is as straight forward as storing Pandas DataFrames and Series.
 Simply create the model, then use :code:`om.models.put()` to store:
 
 .. code::
@@ -170,6 +174,12 @@ runtime supports the following methods on a model:
 * :code:`partial_fit`
 * :code:`transform`
 * :code:`score`
+* :code:`gridsearch`
 
 For details refer to the API reference.
 
+Specific frameworks
+-------------------
+
+.. include:: keras.rst
+.. include:: tensorflow.rst