Skip to content

Commit

Permalink
enable keras, tensorflow
Browse files Browse the repository at this point in the history
update requirements for docker installation and livetest

rebasing on master

fix dependencies
  • Loading branch information
miraculixx committed Aug 17, 2019
1 parent 224f668 commit 747fb7e
Show file tree
Hide file tree
Showing 89 changed files with 17,977 additions and 6,304 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Expand Up @@ -10,5 +10,5 @@ RUN conda install -y --file /app/conda-requirements.txt && \
RUN pip install --no-cache-dir -q -r /app/requirements.txt
RUN mkdir -p ~/.jupyter && \
cp /app/omegaml/notebook/jupyter/*py ~/.jupyter && \
cd /app && pip install .
cd /app && pip install .[all]
CMD ["jupyter", '--config-dir', '/app/.jupyter']
5 changes: 2 additions & 3 deletions Makefile
Expand Up @@ -13,11 +13,11 @@ livetest: dist
scripts/livetest.sh --local --build

devtest:
scripts/devtest.sh
scripts/devtest.sh --headless

image:
: "run docker build"
docker build -t omegaml/omegaml:$(VERSION) .
scripts/livetest.sh --build

release-test: dist
: "twine upload to pypi test"
Expand All @@ -42,7 +42,6 @@ release-docker: dist
docker push omegaml/omegaml:${VERSION}
docker push omegaml/omegaml:latest


thirdparty:
: "create THIRDPARTY & THIRDPARTY-LICENSES"
pip-licenses > THIRDPARTY
Expand Down
8 changes: 4 additions & 4 deletions README.rst
Expand Up @@ -6,11 +6,11 @@ omega|ml takes just a single line of code to
* implement data pipelines quickly, without memory limitation, all from a Pandas-like API
* serve models and data from an easy to use REST API

In addition you can
Further, omega|ml is the fastest way to

* train models on the integrated compute cluster (or any other cluster using custom backends)
* collaborate on data science projects easily (using Jupyter Notebook)
* deploy beautiful dashboards right from your Jupyter Notebook (using dashserve)
* scale model training on the included scalable pure-Python compute cluster, on Spark or any other cloud
* collaborate on data science projects easily, sharing Jupyter Notebooks
* deploy beautiful dashboards right from your Jupyter Notebook, using dashserve

Documentation: https://omegaml.github.io/omegaml/

Expand Down
18,665 changes: 12,659 additions & 6,006 deletions THIRDPARTY-LICENSES

Large diffs are not rendered by default.

168 changes: 157 additions & 11 deletions conda-requirements.txt
@@ -1,11 +1,157 @@
# conda-requirements.txt
pandas=0.24.1
scikit-learn=0.20.3
jupyter=1.0.0
pytables=3.4.4
nose=1.3.7
nosexcover=1.0.11
distributed=1.26.0
dask=1.1.4
notebook=5.7.6
tornado=6.0.1
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
_libgcc_mutex=0.1=main
_tflow_select=2.3.0=mkl
absl-py=0.7.1=py36_0
astor=0.7.1=py36_0
attrs=19.1.0=py36_1
backcall=0.1.0=py36_0
blas=1.0=mkl
bleach=3.1.0=py36_0
blosc=1.16.3=hd408876_0
bokeh=1.2.0=py36_0
bzip2=1.0.6=h14c3975_5
c-ares=1.15.0=h7b6447c_1
ca-certificates=2019.5.15=1
certifi=2019.6.16=py36_1
click=7.0=py36_0
cloudpickle=1.2.1=py_0
coverage=4.5.3=py36h7b6447c_0
cycler=0.10.0=py36_0
cytoolz=0.9.0.1=py36h14c3975_1
dask=1.1.4=py36_1
dask-core=1.1.4=py36_1
dbus=1.13.6=h746ee38_0
decorator=4.4.0=py36_1
defusedxml=0.6.0=py_0
distributed=1.26.0=py36_1
entrypoints=0.3=py36_0
expat=2.2.6=he6710b0_0
fontconfig=2.13.0=h9420a91_0
freetype=2.9.1=h8a8886c_1
gast=0.2.2=py36_0
glib=2.56.2=hd408876_0
gmp=6.1.2=h6c8ec71_1
google-pasta=0.1.7=py_0
grpcio=1.16.1=py36hf8bcb03_1
gst-plugins-base=1.14.0=hbbd80ab_1
gstreamer=1.14.0=hb453b48_1
h5py=2.9.0=py36h7918eee_0
hdf5=1.10.4=hb1b8bf9_0
heapdict=1.0.0=py36_2
icu=58.2=h9c2bf20_1
intel-openmp=2019.4=243
ipykernel=5.1.1=py36h39e3cac_0
ipython=7.6.0=py36h39e3cac_0
ipython_genutils=0.2.0=py36_0
ipywidgets=7.4.2=py36_0
jedi=0.13.3=py36_0
jinja2=2.10.1=py36_0
jpeg=9b=h024ee3a_2
jsonschema=3.0.1=py36_0
jupyter=1.0.0=py36_7
jupyter_client=5.2.4=py36_0
jupyter_console=6.0.0=py36_0
jupyter_core=4.5.0=py_0
keras=2.2.4=0
keras-applications=1.0.8=py_0
keras-base=2.2.4=py36_0
keras-preprocessing=1.1.0=py_1
kiwisolver=1.1.0=py36he6710b0_0
libedit=3.1.20181209=hc058e9b_0
libffi=3.2.1=hd88cf55_4
libgcc-ng=9.1.0=hdf63c60_0
libgfortran-ng=7.3.0=hdf63c60_0
libpng=1.6.37=hbc83047_0
libprotobuf=3.8.0=hd408876_0
libsodium=1.0.16=h1bed415_0
libstdcxx-ng=9.1.0=hdf63c60_0
libtiff=4.0.10=h2733197_2
libuuid=1.0.3=h1bed415_2
libxcb=1.13=h1bed415_1
libxml2=2.9.9=hea5a465_1
locket=0.2.0=py36_1
lz4-c=1.8.1.2=h14c3975_0
lzo=2.10=h49e0be7_2
markdown=3.1.1=py36_0
markupsafe=1.1.1=py36h7b6447c_0
matplotlib=3.1.0=py36h5429711_0
mistune=0.8.4=py36h7b6447c_0
mkl=2019.4=243
mkl_fft=1.0.12=py36ha843d7b_0
mkl_random=1.0.2=py36hd81dba3_0
msgpack-python=0.6.1=py36hfd86e86_1
nbconvert=5.5.0=py_0
nbformat=4.4.0=py36_0
ncurses=6.1=he6710b0_1
nose=1.3.7=py36_2
nosexcover=1.0.11=py36_1
notebook=5.7.6=py36_0
numexpr=2.6.9=py36h9e4a6bb_0
numpy=1.16.4=py36h7e9f1db_0
numpy-base=1.16.4=py36hde5b4d6_0
olefile=0.46=py36_0
openssl=1.1.1c=h7b6447c_1
packaging=19.0=py36_0
pandas=0.24.1=py36he6710b0_0
pandoc=2.2.3.2=0
pandocfilters=1.4.2=py36_1
parso=0.5.0=py_0
partd=1.0.0=py_0
pcre=8.43=he6710b0_0
pexpect=4.7.0=py36_0
pickleshare=0.7.5=py36_0
pillow=6.0.0=py36h34e0f95_0
pip=19.1.1=py36_0
prometheus_client=0.7.1=py_0
prompt_toolkit=2.0.9=py36_0
protobuf=3.8.0=py36he6710b0_0
psutil=5.6.3=py36h7b6447c_0
ptyprocess=0.6.0=py36_0
pygments=2.4.2=py_0
pyparsing=2.4.0=py_0
pyqt=5.9.2=py36h05f1152_2
pyrsistent=0.14.11=py36h7b6447c_0
pytables=3.4.4=py36h71ec239_0
python=3.6.8=h0371630_0
python-dateutil=2.8.0=py36_0
pytz=2019.1=py_0
pyyaml=5.1.1=py36h7b6447c_0
pyzmq=18.0.0=py36he6710b0_0
qt=5.9.7=h5867ecd_1
qtconsole=4.5.1=py_0
readline=7.0=h7b6447c_5
scikit-learn=0.20.3=py36hd81dba3_0
scipy=1.2.1=py36h7c811a0_0
send2trash=1.5.0=py36_0
setuptools=41.0.1=py36_0
sip=4.19.8=py36hf484d3e_0
six=1.12.0=py36_0
snappy=1.1.7=hbae5bb6_3
sortedcontainers=2.1.0=py36_0
sqlite=3.28.0=h7b6447c_0
tblib=1.4.0=py_0
tensorboard=1.14.0=py36hf484d3e_0
tensorflow=1.14.0=mkl_py36h2526735_0
tensorflow-base=1.14.0=mkl_py36h7ce6ba3_0
tensorflow-estimator=1.14.0=py_0
termcolor=1.1.0=py36_1
terminado=0.8.2=py36_0
testpath=0.4.2=py36_0
tk=8.6.8=hbc83047_0
toolz=0.9.0=py36_0
tornado=6.0.1=py36h7b6447c_0
traitlets=4.3.2=py36_0
wcwidth=0.1.7=py36_0
webencodings=0.5.1=py36_1
werkzeug=0.15.4=py_0
wheel=0.33.4=py36_0
widgetsnbextension=3.4.2=py36_0
wrapt=1.11.2=py36h7b6447c_0
xz=5.2.4=h14c3975_4
yaml=0.1.7=had09818_2
zeromq=4.3.1=he6710b0_3
zict=1.0.0=py_0
zlib=1.2.11=h7b6447c_3
zstd=1.3.7=h0b5b093_0
2 changes: 1 addition & 1 deletion docker-compose-dev.yml
Expand Up @@ -5,7 +5,7 @@ services:
ports:
- "27017:27017"
- "28017:28017"
command: ["--auth"]
command: ["--auth", "--oplogSize", "100"]
rabbitmq:
image: rabbitmq
ports:
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Expand Up @@ -30,7 +30,7 @@ services:
- OMEGA_BROKER=amqp://rabbitmq:5672//
- C_FORCE_ROOT=yes
rabbitmq:
image: rabbitmq
image: rabbitmq:3.7.17
hostname: rabbitmq
mongodb:
image: mongo:3.6.8-stretch
Expand Down
1 change: 1 addition & 0 deletions docs/requirements.txt
Expand Up @@ -3,3 +3,4 @@ sphinx-autobuild==0.6.0

sphinx-rtd-theme==0.2.4
sphinx-autobuild==0.6.0
nbsphinx==0.4.2
4 changes: 3 additions & 1 deletion docs/source/conf.py
Expand Up @@ -35,6 +35,7 @@
'sphinx.ext.mathjax',
'sphinx.ext.ifconfig',
'sphinx.ext.autosummary',
'nbsphinx',
#'sphinx.ext.githubpages',
]

Expand Down Expand Up @@ -96,7 +97,8 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = []
exclude_patterns = ['_build', '**.ipynb_checkpoints']
nbsphinx_execute = 'never'

# The reST default role (used for this markup: `text`) to use for all
# documents.
Expand Down
2 changes: 1 addition & 1 deletion docs/source/devguide/mixins.rst
Expand Up @@ -79,7 +79,7 @@ convenience.
class CrossValidationMixin(object):
def cross_validate(modelName, Xname, Yname, *args, **kwargs):
# get the cross validation task
task = self.runtime.task('custom.tasks.cross_validate')
task = self.task('custom.tasks.cross_validate')
return task.delay(modelName, Xname, Yname, *args, **kwargs)
Expand Down
12 changes: 12 additions & 0 deletions docs/source/guide/keras.rst
@@ -0,0 +1,12 @@
Keras
+++++

The Keras backend implements the `.fit()` method with the following Keras-specific extensions:

* :code:`validation_data=` can refer to a tuple of (testX, testY) dataset names instead of actual
data values, similar to X, Y. This will load the validation dataset before :code:`model.fit()`.

* :code:`Metadata.attributes.history` stores the history.history object, which is a dictionary
of all metrics with one entry per epoch as the return value of Keras's model.fit() method.


20 changes: 15 additions & 5 deletions docs/source/guide/modelstore.rst
@@ -1,16 +1,20 @@
Working with Machine Learning Models
====================================

omega|ml currently implements two backends to store models. More backends can
be implemented using the model backend-API.
omega|ml currently implements the following machine learning frameworks out of the box. More backends are planned.
Any backend can be implemented using the backend API.

* scikit-learn models
* Apache Spark models
* scikit-learn
* Keras
* Tensorflow (tf.keras, tf.estimator, tf.data, tf.SavedModel)
* Apache Spark MLLib

Note that support for Keras, Tensorflow and Apache Spark is experimental at this time.

Storing models
--------------

Storing models (and Pipeline) is as straight forward as storing Pandas DataFrames and Series.
Storing models and pipelines is as straight forward as storing Pandas DataFrames and Series.
Simply create the model, then use :code:`om.models.put()` to store:

.. code::
Expand Down Expand Up @@ -170,6 +174,12 @@ runtime supports the following methods on a model:
* :code:`partial_fit`
* :code:`transform`
* :code:`score`
* :code:`gridsearch`

For details refer to the API reference.

Specific frameworks
-------------------

.. include:: keras.rst
.. include:: tensorflow.rst

0 comments on commit 747fb7e

Please sign in to comment.