Skip to content

Commit

Permalink
Merge pull request #91 from maxpumperla/spark2
Browse files Browse the repository at this point in the history
Project overhaul
  • Loading branch information
maxpumperla committed Aug 15, 2018
2 parents 7562adb + 24b544a commit 9eb3252
Show file tree
Hide file tree
Showing 50 changed files with 1,864 additions and 838 deletions.
9 changes: 9 additions & 0 deletions .gitignore
Expand Up @@ -60,3 +60,12 @@ examples/.ipynb_checkpoints
examples/metastore_db

examples/*.csv

.idea/

.pytest_cache

venv/

train.csv
test.csv
16 changes: 4 additions & 12 deletions .travis.yml
Expand Up @@ -3,7 +3,7 @@ dist: trusty
language: python
python:
- "2.7"
# - "3.4" # Note that hyperopt currently seems to have issues with 3.4
#- "3.4" # Note that hyperopt currently seems to have issues with 3.4
install:
# code below is taken from http://conda.pydata.org/docs/travis.html
# We do this conditionally because it saves us some downloading if the
Expand All @@ -19,22 +19,14 @@ install:
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
- conda info -a
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py flask
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy pytest h5py flask
- source activate test-environment
- pip install pytest-cov python-coveralls
- pip install git+git://github.com/Theano/Theano.git
- pip install keras
- python setup.py install
- pip install -r requirements.txt

# Install Spark
- wget http://apache.mirrors.tds.net/spark/spark-1.5.2/spark-1.5.2-bin-hadoop2.6.tgz -P $HOME
- tar zxvf $HOME/spark-* -C $HOME
- export SPARK_HOME=$HOME/spark-1.5.2-bin-hadoop2.6
- export PATH=$PATH:$SPARK_HOME/bin

# Just run an example for now
script:
- python -c "import keras.backend"
- spark-submit --driver-memory 2G $PWD/examples/mnist_mlp_spark.py
- py.test tests/
after_success:
- coveralls
112 changes: 65 additions & 47 deletions Dockerfile
@@ -1,55 +1,73 @@
# Extension of the Jupyter Notebooks
# Distributed under the terms of the Modified BSD / MIT License.
FROM jupyter/scipy-notebook
ARG cuda_version=9.0
ARG cudnn_version=7
FROM nvidia/cuda:${cuda_version}-cudnn${cudnn_version}-devel

MAINTAINER Elephas Project
# Install system packages
RUN apt-get update && apt-get install -y --no-install-recommends \
bzip2 \
g++ \
git \
graphviz \
libgl1-mesa-glx \
libhdf5-dev \
openmpi-bin \
wget && \
rm -rf /var/lib/apt/lists/*

USER root
# Install conda
ENV CONDA_DIR /opt/conda
ENV PATH $CONDA_DIR/bin:$PATH

# Spark dependencies
ENV APACHE_SPARK_VERSION 2.0.1
ENV PYJ_VERSION py4j-0.10.1-src.zip
RUN apt-get -y update && \
apt-get install -y --no-install-recommends openjdk-7-jre-headless && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN cd /tmp && \
wget -q http://d3kbcqa49mib13.cloudfront.net/spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6.tgz
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop2.6 spark

# Mesos dependencies
# Currently, Mesos is not available from Debian Jessie.
# So, we are installing it from Debian Wheezy. Once it
# becomes available for Debian Jessie. We should switch
# over to using that instead.
RUN apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF && \
DISTRO=debian && \
CODENAME=wheezy && \
echo "deb http://repos.mesosphere.io/${DISTRO} ${CODENAME} main" > /etc/apt/sources.list.d/mesosphere.list && \
apt-get -y update && \
apt-get --no-install-recommends -y --force-yes install mesos=0.22.1-1.0.debian78 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN wget --quiet --no-check-certificate https://repo.continuum.io/miniconda/Miniconda3-4.2.12-Linux-x86_64.sh && \
echo "c59b3dd3cad550ac7596e0d599b91e75d88826db132e4146030ef471bb434e9a *Miniconda3-4.2.12-Linux-x86_64.sh" | sha256sum -c - && \
/bin/bash /Miniconda3-4.2.12-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
rm Miniconda3-4.2.12-Linux-x86_64.sh && \
echo export PATH=$CONDA_DIR/bin:'$PATH' > /etc/profile.d/conda.sh

# additional libraries for Keras and Elephas
# RUN apt-get --no-install-recommends -y --force-yes install liblapack-dev libblas-dev gfortran
# Install Python packages and keras
ENV NB_USER keras
ENV NB_UID 1000

# Spark and Mesos config
ENV SPARK_HOME /usr/local/spark
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/$PYJ_LIB_VERSION
ENV MESOS_NATIVE_LIBRARY /usr/local/lib/libmesos.so
ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info
RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \
chown $NB_USER $CONDA_DIR -R && \
mkdir -p /src && \
chown $NB_USER /src

USER $NB_USER

# Install Python 3 Tensorflow
RUN conda install --quiet --yes 'tensorflow=0.9.0'
# Keras
RUN conda install --channel https://conda.anaconda.org/KEHANG --quiet --yes 'keras=1.0.8'
# Use the latest version of hyperopts (python 3.5 compatibility)
RUN pip install https://github.com/hyperopt/hyperopt/archive/master.zip
# Elephas for distributed spark
RUN pip install elephas
RUN pip install py4j
ARG python_version=2.7

RUN conda install -y python=${python_version} && \
pip install --upgrade pip && \
pip install \
sklearn_pandas \
tensorflow-gpu && \
conda install \
bcolz \
h5py \
matplotlib \
mkl \
nose \
notebook \
Pillow \
pandas \
pygpu \
pyyaml \
scikit-learn \
six \
conda clean -yt

ENV PYTHONPATH='/src/:$PYTHONPATH'

RUN mkdir -p app
WORKDIR /app
COPY ./requirements.txt /app

# Install requirements
RUN pip install -r ./requirements.txt
RUN pip install git+https://github.com/hyperopt/hyperopt.git


EXPOSE 8888

CMD jupyter notebook --port=8888 --ip=0.0.0.0

0 comments on commit 9eb3252

Please sign in to comment.