Skip to content

Commit

Permalink
Modify NB Dockerfile and start scripts to detect PVC mount for /home/…
Browse files Browse the repository at this point in the history
…jovyan
  • Loading branch information
Peter MacKinnon committed May 14, 2018
1 parent bf6fb9f commit 7d6136b
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 25 deletions.
2 changes: 1 addition & 1 deletion bootstrap/cmd/bootstrap/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ func Run(opt *options.ServerOption) error {

pvcMount := ""
if hasDefault {
pvcMount = "/home/jovyan/work"
pvcMount = "/home/jovyan"
}

err = actions.RunParamSet(map[string]interface{}{
Expand Down
43 changes: 23 additions & 20 deletions components/tensorflow-notebook-image/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ ENV DEBIAN_FRONTEND noninteractive
ENV NB_USER jovyan
ENV NB_UID 1000
ENV HOME /home/$NB_USER
ENV CONDA_DIR=$HOME/.conda
ENV CONDA_DIR=/opt/conda
ENV PATH $CONDA_DIR/bin:$PATH

# Use bash instead of sh
Expand Down Expand Up @@ -50,8 +50,11 @@ ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8

# Create jovyan user with UID=1000 and in the 'users' group
RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \
chown -R ${NB_USER}:users /usr/local/bin
# but allow for non-initial launches of the notebook to have
# $HOME provided by the contents of a PV
RUN useradd -M -s /bin/bash -N -u $NB_UID $NB_USER && \
chown -R ${NB_USER}:users /usr/local/bin && \
mkdir -p $HOME

RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
echo "deb https://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" > /etc/apt/sources.list.d/google-cloud-sdk.list && \
Expand All @@ -62,16 +65,6 @@ RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
gcloud config set component_manager/disable_update_check true && \
gcloud config set metrics/environment github_docker_image

RUN chown -R ${NB_USER}:users /home/${NB_USER}

# Run everything below this as $NB_USER
USER $NB_USER

WORKDIR $HOME

# Setup work directory for backward-compatibility
RUN mkdir /home/$NB_USER/work

# Install Tini - used as entrypoint for container
RUN cd /tmp && \
wget --quiet https://github.com/krallin/tini/releases/download/v0.10.0/tini && \
Expand Down Expand Up @@ -126,23 +119,33 @@ RUN conda create -n py2 python=2 && \
pip install --no-cache-dir tensorflow-model-analysis && \
jupyter nbextension install --py --symlink tensorflow_model_analysis --user && \
jupyter nbextension enable --py tensorflow_model_analysis --user; \
fi

# Install jupyterlab-manager
RUN conda install --quiet --yes \
fi \
&& \
# Install jupyterlab-manager
conda install --quiet --yes \
# nodejs required for jupyterlab-manager
nodejs && \
jupyter labextension install @jupyter-widgets/jupyterlab-manager
jupyter labextension install @jupyter-widgets/jupyterlab-manager && \
# Do chown in this layer for significant size savings
chown -R ${NB_USER}:users $HOME && \
chown -R ${NB_USER}:users $CONDA_DIR

# Install common packages from requirements.txt for both python2 and python3
# NB: the COPY chown can't expand a bash variable for NB_USER
COPY --chown=jovyan:users requirements.txt $HOME/requirements.txt
COPY --chown=jovyan:users jupyter_notebook_config.py $HOME/.jupyter/
RUN pip --no-cache-dir install -r $HOME/requirements.txt && \
source activate py2 && \
pip --no-cache-dir install -r $HOME/requirements.txt

# Tar and delete staged content
WORKDIR $HOME
RUN tar -cf /tmp/${NB_USER}.tar -C $HOME . && \
chown ${NB_USER}:users /tmp/${NB_USER}.tar && \
rm -fr $(ls -A $HOME)

# Copy over init scripts
COPY --chown=jovyan:users start-singleuser.sh start-notebook.sh start.sh /usr/local/bin/
COPY --chown=jovyan:users jupyter_notebook_config.py $HOME/.jupyter/
COPY --chown=jovyan:users start-singleuser.sh start-notebook.sh start.sh pvc-check.sh /usr/local/bin/
RUN chmod a+rx /usr/local/bin/*

# Configure container startup
Expand Down
40 changes: 40 additions & 0 deletions components/tensorflow-notebook-image/pvc-check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

# Copyright 2016 The Kubeflow Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# stored in the NB Dockerfile
SRC_TAR=/tmp/$NB_USER.tar
WORK_DIR=$HOME/work

echo "checking if $HOME volume needs init..."

if [ "$(ls -A $HOME)" ]; then
# assume we are working with an existing volume via a PVC
echo "...$HOME already has content. Reinstalling packages..."
cd $HOME
pip install -q -r $HOME/requirements.txt
source activate py2 && pip install -q -r $HOME/requirements.txt
else
# clean volume, needs init
echo "...creating $WORK_DIR"
mkdir $WORK_DIR

echo "...load initial content into $HOME..."
tar --no-overwrite-dir -xf $SRC_TAR -C $HOME .

chown -R $NB_USER:users $(ls -A $HOME)
fi

echo "...done"
3 changes: 3 additions & 0 deletions components/tensorflow-notebook-image/start-singleuser.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,7 @@ if [ ! -z "$JPY_HUB_API_URL" ]; then
NOTEBOOK_ARGS="--hub-api-url=$JPY_HUB_API_URL $NOTEBOOK_ARGS"
fi

# check to see if a PV has been mounted
. /usr/local/bin/pvc-check.sh

. /usr/local/bin/start.sh jupyterhub-singleuser $NOTEBOOK_ARGS $@
6 changes: 5 additions & 1 deletion kubeflow/core/kubeform_spawner.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,18 @@ def extra_resource_limits(self):
c.KubeSpawner.start_timeout = 60 * 30
# Increase timeout to 5 minutes to avoid HTTP 500 errors on JupyterHub
c.KubeSpawner.http_timeout = 60 * 5

# Volume setup
c.KubeSpawner.singleuser_uid = 1000
c.KubeSpawner.singleuser_fs_gid = 100
c.KubeSpawner.singleuser_working_dir = '/home/jovyan'
volumes = []
volume_mounts = []
###################################################
# Persistent volume options
###################################################
# Using persistent storage requires a default storage class.
# TODO(jlewi): Verify this works on minikube.
# TODO(jlewi): Should we set c.KubeSpawner.singleuser_fs_gid = 1000
# see https://github.com/kubeflow/kubeflow/pull/22#issuecomment-350500944
pvc_mount = os.environ.get('NOTEBOOK_PVC_MOUNT')
if pvc_mount and pvc_mount != 'null':
Expand Down
2 changes: 1 addition & 1 deletion kubeflow/core/tests/jupyterhub_test.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ local params = {
jupyterHubAuthenticator:: null,
jupyterHubServiceType:: "ClusterIP",
jupyterHubImage: "gcr.io/kubeflow/jupyterhub-k8s:1.0.1",
jupyterNotebookPVCMount: "/home/jovyan/work",
jupyterNotebookPVCMount: "/home/jovyan",
cloud: null,
};

Expand Down
4 changes: 2 additions & 2 deletions user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,10 @@ kubectl get storageclass
parameter to create a volume that will be mounted within the notebook

```
ks param set kubeflow-core jupyterNotebookPVCMount /home/jovyan/work
ks param set kubeflow-core jupyterNotebookPVCMount /home/jovyan
```

* Here we mount the volume at `/home/jovyan/work` because the notebook
* Here we mount the volume at `/home/jovyan` because the notebook
always executes as user jovyan
* The selected directory will be stored on whatever storage is the default
for the cluster (typically some form of persistent disk)
Expand Down

0 comments on commit 7d6136b

Please sign in to comment.