From efdeeeffb92c82aa3e241a2122386ab362c34db2 Mon Sep 17 00:00:00 2001 From: HongwuLin Date: Tue, 23 Jun 2020 23:52:30 +0800 Subject: [PATCH 1/4] Minimize sqlflow:mysql image --- docker/dev/find_fastest_resources.sh | 22 ++++++++-- docker/mysql/Dockerfile | 13 +++--- docker/mysql/install-mysql-server.bash | 32 -------------- docker/mysql/start.bash | 61 +++++++++++++++++--------- 4 files changed, 67 insertions(+), 61 deletions(-) delete mode 100755 docker/mysql/install-mysql-server.bash diff --git a/docker/dev/find_fastest_resources.sh b/docker/dev/find_fastest_resources.sh index 4d87597276..eeb6247289 100755 --- a/docker/dev/find_fastest_resources.sh +++ b/docker/dev/find_fastest_resources.sh @@ -1,4 +1,5 @@ #!/bin/bash + # Copyright 2020 The SQLFlow Authors. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -36,7 +37,7 @@ function get_domain_from_url() { # Find the fastest URL. The parameter consists of URLS separated by whitespace. function find_fastest_url() { - local speed=99999.9 + local speed=99999 # shellcheck disable=SC2068 for i in $@; do local domain @@ -48,10 +49,11 @@ function find_fastest_url() { cur_speed=$(ping -c 4 -W 2 "$domain" | tail -1 \ | grep "/avg/" | awk '{print $4}'\ | cut -d '/' -f 2) - cur_speed=${cur_speed:-99999.9} + cur_speed=${cur_speed:-99999} + cur_speed=${cur_speed/.*/} # c.f. https://stackoverflow.com/a/31087503/724872 - if (( $(echo "$cur_speed < $speed" | bc -l) )); then + if [[ $cur_speed -lt $speed ]]; then local best_domain="$i" speed="$cur_speed" fi @@ -245,3 +247,17 @@ function choose_fastest_pip_source() { find_fastest_pip_mirror > "$HOME"/.pip/pip.conf } +function choose_fastest_alpine_source() { + default="http://dl-cdn.alpinelinux.org" + read -r -d '\t' urls </dev/null + VOLUME /var/lib/mysql ARG MYSQL_PORT="3306" diff --git a/docker/mysql/install-mysql-server.bash b/docker/mysql/install-mysql-server.bash deleted file mode 100755 index d16b234666..0000000000 --- a/docker/mysql/install-mysql-server.bash +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -# Copyright 2020 The SQLFlow Authors. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -source find_fastest_resources.sh -choose_fastest_apt_source - -echo "Install MySQL server without a password prompt ..." -echo 'mysql-server mysql-server/root_password password root' | \ - debconf-set-selections -echo 'mysql-server mysql-server/root_password_again password root' | \ - debconf-set-selections -apt-get -qq update > /dev/null -apt-get -qq install -y mysql-server > /dev/null -mkdir -p /var/run/mysqld -mkdir -p /var/lib/mysql -chown mysql:mysql /var/run/mysqld -chown mysql:mysql /var/lib/mysql -mkdir -p /docker-entrypoint-initdb.d diff --git a/docker/mysql/start.bash b/docker/mysql/start.bash index e776effd06..49a646508f 100755 --- a/docker/mysql/start.bash +++ b/docker/mysql/start.bash @@ -1,4 +1,5 @@ -#!/bin/bash +#!/bin/sh + # Copyright 2020 The SQLFlow Authors. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,14 +15,38 @@ set -e + +echo "Init mysqld if needed ..." +if [[ -d "/docker-entrypoint-initdb.d" ]]; then + echo "Skip" +else + mkdir -p /var/run/mysqld + mkdir -p /var/lib/mysql + chown mysql:mysql /var/run/mysqld + chown mysql:mysql /var/lib/mysql + mkdir -p /docker-entrypoint-initdb.d + + mysql_install_db --user=mysql --datadir=/var/lib/mysql >dev/null + mysqld --user=mysql --bootstrap --verbose=0 \ + --skip-name-resolve --skip-networking=0 >/dev/null </dev/null 2>&1 & +sleep 2 echo "Sleep until MySQL server is ready ..." @@ -31,26 +56,16 @@ until mysql -u root -proot \ --port "$MYSQL_PORT" \ -e ";" ; do sleep 1 - read -r -p "Can't connect, retrying..." + echo "Can't connect, retrying..." done -# Grant all privileges to all the remote hosts so that the sqlflow -# server can be scaled to more than one replicas. -# -# NOTE: should notice this authorization on the production -# environment, it's not safe. -mysql -uroot -proot \ - -e "GRANT ALL PRIVILEGES ON *.* TO 'root'@'' IDENTIFIED BY 'root' WITH GRANT OPTION;" - - -# FIXME(typhoonzero): should let docker-entrypoint.sh do this work +echo "Populate datasets ..." for f in /datasets/*; do - echo "Populate datasets $f ..." - mysql -uroot -proot \ - --host "$MYSQL_HOST" --port "$MYSQL_PORT" \ - < "$f" + echo "$f" + mysql -uroot -proot < "$f" done +echo "Done." # If we run the contaienr with -v host_dir:/work, then the following @@ -59,4 +74,10 @@ done # file using the trick https://unix.stackexchange.com/a/185370/325629. mkdir -p /work && touch /work/mysql-inited -sleep infinity + +# c.f. https://stackoverflow.com/questions/2935183/bash-infinite-sleep-infinite-blocking for BusyBox +echo "Serving ..." +while true; + do sleep 1d; +done + From 58c8edcf5722be0a41bf73af2bd3530233d41740 Mon Sep 17 00:00:00 2001 From: HongwuLin Date: Wed, 24 Jun 2020 11:08:25 +0800 Subject: [PATCH 2/4] fix ci --- pkg/workflow/argo/fetch_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/workflow/argo/fetch_test.go b/pkg/workflow/argo/fetch_test.go index 0f39bb9c0b..01e5709037 100644 --- a/pkg/workflow/argo/fetch_test.go +++ b/pkg/workflow/argo/fetch_test.go @@ -215,9 +215,9 @@ func TestFetch(t *testing.T) { a.Contains(concatedLogs, "SQLFlow Step: [3/3] Status: Succeeded") // confirm columns and rows of sql: SELECT 1; a.Equal([]string{"1"}, columns) - v := &wrappers.Int64Value{} + v := &wrappers.Int32Value{} a.NoError(ptypes.UnmarshalAny(rows[0][0], v)) - a.Equal(v.GetValue(), int64(1)) + a.Equal(v.GetValue(), int32(1)) } func waitUntilPodRunning(podID string) error { From 27350d377fbed17404e2cc55fc07d55365275951 Mon Sep 17 00:00:00 2001 From: HongwuLin Date: Sun, 28 Jun 2020 22:20:33 +0800 Subject: [PATCH 3/4] minimize jupyter image size --- docker/jupyter/Dockerfile | 33 +++++++------------ ...nstall-jupyter.bash => install-jupyter.sh} | 16 +++++---- 2 files changed, 22 insertions(+), 27 deletions(-) rename docker/jupyter/{install-jupyter.bash => install-jupyter.sh} (75%) diff --git a/docker/jupyter/Dockerfile b/docker/jupyter/Dockerfile index 5beec32469..faf0c6b528 100644 --- a/docker/jupyter/Dockerfile +++ b/docker/jupyter/Dockerfile @@ -1,32 +1,23 @@ # This Dockerfile containers Jupyter Notebook server with many # SQLFlow tutorials and SQLFlow magic command. -FROM jupyter/base-notebook +FROM alpine:3.12 -# using root user to avoid permission deni -USER root -# Choose fastest mirrors for apt-get and pip -COPY docker/dev/find_fastest_resources.sh /usr/local/bin/find_fastest_resources.sh -RUN /bin/bash -c 'source find_fastest_resources.sh \ - && echo "Choose the fastest APT source ..." \ - && choose_fastest_apt_source \ - && echo "Choose the fastest PIP source ..." \ - && choose_fastest_pip_source' +COPY docker/dev/find_fastest_resources.sh /usr/local/bin/ +RUN /bin/sh -c "source find_fastest_resources.sh && \ + choose_fastest_alpine_source && \ + choose_fastest_pip_source" -# Install dependencies. -COPY docker/jupyter/js /jupyter/js - -RUN apt-get -qq update - -COPY docker/jupyter/install-jupyter.bash /jupyter -RUN /jupyter/install-jupyter.bash - -# switch back to the default user to avoid accidental container runs as root, -# this env comes from base Dockerfile: https://github.com/jupyter/docker-stacks/blob/master/base-notebook/Dockerfile#L13 -USER $NB_UID +RUN apk add --no-cache python3 py3-pip py3-pyzmq py3-grpcio # Install IPythono Notebook tutorials +COPY /docker/jupyter/js /jupyter/js/ COPY build/tutorial /workspace +COPY docker/jupyter/install-jupyter.sh /jupyter/install-jupyter.sh +RUN /bin/sh /jupyter/install-jupyter.sh + +# Cleanup +RUN apk del --purge py3-pip # The following SQLFlow gRPC server endpoint implies the server runs in a container, # and if container has the option --net=container:sqlflow_server_container, SQLFlow magic diff --git a/docker/jupyter/install-jupyter.bash b/docker/jupyter/install-jupyter.sh similarity index 75% rename from docker/jupyter/install-jupyter.bash rename to docker/jupyter/install-jupyter.sh index 31a8a37a15..4a11eb54bd 100755 --- a/docker/jupyter/install-jupyter.bash +++ b/docker/jupyter/install-jupyter.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # Copyright 2020 The SQLFlow Authors. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,12 +15,16 @@ set -e -# This file depends on install-python.bash. # install jupyterhub Python package so that this image can be used as jupyterhub # singleuser notebook server, ref: https://github.com/jupyterhub/jupyterhub/tree/master/singleuser -pip install --quiet \ - jupyterhub==1.1.0 \ - sqlflow==0.10.0 # sqlflow is the Python client of SQLFlow server. +# Install pandas pre-compiled apk, we do not want to build this python package locally because it relies on gcc and other build tools, which make the image very large +wget -q http://cdn.sqlflow.tech/alpine/py3-pandas-1.0.3-r0.apk +wget -q -P /etc/apk/keys/ http://cdn.sqlflow.tech/alpine/sqlflow-5ef80180.rsa.pub +apk add py3-pandas-1.0.3-r0.apk && rm py3-pandas-1.0.3-r0.apk + +pip -q install \ + notebook \ + sqlflow==0.10.0 # Load SQLFlow's Jupyter magic command # automatically. c.f. https://stackoverflow.com/a/32683001. @@ -33,7 +37,7 @@ mkdir -p /workspace >> "$IPYTHON_STARTUP"/00-first.py # Enable highlighting, see https://stackoverflow.com/questions/43641362 -NOTEBOOK_DIR=$(python -c "print(__import__('notebook').__path__[0])") +NOTEBOOK_DIR=$(python3 -c "print(__import__('notebook').__path__[0])") CODE_MIRROR_MODE_PATH=$NOTEBOOK_DIR/static/components/codemirror/mode mkdir -p "$HOME"/.jupyter/custom/ mkdir -p "$CODE_MIRROR_MODE_PATH"/sqlflow From fda415a446b155bf82502474b6f99824a2a8e415 Mon Sep 17 00:00:00 2001 From: HongwuLin Date: Sun, 28 Jun 2020 22:23:18 +0800 Subject: [PATCH 4/4] modify doc --- docker/jupyter/install-jupyter.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/jupyter/install-jupyter.sh b/docker/jupyter/install-jupyter.sh index 4a11eb54bd..f1e69e5fe6 100755 --- a/docker/jupyter/install-jupyter.sh +++ b/docker/jupyter/install-jupyter.sh @@ -17,7 +17,9 @@ set -e # install jupyterhub Python package so that this image can be used as jupyterhub # singleuser notebook server, ref: https://github.com/jupyterhub/jupyterhub/tree/master/singleuser -# Install pandas pre-compiled apk, we do not want to build this python package locally because it relies on gcc and other build tools, which make the image very large +# Install pandas pre-compiled apk, we do not want to build +# this python package locally because it relies on gcc and +# other build tools, which make the image very large wget -q http://cdn.sqlflow.tech/alpine/py3-pandas-1.0.3-r0.apk wget -q -P /etc/apk/keys/ http://cdn.sqlflow.tech/alpine/sqlflow-5ef80180.rsa.pub apk add py3-pandas-1.0.3-r0.apk && rm py3-pandas-1.0.3-r0.apk