Skip to content

Commit

Permalink
Create a multistage docker image from a debian-bookworm base (#3520)
Browse files Browse the repository at this point in the history
Rewrite Teraslice Dockerfile to be multistage and use node image

This PR makes the following changes:

- Rewrite docker file:
  - Discontinue use of `base-docker-image`
  - Create a multi-stage build:
    - base stage:
      - build image from `debian-bookworm`
- combine functionality from the `base-docker-image` with the yarn build
step to create `/app` directory
    - second stage:
      - build image from `debian-bookworm-slim`
      - copy `/app` artifact from base stage, minimizing dependencies
- Copy the following scripts from `base-docker-image` repo to
`teraslice/scripts`:
  - `docker-pkg-fix.sh` (this doesn't seem to be used anywhere)
  - `wait-for-it.sh`

Ultimately the goal is to not have to manage our own base image.

ref: #3518

---------

Co-authored-by: Austin Godber <godber@terascope.io>
  • Loading branch information
busma13 and godber authored Jan 11, 2024
1 parent 0f0adb2 commit 9a44c8e
Show file tree
Hide file tree
Showing 5 changed files with 264 additions and 8 deletions.
69 changes: 63 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,42 @@
# NODE_VERSION is set by default in the config.ts, the following value will only
# be used if you build images by default with docker build
ARG NODE_VERSION=18.18.2
FROM terascope/node-base:${NODE_VERSION}
FROM node:${NODE_VERSION}-bookworm as base

ENV NPM_CONFIG_LOGLEVEL error
# Do not use SASL authentication with kafka
ENV WITH_SASL 0

RUN node --version
RUN yarn --version
RUN npm --version

RUN mkdir -p /app/source

# Install bunyan
RUN yarn global add \
--ignore-optional \
--no-progress \
--no-emoji \
--no-cache \
bunyan

# Install any built-in connectors in /app/
# use npm because there isn't a package.json
WORKDIR /app

RUN npm init --yes &> /dev/null \
&& npm install \
--build \
--no-package-lock \
--no-optional \
'terafoundation_kafka_connector@~0.11.1' \
&& npm cache clean --force

WORKDIR /app/source

# verify node-rdkafka is installed right
RUN node --print --eval "require('node-rdkafka')"

ENV NODE_ENV production

Expand All @@ -24,19 +59,41 @@ RUN yarn --prod=false --frozen-lockfile \
--ignore-scripts \
&& yarn cache clean


COPY service.js /app/source/

# verify node-rdkafka is installed right
RUN node -e "require('node-rdkafka')"

# verify teraslice is installed right
RUN node -e "require('teraslice')"
FROM node:${NODE_VERSION}-bookworm-slim

# Affects garbage collection. This default gets overwritten by the memory setting in kubernetes
ENV NODE_OPTIONS "--max-old-space-size=2048"
ENV NODE_ENV production

EXPOSE 5678

# set up the volumes
VOLUME /app/config /app/logs /app/assets
ENV TERAFOUNDATION_CONFIG /app/config/teraslice.yaml

# Use tini to handle sigterm and zombie processes
ENTRYPOINT ["/usr/bin/tini", "--"]

CMD ["node", "service.js"]

RUN apt-get update && \
apt-get install -y libcurl4 tini && \
apt-get autoremove -y && \
apt-get clean -y && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

# this can most likely be removed. Looks to be related to node10->12 transition.
COPY scripts/docker-pkg-fix.js /usr/local/bin/docker-pkg-fix
COPY scripts/wait-for-it.sh /usr/local/bin/wait-for-it
COPY --from=base /app /app

WORKDIR /app/source

# verify node-rdkafka is installed right
RUN node -e "require('node-rdkafka')"

# verify teraslice is installed right
RUN node -e "require('teraslice')"
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "teraslice-workspace",
"displayName": "Teraslice",
"version": "0.90.0",
"version": "0.91.0",
"private": true,
"homepage": "https://github.com/terascope/teraslice",
"bugs": {
Expand Down
2 changes: 1 addition & 1 deletion packages/teraslice/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "teraslice",
"displayName": "Teraslice",
"version": "0.90.0",
"version": "0.91.0",
"description": "Distributed computing platform for processing JSON data",
"homepage": "https://github.com/terascope/teraslice#readme",
"bugs": {
Expand Down
26 changes: 26 additions & 0 deletions scripts/docker-pkg-fix.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env node

'use strict';

// We are unsure what this does and it can probably be deleted, looks like something left
// from the migration from Node 10 to Node 12

const fs = require('fs');

const tmpPkgJSONPath = '/tmp/package.json';
const pkgJSONPath = '/app/source/package.json';

const arg = process.argv[2];

const pkgJSON = JSON.parse(fs.readFileSync(pkgJSONPath));
if (arg === 'pre') {
fs.renameSync(pkgJSONPath, tmpPkgJSONPath);
delete pkgJSON.workspaces;
fs.writeFileSync(pkgJSONPath, JSON.stringify(pkgJSON, null, 4));
} else if (arg === 'post') {
fs.unlinkSync(pkgJSONPath);
fs.renameSync(tmpPkgJSONPath, pkgJSONPath);
} else {
console.error('Expected first arg to be either "pre" or "post"');
process.exit(1);
}
173 changes: 173 additions & 0 deletions scripts/wait-for-it.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#!/usr/bin/env bash
# Use this script to test if a given TCP host/port are available

cmdname=$(basename $0)

echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi; }

usage() {
cat <<USAGE >&2
Usage:
$cmdname host:port [-s] [-t timeout] [-- command args]
-h HOST | --host=HOST Host or IP under test
-p PORT | --port=PORT TCP port under test
Alternatively, you specify the host and port as host:port
-s | --strict Only execute subcommand if the test succeeds
-q | --quiet Don't output any status messages
-t TIMEOUT | --timeout=TIMEOUT
Timeout in seconds, zero for no timeout
-- COMMAND ARGS Execute command with args after the test finishes
USAGE
exit 1
}

wait_for() {
if [[ $TIMEOUT -gt 0 ]]; then
echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT"
else
echoerr "$cmdname: waiting for $HOST:$PORT without a timeout"
fi
start_ts=$(date +%s)
while :; do
if [[ $ISBUSY -eq 1 ]]; then
nc -z $HOST $PORT
result=$?
else
(echo >/dev/tcp/$HOST/$PORT) >/dev/null 2>&1
result=$?
fi
if [[ $result -eq 0 ]]; then
end_ts=$(date +%s)
echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds"
break
fi
sleep 1
done
return $result
}

wait_for_wrapper() {
# In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
if [[ $QUIET -eq 1 ]]; then
timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
else
timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
fi
PID=$!
trap "kill -INT -$PID" INT
wait $PID
RESULT=$?
if [[ $RESULT -ne 0 ]]; then
echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT"
fi
return $RESULT
}

# process arguments
while [[ $# -gt 0 ]]; do
case "$1" in
*:*)
hostport=(${1//:/ })
HOST=${hostport[0]}
PORT=${hostport[1]}
shift 1
;;
--child)
CHILD=1
shift 1
;;
-q | --quiet)
QUIET=1
shift 1
;;
-s | --strict)
STRICT=1
shift 1
;;
-h)
HOST="$2"
if [[ $HOST == "" ]]; then break; fi
shift 2
;;
--host=*)
HOST="${1#*=}"
shift 1
;;
-p)
PORT="$2"
if [[ $PORT == "" ]]; then break; fi
shift 2
;;
--port=*)
PORT="${1#*=}"
shift 1
;;
-t)
TIMEOUT="$2"
if [[ $TIMEOUT == "" ]]; then break; fi
shift 2
;;
--timeout=*)
TIMEOUT="${1#*=}"
shift 1
;;
--)
shift
CLI=("$@")
break
;;
--help)
usage
;;
*)
echoerr "Unknown argument: $1"
usage
;;
esac
done

if [[ "$HOST" == "" || "$PORT" == "" ]]; then
echoerr "Error: you need to provide a host and port to test."
usage
fi

TIMEOUT=${TIMEOUT:-15}
STRICT=${STRICT:-0}
CHILD=${CHILD:-0}
QUIET=${QUIET:-0}

# check to see if timeout is from busybox?
TIMEOUT_PATH=$(realpath $(which timeout))
if [[ $TIMEOUT_PATH =~ "busybox" ]]; then
ISBUSY=1
# We can comment this out because -t doesn't exist
# in our version of busybox.
# BUSYTIMEFLAG="-t"
else
ISBUSY=0
BUSYTIMEFLAG=""
fi

if [[ $CHILD -gt 0 ]]; then
wait_for
RESULT=$?
exit $RESULT
else
if [[ $TIMEOUT -gt 0 ]]; then
wait_for_wrapper
RESULT=$?
else
wait_for
RESULT=$?
fi
fi

if [[ $CLI != "" ]]; then
if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then
echoerr "$cmdname: strict mode, refusing to execute subprocess"
exit $RESULT
fi
exec "${CLI[@]}"
else
exit $RESULT
fi

0 comments on commit 9a44c8e

Please sign in to comment.