diff --git a/bin/bootstrap.py b/bin/bootstrap.py index 692e8e538..c92da7ad9 100755 --- a/bin/bootstrap.py +++ b/bin/bootstrap.py @@ -5,20 +5,13 @@ import os import sys import json -import shutil -import hashlib import logging -import zipfile import argparse import datetime import requests -import requests_toolbelt - -from api import validators -from api import tempdir as tempfile logging.basicConfig( - format='%(asctime)s %(name)16.16s %(filename)24.24s %(lineno)5d:%(levelname)4.4s %(message)s', + format='%(asctime)s %(levelname)8.8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.DEBUG, ) @@ -27,144 +20,45 @@ logging.getLogger('requests').setLevel(logging.WARNING) # silence Requests library -if 'SCITRAN_CORE_DRONE_SECRET' not in os.environ: - log.error('SCITRAN_CORE_DRONE_SECRET not configured') - sys.exit(1) - -if 'SCITRAN_RUNTIME_HOST' not in os.environ or 'SCITRAN_RUNTIME_PORT' not in os.environ: - log.error('SCITRAN_RUNTIME_HOST or SCITRAN_RUNTIME_PORT not configured') - sys.exit(1) -else: - API_URL = 'https://%s:%s/api' % (os.environ['SCITRAN_RUNTIME_HOST'], os.environ['SCITRAN_RUNTIME_PORT']) - -if 'SCITRAN_PERSISTENT_PATH' in os.environ and 'SCITRAN_PERSISTENT_DATA_PATH' not in os.environ: - os.environ['SCITRAN_PERSISTENT_DATA_PATH'] = os.path.join(os.environ['SCITRAN_PERSISTENT_PATH'], 'data') - -HTTP_HEADERS = {'X-SciTran-Auth': os.environ['SCITRAN_CORE_DRONE_SECRET'], 'User-Agent': 'SciTran Drone Bootstrapper'} - - -def metadata_encoder(o): - if isinstance(o, datetime.datetime): - if o.tzinfo is None: - o = pytz.timezone('UTC').localize(o) - return o.isoformat() - elif isinstance(o, datetime.tzinfo): - return o.zone - raise TypeError(repr(o) + ' is not JSON serializable') - - -def create_archive(content, arcname, metadata, outdir=None, filenames=None): - path = (os.path.join(outdir, arcname) if outdir else content) + '.zip' - with zipfile.ZipFile(path, 'w', zipfile.ZIP_DEFLATED, allowZip64=True) as zf: - zf.comment = json.dumps(metadata, default=metadata_encoder) - zf.write(content, arcname) - for fn in filenames or os.listdir(content): - zf.write(os.path.join(content, fn), os.path.join(arcname, fn)) - return path - - -def users(args): +def users(filepath, api_url, http_headers, insecure): now = datetime.datetime.utcnow() - with open(args.json) as json_dump: - input_data = json.load(json_dump) + with open(filepath) as fd: + input_data = json.load(fd) with requests.Session() as rs: log.info('bootstrapping users...') - rs.verify = not args.insecure - rs.headers = HTTP_HEADERS + rs.verify = not insecure + rs.headers = http_headers for u in input_data.get('users', []): log.info(' ' + u['_id']) - rs.post(API_URL + '/users', json=u) + rs.post(api_url + '/users', json=u) log.info('bootstrapping groups...') - site_id = rs.get(API_URL + '/config').json()['site']['id'] + site_id = rs.get(api_url + '/config').json()['site']['id'] for g in input_data.get('groups', []): log.info(' ' + g['_id']) roles = g.pop('roles') - rs.post(API_URL + '/groups' , json=g) + rs.post(api_url + '/groups' , json=g) for r in roles: r.setdefault('site', site_id) - rs.post(API_URL + '/groups/' + g['_id'] + '/roles' , json=r) + rs.post(api_url + '/groups/' + g['_id'] + '/roles' , json=r) log.info('bootstrapping complete') -users_desc = """ -example: -./bin/bootstrap.py users users_and_groups.json -""" - -def data(args): - log.info('Inspecting %s' % args.path) - files = [] - schema_validator = validators.payload_from_schema_file('uploader.json') - with requests.Session() as rs: - rs.verify = not args.insecure - rs.headers = HTTP_HEADERS - for dirpath, dirnames, filenames in os.walk(args.path): - dirnames[:] = [dn for dn in dirnames if not dn.startswith('.')] # use slice assignment to influence walk - if not dirnames and filenames: - for metadata_file in filenames: - if metadata_file.lower() == 'metadata.json': - filenames.remove(metadata_file) - break - else: - metadata_file = None - if not metadata_file: - log.warning('Skipping %s: No metadata found' % dirpath) - continue - with open(os.path.join(dirpath, metadata_file)) as fd: - try: - metadata = json.load(fd) - except ValueError: - log.warning('Skipping %s: Unparsable metadata' % dirpath) - continue - with tempfile.TemporaryDirectory() as tempdir: - log.info('Packaging %s' % dirpath) - filepath = create_archive(dirpath, os.path.basename(dirpath), metadata, tempdir, filenames) - filename = os.path.basename(filepath) - metadata.setdefault('acquisition', {}).setdefault('files', [{}])[0]['name'] = filename - log.info('Validating %s' % filename) - try: - schema_validator(metadata, 'POST') - except validators.InputValidationException: - log.warning('Skipping %s: Invalid metadata' % dirpath) - continue - log.info('Uploading %s' % filename) - rs.post(API_URL + '/groups', json={'_id': metadata['group']['_id']}).json() - with open(filepath, 'rb') as fd: - metadata_json = json.dumps(metadata, default=metadata_encoder) - mpe = requests_toolbelt.multipart.encoder.MultipartEncoder(fields={'metadata': metadata_json, 'file': (filename, fd)}) - rs.post(API_URL + '/uploader', data=mpe, headers={'Content-Type': mpe.content_type}) - -data_desc = """ -example: -./bin/bootstrap.py data /tmp/data -""" - - -parser = argparse.ArgumentParser() -subparsers = parser.add_subparsers(help='operation to perform') - -users_parser = subparsers.add_parser( - name='users', - help='bootstrap users and groups', - description=users_desc, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) -users_parser.add_argument('json', help='JSON file containing users and groups') -users_parser.set_defaults(func=users) - -data_parser = subparsers.add_parser( - name='data', - help='bootstrap files in a dicrectory tree', - description=data_desc, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) -data_parser.add_argument('path', help='filesystem path to data') -data_parser.set_defaults(func=data) - -parser.add_argument('-i', '--insecure', action='store_true', help='do not verify SSL connections') -args = parser.parse_args() +ap = argparse.ArgumentParser() +ap.description = 'Bootstrap SciTran users and groups' +ap.add_argument('url', help='API URL') +ap.add_argument('json', help='JSON file containing users and groups') +ap.add_argument('--insecure', action='store_true', help='do not verify SSL connections') +ap.add_argument('--secret', help='shared API secret') +args = ap.parse_args() if args.insecure: requests.packages.urllib3.disable_warnings() -args.func(args) +http_headers = { + 'User-Agent': 'SciTran Drone Bootstrapper', +} +if args.secret: + http_headers['X-SciTran-Auth'] = args.secret +# TODO: extend this to support oauth tokens + +users(args.json, args.url, http_headers, args.insecure) diff --git a/bin/run.sh b/bin/run.sh index bd07e8f3b..616a1252d 100755 --- a/bin/run.sh +++ b/bin/run.sh @@ -25,7 +25,6 @@ fi SCITRAN_RUNTIME_HOST=${SCITRAN_RUNTIME_HOST:-"127.0.0.1"} SCITRAN_RUNTIME_PORT=${SCITRAN_RUNTIME_PORT:-"8080"} SCITRAN_RUNTIME_PATH=${SCITRAN_RUNTIME_PATH:-"./runtime"} -SCITRAN_RUNTIME_SSL_PEM=${SCITRAN_RUNTIME_SSL_PEM:-""} SCITRAN_RUNTIME_BOOTSTRAP=${SCITRAN_RUNTIME_BOOTSTRAP:-"bootstrap.json"} SCITRAN_PERSISTENT_PATH=${SCITRAN_PERSISTENT_PATH:-"./persistent"} SCITRAN_PERSISTENT_DATA_PATH=${SCITRAN_PERSISTENT_DATA_PATH:-"$SCITRAN_PERSISTENT_PATH/data"} @@ -155,10 +154,15 @@ trap "{ sleep 1 +# Set API URL +[ -z "$SCITRAN_RUNTIME_SSL_PEM" ] && API_URL="http" || API_URL="https" +API_URL="$API_URL://$SCITRAN_RUNTIME_HOST:$SCITRAN_RUNTIME_PORT/api" + + # Boostrap users and groups if [ $BOOTSTRAP_USERS -eq 1 ]; then echo "Bootstrapping users" - bin/bootstrap.py -i users "$SCITRAN_RUNTIME_BOOTSTRAP" + bin/bootstrap.py --insecure --secret "$SCITRAN_CORE_DRONE_SECRET" $API_URL "$SCITRAN_RUNTIME_BOOTSTRAP" echo "Bootstrapped users" else echo "Database exists at $SCITRAN_PERSISTENT_PATH/db. Not bootstrapping users." @@ -174,11 +178,12 @@ else echo "Updating testdata in $SCITRAN_PERSISTENT_PATH/testdata" git -C $SCITRAN_PERSISTENT_PATH/testdata pull fi + if [ -f "$SCITRAN_PERSISTENT_DATA_PATH/.bootstrapped" ]; then echo "Persistence store exists at $SCITRAN_PERSISTENT_PATH/data. Not bootstrapping data. Remove to re-bootstrap." else echo "Bootstrapping testdata" - bin/bootstrap.py -i data $SCITRAN_PERSISTENT_PATH/testdata + folder_reaper --insecure --secret "$SCITRAN_CORE_DRONE_SECRET" $API_URL "$SCITRAN_PERSISTENT_PATH/testdata" echo "Bootstrapped testdata" touch "$SCITRAN_PERSISTENT_DATA_PATH/.bootstrapped" fi diff --git a/docker/Dockerfile b/docker/Dockerfile index 113fde8df..bdd29e645 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -18,6 +18,7 @@ RUN apt-get update \ python-pip \ libffi-dev \ libssl-dev \ + git \ && rm -rf /var/lib/apt/lists/* \ && pip install -U pip diff --git a/docker/README.md b/docker/README.md index 13bffa9d8..cf88d9839 100644 --- a/docker/README.md +++ b/docker/README.md @@ -22,6 +22,7 @@ preserving their contents across container instances. --name scitran-core \ -e "SCITRAN_PERSISTENT_DB_URI=mongodb://some-mongo:27017/scitran" \ -e "SCITRAN_CORE_INSECURE=true" \ + -e "SCITRAN_CORE_DRONE_SECRET=change-me" \ -v $(pwd)/persistent/data:/var/scitran/data \ -v $(pwd):/var/scitran/code/api \ --link some-mongo \ @@ -30,26 +31,34 @@ preserving their contents across container instances. uwsgi \ --ini /var/scitran/config/uwsgi-config.ini \ --http 0.0.0.0:8080 \ + --http-keepalive \ --python-autoreload 1 # Bootstrap Account Example: docker run \ - -e "SCITRAN_PERSISTENT_DB_URI=mongodb://some-mongo:27017/scitran" \ - --link some-mongo \ + -e "SCITRAN_RUNTIME_HOST=scitran-core" \ + -e "SCITRAN_RUNTIME_PORT=8080" \ + -e "SCITRAN_RUNTIME_PROTOCOL=http" \ + -e "SCITRAN_CORE_DRONE_SECRET=change-me" \ + --link scitran-core \ --rm \ - -v /dev/bali.prod/docker/uwsgi/bootstrap-dev.json:/accounts.json \ - scitran/core \ + -v /dev/bali.prod/docker/uwsgi/bootstrap.json:/accounts.json \ + scitran-core \ /var/scitran/code/api/docker/bootstrap-accounts.sh \ /accounts.json + # Bootstrap Data Example: docker run \ - -e "SCITRAN_PERSISTENT_DB_URI=mongodb://some-mongo:27017/scitran" \ - --link some-mongo \ + -e "SCITRAN_RUNTIME_HOST=scitran-core" \ + -e "SCITRAN_RUNTIME_PORT=8080" \ + -e "SCITRAN_RUNTIME_PROTOCOL=http" \ + -e "SCITRAN_CORE_DRONE_SECRET=change-me" \ + --link scitran-core \ --volumes-from scitran-core \ --rm \ - scitran/core \ + scitran-core \ /var/scitran/code/api/docker/bootstrap-data.sh ``` @@ -77,5 +86,6 @@ docker run \ uwsgi \ --ini /var/scitran/config/uwsgi-config.ini \ --http 0.0.0.0:8080 \ + --http-keepalive \ --python-autoreload 1 ``` diff --git a/docker/bootstrap-accounts.sh b/docker/bootstrap-accounts.sh index 65c5f5ab2..876eb3726 100755 --- a/docker/bootstrap-accounts.sh +++ b/docker/bootstrap-accounts.sh @@ -21,8 +21,11 @@ cd /var/scitran/code/api export PYTHONPATH=. +# Set API URL +API_URL="$SCITRAN_RUNTIME_PROTOCOL://$SCITRAN_RUNTIME_HOST:$SCITRAN_RUNTIME_PORT/api" + # Bootstrap Users -./bin/bootstrap.py -i users ${bootstrap_user_file} +./bin/bootstrap.py --insecure --secret "${SCITRAN_CORE_DRONE_SECRET}" "${API_URL}" "${bootstrap_user_file}" ) diff --git a/docker/bootstrap-data.sh b/docker/bootstrap-data.sh index af1c39b55..6c2963f39 100755 --- a/docker/bootstrap-data.sh +++ b/docker/bootstrap-data.sh @@ -21,7 +21,11 @@ GET_LATEST_DATA=${1:-N} # # When changing scitran/testdata, merge that change to master first, # then reference that resulting commit hash here. -bootstrap_data_label=9362b768d54caf6e5cd35f00498208c3b2bff77d +bootstrap_data_label=ff987283d43db2849d943adbdd4f0d0cdecb6d44 + + +# Same as bootstrap_data_label above, except for scitran/reaper. +bootstrap_reaper_label=30215c66a33b18685e1608dbe952e78c370d8765 # Move to API folder for relative path assumptions later on @@ -44,11 +48,18 @@ TESTDATA_DIR=$SCITRAN_PERSISTENT_PATH/testdata if [ ! -d "$TESTDATA_DIR" ] || [ ! -d "$TESTDATA_DIR/download" ] || [ ! -f "$TESTDATA_DIR/.testdata_version" ]; then echo "Downloading testdata to $TESTDATA_DIR" + + # Remove old contents, as they may not be forward compatible. + rm -rf "$TESTDATA_DIR" mkdir -p "$TESTDATA_DIR/download" curl -L $TESTDATA_URL | tar xz -C "$TESTDATA_DIR/download" --strip-components 1 else if [ "$TESTDATA_VERSION" != "$(cat $TESTDATA_DIR/.testdata_version)" ]; then echo "Testdata out of date; downloading" + + # Remove old contents, as they may not be forward compatible. + rm -rf "$TESTDATA_DIR" + mkdir -p "$TESTDATA_DIR/download" curl -L $TESTDATA_URL | tar xz -C "$TESTDATA_DIR/download" --strip-components 1 else echo "Testdata up to date" @@ -56,8 +67,14 @@ else fi builtin echo "$TESTDATA_VERSION" > "$TESTDATA_DIR/.testdata_version" +# pull reaper module +pip install "git+https://github.com/scitran/reaper.git@${bootstrap_reaper_label}" + + +# Set API URL +API_URL="$SCITRAN_RUNTIME_PROTOCOL://$SCITRAN_RUNTIME_HOST:$SCITRAN_RUNTIME_PORT/api" ## load the test data in -./bin/bootstrap.py -i data $TESTDATA_DIR/download +folder_reaper --insecure --secret "${SCITRAN_CORE_DRONE_SECRET}" "${API_URL}" "$TESTDATA_DIR/download" ) diff --git a/docker/uwsgi-entrypoint.sh b/docker/uwsgi-entrypoint.sh index dc3e5e104..d0cc6fe8a 100755 --- a/docker/uwsgi-entrypoint.sh +++ b/docker/uwsgi-entrypoint.sh @@ -32,6 +32,11 @@ if [ "${1:0:1}" = '-' ]; then set -- uwsgi "$@" fi +# run $PRE_RUNAS_CMD as root if provided. Useful for things like JIT pip insalls. +if [ ! -z "${PRE_RUNAS_CMD}" ]; then + ${PRE_RUNAS_CMD} +fi + if [ "$1" = 'uwsgi' ]; then exec gosu ${RUNAS_USER} "$@" fi diff --git a/requirements_dev.txt b/requirements_dev.txt index fda2f85d0..edd554315 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -7,3 +7,5 @@ pylint==1.5.3 pytest==2.8.5 pytest-cov==2.2.0 pytest-watch==3.8.0 + +git+https://github.com/scitran/reaper.git diff --git a/sample.config b/sample.config index f61bea7a3..60d9df8e6 100644 --- a/sample.config +++ b/sample.config @@ -2,6 +2,7 @@ #SCITRAN_RUNTIME_HOST="127.0.0.1" #SCITRAN_RUNTIME_PORT="8080" +#SCITRAN_RUNTIME_PROTOCOL="https" #SCITRAN_RUNTIME_PATH="./runtime" #SCITRAN_RUNTIME_SSL_PEM="*" #SCITRAN_RUNTIME_BOOTSTRAP="bootstrap.json"