Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 25 additions & 131 deletions bin/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,13 @@
import os
import sys
import json
import shutil
import hashlib
import logging
import zipfile
import argparse
import datetime
import requests
import requests_toolbelt

from api import validators
from api import tempdir as tempfile

logging.basicConfig(
format='%(asctime)s %(name)16.16s %(filename)24.24s %(lineno)5d:%(levelname)4.4s %(message)s',
format='%(asctime)s %(levelname)8.8s %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=logging.DEBUG,
)
Expand All @@ -27,144 +20,45 @@
logging.getLogger('requests').setLevel(logging.WARNING) # silence Requests library


if 'SCITRAN_CORE_DRONE_SECRET' not in os.environ:
log.error('SCITRAN_CORE_DRONE_SECRET not configured')
sys.exit(1)

if 'SCITRAN_RUNTIME_HOST' not in os.environ or 'SCITRAN_RUNTIME_PORT' not in os.environ:
log.error('SCITRAN_RUNTIME_HOST or SCITRAN_RUNTIME_PORT not configured')
sys.exit(1)
else:
API_URL = 'https://%s:%s/api' % (os.environ['SCITRAN_RUNTIME_HOST'], os.environ['SCITRAN_RUNTIME_PORT'])

if 'SCITRAN_PERSISTENT_PATH' in os.environ and 'SCITRAN_PERSISTENT_DATA_PATH' not in os.environ:
os.environ['SCITRAN_PERSISTENT_DATA_PATH'] = os.path.join(os.environ['SCITRAN_PERSISTENT_PATH'], 'data')

HTTP_HEADERS = {'X-SciTran-Auth': os.environ['SCITRAN_CORE_DRONE_SECRET'], 'User-Agent': 'SciTran Drone Bootstrapper'}


def metadata_encoder(o):
if isinstance(o, datetime.datetime):
if o.tzinfo is None:
o = pytz.timezone('UTC').localize(o)
return o.isoformat()
elif isinstance(o, datetime.tzinfo):
return o.zone
raise TypeError(repr(o) + ' is not JSON serializable')


def create_archive(content, arcname, metadata, outdir=None, filenames=None):
path = (os.path.join(outdir, arcname) if outdir else content) + '.zip'
with zipfile.ZipFile(path, 'w', zipfile.ZIP_DEFLATED, allowZip64=True) as zf:
zf.comment = json.dumps(metadata, default=metadata_encoder)
zf.write(content, arcname)
for fn in filenames or os.listdir(content):
zf.write(os.path.join(content, fn), os.path.join(arcname, fn))
return path


def users(args):
def users(filepath, api_url, http_headers, insecure):
now = datetime.datetime.utcnow()
with open(args.json) as json_dump:
input_data = json.load(json_dump)
with open(filepath) as fd:
input_data = json.load(fd)
with requests.Session() as rs:
log.info('bootstrapping users...')
rs.verify = not args.insecure
rs.headers = HTTP_HEADERS
rs.verify = not insecure
rs.headers = http_headers
for u in input_data.get('users', []):
log.info(' ' + u['_id'])
rs.post(API_URL + '/users', json=u)
rs.post(api_url + '/users', json=u)
log.info('bootstrapping groups...')
site_id = rs.get(API_URL + '/config').json()['site']['id']
site_id = rs.get(api_url + '/config').json()['site']['id']
for g in input_data.get('groups', []):
log.info(' ' + g['_id'])
roles = g.pop('roles')
rs.post(API_URL + '/groups' , json=g)
rs.post(api_url + '/groups' , json=g)
for r in roles:
r.setdefault('site', site_id)
rs.post(API_URL + '/groups/' + g['_id'] + '/roles' , json=r)
rs.post(api_url + '/groups/' + g['_id'] + '/roles' , json=r)
log.info('bootstrapping complete')

users_desc = """
example:
./bin/bootstrap.py users users_and_groups.json
"""


def data(args):
log.info('Inspecting %s' % args.path)
files = []
schema_validator = validators.payload_from_schema_file('uploader.json')
with requests.Session() as rs:
rs.verify = not args.insecure
rs.headers = HTTP_HEADERS
for dirpath, dirnames, filenames in os.walk(args.path):
dirnames[:] = [dn for dn in dirnames if not dn.startswith('.')] # use slice assignment to influence walk
if not dirnames and filenames:
for metadata_file in filenames:
if metadata_file.lower() == 'metadata.json':
filenames.remove(metadata_file)
break
else:
metadata_file = None
if not metadata_file:
log.warning('Skipping %s: No metadata found' % dirpath)
continue
with open(os.path.join(dirpath, metadata_file)) as fd:
try:
metadata = json.load(fd)
except ValueError:
log.warning('Skipping %s: Unparsable metadata' % dirpath)
continue
with tempfile.TemporaryDirectory() as tempdir:
log.info('Packaging %s' % dirpath)
filepath = create_archive(dirpath, os.path.basename(dirpath), metadata, tempdir, filenames)
filename = os.path.basename(filepath)
metadata.setdefault('acquisition', {}).setdefault('files', [{}])[0]['name'] = filename
log.info('Validating %s' % filename)
try:
schema_validator(metadata, 'POST')
except validators.InputValidationException:
log.warning('Skipping %s: Invalid metadata' % dirpath)
continue
log.info('Uploading %s' % filename)
rs.post(API_URL + '/groups', json={'_id': metadata['group']['_id']}).json()
with open(filepath, 'rb') as fd:
metadata_json = json.dumps(metadata, default=metadata_encoder)
mpe = requests_toolbelt.multipart.encoder.MultipartEncoder(fields={'metadata': metadata_json, 'file': (filename, fd)})
rs.post(API_URL + '/uploader', data=mpe, headers={'Content-Type': mpe.content_type})

data_desc = """
example:
./bin/bootstrap.py data /tmp/data
"""


parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(help='operation to perform')

users_parser = subparsers.add_parser(
name='users',
help='bootstrap users and groups',
description=users_desc,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
users_parser.add_argument('json', help='JSON file containing users and groups')
users_parser.set_defaults(func=users)

data_parser = subparsers.add_parser(
name='data',
help='bootstrap files in a dicrectory tree',
description=data_desc,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
data_parser.add_argument('path', help='filesystem path to data')
data_parser.set_defaults(func=data)

parser.add_argument('-i', '--insecure', action='store_true', help='do not verify SSL connections')
args = parser.parse_args()
ap = argparse.ArgumentParser()
ap.description = 'Bootstrap SciTran users and groups'
ap.add_argument('url', help='API URL')
ap.add_argument('json', help='JSON file containing users and groups')
ap.add_argument('--insecure', action='store_true', help='do not verify SSL connections')
ap.add_argument('--secret', help='shared API secret')
args = ap.parse_args()

if args.insecure:
requests.packages.urllib3.disable_warnings()

args.func(args)
http_headers = {
'User-Agent': 'SciTran Drone Bootstrapper',
}
if args.secret:
http_headers['X-SciTran-Auth'] = args.secret
# TODO: extend this to support oauth tokens

users(args.json, args.url, http_headers, args.insecure)
11 changes: 8 additions & 3 deletions bin/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ fi
SCITRAN_RUNTIME_HOST=${SCITRAN_RUNTIME_HOST:-"127.0.0.1"}
SCITRAN_RUNTIME_PORT=${SCITRAN_RUNTIME_PORT:-"8080"}
SCITRAN_RUNTIME_PATH=${SCITRAN_RUNTIME_PATH:-"./runtime"}
SCITRAN_RUNTIME_SSL_PEM=${SCITRAN_RUNTIME_SSL_PEM:-""}
SCITRAN_RUNTIME_BOOTSTRAP=${SCITRAN_RUNTIME_BOOTSTRAP:-"bootstrap.json"}
SCITRAN_PERSISTENT_PATH=${SCITRAN_PERSISTENT_PATH:-"./persistent"}
SCITRAN_PERSISTENT_DATA_PATH=${SCITRAN_PERSISTENT_DATA_PATH:-"$SCITRAN_PERSISTENT_PATH/data"}
Expand Down Expand Up @@ -155,10 +154,15 @@ trap "{
sleep 1


# Set API URL
[ -z "$SCITRAN_RUNTIME_SSL_PEM" ] && API_URL="http" || API_URL="https"
API_URL="$API_URL://$SCITRAN_RUNTIME_HOST:$SCITRAN_RUNTIME_PORT/api"


# Boostrap users and groups
if [ $BOOTSTRAP_USERS -eq 1 ]; then
echo "Bootstrapping users"
bin/bootstrap.py -i users "$SCITRAN_RUNTIME_BOOTSTRAP"
bin/bootstrap.py --insecure --secret "$SCITRAN_CORE_DRONE_SECRET" $API_URL "$SCITRAN_RUNTIME_BOOTSTRAP"
echo "Bootstrapped users"
else
echo "Database exists at $SCITRAN_PERSISTENT_PATH/db. Not bootstrapping users."
Expand All @@ -174,11 +178,12 @@ else
echo "Updating testdata in $SCITRAN_PERSISTENT_PATH/testdata"
git -C $SCITRAN_PERSISTENT_PATH/testdata pull
fi

if [ -f "$SCITRAN_PERSISTENT_DATA_PATH/.bootstrapped" ]; then
echo "Persistence store exists at $SCITRAN_PERSISTENT_PATH/data. Not bootstrapping data. Remove to re-bootstrap."
else
echo "Bootstrapping testdata"
bin/bootstrap.py -i data $SCITRAN_PERSISTENT_PATH/testdata
folder_reaper --insecure --secret "$SCITRAN_CORE_DRONE_SECRET" $API_URL "$SCITRAN_PERSISTENT_PATH/testdata"
echo "Bootstrapped testdata"
touch "$SCITRAN_PERSISTENT_DATA_PATH/.bootstrapped"
fi
Expand Down
1 change: 1 addition & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ RUN apt-get update \
python-pip \
libffi-dev \
libssl-dev \
git \
&& rm -rf /var/lib/apt/lists/* \
&& pip install -U pip

Expand Down
24 changes: 17 additions & 7 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ preserving their contents across container instances.
--name scitran-core \
-e "SCITRAN_PERSISTENT_DB_URI=mongodb://some-mongo:27017/scitran" \
-e "SCITRAN_CORE_INSECURE=true" \
-e "SCITRAN_CORE_DRONE_SECRET=change-me" \
-v $(pwd)/persistent/data:/var/scitran/data \
-v $(pwd):/var/scitran/code/api \
--link some-mongo \
Expand All @@ -30,26 +31,34 @@ preserving their contents across container instances.
uwsgi \
--ini /var/scitran/config/uwsgi-config.ini \
--http 0.0.0.0:8080 \
--http-keepalive \
--python-autoreload 1


# Bootstrap Account Example:
docker run \
-e "SCITRAN_PERSISTENT_DB_URI=mongodb://some-mongo:27017/scitran" \
--link some-mongo \
-e "SCITRAN_RUNTIME_HOST=scitran-core" \
-e "SCITRAN_RUNTIME_PORT=8080" \
-e "SCITRAN_RUNTIME_PROTOCOL=http" \
-e "SCITRAN_CORE_DRONE_SECRET=change-me" \
--link scitran-core \
--rm \
-v /dev/bali.prod/docker/uwsgi/bootstrap-dev.json:/accounts.json \
scitran/core \
-v /dev/bali.prod/docker/uwsgi/bootstrap.json:/accounts.json \
scitran-core \
/var/scitran/code/api/docker/bootstrap-accounts.sh \
/accounts.json


# Bootstrap Data Example:
docker run \
-e "SCITRAN_PERSISTENT_DB_URI=mongodb://some-mongo:27017/scitran" \
--link some-mongo \
-e "SCITRAN_RUNTIME_HOST=scitran-core" \
-e "SCITRAN_RUNTIME_PORT=8080" \
-e "SCITRAN_RUNTIME_PROTOCOL=http" \
-e "SCITRAN_CORE_DRONE_SECRET=change-me" \
--link scitran-core \
--volumes-from scitran-core \
--rm \
scitran/core \
scitran-core \
/var/scitran/code/api/docker/bootstrap-data.sh
```

Expand Down Expand Up @@ -77,5 +86,6 @@ docker run \
uwsgi \
--ini /var/scitran/config/uwsgi-config.ini \
--http 0.0.0.0:8080 \
--http-keepalive \
--python-autoreload 1
```
5 changes: 4 additions & 1 deletion docker/bootstrap-accounts.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@ cd /var/scitran/code/api
export PYTHONPATH=.


# Set API URL
API_URL="$SCITRAN_RUNTIME_PROTOCOL://$SCITRAN_RUNTIME_HOST:$SCITRAN_RUNTIME_PORT/api"

# Bootstrap Users
./bin/bootstrap.py -i users ${bootstrap_user_file}
./bin/bootstrap.py --insecure --secret "${SCITRAN_CORE_DRONE_SECRET}" "${API_URL}" "${bootstrap_user_file}"


)
21 changes: 19 additions & 2 deletions docker/bootstrap-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ GET_LATEST_DATA=${1:-N}
#
# When changing scitran/testdata, merge that change to master first,
# then reference that resulting commit hash here.
bootstrap_data_label=9362b768d54caf6e5cd35f00498208c3b2bff77d
bootstrap_data_label=ff987283d43db2849d943adbdd4f0d0cdecb6d44


# Same as bootstrap_data_label above, except for scitran/reaper.
bootstrap_reaper_label=30215c66a33b18685e1608dbe952e78c370d8765


# Move to API folder for relative path assumptions later on
Expand All @@ -44,20 +48,33 @@ TESTDATA_DIR=$SCITRAN_PERSISTENT_PATH/testdata

if [ ! -d "$TESTDATA_DIR" ] || [ ! -d "$TESTDATA_DIR/download" ] || [ ! -f "$TESTDATA_DIR/.testdata_version" ]; then
echo "Downloading testdata to $TESTDATA_DIR"

# Remove old contents, as they may not be forward compatible.
rm -rf "$TESTDATA_DIR"
mkdir -p "$TESTDATA_DIR/download"
curl -L $TESTDATA_URL | tar xz -C "$TESTDATA_DIR/download" --strip-components 1
else
if [ "$TESTDATA_VERSION" != "$(cat $TESTDATA_DIR/.testdata_version)" ]; then
echo "Testdata out of date; downloading"

# Remove old contents, as they may not be forward compatible.
rm -rf "$TESTDATA_DIR"
mkdir -p "$TESTDATA_DIR/download"
curl -L $TESTDATA_URL | tar xz -C "$TESTDATA_DIR/download" --strip-components 1
else
echo "Testdata up to date"
fi
fi
builtin echo "$TESTDATA_VERSION" > "$TESTDATA_DIR/.testdata_version"

# pull reaper module
pip install "git+https://github.com/scitran/reaper.git@${bootstrap_reaper_label}"


# Set API URL
API_URL="$SCITRAN_RUNTIME_PROTOCOL://$SCITRAN_RUNTIME_HOST:$SCITRAN_RUNTIME_PORT/api"

## load the test data in
./bin/bootstrap.py -i data $TESTDATA_DIR/download
folder_reaper --insecure --secret "${SCITRAN_CORE_DRONE_SECRET}" "${API_URL}" "$TESTDATA_DIR/download"

)
5 changes: 5 additions & 0 deletions docker/uwsgi-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ if [ "${1:0:1}" = '-' ]; then
set -- uwsgi "$@"
fi

# run $PRE_RUNAS_CMD as root if provided. Useful for things like JIT pip insalls.
if [ ! -z "${PRE_RUNAS_CMD}" ]; then
${PRE_RUNAS_CMD}
fi

if [ "$1" = 'uwsgi' ]; then
exec gosu ${RUNAS_USER} "$@"
fi
Expand Down
2 changes: 2 additions & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ pylint==1.5.3
pytest==2.8.5
pytest-cov==2.2.0
pytest-watch==3.8.0

git+https://github.com/scitran/reaper.git
1 change: 1 addition & 0 deletions sample.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#SCITRAN_RUNTIME_HOST="127.0.0.1"
#SCITRAN_RUNTIME_PORT="8080"
#SCITRAN_RUNTIME_PROTOCOL="https"
#SCITRAN_RUNTIME_PATH="./runtime"
#SCITRAN_RUNTIME_SSL_PEM="*"
#SCITRAN_RUNTIME_BOOTSTRAP="bootstrap.json"
Expand Down