diff --git a/api/api.py b/api/api.py index 11589df1e..9f029a00e 100644 --- a/api/api.py +++ b/api/api.py @@ -17,6 +17,7 @@ from handlers import containerhandler from handlers import collectionshandler from handlers import searchhandler +from handlers import schemahandler log = config.log @@ -56,12 +57,14 @@ def get_js(self): # any character allowed except '/'' 'tag_re': '[^/]{3,24}', # filename regex - # length between 3 and 60 characters # any character allowed except '/' 'filename_re': '[^/]+', # note id regex # hexadecimal string exactly of length 24 - 'note_id_re': '[0-9a-f]{24}' + 'note_id_re': '[0-9a-f]{24}', + # schema regex + # example: schema_path/schema.json + 'schema_re': '[^/.]{3,60}/[^/.]{3,60}\.json' } def _format(route): @@ -134,7 +137,8 @@ def _format(route): webapp2.Route(_format(r'/api///'), containerhandler.ContainerHandler, name='cont_sublist_groups', handler_method='get_all', methods=['GET']), webapp2.Route(_format(r'/api///'), containerhandler.ContainerHandler, name='cont_sublist', handler_method='get_all', methods=['GET']), webapp2.Route(_format(r'/api/search'), searchhandler.SearchHandler, name='es_proxy', methods=['GET']), - webapp2.Route(_format(r'/api/search/'), searchhandler.SearchHandler, name='es_proxy', methods=['GET']), + webapp2.Route(_format(r'/api/search/'), searchhandler.SearchHandler, name='es_proxy_1', methods=['GET']), + webapp2.Route(_format(r'/api/schemas/'), schemahandler.SchemaHandler, name='schemas', methods=['GET']), ] diff --git a/api/config.py b/api/config.py index 6ca252b22..160d7fda0 100644 --- a/api/config.py +++ b/api/config.py @@ -1,5 +1,6 @@ import os import copy +import glob import logging import pymongo import datetime @@ -30,7 +31,7 @@ 'site': { 'id': 'local', 'name': 'Local', - 'url': 'https://localhost/api', + 'api_url': 'https://localhost/api', 'central_url': 'https://sdmc.scitran.io/api', 'registered': False, 'ssl_cert': None, @@ -46,8 +47,9 @@ 'db_connect_timeout': '2000', 'db_server_selection_timeout': '3000', 'data_path': os.path.join(os.path.dirname(__file__), '../persistent/data'), + 'schema_path': 'api/schemas', 'elasticsearch_host': 'localhost:9200', - } + }, } __config = copy.deepcopy(DEFAULT_CONFIG) @@ -87,6 +89,63 @@ es = elasticsearch.Elasticsearch([__config['persistent']['elasticsearch_host']]) +# validate the lists of json schemas +schema_path = __config['persistent']['schema_path'] + +expected_mongo_schemas = set([ + 'acquisition.json', + 'collection.json', + 'container.json', + 'file.json', + 'group.json', + 'note.json', + 'permission.json', + 'project.json', + 'session.json', + 'subject.json', + 'user.json', + 'avatars.json', + 'tag.json' +]) +expected_input_schemas = set([ + 'acquisition.json', + 'collection.json', + 'container.json', + 'file.json', + 'group.json', + 'note.json', + 'packfile.json', + 'permission.json', + 'project.json', + 'session.json', + 'subject.json', + 'user.json', + 'avatars.json', + 'download.json', + 'tag.json', + 'enginemetadata.json', + 'uploader.json', + 'reaper.json' +]) +mongo_schemas = set() +input_schemas = set() + +# check that the lists of schemas are correct +for schema_filepath in glob.glob(schema_path + '/mongo/*.json'): + schema_file = os.path.basename(schema_filepath) + mongo_schemas.add(schema_file) + with open(schema_filepath, 'rU') as f: + pass + +assert mongo_schemas == expected_mongo_schemas, '{} is different from {}'.format(mongo_schemas, expected_mongo_schemas) + +for schema_filepath in glob.glob(schema_path + '/input/*.json'): + schema_file = os.path.basename(schema_filepath) + input_schemas.add(schema_file) + with open(schema_filepath, 'rU') as f: + pass + +assert input_schemas == expected_input_schemas, '{} is different from {}'.format(input_schemas, expected_input_schemas) def initialize_db(): log.info('Initializing database, creating indexes') @@ -104,7 +163,7 @@ def initialize_db(): now = datetime.datetime.utcnow() db.groups.update_one({'_id': 'unknown'}, {'$setOnInsert': { 'created': now, 'modified': now, 'name': 'Unknown', 'roles': []}}, upsert=True) - db.sites.replace_one({'_id': __config['site']['id']}, {'name': __config['site']['name'], 'site_url': __config['site']['url']}, upsert=True) + db.sites.replace_one({'_id': __config['site']['id']}, {'name': __config['site']['name'], 'site_url': __config['site']['api_url']}, upsert=True) def get_config(): diff --git a/api/download.py b/api/download.py index 6dcb77500..f6c44a6b2 100644 --- a/api/download.py +++ b/api/download.py @@ -198,7 +198,8 @@ def download(self): config.db.projects.update_one({'_id': project_id}, {'$inc': {'counter': 1}}) else: req_spec = self.request.json_body - validator = validators.payload_from_schema_file(self, 'download.json') + payload_schema_uri = util.schema_uri('input', 'download.json') + validator = validators.from_schema_path(payload_schema_uri) validator(req_spec, 'POST') log.debug(json.dumps(req_spec, sort_keys=True, indent=4, separators=(',', ': '))) return self._preflight_archivestream(req_spec) diff --git a/api/handlers/containerhandler.py b/api/handlers/containerhandler.py index 5112daeea..3c08b02d5 100644 --- a/api/handlers/containerhandler.py +++ b/api/handlers/containerhandler.py @@ -306,8 +306,10 @@ def get_groups_with_project(self): def _get_validators(self): - mongo_validator = validators.mongo_from_schema_file(self.config.get('storage_schema_file')) - payload_validator = validators.payload_from_schema_file(self.config.get('payload_schema_file')) + mongo_schema_uri = util.schema_uri('mongo', self.config.get('storage_schema_file')) + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + payload_schema_uri = util.schema_uri('input', self.config.get('payload_schema_file')) + payload_validator = validators.from_schema_path(payload_schema_uri) return mongo_validator, payload_validator def _get_parent_container(self, payload): @@ -329,7 +331,6 @@ def _get_parent_container(self, payload): log.debug(parent_container) return parent_container, parent_id_property - def _get_container(self, _id): try: container = self.storage.get_container(_id) diff --git a/api/handlers/grouphandler.py b/api/handlers/grouphandler.py index 62344c1c9..1e66f8666 100644 --- a/api/handlers/grouphandler.py +++ b/api/handlers/grouphandler.py @@ -1,6 +1,7 @@ import datetime from .. import base +from .. import util from .. import config from .. import debuginfo from .. import validators @@ -58,8 +59,10 @@ def put(self, _id): self.abort(404, 'no such Group: ' + _id) permchecker = groupauth.default(self, group) payload = self.request.json_body - mongo_validator = validators.mongo_from_schema_file('group.json') - payload_validator = validators.payload_from_schema_file('group.json') + mongo_schema_uri = util.schema_uri('mongo', 'group.json') + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + payload_schema_uri = util.schema_uri('input', 'group.json') + payload_validator = validators.from_schema_path(payload_schema_uri) payload_validator(payload, 'PUT') result = mongo_validator(permchecker(self.storage.exec_op))('PUT', _id=_id, payload=payload) if result.modified_count == 1: @@ -71,8 +74,10 @@ def post(self): self._init_storage() permchecker = groupauth.default(self, None) payload = self.request.json_body - mongo_validator = validators.mongo_from_schema_file('group.json') - payload_validator = validators.payload_from_schema_file('group.json') + mongo_schema_uri = util.schema_uri('mongo', 'group.json') + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + payload_schema_uri = util.schema_uri('input', 'group.json') + payload_validator = validators.from_schema_path(payload_schema_uri) payload_validator(payload, 'POST') payload['created'] = payload['modified'] = datetime.datetime.utcnow() payload['roles'] = [{'_id': self.uid, 'access': 'admin', 'site': self.user_site}] if self.uid else [] diff --git a/api/handlers/listhandler.py b/api/handlers/listhandler.py index 9ac1181c2..de1ad2d5c 100644 --- a/api/handlers/listhandler.py +++ b/api/handlers/listhandler.py @@ -183,9 +183,11 @@ def _initialize_request(self, cont_name, list_name, _id, query_params=None): permchecker = permchecker(self, container) else: self.abort(404, 'Element {} not found in container {}'.format(_id, storage.cont_name)) - mongo_validator = validators.mongo_from_schema_file(config.get('storage_schema_file')) - input_validator = validators.payload_from_schema_file(config.get('input_schema_file')) - keycheck = validators.key_check(config.get('storage_schema_file')) + mongo_schema_uri = util.schema_uri('mongo', config.get('storage_schema_file')) + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + input_schema_uri = util.schema_uri('input', config.get('input_schema_file')) + input_validator = validators.from_schema_path(input_schema_uri) + keycheck = validators.key_check(mongo_schema_uri) return container, permchecker, storage, mongo_validator, input_validator, keycheck diff --git a/api/handlers/schemahandler.py b/api/handlers/schemahandler.py new file mode 100644 index 000000000..18eb4a004 --- /dev/null +++ b/api/handlers/schemahandler.py @@ -0,0 +1,21 @@ +import os +import json +import datetime + +from .. import base +from .. import config + +log = config.log + +class SchemaHandler(base.RequestHandler): + + def __init__(self, request=None, response=None): + super(SchemaHandler, self).__init__(request, response) + + def get(self, schema, **kwargs): + schema_path = os.path.join(config.get_item('persistent', 'schema_path'), schema) + try: + with open(schema_path, 'ru') as f: + return json.load(f) + except IOError as e: + self.abort(404, str(e)) diff --git a/api/handlers/userhandler.py b/api/handlers/userhandler.py index 42b6e65e6..7bc247ad6 100644 --- a/api/handlers/userhandler.py +++ b/api/handlers/userhandler.py @@ -3,6 +3,7 @@ import requests from .. import base +from .. import util from .. import config from .. import validators from ..auth import userauth, always_ok, ROLES @@ -64,8 +65,10 @@ def put(self, _id): user = self._get_user(_id) permchecker = userauth.default(self, user) payload = self.request.json_body - mongo_validator = validators.mongo_from_schema_file('user.json') - payload_validator = validators.payload_from_schema_file('user.json') + mongo_schema_uri = util.schema_uri('mongo', 'user.json') + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + payload_schema_uri = util.schema_uri('input', 'user.json') + payload_validator = validators.from_schema_path(payload_schema_uri) payload_validator(payload, 'PUT') payload['modified'] = datetime.datetime.utcnow() result = mongo_validator(permchecker(self.storage.exec_op))('PUT', _id=_id, payload=payload) @@ -78,8 +81,10 @@ def post(self): self._init_storage() permchecker = userauth.default(self) payload = self.request.json_body - mongo_validator = validators.mongo_from_schema_file('user.json') - payload_validator = validators.payload_from_schema_file('user.json') + mongo_schema_uri = util.schema_uri('mongo', 'user.json') + mongo_validator = validators.decorator_from_schema_path(mongo_schema_uri) + payload_schema_uri = util.schema_uri('input', 'user.json') + payload_validator = validators.from_schema_path(payload_schema_uri) payload_validator(payload, 'POST') payload['created'] = payload['modified'] = datetime.datetime.utcnow() payload['root'] = payload.get('root', False) diff --git a/api/root.py b/api/root.py index c98063e51..4b83c0cfa 100644 --- a/api/root.py +++ b/api/root.py @@ -2,6 +2,9 @@ import markdown from . import base +from . import config + +log = config.log class Root(base.RequestHandler): @@ -80,4 +83,4 @@ def get(self): self.response.write('\n') self.response.write(markdown.markdown(resources, ['extra'])) self.response.write('\n') - self.response.write('\n') \ No newline at end of file + self.response.write('\n') diff --git a/api/upload.py b/api/upload.py index f52cfc04a..284c4f921 100644 --- a/api/upload.py +++ b/api/upload.py @@ -170,7 +170,8 @@ def uploader(self): self.abort(400, str(e)) if not file_store.metadata: self.abort(400, 'metadata is missing') - metadata_validator = validators.payload_from_schema_file('uploader.json') + payload_schema_uri = util.schema_uri('input', 'uploader.json') + metadata_validator = validators.from_schema_path(payload_schema_uri) metadata_validator(file_store.metadata, 'POST') try: target_containers = reaperutil.create_root_to_leaf_hierarchy(file_store.metadata, file_store.files) @@ -216,7 +217,8 @@ def engine(self): self.abort(400, str(e)) if not file_store.metadata: self.abort(400, 'metadata is missing') - metadata_validator = validators.payload_from_schema_file('enginemetadata.json') + payload_schema_uri = util.schema_uri('input', 'enginemetadata.json') + metadata_validator = validators.from_schema_path(payload_schema_uri) metadata_validator(file_store.metadata, 'POST') file_infos = file_store.metadata['acquisition'].pop('files', []) now = datetime.datetime.utcnow() diff --git a/api/util.py b/api/util.py index f553ba1f9..bb37383c7 100644 --- a/api/util.py +++ b/api/util.py @@ -8,6 +8,7 @@ import tempdir as tempfile import enum as baseEnum +from . import config MIMETYPES = [ ('.bvec', 'text', 'bvec'), ('.bval', 'text', 'bval'), @@ -136,6 +137,13 @@ def send_json_http_exception(response, message, code): response.headers['Content-Type'] = 'application/json; charset=utf-8' response.write(content) +def schema_uri(type_, schema_name): + return '/'.join([ + config.get_item('site', 'api_url'), + 'schemas', + type_, schema_name + ]) + class Enum(baseEnum.Enum): # Enum strings are prefixed by their class: "Category.classifier". # This overrides that behaviour and removes the prefix. diff --git a/api/validators.py b/api/validators.py index d73f56b39..f5ee4c3b8 100644 --- a/api/validators.py +++ b/api/validators.py @@ -1,7 +1,10 @@ import os +import re import copy -import glob +import json +import requests import jsonschema +from jsonschema.compat import urlopen, urlsplit from . import config @@ -13,66 +16,7 @@ class InputValidationException(Exception): class DBValidationException(Exception): pass -# following https://github.com/Julian/jsonschema/issues/98 -# json schema files are expected to be in the schemas folder relative to this module -schema_path = os.path.abspath(os.path.dirname(__file__)) - -resolver_input = jsonschema.RefResolver('file://' + schema_path + '/schemas/input/', None) -resolver_mongo = jsonschema.RefResolver('file://' + schema_path + '/schemas/mongo/', None) - -expected_mongo_schemas = set([ - 'acquisition.json', - 'collection.json', - 'container.json', - 'file.json', - 'group.json', - 'note.json', - 'permission.json', - 'project.json', - 'session.json', - 'subject.json', - 'user.json', - 'avatars.json', - 'tag.json' -]) -expected_input_schemas = set([ - 'acquisition.json', - 'collection.json', - 'container.json', - 'file.json', - 'group.json', - 'note.json', - 'permission.json', - 'project.json', - 'session.json', - 'subject.json', - 'user.json', - 'avatars.json', - 'download.json', - 'tag.json', - 'enginemetadata.json', - 'packfile.json', - 'uploader.json', - 'reaper.json' -]) -mongo_schemas = set() -input_schemas = set() -# validate and cache schemas at start time -for schema_filepath in glob.glob(schema_path + '/schemas/mongo/*.json'): - schema_file = os.path.basename(schema_filepath) - mongo_schemas.add(schema_file) - resolver_mongo.resolve(schema_file) - -assert mongo_schemas == expected_mongo_schemas, '{} is different from {}'.format(mongo_schemas, expected_mongo_schemas) - -for schema_filepath in glob.glob(schema_path + '/schemas/input/*.json'): - schema_file = os.path.basename(schema_filepath) - input_schemas.add(schema_file) - resolver_input.resolve(schema_file) - -assert input_schemas == expected_input_schemas, '{} is different from {}'.format(input_schemas, expected_input_schemas) - -def validate_data(data, schema_name, verb, optional=False): +def validate_data(data, schema_url, verb, optional=False): """ Convenience method to validate a JSON schema against some action. @@ -82,22 +26,58 @@ def validate_data(data, schema_name, verb, optional=False): if optional and data is None: return - validator = payload_from_schema_file(schema_name) + validator = from_schema_path(schema_url) validator(data, verb) def _validate_json(json_data, schema, resolver): jsonschema.validate(json_data, schema, resolver=resolver) - #jsonschema.Draft4Validator(schema, resolver=resolver).validate(json_data) + +class RefResolver(jsonschema.RefResolver): + + def resolve_remote(self, uri): + """override default resolve_remote + to allow testing when there is no ssl certificate + """ + scheme = urlsplit(uri).scheme + + if scheme in self.handlers: + result = self.handlers[scheme](uri) + elif ( + scheme in [u"http", u"https"] and + requests and + getattr(requests.Response, "json", None) is not None + ): + # Requests has support for detecting the correct encoding of + # json over http + if callable(requests.Response.json): + result = requests.get(uri, verify=False).json() + else: + result = requests.get(uri, verify=False).json + else: + # Otherwise, pass off to urllib and assume utf-8 + result = json.loads(urlopen(uri).read().decode("utf-8")) + + if self.cache_remote: + self.store[uri] = result + return result + +# We store the resolvers for each base_uri we use, so that we reuse the schemas cached by the resolvers. +resolvers = {} +def _resolve_schema(schema_url): + base_uri, schema_name = re.match('(.*/)(.*)', schema_url).groups() + if not resolvers.get(base_uri): + resolvers[base_uri] = RefResolver(base_uri, None) + return resolvers[base_uri].resolve(schema_name)[1], resolvers[base_uri] def no_op(g, *args): return g -def mongo_from_schema_file(schema_file): - if schema_file is None: +def decorator_from_schema_path(schema_url): + if schema_url is None: return no_op - schema = resolver_mongo.resolve(schema_file)[1] + schema, resolver = _resolve_schema(schema_url) def g(exec_op): - def mongo_val(method, **kwargs): + def validator(method, **kwargs): payload = kwargs['payload'] log.debug(payload) if method == 'PUT' and schema.get('required'): @@ -107,17 +87,18 @@ def mongo_val(method, **kwargs): _schema = schema if method in ['POST', 'PUT']: try: - _validate_json(payload, _schema, resolver_mongo) + _validate_json(payload, _schema, resolver) except jsonschema.ValidationError as e: raise DBValidationException(str(e)) return exec_op(method, **kwargs) - return mongo_val + return validator return g -def payload_from_schema_file(schema_file): - if schema_file is None: +def from_schema_path(schema_url): + if schema_url is None: return no_op - schema = resolver_input.resolve(schema_file)[1] + # split the url in base_uri and schema_name + schema, resolver = _resolve_schema(schema_url) def g(payload, method): if method == 'PUT' and schema.get('required'): _schema = copy.copy(schema) @@ -126,12 +107,12 @@ def g(payload, method): _schema = schema if method in ['POST', 'PUT']: try: - _validate_json(payload, _schema, resolver_input) + _validate_json(payload, _schema, resolver) except jsonschema.ValidationError as e: raise InputValidationException(str(e)) return g -def key_check(schema_file): +def key_check(schema_url): """ for sublists of mongo container there is no automatic key check when creating, updating or deleting an object. We are adding a custom array field to the json schemas ("key_fields"). @@ -146,9 +127,9 @@ def key_check(schema_file): 2. a GET will retrieve a single item 3. a DELETE (most importantly) will delete a single item """ - if schema_file is None: + if schema_url is None: return no_op - schema = resolver_mongo.resolve(schema_file)[1] + schema, _ = _resolve_schema(schema_url) log.debug(schema) if schema.get('key_fields') is None: return no_op diff --git a/bin/run.sh b/bin/run.sh index 616a1252d..6a302fd4c 100755 --- a/bin/run.sh +++ b/bin/run.sh @@ -32,6 +32,9 @@ SCITRAN_PERSISTENT_DB_PATH=${SCITRAN_PERSISTENT_DB_PATH:-"$SCITRAN_PERSISTENT_PA SCITRAN_PERSISTENT_DB_PORT=${SCITRAN_PERSISTENT_DB_PORT:-"9001"} SCITRAN_PERSISTENT_DB_URI=${SCITRAN_PERSISTENT_DB_URI:-"mongodb://localhost:$SCITRAN_PERSISTENT_DB_PORT/scitran"} +[ -z "$SCITRAN_RUNTIME_SSL_PEM" ] && SCITRAN_SITE_API_URL="http" || SCITRAN_SITE_API_URL="https" +SCITRAN_SITE_API_URL="$SCITRAN_SITE_API_URL://$SCITRAN_RUNTIME_HOST:$SCITRAN_RUNTIME_PORT/api" + set +o allexport @@ -154,15 +157,13 @@ trap "{ sleep 1 -# Set API URL -[ -z "$SCITRAN_RUNTIME_SSL_PEM" ] && API_URL="http" || API_URL="https" -API_URL="$API_URL://$SCITRAN_RUNTIME_HOST:$SCITRAN_RUNTIME_PORT/api" + # Boostrap users and groups if [ $BOOTSTRAP_USERS -eq 1 ]; then echo "Bootstrapping users" - bin/bootstrap.py --insecure --secret "$SCITRAN_CORE_DRONE_SECRET" $API_URL "$SCITRAN_RUNTIME_BOOTSTRAP" + bin/bootstrap.py --insecure --secret "$SCITRAN_CORE_DRONE_SECRET" $SCITRAN_SITE_API_URL "$SCITRAN_RUNTIME_BOOTSTRAP" echo "Bootstrapped users" else echo "Database exists at $SCITRAN_PERSISTENT_PATH/db. Not bootstrapping users." @@ -183,7 +184,7 @@ if [ -f "$SCITRAN_PERSISTENT_DATA_PATH/.bootstrapped" ]; then echo "Persistence store exists at $SCITRAN_PERSISTENT_PATH/data. Not bootstrapping data. Remove to re-bootstrap." else echo "Bootstrapping testdata" - folder_reaper --insecure --secret "$SCITRAN_CORE_DRONE_SECRET" $API_URL "$SCITRAN_PERSISTENT_PATH/testdata" + folder_reaper --insecure --secret "$SCITRAN_CORE_DRONE_SECRET" $SCITRAN_SITE_API_URL "$SCITRAN_PERSISTENT_PATH/testdata" echo "Bootstrapped testdata" touch "$SCITRAN_PERSISTENT_DATA_PATH/.bootstrapped" fi diff --git a/docker/README.md b/docker/README.md index 5099a7037..989fefbc0 100644 --- a/docker/README.md +++ b/docker/README.md @@ -23,6 +23,7 @@ preserving their contents across container instances. -e "SCITRAN_PERSISTENT_DB_URI=mongodb://some-mongo:27017/scitran" \ -e "SCITRAN_CORE_INSECURE=true" \ -e "SCITRAN_CORE_DRONE_SECRET=change-me" \ + -e "SCITRAN_SITE_API_URL=http://localhost:8080/api" \ -v $(pwd)/persistent/data:/var/scitran/data \ -v $(pwd):/var/scitran/code/api \ --link some-mongo \ diff --git a/sample.config b/sample.config index 60d9df8e6..6d8cab619 100644 --- a/sample.config +++ b/sample.config @@ -16,6 +16,7 @@ #SCITRAN_SITE_ID="" #SCITRAN_SITE_NAME="" #SCITRAN_SITE_URL="" +#SCITRAN_SITE_API_URL="" #SCITRAN_SITE_CENTRAL_URL="" #SCITRAN_SITE_REGISTERED="" #SCITRAN_SITE_SSL_CERT=""