From 45cc620bdc4ec73b73fc7a8e077751e939efe909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Se=CC=81bastien=20De=CC=81le=CC=80ze?= Date: Tue, 24 Aug 2021 17:10:29 +0200 Subject: [PATCH] documents: `masked` property enhancement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Allows to display documents only for allowed IP addresses. This is done by changing the `masked` property from a boolean to an enum. * Configures facet for filtering documents with `masked` property. * Closes #632. Co-Authored-by: Sébastien Délèze --- sonar/config.py | 4 ++ .../documents/document-v1.0.0_src.json | 23 ++++++++++- .../v7/documents/document-v1.0.0.json | 5 ++- sonar/modules/documents/marshmallow/json.py | 2 +- sonar/modules/documents/query.py | 34 +++++++++++++++- sonar/modules/documents/receivers.py | 10 ++++- sonar/modules/documents/views.py | 39 +++++++++++++++--- sonar/modules/utils.py | 40 ++++++++++++++++++- tests/api/documents/test_documents_query.py | 27 +++++++++++-- tests/ui/test_utils.py | 16 ++++++++ 10 files changed, 183 insertions(+), 17 deletions(-) diff --git a/sonar/config.py b/sonar/config.py index f496b9b4..941fd59b 100644 --- a/sonar/config.py +++ b/sonar/config.py @@ -520,6 +520,8 @@ def _(x): RECORDS_REST_FACETS = { 'documents': dict(aggs=dict( + masked=dict(terms=dict(field='masked', + size=DEFAULT_AGGREGATION_SIZE)), subdivision=dict(terms=dict(field='subdivisions.pid', size=DEFAULT_AGGREGATION_SIZE)), organisation=dict(terms=dict(field='organisation.pid', @@ -549,6 +551,8 @@ def _(x): customField3=dict(terms=dict(field='customField3.raw', size=DEFAULT_AGGREGATION_SIZE))), filters={ + 'masked': + and_term_filter('masked'), 'subdivision': and_term_filter('subdivisions.pid'), 'organisation': diff --git a/sonar/modules/documents/jsonschemas/documents/document-v1.0.0_src.json b/sonar/modules/documents/jsonschemas/documents/document-v1.0.0_src.json index 781e81c0..605768e9 100644 --- a/sonar/modules/documents/jsonschemas/documents/document-v1.0.0_src.json +++ b/sonar/modules/documents/jsonschemas/documents/document-v1.0.0_src.json @@ -1835,10 +1835,29 @@ }, "masked": { "title": "Masked", - "type": "boolean", + "type": "string", + "enum": [ + "not_masked", + "masked_for_all", + "masked_for_external_ips" + ], "description": "A masked document is visible in the professional interface, but not in the public interface.", - "default": false, + "default": "not_masked", "form": { + "options": [ + { + "label": "Not masked", + "value": "not_masked" + }, + { + "label": "Masked for all", + "value": "masked_for_all" + }, + { + "label": "Masked for external IP addresses", + "value": "masked_for_external_ips" + } + ], "expressionProperties": { "templateOptions.required": "true" } diff --git a/sonar/modules/documents/mappings/v7/documents/document-v1.0.0.json b/sonar/modules/documents/mappings/v7/documents/document-v1.0.0.json index 35dfb875..738637b6 100644 --- a/sonar/modules/documents/mappings/v7/documents/document-v1.0.0.json +++ b/sonar/modules/documents/mappings/v7/documents/document-v1.0.0.json @@ -104,6 +104,9 @@ }, "name": { "type": "text" + }, + "ips": { + "type": "keyword" } } }, @@ -584,7 +587,7 @@ } }, "masked": { - "type": "boolean" + "type": "keyword" }, "subdivisions": { "type": "object", diff --git a/sonar/modules/documents/marshmallow/json.py b/sonar/modules/documents/marshmallow/json.py index 96ce2605..0a6f2266 100644 --- a/sonar/modules/documents/marshmallow/json.py +++ b/sonar/modules/documents/marshmallow/json.py @@ -105,7 +105,7 @@ class DocumentMetadataSchemaV1(StrictKeysMixin): customField1 = fields.List(fields.String(validate=validate.Length(min=1))) customField2 = fields.List(fields.String(validate=validate.Length(min=1))) customField3 = fields.List(fields.String(validate=validate.Length(min=1))) - masked = fields.Boolean() + masked = SanitizedUnicode() _bucket = SanitizedUnicode() _files = Nested(FileSchemaV1, many=True) _oai = fields.Dict() diff --git a/sonar/modules/documents/query.py b/sonar/modules/documents/query.py index e9969b6d..dea1e902 100644 --- a/sonar/modules/documents/query.py +++ b/sonar/modules/documents/query.py @@ -26,6 +26,7 @@ from sonar.modules.query import default_search_factory, \ get_operator_and_query_type from sonar.modules.users.api import current_user_record +from sonar.modules.utils import get_current_ip FIELDS = [ '_bucket', '_files.*', 'pid', 'organisation.*', 'title.*^3', @@ -82,7 +83,38 @@ def search_factory(self, search, query_parser=None): # Public search if view: # Don't display masked records - search = search.filter('bool', must_not={'term': {'masked': True}}) + search = search.filter('bool', + should=[{ + 'bool': { + 'must_not': [{ + 'exists': { + 'field': 'masked' + } + }] + } + }, { + 'bool': { + 'filter': [{ + 'term': { + 'masked': 'not_masked' + } + }] + } + }, { + 'bool': { + 'must': [{ + 'term': { + 'masked': + 'masked_for_external_ips' + } + }, { + 'term': { + 'organisation.ips': + get_current_ip() + } + }] + } + }]) # Filter record by organisation view. if view != current_app.config.get('SONAR_APP_DEFAULT_ORGANISATION'): diff --git a/sonar/modules/documents/receivers.py b/sonar/modules/documents/receivers.py index a3f1572a..ebc4e0ef 100644 --- a/sonar/modules/documents/receivers.py +++ b/sonar/modules/documents/receivers.py @@ -29,7 +29,7 @@ from sonar.modules.api import SonarRecord from sonar.modules.documents.api import DocumentRecord from sonar.modules.documents.loaders.schemas.factory import LoaderSchemaFactory -from sonar.modules.utils import chunks +from sonar.modules.utils import chunks, get_ips_list from sonar.webdav import HegClient from .api import DocumentRecord @@ -109,6 +109,14 @@ def enrich_document_data(sender=None, # Check if record is open access. json['isOpenAccess'] = record.is_open_access() + # Compile allowed IPs in document + if json.get('organisation'): + if json['organisation'][0].get('allowedIps'): + json['organisation'][0]['ips'] = get_ips_list( + json['organisation'][0]['allowedIps'].split('\n')) + else: + json['organisation'][0]['ips'] = [] + # No files are present in record if not record.files: return diff --git a/sonar/modules/documents/views.py b/sonar/modules/documents/views.py index ab3db62b..67aba9b6 100644 --- a/sonar/modules/documents/views.py +++ b/sonar/modules/documents/views.py @@ -21,7 +21,7 @@ import json -from flask import Blueprint, current_app, render_template, request +from flask import Blueprint, abort, current_app, render_template, request from flask_babelex import gettext as _ from invenio_i18n.ext import current_i18n from invenio_records_ui.signals import record_viewed @@ -30,7 +30,8 @@ from sonar.modules.documents.utils import has_external_urls_for_files, \ populate_files_properties from sonar.modules.utils import format_date, \ - get_bibliographic_code_from_language, get_language_value + get_bibliographic_code_from_language, get_current_ip, get_language_value, \ + is_ip_in_list from .utils import publication_statement_text @@ -70,6 +71,28 @@ def detail(pid, record, template=None, **kwargs): :param \*\*kwargs: Additional view arguments based on URL rule. :returns: The rendered template. """ + + def is_masked(record): + """Check if record is masked. + + :param record: Record object + :returns: True if record is masked + :rtype: boolean + """ + if not record.get('masked'): + return False + + if record['masked'] == 'masked_for_all': + return True + + if record['masked'] == 'masked_for_external_ips' and record.get( + 'organisation') and not is_ip_in_list( + get_current_ip(), record['organisation'][0].get( + 'allowedIps', '').split('\n')): + return True + + return False + # Add restriction, link and thumbnail to files if record.get('_files'): # Check if organisation's record forces to point file to an external @@ -93,6 +116,10 @@ def detail(pid, record, template=None, **kwargs): # Resolve $ref properties record = record.replace_refs() + # Record is masked + if is_masked(record): + abort(404) + # Send signal when record is viewed record_viewed.send( current_app._get_current_object(), @@ -174,12 +201,12 @@ def part_of_format(part_of): label=_('vol.'), value=part_of['numberingVolume'])) if 'numberingIssue' in part_of: - items.append('{label} {value}'.format( - label=_('no.'), value=part_of['numberingIssue'])) + items.append('{label} {value}'.format(label=_('no.'), + value=part_of['numberingIssue'])) if 'numberingPages' in part_of: - items.append('{label} {value}'.format( - label=_('p.'), value=part_of['numberingPages'])) + items.append('{label} {value}'.format(label=_('p.'), + value=part_of['numberingPages'])) return ', '.join(items) diff --git a/sonar/modules/utils.py b/sonar/modules/utils.py index 8216146e..4bf7d3bf 100644 --- a/sonar/modules/utils.py +++ b/sonar/modules/utils.py @@ -20,7 +20,7 @@ import datetime import re -from flask import current_app, g +from flask import current_app, g, request from invenio_i18n.ext import current_i18n from invenio_mail.api import TemplatedMessage from netaddr import IPAddress, IPGlob, IPNetwork, IPSet @@ -278,3 +278,41 @@ def get_bibliographic_code_from_language(language_code): return key raise Exception(f'Language code not found for "{language_code}"') + + +def get_current_ip(): + """Get current IP address. + + :returns: Current IP address. + :rtype: str + """ + ip_address = request.environ.get('X-Forwarded-For', request.remote_addr) + # Take only the first IP, as X-Forwarded for gives the real IP + the + # proxy IP. + return ip_address.split(', ')[0] + + +def get_ips_list(ranges): + """Get the IP addresses list from a list of ranges. + + :param list ranges: List of ranges. + :returns: List of IP addresses. + :rtype: list + """ + ip_set = IPSet() + + for ip_range in ranges: + try: + # It's a glob + if '*' in ip_range or '-' in ip_range: + ip_set.add(IPGlob(ip_range)) + # It's a network + elif '/' in ip_range: + ip_set.add(IPNetwork(ip_range)) + # Simple IP + else: + ip_set.add(IPAddress(ip_range)) + except Exception: + pass + + return [str(ip) for ip in ip_set] diff --git a/tests/api/documents/test_documents_query.py b/tests/api/documents/test_documents_query.py index 0203e04b..2ec93c71 100644 --- a/tests/api/documents/test_documents_query.py +++ b/tests/api/documents/test_documents_query.py @@ -38,7 +38,7 @@ def test_collection_query(db, client, document, collection, es_clear): assert not res.json['aggregations'].get('collection') -def test_masked_document(db, client, document, es_clear): +def test_masked_document(db, client, organisation, document, es_clear): """Test masked document.""" # Not masked (property not exists) res = client.get(url_for('invenio_records_rest.doc_list', view='global')) @@ -46,7 +46,7 @@ def test_masked_document(db, client, document, es_clear): assert res.json['hits']['total']['value'] == 1 # Not masked - document['masked'] = False + document['masked'] = 'not_masked' document.commit() document.reindex() db.session.commit() @@ -54,11 +54,30 @@ def test_masked_document(db, client, document, es_clear): assert res.status_code == 200 assert res.json['hits']['total']['value'] == 1 - # Masked - document['masked'] = True + # Masked for all + document['masked'] = 'masked_for_all' document.commit() document.reindex() db.session.commit() res = client.get(url_for('invenio_records_rest.doc_list', view='global')) assert res.status_code == 200 assert res.json['hits']['total']['value'] == 0 + + # Masked for external IPs, IP is not allowed + document['masked'] = 'masked_for_external_ips' + document.commit() + document.reindex() + db.session.commit() + res = client.get(url_for('invenio_records_rest.doc_list', view='global')) + assert res.status_code == 200 + assert res.json['hits']['total']['value'] == 0 + + # Masked for external IPs, IP is allowed + organisation['allowedIps'] = '127.0.0.1' + organisation.commit() + db.session.commit() + organisation.reindex() + document.reindex() + res = client.get(url_for('invenio_records_rest.doc_list', view='global')) + assert res.status_code == 200 + assert res.json['hits']['total']['value'] == 1 diff --git a/tests/ui/test_utils.py b/tests/ui/test_utils.py index b3ddb36d..aa9bfd78 100644 --- a/tests/ui/test_utils.py +++ b/tests/ui/test_utils.py @@ -100,6 +100,7 @@ def test_get_view_code(app, organisation): with app.test_request_context('/notexists'): assert get_view_code() == 'global' + def test_format_date(): """Test date formatting.""" # Just year @@ -219,3 +220,18 @@ def test_get_language_value(app): # Non existing locale assert get_language_value(values, 'de') == 'Value ENG' + + +def test_get_current_ip(app): + """Test get current ip.""" + with app.test_request_context( + environ_base={'X-Forwarded-For': '127.0.0.1'}): + assert get_current_ip() == '127.0.0.1' + + +def test_get_ips_list(): + """Test get IP list.""" + ranges = ['127.0.0.1', '192.168.1.3-5', '12.13.14.15/32'] + assert get_ips_list(ranges) == [ + '12.13.14.15', '127.0.0.1', '192.168.1.3', '192.168.1.4', '192.168.1.5' + ]