From f94f94cb9416cacbd4528eca7e67c06c001457ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Se=CC=81bastien=20De=CC=81le=CC=80ze?= Date: Fri, 30 Jul 2021 11:08:00 +0200 Subject: [PATCH] api: search swisscovery records MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adds an endpoint to search for swisscovery records. * Adds a loader to transform swisscovery record to document record. * Adds a serializer to transform document record to deposit record. * Closes #610. Co-Authored-by: Sébastien Délèze --- setup.py | 3 +- sonar/config_sonar.py | 3 + sonar/modules/deposits/api.py | 48 +- .../deposits/deposit-v1.0.0_src.json | 179 +++ .../mappings/v7/deposits/deposit-v1.0.0.json | 64 + .../deposits/serializers/schemas/document.py | 174 +++ sonar/modules/documents/dojson/overdo.py | 33 + .../documents/dojson/rerodoc/overdo.py | 33 - .../modules/documents/dojson/sru/__init__.py | 18 + sonar/modules/documents/dojson/sru/model.py | 793 +++++++++++ .../modules/documents/loaders/schemas/sru.py | 50 + sonar/modules/documents/marshmallow/json.py | 1 + sonar/modules/permissions.py | 16 + sonar/modules/swisscovery/__init__.py | 19 + sonar/modules/swisscovery/rest.py | 73 + .../api/swisscovery/test_swisscovery_rest.py | 186 +++ tests/conftest.py | 65 +- tests/ui/deposits/test_deposits_api.py | 29 +- .../test_deposits_documents_schema.py | 485 +++++++ tests/ui/documents/schemas/test_sru_schema.py | 1216 +++++++++++++++++ 20 files changed, 3444 insertions(+), 44 deletions(-) create mode 100644 sonar/modules/deposits/serializers/schemas/document.py create mode 100644 sonar/modules/documents/dojson/sru/__init__.py create mode 100644 sonar/modules/documents/dojson/sru/model.py create mode 100644 sonar/modules/documents/loaders/schemas/sru.py create mode 100644 sonar/modules/swisscovery/__init__.py create mode 100644 sonar/modules/swisscovery/rest.py create mode 100644 tests/api/swisscovery/test_swisscovery_rest.py create mode 100644 tests/ui/deposits/test_deposits_documents_schema.py create mode 100644 tests/ui/documents/schemas/test_sru_schema.py diff --git a/setup.py b/setup.py index 4fa45b60..afa14c35 100644 --- a/setup.py +++ b/setup.py @@ -91,7 +91,8 @@ 'monitoring = sonar.monitoring.views:blueprint', 'translations = sonar.translations.rest:blueprint', 'suggestions = sonar.suggestions.rest:blueprint', - 'validation = sonar.modules.validation.views:blueprint' + 'validation = sonar.modules.validation.views:blueprint', + 'swisscovery = sonar.modules.swisscovery.rest:blueprint' ], 'invenio_assets.webpack': [ 'sonar_theme = sonar.theme.webpack:theme' diff --git a/sonar/config_sonar.py b/sonar/config_sonar.py index 075f1286..d0a5f585 100644 --- a/sonar/config_sonar.py +++ b/sonar/config_sonar.py @@ -97,3 +97,6 @@ """ARK scheme.""" # SONAR_APP_ARK_SHOULDER = 'ffk3' """ARK Shoulder, can be multiple for a given organisation.""" + +SONAR_APP_SWISSCOVERY_SEARCH_URL = 'https://swisscovery.slsp.ch/view/sru/41SLSP_NETWORK' +SONAR_APP_SWISSCOVERY_SEARCH_VERSION = '1.1' diff --git a/sonar/modules/deposits/api.py b/sonar/modules/deposits/api.py index 96e4d812..d85c9394 100644 --- a/sonar/modules/deposits/api.py +++ b/sonar/modules/deposits/api.py @@ -180,7 +180,10 @@ def create_document(self): # Add a statement for date metadata['provisionActivity'][0]['statement'].append({ 'label': [{ - 'value': self['metadata']['documentDate'] + 'value': + self['metadata']['statementDate'] + if self['metadata'].get('statementDate') else + self['metadata']['documentDate'] }], 'type': 'Date' @@ -188,8 +191,11 @@ def create_document(self): # Published in if self['metadata'].get('publication'): + year = self['metadata']['publication']['year'] if self['metadata'][ + 'publication'].get( + 'year') else self['metadata']['documentDate'] part_of = { - 'numberingYear': self['metadata']['documentDate'], + 'numberingYear': year, 'document': { 'title': self['metadata']['publication']['publishedIn'] } @@ -216,6 +222,10 @@ def create_document(self): 'statement': self['metadata']['publication']['publisher'] } + if self['metadata']['publication'].get('identifiedBy'): + part_of['document']['identifiedBy'] = self['metadata'][ + 'publication']['identifiedBy'] + metadata['partOf'] = [part_of] # Other electronic versions @@ -302,6 +312,40 @@ def create_document(self): if identifiers: metadata['identifiedBy'] = identifiers + # Content note + if self['metadata'].get('contentNote'): + metadata['contentNote'] = self['metadata']['contentNote'] + + # Extent + if self['metadata'].get('extent'): + metadata['extent'] = self['metadata']['extent'] + + # Additional materials + if self['metadata'].get('additionalMaterials'): + metadata['additionalMaterials'] = self['metadata'][ + 'additionalMaterials'] + + # Formats + if self['metadata'].get('formats'): + metadata['formats'] = self['metadata']['formats'] + + # Other material characteristics + if self['metadata'].get('otherMaterialCharacteristics'): + metadata['otherMaterialCharacteristics'] = self['metadata'][ + 'otherMaterialCharacteristics'] + + # Edition statement + if self['metadata'].get('editionStatement'): + metadata['editionStatement'] = self['metadata']['editionStatement'] + + # Notes + if self['metadata'].get('notes'): + metadata['notes'] = self['metadata']['notes'] + + # Series + if self['metadata'].get('series'): + metadata['series'] = self['metadata']['series'] + # Contributors contributors = [] for contributor in self.get('contributors', []): diff --git a/sonar/modules/deposits/jsonschemas/deposits/deposit-v1.0.0_src.json b/sonar/modules/deposits/jsonschemas/deposits/deposit-v1.0.0_src.json index 0d89ce69..e20464fe 100644 --- a/sonar/modules/deposits/jsonschemas/deposits/deposit-v1.0.0_src.json +++ b/sonar/modules/deposits/jsonschemas/deposits/deposit-v1.0.0_src.json @@ -353,6 +353,12 @@ } } }, + "statementDate": { + "title": "Statement date", + "type": "string", + "minLength": 1 + }, + "identifiedBy": { "title": "Identifiers", "type": "array", @@ -608,6 +614,14 @@ } } }, + "year": { + "title": "Year", + "type": "string", + "minLength": 1, + "form": { + "hide": true + } + }, "editors": { "title": "Authors / Editors", "type": "array", @@ -623,6 +637,59 @@ "title": "Publisher", "type": "string", "minLength": 1 + }, + "identifiedBy": { + "title": "Identifiers", + "type": "array", + "minItems": 1, + "items": { + "title": "Identifier", + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "title": "Type", + "type": "string", + "enum": [ + "bf:AudioIssueNumber", + "bf:Doi", + "bf:Ean", + "bf:Gtin14Number", + "bf:Identifier", + "bf:Isan", + "bf:Isbn", + "bf:Ismn", + "bf:Isrc", + "bf:Issn", + "bf:Local", + "bf:IssnL", + "bf:MatrixNumber", + "bf:MusicDistributorNumber", + "bf:MusicPlate", + "bf:MusicPublisherNumber", + "bf:PublisherNumber", + "bf:Upc", + "bf:Urn", + "bf:VideoRecordingNumber", + "uri", + "bf:ReportNumber", + "bf:Strn" + ] + }, + "value": { + "title": "Value", + "type": "string", + "minLength": 1 + } + }, + "required": [ + "type", + "value" + ] + }, + "form": { + "hide": true + } } }, "form": { @@ -915,6 +982,118 @@ ] } } + }, + "contentNote": { + "title": "Content notes", + "type": "array", + "minItems": 1, + "items": { + "title": "Content note", + "type": "string", + "minLength": 1 + } + }, + "extent": { + "title": "Extent", + "description": "Extent of the resource, i.e. the number of pages or volumes.", + "type": "string", + "minLength": 1 + }, + "additionalMaterials": { + "title": "Additional materials", + "description": "Accompanying material of the resource, i.e. maps.", + "type": "string", + "minLength": 1 + }, + "formats": { + "title": "Formats", + "description": "Format of the resource, i.e. dimensions in cm.", + "type": "array", + "minItems": 1, + "items": { + "title": "Format", + "type": "string", + "minLength": 1 + } + }, + "otherMaterialCharacteristics": { + "title": "Other Material Characteristics", + "description": "Other Material Characteristics, i.e. illustrations, black and white, or coloured.", + "type": "string", + "minLength": 1 + }, + "editionStatement": { + "title": "Edition", + "type": "object", + "additionalProperties": false, + "properties": { + "editionDesignation": { + "title": "Designation", + "type": "object", + "properties": { + "value": { + "title": "Value", + "type": "string", + "minLength": 1 + } + }, + "required": [ + "value" + ] + }, + "responsibility": { + "title": "Responsibility", + "type": "object", + "properties": { + "value": { + "title": "Value", + "type": "string", + "minLength": 1 + } + }, + "required": [ + "value" + ] + } + }, + "required": [ + "editionDesignation" + ] + }, + "notes": { + "title": "Notes", + "type": "array", + "minItems": 1, + "items": { + "title": "Note", + "type": "string", + "minLength": 1 + } + }, + "series": { + "title": "Series", + "description": "Series the resource belongs to.", + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "name": { + "title": "Title", + "type": "string", + "minLength": 1 + }, + "number": { + "title": "Numbering", + "type": "string", + "minLength": 1 + } + }, + "required": [ + "name" + ] + } } } }, diff --git a/sonar/modules/deposits/mappings/v7/deposits/deposit-v1.0.0.json b/sonar/modules/deposits/mappings/v7/deposits/deposit-v1.0.0.json index 6bf10b92..e72def9a 100644 --- a/sonar/modules/deposits/mappings/v7/deposits/deposit-v1.0.0.json +++ b/sonar/modules/deposits/mappings/v7/deposits/deposit-v1.0.0.json @@ -128,6 +128,9 @@ "type": "date", "format": "yyyy-MM-dd||yyyy" }, + "statementDate": { + "type": "text" + }, "publication": { "type": "object", "properties": { @@ -151,6 +154,17 @@ }, "publisher": { "type": "text" + }, + "identifiedBy": { + "type": "object", + "properties": { + "type": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } } } }, @@ -234,6 +248,56 @@ "format": "yyyy-MM-dd||yyyy" } } + }, + "contentNote": { + "type": "text" + }, + "extent": { + "type": "text" + }, + "additionalMaterials": { + "type": "text" + }, + "formats": { + "type": "text" + }, + "otherMaterialCharacteristics": { + "type": "text" + }, + "editionStatement": { + "type": "object", + "properties": { + "editionDesignation": { + "type": "object", + "properties": { + "value": { + "type": "text" + } + } + }, + "responsibility": { + "type": "object", + "properties": { + "value": { + "type": "text" + } + } + } + } + }, + "notes": { + "type": "text" + }, + "series": { + "type": "object", + "properties": { + "name": { + "type": "text" + }, + "number": { + "type": "text" + } + } } } }, diff --git a/sonar/modules/deposits/serializers/schemas/document.py b/sonar/modules/deposits/serializers/schemas/document.py new file mode 100644 index 00000000..b098be6b --- /dev/null +++ b/sonar/modules/deposits/serializers/schemas/document.py @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- +# +# Swiss Open Access Repository +# Copyright (C) 2021 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Document serializer.""" + +from marshmallow import Schema, fields, post_dump, pre_dump + + +class RemoveEmptyValuesMixin(): + """Mixin for removing empty values from schema.""" + + @post_dump + def remove_empty_values(self, data, **kwargs): + """Remove empty values before dumping data.""" + return {key: value for key, value in data.items() if value} + + +class DocumentMetadataSchema(Schema, RemoveEmptyValuesMixin): + """Serialize deposit metadata.""" + + title = fields.Method('get_title') + subtitle = fields.Method('get_subtitle') + identifiedBy = fields.List(fields.Dict()) + language = fields.Method('get_language') + abstracts = fields.Method('get_abstracts') + documentType = fields.Str() + contentNote = fields.List(fields.Str()) + extent = fields.Str() + dissertation = fields.Dict() + additionalMaterials = fields.Str() + formats = fields.List(fields.Str()) + otherMaterialCharacteristics = fields.Str() + editionStatement = fields.Dict() + documentDate = fields.Method('get_date') + statementDate = fields.Method('get_statement_date') + publicationPlace = fields.Method('get_publication_place') + publisher = fields.Method('get_publisher') + notes = fields.List(fields.Str()) + series = fields.List(fields.Dict()) + publication = fields.Method('get_publication') + + def get_title(self, obj): + """Get title.""" + if not obj.get('title'): + return None + + return obj['title'][0]['mainTitle'][0]['value'] + + def get_subtitle(self, obj): + """Get subttitle.""" + if not obj.get('title') or not obj['title'][0].get('subtitle'): + return None + + return obj['title'][0]['subtitle'][0]['value'] + + def get_language(self, obj): + """Get language.""" + if not obj.get('language'): + return None + + return obj['language'][0]['value'] + + def get_abstracts(self, obj): + """Get abstracts.""" + if not obj.get('abstracts'): + return None + + return [{ + 'language': item['language'], + 'abstract': item['value'] + } for item in obj['abstracts']] + + def get_date(self, obj): + """Get date.""" + for provision_activity in obj.get('provisionActivity', []): + if provision_activity.get('startDate'): + return provision_activity['startDate'] + + return None + + def get_statement_date(self, obj): + """Get statement date.""" + for provision_activity in obj.get('provisionActivity', []): + for statement in provision_activity.get('statement', []): + if statement['type'] == 'Date': + return statement['label']['value'] + + return None + + def get_publication_place(self, obj): + """Get publication place.""" + for provision_activity in obj.get('provisionActivity', []): + for statement in provision_activity.get('statement', []): + if statement['type'] == 'bf:Place': + return statement['label']['value'] + + return None + + def get_publisher(self, obj): + """Get publisher.""" + for provision_activity in obj.get('provisionActivity', []): + for statement in provision_activity.get('statement', []): + if statement['type'] == 'bf:Agent': + return statement['label']['value'] + + return None + + def get_publication(self, obj): + """Get publication.""" + for part_of in obj.get('partOf', []): + data = { + 'publishedIn': part_of['document']['title'] + } + + if part_of.get('numberingYear'): + data['year'] = part_of['numberingYear'] + + if part_of.get('numberingVolume'): + data['volume'] = part_of['numberingVolume'] + + if part_of.get('numberingIssue'): + data['number'] = part_of['numberingIssue'] + + if part_of.get('numberingPages'): + data['pages'] = part_of['numberingPages'] + + if part_of.get('document', {}).get('contribution'): + data['editors'] = part_of['document']['contribution'] + + if part_of.get('document', {}).get('identifiedBy'): + data['identifiedBy'] = part_of['document']['identifiedBy'] + + return data + + return None + + + +class DocumentSchema(Schema, RemoveEmptyValuesMixin): + """Serialize deposit from document schema.""" + + metadata = fields.Nested(DocumentMetadataSchema) + contributors = fields.Method('get_contributors') + + @pre_dump + def init_data(self, item, **kwargs): + """Initialize data before processing.""" + contribution = item.pop('contribution', None) + item = {'metadata': item, 'contribution': contribution} + return item + + def get_contributors(self, obj): + """Get contributors.""" + if not obj.get('contribution'): + return None + + return [{ + 'name': item['agent']['preferred_name'], + 'role': item['role'][0] + } for item in obj['contribution']] diff --git a/sonar/modules/documents/dojson/overdo.py b/sonar/modules/documents/dojson/overdo.py index a9dfe6c8..1f3d386c 100644 --- a/sonar/modules/documents/dojson/overdo.py +++ b/sonar/modules/documents/dojson/overdo.py @@ -17,6 +17,8 @@ """Base overdo class for DOJSON transformation.""" +import re + from dojson import Overdo as BaseOverdo @@ -36,6 +38,37 @@ def not_repetitive(value, subfield, default=None): return data + @staticmethod + def extract_date(date=None): + """Try to extract date of birth and date of death from field. + + :param date: String, date to parse + :returns: Tuple containing date of birth and date of death + """ + if not date: + return (None, None) + + # Match a full date + match = re.search(r'^([0-9]{4}-[0-9]{2}-[0-9]{2})$', date) + if match: + return (match.group(1), None) + + match = re.search(r'^([0-9]{2}-[0-9]{2}-[0-9]{4})$', date) + if match: + return (match.group(1), None) + + # Match these value: "1980-2010" + match = re.search(r'^([0-9]{4})-([0-9]{4})$', date) + if match: + return (match.group(1), match.group(2)) + + # Match these value: "1980-" or "1980" + match = re.search(r'^([0-9]{4})-?', date) + if match: + return (match.group(1), None) + + raise Exception('Date "{date}" is not recognized'.format(date=date)) + def do(self, blob, ignore_missing=True, exception_handlers=None): """Store blob values and do transformation.""" self.blob_record = blob diff --git a/sonar/modules/documents/dojson/rerodoc/overdo.py b/sonar/modules/documents/dojson/rerodoc/overdo.py index 6917308f..b95978dd 100644 --- a/sonar/modules/documents/dojson/rerodoc/overdo.py +++ b/sonar/modules/documents/dojson/rerodoc/overdo.py @@ -17,8 +17,6 @@ """Overdo specialized class for RERODOC DOJSON transformation.""" -import re - from flask import current_app from sonar.modules.documents.dojson.overdo import Overdo as BaseOverdo @@ -54,37 +52,6 @@ def create_organisation(organisation_key): dbcommit=True) organisation.reindex() - @staticmethod - def extract_date(date=None): - """Try to extract date of birth and date of death from field. - - :param date: String, date to parse - :returns: Tuple containing date of birth and date of death - """ - if not date: - return (None, None) - - # Match a full date - match = re.search(r'^([0-9]{4}-[0-9]{2}-[0-9]{2})$', date) - if match: - return (match.group(1), None) - - match = re.search(r'^([0-9]{2}-[0-9]{2}-[0-9]{4})$', date) - if match: - return (match.group(1), None) - - # Match these value: "1980-2010" - match = re.search(r'^([0-9]{4})-([0-9]{4})$', date) - if match: - return (match.group(1), match.group(2)) - - # Match these value: "1980-" or "1980" - match = re.search(r'^([0-9]{4})-?', date) - if match: - return (match.group(1), None) - - raise Exception('Date "{date}" is not recognized'.format(date=date)) - def do(self, blob, ignore_missing=True, exception_handlers=None): """Do transformation.""" result = super(Overdo, self).do(blob, diff --git a/sonar/modules/documents/dojson/sru/__init__.py b/sonar/modules/documents/dojson/sru/__init__.py new file mode 100644 index 00000000..167f6c0f --- /dev/null +++ b/sonar/modules/documents/dojson/sru/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Swiss Open Access Repository +# Copyright (C) 2021 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""DOJSON SRU transformation.""" diff --git a/sonar/modules/documents/dojson/sru/model.py b/sonar/modules/documents/dojson/sru/model.py new file mode 100644 index 00000000..557e4091 --- /dev/null +++ b/sonar/modules/documents/dojson/sru/model.py @@ -0,0 +1,793 @@ +# -*- coding: utf-8 -*- +# +# Swiss Open Access Repository +# Copyright (C) 2021 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""DOJSON transformation for SRU.""" + +import re + +from dojson import utils + +from sonar.modules.documents.dojson.overdo import Overdo + +overdo = Overdo() + +CONTRIBUTOR_ROLES_MAPPING = { + 'aut': 'cre', + 'cmp': 'cre', + 'pht': 'cre', + 'ape': 'cre', + 'aqt': 'cre', + 'arc': 'cre', + 'art': 'cre', + 'aus': 'cre', + 'chr': 'cre', + 'cll': 'cre', + 'com': 'cre', + 'drt': 'cre', + 'dsr': 'cre', + 'enj': 'cre', + 'fmk': 'cre', + 'inv': 'cre', + 'ive': 'cre', + 'ivr': 'cre', + 'lbt': 'cre', + 'lsa': 'cre', + 'lyr': 'cre', + 'pra': 'cre', + 'prg': 'cre', + 'rsp': 'cre', + 'scl': 'cre', + 'hnr': 'cre', + 'apl': 'cre', + 'cng': 'cre', + 'cou': 'cre', + 'csl': 'cre', + 'dfd': 'cre', + 'dgg': 'cre', + 'dte': 'cre', + 'fmd': 'cre', + 'fmp': 'cre', + 'his': 'cre', + 'jud': 'cre', + 'jug': 'cre', + 'med': 'cre', + 'orm': 'cre', + 'prn': 'cre', + 'pro': 'cre', + 'ptf': 'cre', + 'rcp': 'cre', + 'rpc': 'cre', + 'spn': 'cre', + 'tlp': 'cre', + 'cre': 'cre', + 'dub': 'cre', + 'mus': 'cre', + 'ctb': 'ctb', + 'ill': 'ctb', + 'prf': 'ctb', + 'trl': 'ctb', + 'abr': 'ctb', + 'act': 'ctb', + 'adi': 'ctb', + 'adp': 'ctb', + 'aft': 'ctb', + 'anm': 'ctb', + 'ann': 'ctb', + 'arr': 'ctb', + 'ato': 'ctb', + 'clr': 'ctb', + 'cnd': 'ctb', + 'ctg': 'ctb', + 'auc': 'ctb', + 'aui': 'ctb', + 'bkd': 'ctb', + 'bnd': 'ctb', + 'brd': 'ctb', + 'brl': 'ctb', + 'bsl': 'ctb', + 'cas': 'ctb', + 'clt': 'ctb', + 'cwt': 'ctb', + 'cmm': 'ctb', + 'cns': 'ctb', + 'col': 'ctb', + 'cor': 'ctb', + 'crt': 'ctb', + 'cst': 'ctb', + 'ctr': 'ctb', + 'cur': 'ctb', + 'dnc': 'ctb', + 'dnr': 'ctb', + 'dpt': 'ctb', + 'drm': 'ctb', + 'dst': 'ctb', + 'dto': 'ctb', + 'edm': 'ctb', + 'egr': 'ctb', + 'etr': 'ctb', + 'exp': 'ctb', + 'fac': 'ctb', + 'fds': 'ctb', + 'fmo': 'ctb', + 'hst': 'ctb', + 'ilu': 'ctb', + 'ins': 'ctb', + 'dgs': 'dgs', + 'edt': 'edt', + 'isb': 'edt', + 'pbd': 'edt', + 'mfr': 'prt', + 'prt': 'prt', + 'itr': 'ctb', + 'lgd': 'ctb', + 'ltg': 'ctb', + 'mod': 'ctb', + 'msd': 'ctb', + 'mtk': 'ctb', + 'nrt': 'ctb', + 'osp': 'ctb', + 'oth': 'ctb', + 'own': 'ctb', + 'pan': 'ctb', + 'pat': 'ctb', + 'pbl': 'ctb', + 'plt': 'ctb', + 'ppm': 'ctb', + 'ppt': 'ctb', + 'pre': 'ctb', + 'prm': 'ctb', + 'prs': 'ctb', + 'rcd': 'ctb', + 'rce': 'ctb', + 'rdd': 'ctb', + 'res': 'ctb', + 'rsr': 'ctb', + 'sds': 'ctb', + 'sgd': 'ctb', + 'sll': 'ctb', + 'sng': 'ctb', + 'spk': 'ctb', + 'srv': 'ctb', + 'stl': 'ctb', + 'tch': 'ctb', + 'tld': 'ctb', + 'trc': 'ctb', + 'vac': 'ctb', + 'vdg': 'ctb', + 'wac': 'ctb', + 'wal': 'ctb', + 'wat': 'ctb', + 'win': 'ctb', + 'wpr': 'ctb', + 'wst': 'ctb' +} + + +@overdo.over('identifiedBy', '001') +@utils.ignore_value +def marc21_to_identified_by_from_001(self, key, value): + """Get identifier from field 001.""" + identified_by = self.get('identifiedBy', []) + + identified_by.append({ + 'type': 'bf:Local', + 'source': 'swisscovery', + 'value': value + }) + + return identified_by + + +@overdo.over('identifiedBy', '^020..') +@utils.ignore_value +def marc21_to_identified_by_from_020(self, key, value): + """Get identifier from field 020.""" + if not value.get('a'): + return None + + identified_by = self.get('identifiedBy', []) + identified_by.append({'type': 'bf:Isbn', 'value': value.get('a')}) + + self['identifiedBy'] = identified_by + + return None + + +@overdo.over('identifiedBy', '^022..') +@utils.ignore_value +def marc21_to_identified_by_from_022(self, key, value): + """Get identifier from field 022.""" + if not value.get('a') and not value.get('l'): + return None + + identified_by = self.get('identifiedBy', []) + + if value.get('a'): + identified_by.append({'type': 'bf:Issn', 'value': value.get('a')}) + + if value.get('l'): + identified_by.append({'type': 'bf:IssnL', 'value': value.get('l')}) + + self['identifiedBy'] = identified_by + + return None + + +@overdo.over('identifiedBy', '^024..') +@utils.for_each_value +@utils.ignore_value +def marc21_to_identified_by_from_024(self, key, value): + """Get identifier from field 024.""" + if not value.get('a') or not value.get('2'): + return None + + type = 'bf:Local' + + if value.get('2') == 'doi': + type = 'bf:Doi' + + if value.get('2') == 'urn': + type = 'bf:Urn' + + if value.get('2') == 'uri': + type = 'uri' + + document_type = {'type': type, 'value': value.get('a')} + + if type == 'bf:Local': + document_type['source'] = value.get('2') + + identified_by = self.get('identifiedBy', []) + identified_by.append(document_type) + + self['identifiedBy'] = identified_by + + return None + + +@overdo.over('identifiedBy', '^027..') +@utils.for_each_value +@utils.ignore_value +def marc21_to_identified_by_from_027(self, key, value): + """Get identifier from field 027.""" + if not value.get('a'): + return None + + identified_by = self.get('identifiedBy', []) + identified_by.append({'type': 'bf:Strn', 'value': value.get('a')}) + + self['identifiedBy'] = identified_by + + return None + + +@overdo.over('identifiedBy', '^088..') +@utils.for_each_value +@utils.ignore_value +def marc21_to_identified_by_from_088(self, key, value): + """Get identifier from field 088.""" + if not value.get('a'): + return None + + identified_by = self.get('identifiedBy', []) + identified_by.append({'type': 'bf:ReportNumber', 'value': value.get('a')}) + + self['identifiedBy'] = identified_by + + return None + + +@overdo.over('language', '^008') +@utils.ignore_value +def marc21_to_language_and_provision_activity_from_008(self, key, value): + """Get language from field 008.""" + # Language + language = self.get('language', []) + language.append({'type': 'bf:Language', 'value': value[-5:-2]}) + self['language'] = language + + # Provision activity + provision_activity = self.get('provisionActivity', []) + if not provision_activity: + provision_activity.append({}) + + provision_activity[0]['type'] = 'bf:Publication' + provision_activity[0]['startDate'] = value[7:11] + + end_date = value[11:15] + if re.match(r'^[0-9]{4}$', end_date): + provision_activity[0]['endDate'] = end_date + + self['provisionActivity'] = provision_activity + + return None + + +@overdo.over('title', '^245..') +@utils.for_each_value +@utils.ignore_value +def marc21_to_title_from_245(self, key, value): + """Get title from field 245.""" + main_title = value.get('a') + language = self['language'][0]['value'] if self.get('language') else 'eng' + subtitle = value.get('b') + + if not main_title: + return None + + title = { + 'type': 'bf:Title', + 'mainTitle': [{ + 'value': main_title.rstrip(':'), + 'language': language + }] + } + + if subtitle: + title['subtitle'] = [{'value': subtitle, 'language': language}] + + return title + + +@overdo.over('abstracts', '^520..') +@utils.for_each_value +@utils.ignore_value +def marc21_to_abstracts_from_520(self, key, value): + """Get abstracts from field 520.""" + if not value.get('a'): + return None + + return { + 'value': value.get('a'), + 'language': + self['language'][0]['value'] if self.get('language') else 'eng' + } + + +@overdo.over('contentNote', '^505..') +@utils.for_each_value +@utils.ignore_value +def marc21_to_content_note_from_505(self, key, value): + """Get abstracts from field 505.""" + if not value.get('a'): + return None + + return value['a'] + + +@overdo.over('contribution', '^(100|700|710|711)..') +@utils.for_each_value +@utils.ignore_value +def marc21_to_contribution_from_100_700(self, key, value): + """Get contribution from field 100.""" + if not value.get('a'): + return None + + is_100_or_700 = key.startswith('100') or key.startswith('700') + + separator = ' ' if is_100_or_700 else '. ' + + name = value.get('a') + if value.get('b'): + name = name + separator + separator.join(utils.force_list(value['b'])) + + contribution = self.get('contribution', []) + + type = 'bf:Person' + + if key == '710__': + type = 'bf:Organization' + + if key == '711__': + type = 'bf:Meeting' + + role = 'cre' if is_100_or_700 else 'ctb' + if value.get('4'): + for item in utils.force_list(value['4']): + if item in CONTRIBUTOR_ROLES_MAPPING: + role = CONTRIBUTOR_ROLES_MAPPING[item] + + data = {'agent': {'type': type, 'preferred_name': name}, 'role': [role]} + + if is_100_or_700 and value.get('d'): + date_of_birth, date_of_death = overdo.extract_date(value['d'][:9]) + + if date_of_birth: + data['agent']['date_of_birth'] = date_of_birth + + if date_of_death: + data['agent']['date_of_death'] = date_of_death + + if key == '711__': + if value.get('c'): + data['agent']['place'] = value['c'] + + if value.get('d'): + data['agent']['date'] = value['d'] + + if value.get('n'): + data['agent']['number'] = value['n'] + + contribution.append(data) + + self['contribution'] = contribution + + return None + + +@overdo.over('extent', '^300..') +@utils.ignore_value +def marc21_to_extent_from_300(self, key, value): + """Get extent from field 300.""" + # Extent + if value.get('a'): + self['extent'] = value['a'] + + # Other material characteristics + if value.get('b'): + self['otherMaterialCharacteristics'] = value['b'] + + # Formats + if value.get('c'): + self['formats'] = [value['c']] + + # Additional materials + if value.get('e'): + self['additionalMaterials'] = value['e'] + + return None + + +@overdo.over('dissertation', '^502..') +@utils.ignore_value +def marc21_to_dissertation_from_502(self, key, value): + """Get dissertation from field 502.""" + if not value.get('a') and not value.get('b'): + return None + + degree = [] + if value.get('a'): + degree.append(value['a']) + if value.get('b'): + degree.append(value['b']) + + data = {'degree': '. '.join(degree)} + + if value.get('c'): + data['grantingInstitution'] = value['c'] + + if value.get('d'): + try: + data['date'] = overdo.extract_date(value['d'])[0] + except Exception: + pass + + return data + + +@overdo.over('editionStatement', '^250..') +@utils.ignore_value +def marc21_to_edition_statement_from_250(self, key, value): + """Get edition statement from field 250.""" + if not value.get('a'): + return None + + data = {'editionDesignation': {'value': value['a']}} + + if value.get('b'): + data['responsibility'] = {'value': value['b']} + + return data + + +@overdo.over('documentType', 'leader') +@utils.ignore_value +def marc21_to_document_type_from_leader(self, key, value): + """Get document type from leader.""" + leader_06 = value[6] + + # Still image + if leader_06 == 'k': + return 'coar:c_ecc8' + + # Musical notation + if leader_06 in ['c', 'd']: + return 'coar:c_18cw' + + # Cartographic material + if leader_06 in ['e', 'f']: + return 'coar:c_12cc' + + # Moving image + if leader_06 == 'g': + return 'coar:c_8a7e' + + # Soung + if leader_06 in ['i', 'j']: + return 'coar:c_18cc' + + # Dataset + if leader_06 == 'm': + return 'coar:c_ddb1' + + if leader_06 == 'a': + leader_07 = value[7] + + # Contribution to journal + if leader_07 == 'b': + return 'coar:c_3e5a' + + # Book part + if leader_07 == 'a': + return 'coar:c_3248' + + # Periodical + if leader_07 == 's': + return 'coar:c_2659' + + field_502 = overdo.blob_record.get('502__') + if field_502: + # Bachelor thesis + if 'bachelor' in field_502.get( + 'a', '') or 'bachelor' in field_502.get('b', ''): + return 'coar:c_7a1f' + + # Master thesis + if 'master' in field_502.get('a', '') or 'master' in field_502.get( + 'b', ''): + return 'coar:c_bdcc' + + # Doctoral thesis + if 'dissertation' in field_502.get( + 'a', '') or 'dissertation' in field_502.get( + 'b', '') or 'thèse' in field_502.get( + 'a', '') or 'thèse' in field_502.get('b', ''): + return 'coar:c_db06' + + # Thesis + return 'coar:c_46ec' + + # Book + if leader_07 == 'm': + return 'coar:c_2f33' + + # Other + return 'coar:c_1843' + + +@overdo.over('provisionActivity', '^264.1') +@utils.ignore_value +def marc21_to_provision_activity_from_264_1(self, key, value): + """Get provision activity from field 264.""" + provision_activity = self.get('provisionActivity', []) + if not provision_activity: + provision_activity.append({'type': 'bf:Publication', 'statement': []}) + + if not provision_activity[0].get('statement'): + provision_activity[0]['statement'] = [] + + for place in utils.force_list(value.get('a', [])): + provision_activity[0]['statement'].append({ + 'type': 'bf:Place', + 'label': { + 'value': place + } + }) + + for agent in utils.force_list(value.get('b', [])): + provision_activity[0]['statement'].append({ + 'type': 'bf:Agent', + 'label': { + 'value': agent + } + }) + + if value.get('c'): + provision_activity[0]['statement'].append({ + 'type': 'Date', + 'label': { + 'value': value['c'] + } + }) + + self['provisionActivity'] = provision_activity + + return None + + +@overdo.over('provisionActivity', '^264.(1|3)') +@utils.ignore_value +def marc21_to_provision_activity_from_264_3(self, key, value): + """Get provision activity from field 264.""" + provision_activity = self.get('provisionActivity', []) + + manufacture = {'type': 'bf:Manufacture', 'statement': []} + + for place in utils.force_list(value.get('a', [])): + manufacture['statement'].append({ + 'type': 'bf:Place', + 'label': { + 'value': place + } + }) + + for agent in utils.force_list(value.get('b', [])): + manufacture['statement'].append({ + 'type': 'bf:Agent', + 'label': { + 'value': agent + } + }) + + if value.get('c'): + manufacture['statement'].append({ + 'type': 'Date', + 'label': { + 'value': value['c'] + } + }) + + provision_activity.append(manufacture) + self['provisionActivity'] = provision_activity + + return None + + +@overdo.over('notes', '^(500|504|508|510|511|530|545|555)..') +@utils.for_each_value +@utils.ignore_value +def marc21_to_notes(self, key, value): + """Get notes from several fields.""" + if not value.get('a'): + return None + + return value['a'] + + +@overdo.over('series', '^490..') +@utils.for_each_value +@utils.ignore_value +def marc21_to_series_from_490(self, key, value): + """Get series from field 490.""" + if not value.get('a'): + return None + + serie = {'name': value['a']} + + if value.get('v'): + serie['number'] = value['v'] + + return serie + + +@overdo.over('partOf773', '^773..') +@utils.for_each_value +@utils.ignore_value +def marc21_to_partof_from_773(self, key, value): + """Get partOf from field 773.""" + if not value.get('t'): + return None + + part_of = self.get('partOf', []) + + data = {'document': {'title': value['t']}} + + # Contribution + contributions = [] + for contribution in utils.force_list(value.get('a', [])): + contributions.append(contribution) + if contributions: + data['document']['contribution'] = contributions + + # Identifiers + identifiers = [] + if value.get('x'): + identifiers.append({'type': 'bf:Issn', 'value': value['x']}) + if value.get('z'): + identifiers.append({'type': 'bf:Isbn', 'value': value['z']}) + if identifiers: + data['document']['identifiedBy'] = identifiers + + # Numbering + for numbering in utils.force_list(value.get('g', [])): + # Pages + matches = re.match(r'^.*S\.\s([0-9\-]+).*$', numbering) + if matches: + data['numberingPages'] = matches.group(1) + + # Year + matches = re.match(r'^yr:([0-9]{4})$', numbering) + if matches: + data['numberingYear'] = matches.group(1) + matches = re.match(r'^.*\(([0-9]{4})\).*$', numbering) + if matches: + data['numberingYear'] = matches.group(1) + + # Volume + matches = re.match(r'^.*Vol\.\s([0-9]+).*$', numbering, re.IGNORECASE) + if matches: + data['numberingVolume'] = matches.group(1) + + # Issue + matches = re.match(r'^no:([0-9]+)$', numbering, re.IGNORECASE) + if matches: + data['numberingIssue'] = matches.group(1) + matches = re.match(r'^.*No\s([0-9]+).*$', numbering, re.IGNORECASE) + if matches: + data['numberingIssue'] = matches.group(1) + matches = re.match(r'^.*Nr\.\s([0-9]+).*$', numbering, re.IGNORECASE) + if matches: + data['numberingIssue'] = matches.group(1) + + part_of.append(data) + self['partOf'] = part_of + + return None + + +@overdo.over('partOf800830', '^800|830..') +@utils.for_each_value +@utils.ignore_value +def marc21_to_partof_from_800(self, key, value): + """Get partOf from field 800.""" + # Title + title = None + if key.startswith('800'): + title = value.get('t') + else: + title = [] + if value.get('a'): + title.append(value['a']) + + if value.get('p'): + title.append(value['p']) + + if title: + title = '. '.join(title) + + if not title: + return None + + part_of = self.get('partOf', []) + + data = {'document': {'title': title}} + + # Contribution + if key.startswith('800'): + contributions = [] + for contribution in utils.force_list(value.get('a', [])): + contributions.append(contribution) + if contributions: + data['document']['contribution'] = contributions + + # Identifiers + identifiers = [] + if value.get('x'): + identifiers.append({'type': 'bf:Issn', 'value': value['x']}) + if value.get('z'): + identifiers.append({'type': 'bf:Isbn', 'value': value['z']}) + if identifiers: + data['document']['identifiedBy'] = identifiers + + # Numbering volume + if value.get('v'): + data['numberingVolume'] = value['v'] + + part_of.append(data) + self['partOf'] = part_of + + return None diff --git a/sonar/modules/documents/loaders/schemas/sru.py b/sonar/modules/documents/loaders/schemas/sru.py new file mode 100644 index 00000000..ca1189db --- /dev/null +++ b/sonar/modules/documents/loaders/schemas/sru.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# +# Swiss Open Access Repository +# Copyright (C) 2021 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""SRU schema.""" + +from marshmallow import fields, pre_dump + +from ...dojson.sru.model import overdo +from .marc21 import Marc21Schema + + +class SRUSchema(Marc21Schema): + """SRU marshmallow schema.""" + + identifiedBy = fields.List(fields.Dict()) + language = fields.List(fields.Dict()) + title = fields.List(fields.Dict()) + abstracts = fields.List(fields.Dict()) + contentNote = fields.List(fields.Str()) + contribution = fields.List(fields.Dict()) + extent = fields.Str() + dissertation = fields.Dict() + additionalMaterials = fields.Str() + formats = fields.List(fields.Str()) + otherMaterialCharacteristics = fields.Str() + editionStatement = fields.Dict() + documentType = fields.Str() + provisionActivity = fields.List(fields.Dict()) + notes = fields.List(fields.Str()) + series = fields.List(fields.Dict()) + partOf = fields.List(fields.Dict()) + + @pre_dump + def process(self, obj, **kwargs): + """All the process is done by overdo.""" + return overdo.do(obj) diff --git a/sonar/modules/documents/marshmallow/json.py b/sonar/modules/documents/marshmallow/json.py index 96ce2605..94bb69f5 100644 --- a/sonar/modules/documents/marshmallow/json.py +++ b/sonar/modules/documents/marshmallow/json.py @@ -102,6 +102,7 @@ class DocumentMetadataSchemaV1(StrictKeysMixin): oa_status = SanitizedUnicode() subdivisions = fields.List(fields.Dict()) harvested = fields.Boolean() + contentNote = fields.List(SanitizedUnicode()) customField1 = fields.List(fields.String(validate=validate.Length(min=1))) customField2 = fields.List(fields.String(validate=validate.Length(min=1))) customField3 = fields.List(fields.String(validate=validate.Length(min=1))) diff --git a/sonar/modules/permissions.py b/sonar/modules/permissions.py index 899d2153..749764e7 100644 --- a/sonar/modules/permissions.py +++ b/sonar/modules/permissions.py @@ -111,6 +111,22 @@ def decorated_view(*args, **kwargs): return decorated_view +def is_user_logged_and_submitter(func): + """Check if user is logged and submitter.""" + + @wraps(func) + def decorated_view(*args, **kwargs): + if not current_user.is_authenticated: + abort(401) + else: + if has_submitter_access(): + return func(*args, **kwargs) + + abort(403) + + return decorated_view + + def admin_permission_factory(admin_view): """Admin permission factory.""" if current_app.config.get('SONAR_APP_DISABLE_PERMISSION_CHECKS'): diff --git a/sonar/modules/swisscovery/__init__.py b/sonar/modules/swisscovery/__init__.py new file mode 100644 index 00000000..5524b90b --- /dev/null +++ b/sonar/modules/swisscovery/__init__.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +# +# Swiss Open Access Repository +# Copyright (C) 2021 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + + +"""Swiss covery module.""" diff --git a/sonar/modules/swisscovery/rest.py b/sonar/modules/swisscovery/rest.py new file mode 100644 index 00000000..29fff5e1 --- /dev/null +++ b/sonar/modules/swisscovery/rest.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +# +# Swiss Open Access Repository +# Copyright (C) 2021 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Swisscovery rest views.""" + +import requests +import xmltodict +from flask import Blueprint, current_app, jsonify, request + +from sonar.modules.deposits.serializers.schemas.document import \ + DocumentSchema as DepositDocumentSchema +from sonar.modules.documents.loaders.schemas.sru import SRUSchema +from sonar.modules.permissions import is_user_logged_and_submitter + +blueprint = Blueprint('swisscovery', __name__, url_prefix='/swisscovery') + + +@blueprint.route('/', methods=['GET']) +@is_user_logged_and_submitter +def get_record(): + """Get record.""" + search_type = request.args.get('type', 'all_for_ui') + query = request.args.get('query') + format = request.args.get('format', 'document') + + if not search_type or not query: + return jsonify({}), 400 + + params = { + 'operation': 'searchRetrieve', + 'version': + current_app.config.get('SONAR_APP_SWISSCOVERY_SEARCH_VERSION'), + 'recordSchema': 'marcxml', + 'maximumRecords': '1', + 'startRecord': '1', + 'query': f'({search_type}="{query}")' + } + response = requests.get( + current_app.config.get('SONAR_APP_SWISSCOVERY_SEARCH_URL'), + params=params) + result = xmltodict.parse(response.text) + + if not result['sru:searchRetrieveResponse'].get( + 'sru:records') or not result['sru:searchRetrieveResponse'][ + 'sru:records'].get('sru:record'): + return jsonify({}), 200 + + # Get only relevant XML part. + record = xmltodict.unparse(result['sru:searchRetrieveResponse'] + ['sru:records']['sru:record']['sru:recordData'], + full_document=False) + + record = SRUSchema().dump(record) + + # Serialize for deposit. + if format == 'deposit': + record = DepositDocumentSchema().dump(record) + + return jsonify(record) diff --git a/tests/api/swisscovery/test_swisscovery_rest.py b/tests/api/swisscovery/test_swisscovery_rest.py new file mode 100644 index 00000000..9a68308a --- /dev/null +++ b/tests/api/swisscovery/test_swisscovery_rest.py @@ -0,0 +1,186 @@ +# -*- coding: utf-8 -*- +# +# Swiss Open Access Repository +# Copyright (C) 2021 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Test swisscovery rest API.""" + +from flask import url_for +from invenio_accounts.testutils import login_user_via_session + + +def test_get_record(client, user, submitter): + url = url_for('swisscovery.get_record') + + # Not logged + res = client.get(url) + assert res.status_code == 401 + + # Not authorized + login_user_via_session(client, email=user['email']) + res = client.get(url) + assert res.status_code == 403 + + # Bad parameter + login_user_via_session(client, email=submitter['email']) + res = client.get(url) + assert res.status_code == 400 + + # No record found + login_user_via_session(client, email=submitter['email']) + res = client.get( + url_for('swisscovery.get_record', query='NON-EXISTING', type='mms_id')) + assert res.status_code == 200 + assert res.json == {} + + # Document serialized + login_user_via_session(client, email=submitter['email']) + res = client.get( + url_for('swisscovery.get_record', + query='991087591959705501', + type='mms_id')) + assert res.status_code == 200 + assert res.json == { + 'contribution': [{ + 'agent': { + 'preferred_name': 'Garay Vargas, Javier Leonardo', + 'type': 'bf:Person' + }, + 'role': ['cre'] + }], + 'documentType': + 'coar:c_2f33', + 'extent': + '174 p.', + 'identifiedBy': [{ + 'source': 'swisscovery', + 'type': 'bf:Local', + 'value': '991087591959705501' + }, { + 'type': 'bf:Isbn', + 'value': '9789587105322' + }, { + 'type': 'bf:Isbn', + 'value': '958710532X' + }], + 'language': [{ + 'type': 'bf:Language', + 'value': 'spa' + }], + 'title': [{ + 'mainTitle': [{ + 'language': + 'spa', + 'value': + '¿Política exterior o política de cooperación?' + }], + 'subtitle': [{ + 'language': + 'spa', + 'value': + 'una aproximación constructivista al estudio de la política exterior colombiana' + }], + 'type': + 'bf:Title' + }], + 'provisionActivity': [{ + 'startDate': + '2010', + 'statement': [{ + 'label': { + 'value': 'Bogotá' + }, + 'type': 'bf:Place' + }, { + 'label': { + 'value': 'Universidad Externado de Colombia' + }, + 'type': 'bf:Agent' + }, { + 'label': { + 'value': '2010' + }, + 'type': 'Date' + }], + 'type': + 'bf:Publication' + }], + 'notes': ['Bibliografía: p. 163-174'], + 'series': [{ + 'name': 'Serie pretextos', + 'number': 'No. 3' + }], + 'partOf': [{ + 'document': { + 'title': 'Serie pretextos' + }, + 'numberingVolume': '38' + }] + } + + # Deposit serialized + login_user_via_session(client, email=submitter['email']) + res = client.get( + url_for('swisscovery.get_record', + query='991087591959705501', + type='mms_id', + format='deposit')) + assert res.status_code == 200 + assert res.json == { + 'contributors': [{ + 'name': 'Garay Vargas, Javier Leonardo', + 'role': 'cre' + }], + 'metadata': { + 'identifiedBy': [{ + 'source': 'swisscovery', + 'type': 'bf:Local', + 'value': '991087591959705501' + }, { + 'type': 'bf:Isbn', + 'value': '9789587105322' + }, { + 'type': 'bf:Isbn', + 'value': '958710532X' + }], + 'language': + 'spa', + 'title': + '¿Política exterior o política de cooperación?', + 'subtitle': + 'una aproximación constructivista al estudio de la política exterior colombiana', + 'documentDate': + '2010', + 'statementDate': + '2010', + 'documentType': + 'coar:c_2f33', + 'publicationPlace': + 'Bogotá', + 'publisher': + 'Universidad Externado de Colombia', + 'extent': + '174 p.', + 'notes': ['Bibliografía: p. 163-174'], + 'series': [{ + 'name': 'Serie pretextos', + 'number': 'No. 3' + }], + 'publication': { + 'publishedIn': 'Serie pretextos', + 'volume': '38' + } + } + } diff --git a/tests/conftest.py b/tests/conftest.py index e25f0f6c..9910e25f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -490,13 +490,27 @@ def deposit_json(collection, subdivision): 'Publisher name', 'documentDate': '2020', + 'statementDate': + '2019', 'publication': { - 'publishedIn': 'Journal', - 'volume': '12', - 'number': '2', - 'pages': '1-12', + 'publishedIn': + 'Journal', + 'volume': + '12', + 'number': + '2', + 'pages': + '1-12', 'editors': ['Denson, Edward', 'Worth, James'], - 'publisher': 'Publisher' + 'publisher': + 'Publisher', + 'identifiedBy': [{ + 'type': 'bf:Isbn', + 'value': 'ISBN' + }, { + 'type': 'bf:Issn', + 'value': 'ISSN' + }] }, 'otherElectronicVersions': [{ 'publicNote': 'Published version', @@ -543,6 +557,47 @@ def deposit_json(collection, subdivision): }, { 'type': 'bf:Doi', 'value': '10.1038/nphys1170' + }], + 'contentNote': ['Note 1', 'Note 2'], + 'extent': + 'Extent value', + 'additionalMaterials': + 'Additional materials', + 'formats': ['Format 1', 'Format 2'], + 'otherMaterialCharacteristics': + 'Other material characteristics', + 'editionStatement': { + 'editionDesignation': { + 'value': '1st edition' + }, + 'responsibility': { + 'value': 'Resp.' + } + }, + 'notes': ['Note 1', 'Note 2'], + 'series': [{ + 'name': 'Serie 1', + 'number': '12' + }, { + 'name': 'Serie 2' + }], + 'partOf': [{ + 'document': { + 'contribution': ['Renato, Ferrari', 'Albano, Mesta'], + 'title': + 'Journal du dimanche', + 'identifiedBy': [{ + 'type': 'bf:Isbn', + 'value': '958710532X' + }, { + 'type': 'bf:Issn', + 'value': '958710532X' + }] + }, + 'numberingPages': '135-139', + 'numberingYear': '2020', + 'numberingVolume': '6', + 'numberingIssue': '12' }] }, 'diffusion': { diff --git a/tests/ui/deposits/test_deposits_api.py b/tests/ui/deposits/test_deposits_api.py index 2369227a..8387c56e 100644 --- a/tests/ui/deposits/test_deposits_api.py +++ b/tests/ui/deposits/test_deposits_api.py @@ -89,7 +89,7 @@ def test_create_document(app, db, project, client, deposit, submitter, 'type': 'bf:Agent' }, { 'label': [{ - 'value': '2020' + 'value': '2019' }], 'type': 'Date' }] @@ -98,11 +98,19 @@ def test_create_document(app, db, project, client, deposit, submitter, 'numberingYear': '2020', 'numberingPages': '1-12', 'document': { - 'title': 'Journal', + 'title': + 'Journal', 'contribution': ['Denson, Edward', 'Worth, James'], 'publication': { 'statement': 'Publisher' - } + }, + 'identifiedBy': [{ + 'type': 'bf:Isbn', + 'value': 'ISBN' + }, { + 'type': 'bf:Issn', + 'value': 'ISSN' + }] }, 'numberingVolume': '12', 'numberingIssue': '2' @@ -172,6 +180,21 @@ def test_create_document(app, db, project, client, deposit, submitter, 'value': '10.1038/nphys1170' }] + assert document['contentNote'] == ['Note 1', 'Note 2'] + assert document['extent'] == 'Extent value' + assert document['additionalMaterials'] == 'Additional materials' + assert document['formats'] == ['Format 1', 'Format 2'] + assert document[ + 'otherMaterialCharacteristics'] == 'Other material characteristics' + assert document['editionStatement'] == { + 'editionDesignation': { + 'value': '1st edition' + }, + 'responsibility': { + 'value': 'Resp.' + } + } + assert len(document['projects']) == 2 assert document.files['main.pdf']['access'] == 'coar:c_f1cf' diff --git a/tests/ui/deposits/test_deposits_documents_schema.py b/tests/ui/deposits/test_deposits_documents_schema.py new file mode 100644 index 00000000..20ef3e0e --- /dev/null +++ b/tests/ui/deposits/test_deposits_documents_schema.py @@ -0,0 +1,485 @@ +# -*- coding: utf-8 -*- +# +# Swiss Open Access Repository +# Copyright (C) 2021 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Test deposits documents schema serializer.""" + +from sonar.modules.deposits.serializers.schemas.document import \ + DocumentSchema as DepositDocumentSchema + + +def test_title(): + """Test title.""" + # No title + document = {} + assert DepositDocumentSchema().dump(document) == {} + + document = { + 'title': [{ + 'mainTitle': [{ + 'language': + 'ger', + 'value': + '¿Política exterior o política de cooperación?' + }], + 'subtitle': [{ + 'language': + 'ger', + 'value': + 'una aproximación constructivista al estudio de la política exterior colombiana' + }], + 'type': + 'bf:Title' + }] + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'title': + '¿Política exterior o política de cooperación?', + 'subtitle': + 'una aproximación constructivista al estudio de la política exterior colombiana' + } + } + + +def test_identified_by(): + """Test identified by.""" + document = { + 'identifiedBy': [{ + 'type': 'bf:Doi', + 'value': '10/12345' + }, { + 'type': 'bf:Isbn', + 'value': '987654' + }, { + 'type': 'bf:Issn', + 'value': '123456' + }, { + 'type': 'bf:IssnL', + 'value': '567890' + }, { + 'type': 'bf:Urn', + 'value': 'urn-value' + }, { + 'type': 'uri', + 'value': 'https://uri.com' + }] + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'identifiedBy': [{ + 'type': 'bf:Doi', + 'value': '10/12345' + }, { + 'type': 'bf:Isbn', + 'value': '987654' + }, { + 'type': 'bf:Issn', + 'value': '123456' + }, { + 'type': 'bf:IssnL', + 'value': '567890' + }, { + 'type': 'bf:Urn', + 'value': 'urn-value' + }, { + 'type': 'uri', + 'value': 'https://uri.com' + }] + } + } + + +def test_language(): + """Test language.""" + # No language + document = {} + assert DepositDocumentSchema().dump(document) == {} + + document = { + 'language': [{ + 'type': 'bf:Language', + 'value': 'fre' + }, { + 'type': 'bf:Language', + 'value': 'ger' + }] + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'language': 'fre' + } + } + + +def test_abstracts(): + """Test abstracts.""" + # No abstracts + assert DepositDocumentSchema().dump({}) == {} + + document = { + 'abstracts': [{ + 'language': 'fre', + 'value': 'Abstract FRE' + }, { + 'language': 'eng', + 'value': 'Abstract ENG' + }] + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'abstracts': [{ + 'language': 'fre', + 'abstract': 'Abstract FRE' + }, { + 'language': 'eng', + 'abstract': 'Abstract ENG' + }] + } + } + + +def test_contribution(): + """Test contribution.""" + # No contribution + assert DepositDocumentSchema().dump({}) == {} + + document = { + 'contribution': [{ + 'agent': { + 'type': 'bf:Person', + 'preferred_name': 'Thilmany, Christian. Herrmann', + 'date_of_birth': '1710', + 'date_of_death': '1767' + }, + 'role': ['cre'] + }] + } + assert DepositDocumentSchema().dump(document) == { + 'contributors': [{ + 'name': 'Thilmany, Christian. Herrmann', + 'role': 'cre' + }] + } + + +def test_document_type(): + """Test document type.""" + # No document type + document = {} + assert DepositDocumentSchema().dump(document) == {} + + document = {'documentType': 'coar:c_2f33'} + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'documentType': 'coar:c_2f33' + } + } + + +def test_date(): + """Test date.""" + # No provision activity + document = {} + assert DepositDocumentSchema().dump(document) == {} + + # No start date + document = {'provisionActivity': [{}]} + assert DepositDocumentSchema().dump(document) == {} + + document = { + 'provisionActivity': [{ + 'type': 'bf:Publication', + 'startDate': '2012' + }] + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'documentDate': '2012' + } + } + + +def test_content_note(): + """Test content note.""" + document = {'contentNote': ['Note 1', 'Note 2']} + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'contentNote': ['Note 1', 'Note 2'] + } + } + + +def test_extent(): + """Test extent.""" + document = {'extent': '1 Bd.'} + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'extent': '1 Bd.' + } + } + + +def test_dissertation(): + """Test dissertation.""" + document = { + 'dissertation': { + 'degree': 'Diss. Claremont. Complément', + 'grantingInstitution': 'Granting', + 'date': '2019' + } + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'dissertation': { + 'degree': 'Diss. Claremont. Complément', + 'grantingInstitution': 'Granting', + 'date': '2019' + } + } + } + + +def test_additional_materials(): + """Test additional materials.""" + document = {'additionalMaterials': '30 pl.'} + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'additionalMaterials': '30 pl.' + } + } + + +def test_formats(): + """Test formats.""" + document = { + 'otherMaterialCharacteristics': 'Other material characteristics' + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'otherMaterialCharacteristics': 'Other material characteristics' + } + } + + +def test_other_material_characteristics(): + """Test other material characteristics.""" + document = {'formats': ['24 cm']} + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'formats': ['24 cm'] + } + } + + +def test_edition_statement(): + """Test edition statement.""" + document = { + 'editionStatement': { + 'editionDesignation': { + 'value': '1st edition' + }, + 'responsibility': { + 'value': 'Resp.' + } + } + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'editionStatement': { + 'editionDesignation': { + 'value': '1st edition' + }, + 'responsibility': { + 'value': 'Resp.' + } + } + } + } + + +def test_publication_place(): + """Test publication place.""" + # No provision activity + document = {} + assert DepositDocumentSchema().dump(document) == {} + + # No statement + document = {'provisionActivity': [{}]} + assert DepositDocumentSchema().dump(document) == {} + + document = { + 'provisionActivity': [{ + 'statement': [{ + 'type': 'bf:Place', + 'label': { + 'value': 'Place 1' + } + }, { + 'type': 'bf:Place', + 'label': { + 'value': 'Place 2' + } + }] + }] + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'publicationPlace': 'Place 1' + } + } + + +def test_publisher(): + """Test publisher.""" + # No provision activity + document = {} + assert DepositDocumentSchema().dump(document) == {} + + # No statement + document = {'provisionActivity': [{}]} + assert DepositDocumentSchema().dump(document) == {} + + document = { + 'provisionActivity': [{ + 'statement': [{ + 'type': 'bf:Agent', + 'label': { + 'value': 'Agent 1' + } + }, { + 'type': 'bf:Agent', + 'label': { + 'value': 'Agent 2' + } + }] + }] + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'publisher': 'Agent 1' + } + } + + +def test_notes(): + """Test notes.""" + # No note + document = {} + assert DepositDocumentSchema().dump(document) == {} + + document = {'notes': ['Note 1', 'Note 2']} + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'notes': ['Note 1', 'Note 2'] + } + } + + +def test_series(): + """Test series.""" + # No serie + document = {} + assert DepositDocumentSchema().dump(document) == {} + + document = { + 'series': [{ + 'name': 'Serie 1', + 'number': '12' + }, { + 'name': 'Serie 2' + }] + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'series': [{ + 'name': 'Serie 1', + 'number': '12' + }, { + 'name': 'Serie 2' + }] + } + } + + +def test_part_of(): + """Test part of.""" + # No part of + document = {} + assert DepositDocumentSchema().dump(document) == {} + + document = { + 'partOf': [{ + 'document': { + 'title': + 'Document title 1', + 'contribution': ['Contributor 1', 'Contributor 2'], + 'identifiedBy': [{ + 'type': 'bf:Issn', + 'value': 'ISSN' + }, { + 'type': 'bf:Isbn', + 'value': 'ISBN' + }] + }, + 'numberingVolume': '22', + 'numberingIssue': '4', + 'numberingPages': '485-512', + 'numberingYear': '2004' + }, { + 'document': { + 'title': 'Document title 2' + }, + 'numberingVolume': '22', + 'numberingIssue': '4', + 'numberingYear': '2004' + }, { + 'document': { + 'title': 'Document title 3', + }, + 'numberingPages': '243-263' + }, { + 'document': { + 'title': 'Document title 4' + }, + 'numberingIssue': '16', + 'numberingYear': '2011' + }] + } + assert DepositDocumentSchema().dump(document) == { + 'metadata': { + 'publication': { + 'publishedIn': + 'Document title 1', + 'volume': + '22', + 'number': + '4', + 'pages': + '485-512', + 'year': + '2004', + 'editors': ['Contributor 1', 'Contributor 2'], + 'identifiedBy': [{ + 'type': 'bf:Issn', + 'value': 'ISSN' + }, { + 'type': 'bf:Isbn', + 'value': 'ISBN' + }] + } + } + } diff --git a/tests/ui/documents/schemas/test_sru_schema.py b/tests/ui/documents/schemas/test_sru_schema.py new file mode 100644 index 00000000..509b8b55 --- /dev/null +++ b/tests/ui/documents/schemas/test_sru_schema.py @@ -0,0 +1,1216 @@ +# -*- coding: utf-8 -*- +# +# Swiss Open Access Repository +# Copyright (C) 2021 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Test SRU dojson transformation.""" + +from __future__ import absolute_import, print_function + +from sonar.modules.documents.loaders.schemas.sru import SRUSchema + + +def test_title(): + """Test title.""" + + # No 245 + xml = """ + + """ + assert SRUSchema().dump(xml) == {} + + # No 245$a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + Title + + + """ + assert SRUSchema().dump(xml) == { + 'title': [{ + 'mainTitle': [{ + 'language': 'eng', + 'value': 'Title' + }], + 'type': 'bf:Title' + }] + } + + # With language + xml = """ + + 201011s1980 xxk||||| |||| 00| ||ger d + + Title + + + """ + assert SRUSchema().dump(xml) == { + 'language': [{ + 'type': 'bf:Language', + 'value': 'ger' + }], + 'provisionActivity': [{ + 'startDate': '1980', + 'type': 'bf:Publication' + }], + 'title': [{ + 'mainTitle': [{ + 'language': 'ger', + 'value': 'Title' + }], + 'type': 'bf:Title' + }] + } + + # With subtitle + xml = """ + + + Title + Subtitle + + + """ + assert SRUSchema().dump(xml) == { + 'title': [{ + 'mainTitle': [{ + 'language': 'eng', + 'value': 'Title' + }], + 'subtitle': [{ + 'language': 'eng', + 'value': 'Subtitle' + }], + 'type': 'bf:Title' + }] + } + + +def test_language(): + """Test language.""" + + # No 008 + xml = """ + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + 201011s1980 xxk||||| |||| 00| ||ger d + + """ + assert SRUSchema().dump(xml) == { + 'language': [{ + 'type': 'bf:Language', + 'value': 'ger' + }], + 'provisionActivity': [{ + 'startDate': '1980', + 'type': 'bf:Publication' + }] + } + + +def test_identified_by(): + """Test identified by.""" + # No 001 + xml = """ + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + 1111 + + """ + assert SRUSchema().dump(xml) == { + 'identifiedBy': [{ + 'type': 'bf:Local', + 'value': '1111', + 'source': 'swisscovery' + }] + } + + # ISBN, but no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # ISBN, OK + xml = """ + + + ISBN NUMBER + + + """ + assert SRUSchema().dump(xml) == { + 'identifiedBy': [{ + 'type': 'bf:Isbn', + 'value': 'ISBN NUMBER' + }] + } + + # ISSN, but no $a and no $l + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # ISSN, OK + xml = """ + + + ISSN NUMBER + ISSNL NUMBER + + + """ + assert SRUSchema().dump(xml) == { + 'identifiedBy': [{ + 'type': 'bf:Issn', + 'value': 'ISSN NUMBER' + }, { + 'type': 'bf:IssnL', + 'value': 'ISSNL NUMBER' + }] + } + + # 024, but no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # 024, OK + xml = """ + + + DOI + doi + + + URN + urn + + + URI + uri + + + OTHER + other + + + """ + assert SRUSchema().dump(xml) == { + 'identifiedBy': [{ + 'type': 'bf:Doi', + 'value': 'DOI' + }, { + 'type': 'bf:Urn', + 'value': 'URN' + }, { + 'type': 'uri', + 'value': 'URI' + }, { + 'type': 'bf:Local', + 'value': 'OTHER', + 'source': 'other' + }] + } + + # 027, but no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # 027, OK + xml = """ + + + Identifier + + + """ + assert SRUSchema().dump(xml) == { + 'identifiedBy': [{ + 'type': 'bf:Strn', + 'value': 'Identifier' + }] + } + + # 088, but no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # 088, OK + xml = """ + + + Identifier + + + """ + assert SRUSchema().dump(xml) == { + 'identifiedBy': [{ + 'type': 'bf:ReportNumber', + 'value': 'Identifier' + }] + } + + +def test_abstracts(): + """Test abstracts.""" + # No 520 + xml = """ + + """ + assert SRUSchema().dump(xml) == {} + + # 520, but no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK, default language + xml = """ + + + Record summary + + + """ + assert SRUSchema().dump(xml) == { + 'abstracts': [{ + 'value': 'Record summary', + 'language': 'eng' + }] + } + + +def test_content_notes(): + """Test content notes.""" + # No 505 + xml = """ + + """ + assert SRUSchema().dump(xml) == {} + + # 505, but no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + Note 1 + + + Note 2 + + + """ + assert SRUSchema().dump(xml) == {'contentNote': ['Note 1', 'Note 2']} + + +def test_contribution(): + """Test contribution.""" + # 100, but no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK, field 100 + xml = """ + + + Thilmany, Christian. + Herrmann + 1710-1767., + dsr + http://id.loc.gov/voc/relators/dsr + + + """ + assert SRUSchema().dump(xml) == { + 'contribution': [{ + 'agent': { + 'type': 'bf:Person', + 'preferred_name': 'Thilmany, Christian. Herrmann', + 'date_of_birth': '1710', + 'date_of_death': '1767' + }, + 'role': ['cre'] + }] + } + + # OK, field 700 + xml = """ + + + Thilmany, Christian. + Herrmann + 1710-1767., + + + """ + assert SRUSchema().dump(xml) == { + 'contribution': [{ + 'agent': { + 'type': 'bf:Person', + 'preferred_name': 'Thilmany, Christian. Herrmann', + 'date_of_birth': '1710', + 'date_of_death': '1767' + }, + 'role': ['cre'] + }] + } + + # Field 710 + xml = """ + + + Commission européenne + Direction générale Emploi + Another b + + + """ + assert SRUSchema().dump(xml) == { + 'contribution': [{ + 'agent': { + 'type': + 'bf:Organization', + 'preferred_name': + 'Commission européenne. Direction générale Emploi. Another b' + }, + 'role': ['ctb'] + }] + } + + # Field 711 + xml = """ + + + Forage and Grassland Conference + Sub + Hamburg + 2011-02-02 + 1 + + + """ + assert SRUSchema().dump(xml) == { + 'contribution': [{ + 'agent': { + 'type': 'bf:Meeting', + 'preferred_name': 'Forage and Grassland Conference. Sub', + 'place': 'Hamburg', + 'date': '2011-02-02', + 'number': '1' + }, + 'role': ['ctb'] + }] + } + + +def test_extent(): + """Test extent.""" + # 300, but no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + 1 Bd. + + + """ + assert SRUSchema().dump(xml) == {'extent': '1 Bd.'} + + +def test_dissertation(): + """Test dissertation.""" + # 502, but no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + Diss. Claremont + Complément + Granting + 2019 + + + """ + assert SRUSchema().dump(xml) == { + 'dissertation': { + 'degree': 'Diss. Claremont. Complément', + 'grantingInstitution': 'Granting', + 'date': '2019' + } + } + + # Wrong date + xml = """ + + + Diss. Claremont + Complément + Granting + wrong + + + """ + assert SRUSchema().dump(xml) == { + 'dissertation': { + 'degree': 'Diss. Claremont. Complément', + 'grantingInstitution': 'Granting' + } + } + + +def test_additional_materials(): + """Test additional materials.""" + # 300, but no $e + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + 1 Bd. + 30 pl. + + + """ + assert SRUSchema().dump(xml) == { + 'extent': '1 Bd.', + 'additionalMaterials': '30 pl.' + } + + +def test_formats(): + """Test formats.""" + # 300, but no $c + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + 1 Bd. + 24 cm + + + """ + assert SRUSchema().dump(xml) == {'extent': '1 Bd.', 'formats': ['24 cm']} + + +def test_other_material_characteristics(): + """Test other material characteristics.""" + # 300, but no $b + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + 1 Bd. + Other material characteristics + + + """ + assert SRUSchema().dump(xml) == { + 'extent': '1 Bd.', + 'otherMaterialCharacteristics': 'Other material characteristics' + } + + +def test_edition_statement(): + """Test edition statement.""" + # 250, but no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + 1st edition + Resp. + + + """ + assert SRUSchema().dump(xml) == { + 'editionStatement': { + 'editionDesignation': { + 'value': '1st edition' + }, + 'responsibility': { + 'value': 'Resp.' + } + } + } + + +def test_document_type(): + """Test document type.""" + # Still image + xml = """ + + 02935nkm a2200253 c 4500 + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_ecc8'} + + # Musical notation + xml = """ + + 02935ncm a2200253 c 4500 + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_18cw'} + + # Cartographic material + xml = """ + + 02935nfm a2200253 c 4500 + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_12cc'} + + # Moving image + xml = """ + + 02935ngm a2200253 c 4500 + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_8a7e'} + + # Sound + xml = """ + + 02935njm a2200253 c 4500 + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_18cc'} + + # Dataset + xml = """ + + 02935nmm a2200253 c 4500 + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_ddb1'} + + # Contribution to journal + xml = """ + + 02935nab a2200253 c 4500 + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_3e5a'} + + # Book part + xml = """ + + 02935naa a2200253 c 4500 + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_3248'} + + # Periodical + xml = """ + + 02935nas a2200253 c 4500 + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_2659'} + + # Bachelor thesis + xml = """ + + 02935nam a2200253 c 4500 + + bachelor thesis + + + """ + assert SRUSchema().dump(xml) == { + 'documentType': 'coar:c_7a1f', + 'dissertation': { + 'degree': 'bachelor thesis' + } + } + + # Master thesis + xml = """ + + 02935nam a2200253 c 4500 + + master thesis + + + """ + assert SRUSchema().dump(xml) == { + 'documentType': 'coar:c_bdcc', + 'dissertation': { + 'degree': 'master thesis' + } + } + + # Doctoral thesis + xml = """ + + 02935nam a2200253 c 4500 + + thèse + + + """ + assert SRUSchema().dump(xml) == { + 'documentType': 'coar:c_db06', + 'dissertation': { + 'degree': 'thèse' + } + } + + # Thesis + xml = """ + + 02935nam a2200253 c 4500 + + + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_46ec'} + + # Book + xml = """ + + 02935nam a2200253 c 4500 + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_2f33'} + + # Other + xml = """ + + 02935nzz a2200253 c 4500 + + """ + assert SRUSchema().dump(xml) == {'documentType': 'coar:c_1843'} + + +def test_provision_activity(): + """Test provision activity.""" + xml = """ + + 201011s19801990xxk||||| |||| 00| ||ger d + + """ + assert SRUSchema().dump(xml) == { + 'language': [{ + 'type': 'bf:Language', + 'value': 'ger' + }], + 'provisionActivity': [{ + 'type': 'bf:Publication', + 'startDate': '1980', + 'endDate': '1990' + }] + } + + # 264 + xml = """ + + + Place 1 + Place 2 + Agent 1 + Agent 2 + 2019 + + + Place 3 + Place 4 + Agent 3 + Agent 4 + 2020 + + + """ + assert SRUSchema().dump(xml) == { + 'provisionActivity': [{ + 'type': + 'bf:Publication', + 'statement': [{ + 'type': 'bf:Place', + 'label': { + 'value': 'Place 1' + } + }, { + 'type': 'bf:Place', + 'label': { + 'value': 'Place 2' + } + }, { + 'type': 'bf:Agent', + 'label': { + 'value': 'Agent 1' + } + }, { + 'type': 'bf:Agent', + 'label': { + 'value': 'Agent 2' + } + }, { + 'type': 'Date', + 'label': { + 'value': '2019' + } + }], + }, { + 'type': + 'bf:Manufacture', + 'statement': [{ + 'type': 'bf:Place', + 'label': { + 'value': 'Place 3' + } + }, { + 'type': 'bf:Place', + 'label': { + 'value': 'Place 4' + } + }, { + 'type': 'bf:Agent', + 'label': { + 'value': 'Agent 3' + } + }, { + 'type': 'bf:Agent', + 'label': { + 'value': 'Agent 4' + } + }, { + 'type': 'Date', + 'label': { + 'value': '2020' + } + }], + }] + } + + +def test_notes(): + """Test notes.""" + # no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + Note 1 + + + Note 2 + + + Note 3 + + + Note 4 + + + Note 5 + + + Note 6 + + + Note 7 + + + Note 8 + + + """ + assert SRUSchema().dump(xml) == { + 'notes': [ + 'Note 1', 'Note 2', 'Note 3', 'Note 4', 'Note 5', 'Note 6', + 'Note 7', 'Note 8' + ] + } + + +def test_series(): + """Test series.""" + # no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + Serie 1 + 12 + + + Serie 2 + + + """ + assert SRUSchema().dump(xml) == { + 'series': [{ + 'name': 'Serie 1', + 'number': '12' + }, { + 'name': 'Serie 2' + }] + } + + +def test_part_of(): + """Test part of.""" + # no $t + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + Contributor 1 + Contributor 2 + Document title 1 + Vol. 22 (2004), Nr. 4, S. 485-512 + + + Document title 2 + vol. 22 (2004), no 4 + ISSN + ISBN + + + Document title 3 + S. 243-263 + + + Document title 4 + yr:2011 + no:16 + + + """ + assert SRUSchema().dump(xml) == { + 'partOf': [{ + 'document': { + 'title': 'Document title 1', + 'contribution': ['Contributor 1', 'Contributor 2'] + }, + 'numberingVolume': '22', + 'numberingIssue': '4', + 'numberingPages': '485-512', + 'numberingYear': '2004' + }, { + 'document': { + 'title': + 'Document title 2', + 'identifiedBy': [{ + 'type': 'bf:Issn', + 'value': 'ISSN' + }, { + 'type': 'bf:Isbn', + 'value': 'ISBN' + }] + }, + 'numberingVolume': '22', + 'numberingIssue': '4', + 'numberingYear': '2004' + }, { + 'document': { + 'title': 'Document title 3', + }, + 'numberingPages': '243-263' + }, { + 'document': { + 'title': 'Document title 4' + }, + 'numberingIssue': '16', + 'numberingYear': '2011' + }] + } + + +def test_part_of_800(): + """Test part of.""" + # no $t + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + Contributor 1 + Contributor 2 + Document title 1 + 1234 + ISSN + ISBN + + + Document title 2 + + + """ + assert SRUSchema().dump(xml) == { + 'partOf': [{ + 'document': { + 'title': + 'Document title 1', + 'contribution': ['Contributor 1', 'Contributor 2'], + 'identifiedBy': [{ + 'type': 'bf:Issn', + 'value': 'ISSN' + }, { + 'type': 'bf:Isbn', + 'value': 'ISBN' + }] + }, + 'numberingVolume': '1234' + }, { + 'document': { + 'title': 'Document title 2' + } + }] + } + + +def test_part_of_830(): + """Test part of.""" + # no $a + xml = """ + + + + + """ + assert SRUSchema().dump(xml) == {} + + # OK + xml = """ + + + Document title 1 + Some subtitle + 1234 + ISSN + ISBN + + + Document title 2 + + + """ + assert SRUSchema().dump(xml) == { + 'partOf': [{ + 'document': { + 'title': + 'Document title 1. Some subtitle', + 'identifiedBy': [{ + 'type': 'bf:Issn', + 'value': 'ISSN' + }, { + 'type': 'bf:Isbn', + 'value': 'ISBN' + }] + }, + 'numberingVolume': '1234' + }, { + 'document': { + 'title': 'Document title 2' + } + }] + } + + +def test_part_of_all(): + """Test multiple partOf.""" + xml = """ + + + Contributor 1 + Contributor 2 + Document title 1 + Vol. 22 (2004), Nr. 4, S. 485-512 + + + Contributor 1 + Contributor 2 + Document title 2 + 1234 + ISSN + ISBN + + + Document title 3 + 1234 + + + """ + assert SRUSchema().dump(xml) == { + 'partOf': [{ + 'document': { + 'title': 'Document title 1', + 'contribution': ['Contributor 1', 'Contributor 2'] + }, + 'numberingVolume': '22', + 'numberingIssue': '4', + 'numberingPages': '485-512', + 'numberingYear': '2004' + }, { + 'document': { + 'title': + 'Document title 2', + 'contribution': ['Contributor 1', 'Contributor 2'], + 'identifiedBy': [{ + 'type': 'bf:Issn', + 'value': 'ISSN' + }, { + 'type': 'bf:Isbn', + 'value': 'ISBN' + }] + }, + 'numberingVolume': '1234' + }, { + 'document': { + 'title': 'Document title 3' + }, + 'numberingVolume': '1234' + }] + }