From 4ee54181315305f69426fe46aa2088c369c33d97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Se=CC=81bastien=20De=CC=81le=CC=80ze?=
 <sebastien.deleze@rero.ch>
Date: Mon, 29 Mar 2021 16:42:50 +0200
Subject: [PATCH] documents: harvest records from IRs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Harvests records from ArODES.
* Harvests records from Zora.
* Harvests records from edoc.
* Closes #487.

Co-Authored-by: Sébastien Délèze <sebastien.deleze@rero.ch>
---
 data/oai_sources.json                         |  24 +
 .../documents/dojson/arodes/__init__.py       |  18 +
 .../modules/documents/dojson/arodes/model.py  | 343 ++++++
 .../modules/documents/dojson/zora/__init__.py |  18 +
 sonar/modules/documents/dojson/zora/model.py  | 328 ++++++
 .../documents/loaders/schemas/arodes.py       |  45 +
 sonar/modules/documents/loaders/schemas/dc.py | 241 +++++
 .../modules/documents/loaders/schemas/edoc.py |  24 +
 .../documents/loaders/schemas/factory.py      |   8 +-
 .../modules/documents/loaders/schemas/zora.py |  43 +
 sonar/modules/documents/receivers.py          |   6 +-
 .../documents/loaders/test_arodes_loader.py   | 981 ++++++++++++++++++
 .../documents/loaders/test_edoc_loader.py     | 542 ++++++++++
 .../documents/loaders/test_zora_loader.py     | 712 +++++++++++++
 14 files changed, 3330 insertions(+), 3 deletions(-)
 create mode 100644 sonar/modules/documents/dojson/arodes/__init__.py
 create mode 100644 sonar/modules/documents/dojson/arodes/model.py
 create mode 100644 sonar/modules/documents/dojson/zora/__init__.py
 create mode 100644 sonar/modules/documents/dojson/zora/model.py
 create mode 100644 sonar/modules/documents/loaders/schemas/arodes.py
 create mode 100644 sonar/modules/documents/loaders/schemas/dc.py
 create mode 100644 sonar/modules/documents/loaders/schemas/edoc.py
 create mode 100644 sonar/modules/documents/loaders/schemas/zora.py
 create mode 100644 tests/unit/documents/loaders/test_arodes_loader.py
 create mode 100644 tests/unit/documents/loaders/test_edoc_loader.py
 create mode 100644 tests/unit/documents/loaders/test_zora_loader.py

diff --git a/data/oai_sources.json b/data/oai_sources.json
index 228fa7372..4744c5013 100644
--- a/data/oai_sources.json
+++ b/data/oai_sources.json
@@ -22,5 +22,29 @@
     "metadataprefix": "oai_openaire",
     "comment": "",
     "setspecs": ""
+  },
+  {
+    "key": "arodes",
+    "name": "ArODES",
+    "url": "https://hesso.tind.io/oai2d",
+    "metadataprefix": "marcxml",
+    "comment": "",
+    "setspecs": ""
+  },
+  {
+    "key": "zora",
+    "name": "Zora",
+    "url": "https://www.zora.uzh.ch/cgi/oai2",
+    "metadataprefix": "marc21",
+    "comment": "",
+    "setspecs": ""
+  },
+  {
+    "key": "edoc",
+    "name": "edoc",
+    "url": "https://edoc.unibas.ch/cgi/oai2",
+    "metadataprefix": "oai_dc",
+    "comment": "",
+    "setspecs": ""
   }
 ]
diff --git a/sonar/modules/documents/dojson/arodes/__init__.py b/sonar/modules/documents/dojson/arodes/__init__.py
new file mode 100644
index 000000000..c55a39c2e
--- /dev/null
+++ b/sonar/modules/documents/dojson/arodes/__init__.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""DOJSON transformation for ArODES."""
diff --git a/sonar/modules/documents/dojson/arodes/model.py b/sonar/modules/documents/dojson/arodes/model.py
new file mode 100644
index 000000000..7bc89b158
--- /dev/null
+++ b/sonar/modules/documents/dojson/arodes/model.py
@@ -0,0 +1,343 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""DOJSON transformation for ArODES."""
+
+import re
+
+from dojson import utils
+
+from sonar.modules.documents.dojson.overdo import Overdo
+
+overdo = Overdo()
+
+TYPE_MAPPINGS = {
+    'livre': 'coar:c_2f33',
+    'chapitre': 'coar:c_3248',
+    'conference': 'coar:c_5794',
+    'scientifique': 'coar:c_6501',
+    'professionnel': 'coar:c_3e5a',
+    'rapport': 'coar:c_18ws',
+    'THESES': 'coar:c_db06',
+    'other': 'coar:c_1843'
+}
+
+OA_STATUS = ['green', 'gold', 'hybrid', 'bronze', 'closed']
+
+
+@overdo.over('identifiedBy', '001')
+@utils.ignore_value
+def identified_by_from_001(self, key, value):
+    """Get identifier from field 001."""
+    identified_by = self.get('identifiedBy', [])
+
+    identified_by.append({
+        'type': 'bf:Local',
+        'source': 'ArODES',
+        'value': value
+    })
+
+    return identified_by
+
+
+@overdo.over('identifiedBy', '^0247.')
+@utils.ignore_value
+def identified_by_from_024(self, key, value):
+    """Get identifier from field 024."""
+    identified_by = self.get('identifiedBy', [])
+
+    if not value.get('a') or not value.get('2') in ['DOI', 'PMID']:
+        return None
+
+    if value.get('2') == 'DOI':
+        identified_by.append({'type': 'bf:Doi', 'value': value.get('a')})
+
+    return identified_by
+
+
+@overdo.over('title', '^245..')
+@utils.for_each_value
+@utils.ignore_value
+def title_from_245(self, key, value):
+    """Get title from field 245."""
+    main_title = value.get('a', 'No title found')
+    subtitle = value.get('b')
+    language = value.get('9', 'eng')
+
+    title = {
+        'type': 'bf:Title',
+        'mainTitle': [{
+            'value': main_title,
+            'language': language
+        }]
+    }
+
+    if subtitle:
+        title['subtitle'] = [{'value': subtitle, 'language': language}]
+
+    return title
+
+
+@overdo.over('documentType', '^980')
+@utils.ignore_value
+def document_type_from_980(self, key, value):
+    """Get document type from 980 field."""
+    document_type = value.get('a', None)
+
+    if self.get('documentType') or not document_type:
+        return None
+
+    if document_type not in TYPE_MAPPINGS:
+        document_type = 'other'
+
+    return TYPE_MAPPINGS[document_type]
+
+
+@overdo.over('language', '^041')
+@utils.for_each_value
+@utils.ignore_value
+def language_from_041(self, key, value):
+    """Get languages."""
+    if not value.get('a'):
+        return None
+
+    language = self.get('language', [])
+
+    codes = utils.force_list(value.get('a'))
+
+    for code in codes:
+        language.append({'type': 'bf:Language', 'value': code})
+
+    self['language'] = language
+
+    return None
+
+
+@overdo.over('abstracts', '^520..')
+@utils.for_each_value
+@utils.ignore_value
+def abstract_from_520(self, key, value):
+    """Get abstract."""
+    abstract = value.get('a')
+    language = value.get('9', 'eng')
+
+    if not abstract:
+        return None
+
+    abstracts_data = self.get('abstracts', [])
+    abstracts_data.append({'value': abstract, 'language': language})
+
+    self['abstracts'] = abstracts_data
+
+    return None
+
+
+@overdo.over('oa_status', '^906..')
+@utils.ignore_value
+def oa_status_from_906(self, key, value):
+    """Get abstract."""
+    oa_status = value.get('a', 'none').lower()
+
+    if not oa_status or oa_status not in OA_STATUS:
+        return None
+
+    return oa_status
+
+
+@overdo.over('date', '^269..')
+@utils.ignore_value
+def date_from_269(self, key, value):
+    """Get date from field 269."""
+    # No date, skipping
+    if not value.get('a'):
+        return None
+
+    # Assign start date
+    match = re.search(r'^[0-9]{4}-[0-9]{2}$', value.get('a'))
+
+    # Date does not match "YYYY" or "YYYY-MM-DD"
+    if not match:
+        return None
+
+    add_provision_activity_start_date(self, value.get('a') + '-01')
+
+    return None
+
+
+@overdo.over('date', '^260..')
+@utils.ignore_value
+def date_from_260(self, key, value):
+    """Get date from field 260."""
+    # No date, skipping
+    if not value.get('c'):
+        return None
+
+    # Assign start date
+    match = re.search(r'^[0-9]{4}-[0-9]{2}$', value.get('c'))
+
+    # Date does not match "YYYY" or "YYYY-MM-DD"
+    if not match:
+        return None
+
+    add_provision_activity_start_date(self, value.get('c') + '-01')
+
+    return None
+
+
+@overdo.over('subjects', '^653..')
+@utils.for_each_value
+@utils.ignore_value
+def subjects_from_653(self, key, value):
+    """Get abstract."""
+    subject = value.get('a')
+    language = value.get('9', 'eng')
+
+    if not subject:
+        return None
+
+    subject_data = get_subject_for_language(self, language)
+    subject_data['label']['value'].append(subject)
+
+    return None
+
+
+@overdo.over('dissertation', '^502..')
+@utils.ignore_value
+def dissertation_from_field_502(self, key, value):
+    """Extract dissertation degree."""
+    if not value.get('b'):
+        return None
+
+    return {'degree': value.get('b')}
+
+
+@overdo.over('partOf', '^773..')
+@utils.ignore_value
+def host_document_from_field_773(self, key, value):
+    """Host document."""
+    if not value.get('t'):
+        return None
+
+    part_of = {'document': {'title': value.get('t')}}
+
+    if not value.get('g'):
+        if self.get('provisionActivity'):
+            match = re.search(r'^(\d{4})',
+                              self['provisionActivity'][0]['startDate'])
+            part_of['numberingYear'] = match.group(1)
+    else:
+        # Year
+        match = re.search(r'^(\d{4})', value.get('g'))
+        if match:
+            part_of['numberingYear'] = match.group(1)
+
+        # Volume
+        match = re.search(r'vol\.\s(\d+)', value.get('g'))
+        if match:
+            part_of['numberingVolume'] = match.group(1)
+
+        # Issue
+        match = re.search(r'no\.\s(\d+)', value.get('g'))
+        if match:
+            part_of['numberingIssue'] = match.group(1)
+
+        # Pages
+        match = re.search(r'pp\.\s([0-9\-–]+)', value.get('g'))
+        if match:
+            part_of['numberingPages'] = match.group(1)
+
+    if not part_of.get('numberingYear'):
+        return None
+
+    return [part_of]
+
+
+@overdo.over('contribution', '^700..')
+@utils.for_each_value
+@utils.ignore_value
+def contribution_from_700(self, key, value):
+    """Get contribution."""
+    name = value.get('a')
+    affiliation = value.get('u')
+
+    if not name:
+        return None
+
+    contribution = {
+        'agent': {
+            'type': 'bf:Person',
+            'preferred_name': name
+        },
+        'role': ['ctb']
+    }
+
+    if affiliation:
+        contribution['affiliation'] = affiliation
+
+    return contribution
+
+
+def add_provision_activity_start_date(data, date):
+    """Add start date for provision activity.
+
+    :param data: Data dictionary.
+    :param date: Date to add.
+    """
+    provisition_activity = data.get('provisionActivity', [])
+
+    def get_publication():
+        """Get stored publication."""
+        for key, item in enumerate(provisition_activity):
+            if item['type'] == 'bf:Publication':
+                return provisition_activity.pop(key)
+
+        return {'type': 'bf:Publication', 'startDate': None}
+
+    publication = get_publication()
+
+    publication['startDate'] = date
+
+    # Inject publiction into provision activity
+    provisition_activity.append(publication)
+
+    # Re-assign provisionActivity
+    data['provisionActivity'] = provisition_activity
+
+
+def get_subject_for_language(data, language):
+    """Return the subject item corresponding to language.
+
+    :param dict data: Overdo data
+    :param str language: Language code
+    :returns: Subject object
+    :rtype: Dict
+    """
+    if not data.get('subjects'):
+        data['subjects'] = []
+
+    subjects = [
+        subject for subject in data.get('subjects', [])
+        if subject['label']['language'] == language
+    ]
+
+    # Create an empty subject
+    if not subjects:
+        subject = {'label': {'language': language, 'value': []}}
+        data['subjects'].append(subject)
+        return subject
+
+    return subjects[0]
diff --git a/sonar/modules/documents/dojson/zora/__init__.py b/sonar/modules/documents/dojson/zora/__init__.py
new file mode 100644
index 000000000..1e768442e
--- /dev/null
+++ b/sonar/modules/documents/dojson/zora/__init__.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""DOJSON transformation for ZORA."""
diff --git a/sonar/modules/documents/dojson/zora/model.py b/sonar/modules/documents/dojson/zora/model.py
new file mode 100644
index 000000000..082a61753
--- /dev/null
+++ b/sonar/modules/documents/dojson/zora/model.py
@@ -0,0 +1,328 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""DOJSON transformation for ZORA."""
+
+import re
+
+from dojson import utils
+
+from sonar.modules.documents.dojson.overdo import Overdo
+
+overdo = Overdo()
+
+
+@overdo.over('identifiedBy', '001')
+@utils.ignore_value
+def identified_by_from_001(self, key, value):
+    """Get identifier from field 001."""
+    identified_by = self.get('identifiedBy', [])
+
+    identified_by.append({
+        'type': 'bf:Local',
+        'source': 'ZORA',
+        'value': value
+    })
+
+    return identified_by
+
+
+@overdo.over('identifiedBy', '^0247.')
+@utils.ignore_value
+def identified_by_from_024(self, key, value):
+    """Get identifier from field 024."""
+    identified_by = self.get('identifiedBy', [])
+
+    if not value.get('a'):
+        return None
+
+    if value.get('2') == 'doi':
+        identified_by.append({'type': 'bf:Doi', 'value': value.get('a')})
+    elif value.get('2') == 'pmid':
+        identified_by.append({
+            'type': 'bf:Local',
+            'value': value.get('a'),
+            'source': 'PMID'
+        })
+    else:
+        identified_by.append({
+            'type': 'bf:Identifier',
+            'value': value.get('a')
+        })
+
+    return identified_by
+
+
+@overdo.over('title', '^245..')
+@utils.for_each_value
+@utils.ignore_value
+def title_from_245(self, key, value):
+    """Get title from field 245."""
+    main_title = value.get('a', 'No title found')
+    subtitle = value.get('b')
+    language = value.get('9', 'eng')
+
+    title = {
+        'type': 'bf:Title',
+        'mainTitle': [{
+            'value': main_title,
+            'language': language
+        }]
+    }
+
+    if subtitle:
+        title['subtitle'] = [{'value': subtitle, 'language': language}]
+
+    return title
+
+
+@overdo.over('documentType', '^655')
+@utils.ignore_value
+def document_type_from_655(self, key, value):
+    """Get document type from 655 field."""
+    type = value.get('2')
+    value = value.get('a')
+
+    if self.get('documentType') or not value or not type:
+        return None
+
+    record = overdo.blob_record
+
+    # Book
+    if type == 'local' and value == 'Herausgegebenes wissenschaftliches Werk':
+        return 'coar:c_2f33'
+
+    if type == 'local' and value == 'Monografie':
+        return 'coar:c_2f33'
+
+    # Book part
+    if type == 'local' and value == 'Buchkapitel':
+        return 'coar:c_3248'
+
+    # Conference paper
+    if type == 'local' and value == 'Konferenzbeitrag':
+        return 'coar:c_5794'
+
+    # Journal article
+    if type == 'local' and value == 'Artikel':
+        return 'coar:c_6501'
+
+    # Newspaper article
+    if type == 'local' and value == 'Zeitungsartikel':
+        return 'coar:c_998f'
+
+    # Research report
+    if type == 'gnd-content' and value == 'Forschungsbericht':
+        return 'coar:c_18ws'
+
+    # Doctoral thesis
+    if type == 'gnd-content' and value == 'Hochschulschrift' and record.get(
+            '502__', {}).get('b') == 'Dissertation':
+        return 'coar:c_db06'
+
+    # Master thesis
+    if type == 'gnd-content' and value == 'Hochschulschrift' and record.get(
+            '502__', {}).get('b') == 'Masterarbeit':
+        return 'coar:c_bdcc'
+
+    # Habilitation thesis
+    if type == 'gnd-content' and value == 'Hochschulschrift' and record.get(
+            '502__', {}).get('b') == 'Habilitation':
+        return 'habilitation_thesis'
+
+    # Working paper
+    if type == 'local' and value == 'Working Paper':
+        return 'coar:c_8042'
+
+    return 'coar:c_1843'
+
+
+@overdo.over('language', '^041')
+@utils.for_each_value
+@utils.ignore_value
+def language_from_041(self, key, value):
+    """Get languages."""
+    if not value.get('a'):
+        return None
+
+    language = self.get('language', [])
+
+    codes = utils.force_list(value.get('a'))
+
+    for code in codes:
+        language.append({'type': 'bf:Language', 'value': code})
+
+    self['language'] = language
+
+    return None
+
+
+@overdo.over('abstracts', '^520..')
+@utils.for_each_value
+@utils.ignore_value
+def abstract_from_520(self, key, value):
+    """Get abstract."""
+    abstract = value.get('a')
+    language = value.get('9', 'eng')
+
+    if not abstract:
+        return None
+
+    abstracts_data = self.get('abstracts', [])
+    abstracts_data.append({'value': abstract, 'language': language})
+
+    self['abstracts'] = abstracts_data
+
+    return None
+
+
+@overdo.over('date', '^264..')
+@utils.ignore_value
+def date_from_264(self, key, value):
+    """Get date from field 264."""
+    # No date, skipping
+    if not value.get('c'):
+        return None
+
+    # Assign start date
+    match = re.search(r'^[0-9]{4}$', value.get('c'))
+
+    # Date does not match "YYYY" or "YYYY-MM-DD"
+    if not match:
+        return None
+
+    add_provision_activity_start_date(self, value.get('c'))
+
+    return None
+
+
+@overdo.over('dissertation', '^502..')
+@utils.ignore_value
+def dissertation_from_field_502(self, key, value):
+    """Extract dissertation degree."""
+    if not value.get('b'):
+        return None
+
+    dissertation = {'degree': value.get('b')}
+
+    if value.get('c'):
+        dissertation['grantingInstitution'] = value.get('c')
+
+    if value.get('d'):
+        dissertation['date'] = value.get('d')
+
+    return dissertation
+
+
+@overdo.over('partOf', '^773..')
+@utils.ignore_value
+def host_document_from_field_773(self, key, value):
+    """Host document."""
+    if not value.get('t'):
+        return None
+
+    part_of = {'document': {'title': value.get('t')}}
+
+    if not value.get('g'):
+        if self.get('provisionActivity'):
+            match = re.search(r'^(\d{4})',
+                              self['provisionActivity'][0]['startDate'])
+            part_of['numberingYear'] = match.group(1)
+    else:
+        # Year
+        match = re.search(r'\((\d{4})\)$', value.get('g'))
+        if match:
+            part_of['numberingYear'] = match.group(1)
+
+        # Volume
+        match = re.search(r'Bd\.\s(\d+)', value.get('g'))
+        if match:
+            part_of['numberingVolume'] = match.group(1)
+
+        # Issue
+        match = re.search(r'Nr\.\s(\d+)', value.get('g'))
+        if match:
+            part_of['numberingIssue'] = match.group(1)
+
+        # Pages
+        match = re.search(r'S\.\s(.+)\s\(', value.get('g'))
+        if match:
+            part_of['numberingPages'] = match.group(1)
+
+    if not part_of.get('numberingYear'):
+        return None
+
+    return [part_of]
+
+
+@overdo.over('contribution', '^[17]00..')
+@utils.ignore_value
+def contribution_from_field_100_700(self, key, value):
+    """Extract contribution from field 100 or 700."""
+    if not value.get('a'):
+        return None
+
+    contribution = self.get('contribution', [])
+
+    data = {
+        'agent': {
+            'type': 'bf:Person',
+            'preferred_name': value.get('a')
+        },
+        'role': ['cre' if value.get('4') == 'aut' else value.get('4')]
+    }
+
+    if value.get('0'):
+        match = re.search(r'^\(orcid\)(.*)$', value.get('0'))
+        if match:
+            data['agent']['identifiedBy'] = {
+                'type': 'bf:Local',
+                'source': 'ORCID',
+                'value': match.group(1)
+            }
+
+    contribution.append(data)
+    self['contribution'] = contribution
+
+    return None
+
+
+def add_provision_activity_start_date(data, date):
+    """Add start date for provision activity.
+
+    :param data: Data dictionary.
+    :param date: Date to add.
+    """
+    provisition_activity = data.get('provisionActivity', [])
+
+    def get_publication():
+        """Get stored publication."""
+        for key, item in enumerate(provisition_activity):
+            if item['type'] == 'bf:Publication':
+                return provisition_activity.pop(key)
+
+        return {'type': 'bf:Publication', 'startDate': None}
+
+    publication = get_publication()
+
+    publication['startDate'] = date
+
+    # Inject publiction into provision activity
+    provisition_activity.append(publication)
+
+    # Re-assign provisionActivity
+    data['provisionActivity'] = provisition_activity
diff --git a/sonar/modules/documents/loaders/schemas/arodes.py b/sonar/modules/documents/loaders/schemas/arodes.py
new file mode 100644
index 000000000..5964fa6d3
--- /dev/null
+++ b/sonar/modules/documents/loaders/schemas/arodes.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Arodes schema."""
+
+from marshmallow import fields, pre_dump
+
+from sonar.modules.documents.dojson.arodes.model import overdo
+
+from .marc21 import Marc21Schema
+
+
+class ArodesSchema(Marc21Schema):
+    """Arodes schema."""
+
+    identifiedBy = fields.List(fields.Dict())
+    title = fields.List(fields.Dict())
+    documentType = fields.Str()
+    language = fields.List(fields.Dict())
+    abstracts = fields.List(fields.Dict())
+    oa_status = fields.Str()
+    provisionActivity = fields.List(fields.Dict())
+    subjects = fields.List(fields.Dict())
+    dissertation = fields.Dict()
+    partOf = fields.List(fields.Dict())
+    contribution = fields.List(fields.Dict())
+
+    @pre_dump
+    def process(self, obj, **kwargs):
+        """All the process is done by overdo."""
+        return overdo.do(obj)
diff --git a/sonar/modules/documents/loaders/schemas/dc.py b/sonar/modules/documents/loaders/schemas/dc.py
new file mode 100644
index 000000000..9eda82c20
--- /dev/null
+++ b/sonar/modules/documents/loaders/schemas/dc.py
@@ -0,0 +1,241 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Dublin core schema."""
+
+import re
+
+import xmltodict
+from marshmallow import Schema, fields, post_dump, pre_dump
+
+from sonar.modules.pdf_extractor.utils import force_list
+
+TYPE_MAPPINGS = {
+    'Book': 'coar:c_2f33',
+    'Book Section': 'coar:c_3248',
+    'Conference': 'coar:c_c94f',
+    'Workshop Item': 'coar:c_c94f',
+    'Research Data': 'coar:c_ddb1',
+    'Article': 'coar:c_6501',
+    'Newspaper': 'coar:c_998f',
+    'Magazine Article': 'coar:c_998f',
+    'Audiovisual Material & Event': 'non_textual_object',
+    'Preprint': 'coar:c_816b',
+    'Thesis': 'coar:c_db06',
+    'Working Paper': 'coar:c_8042',
+    'Other': 'coar:c_1843'
+}
+
+
+class DublinCoreSchema(Schema):
+    """Dublin Core marshmallow schema."""
+
+    identifiedBy = fields.Method('get_identifiers')
+    language = fields.Method('get_language')
+    title = fields.Method('get_title')
+    provisionActivity = fields.Method('get_provision_activity')
+    documentType = fields.Method('get_document_type')
+    abstracts = fields.Method('get_abstracts')
+    subjects = fields.Method('get_subjects')
+    contribution = fields.Method('get_contribution')
+
+    def dump(self, obj):
+        """Serialize an object to native Python data types.
+
+        :param obj: The object to serialize.
+        :returns: Serialized data
+        """
+        result = xmltodict.parse(obj)
+
+        if not result.get('record', {}).get('metadata', {}).get('oai_dc:dc'):
+            return None
+
+        record = result['record']['metadata']['oai_dc:dc']
+        record['id'] = result['record']['header']['identifier']
+
+        return super().dump(record)
+
+    @pre_dump
+    def store_language(self, item, **kwargs):
+        """Store language."""
+        item['languages'] = []
+
+        for language in force_list(item.get('dc:language', [])):
+            if language == 'deu':
+                language = 'ger'
+
+            if language == 'fra':
+                language = 'fre'
+
+            item['languages'].append(language)
+
+        if not item['languages']:
+            item['languages'] = ['eng']
+
+        return item
+
+    @post_dump
+    def remove_empty_values(self, data, **kwargs):
+        """Remove empty values before dumping data."""
+        return {key: value for key, value in data.items() if value}
+
+    def get_identifiers(self, obj):
+        """Get identifiers."""
+        identifiers = [{
+            'type': 'bf:Local',
+            'source': 'edoc',
+            'value': obj['id']
+        }]
+
+        if not obj.get('dc:identifier'):
+            return identifiers
+
+        for identifier in force_list(obj['dc:identifier']):
+            # DOI
+            match = re.search(r'^info:doi\/(.+)$', identifier)
+            if match:
+                identifiers.append({'type': 'bf:Doi', 'value': match.group(1)})
+                continue
+
+            # PMID
+            match = re.search(r'^info:pmid\/(.+)$', identifier)
+            if match:
+                identifiers.append({
+                    'type': 'bf:Local',
+                    'value': match.group(1),
+                    'source': 'PMID'
+                })
+                continue
+
+            # URN
+            match = re.search(r'^urn:(.+)$', identifier)
+            if match:
+                identifiers.append({'type': 'bf:Urn', 'value': match.group(1)})
+                continue
+
+            # Other identifier
+            identifiers.append({'type': 'bf:Identifier', 'value': identifier})
+
+        return identifiers
+
+    def get_language(self, obj):
+        """Get language."""
+        return [{
+            'type': 'bf:Language',
+            'value': item
+        } for item in obj['languages']]
+
+    def get_title(self, obj):
+        """Get title."""
+        title = 'Default title'
+        subtitle = None
+
+        if obj.get('dc:title'):
+            # Title + subtitle
+            match = re.search(r'^(.+)\s:\s(.+)$', obj['dc:title'])
+            if match:
+                title = match.group(1)
+                subtitle = match.group(2)
+            else:
+                title = obj.get('dc:title')
+
+        title = {
+            'type': 'bf:Title',
+            'mainTitle': [{
+                'value': title,
+                'language': obj['languages'][0]
+            }]
+        }
+
+        if subtitle:
+            title['subtitle'] = [{
+                'value': subtitle,
+                'language': obj['languages'][0]
+            }]
+
+        return [title]
+
+    def get_provision_activity(self, obj):
+        """Get provisition activity."""
+        if not obj.get('dc:date'):
+            return None
+
+        match = re.search(r'^[0-9]{4}$', obj['dc:date'])
+
+        if not match:
+            return None
+
+        return [{'type': 'bf:Publication', 'startDate': obj['dc:date']}]
+
+    def get_document_type(self, obj):
+        """Get document type."""
+        for type in force_list(obj.get('dc:type', [])):
+            if TYPE_MAPPINGS.get(type):
+                return TYPE_MAPPINGS[type]
+
+        return TYPE_MAPPINGS['Other']
+
+    def get_abstracts(self, obj):
+        """Get abstracts."""
+        if not obj.get('dc:description'):
+            return None
+
+        return [{
+            'language': obj['languages'][0],
+            'value': obj['dc:description']
+        }]
+
+    def get_subjects(self, obj):
+        """Get subjects."""
+        if not obj.get('dc:subject'):
+            return []
+
+        subjects = []
+
+        for subject in force_list(obj.get('dc:subject', [])):
+            subjects.append(subject)
+
+        return [{
+            'label': {
+                'language': obj['languages'][0],
+                'value': subjects
+            }
+        }]
+
+    def get_contribution(self, obj):
+        """Get contribution."""
+        contributors = []
+
+        for creator in force_list(obj.get('dc:creator', [])):
+            contributors.append({
+                'agent': {
+                    'type': 'bf:Person',
+                    'preferred_name': creator
+                },
+                'role': ['cre']
+            })
+
+        for contributor in force_list(obj.get('dc:contributor', [])):
+            contributors.append({
+                'agent': {
+                    'type': 'bf:Person',
+                    'preferred_name': contributor
+                },
+                'role': ['ctb']
+            })
+
+        return contributors
diff --git a/sonar/modules/documents/loaders/schemas/edoc.py b/sonar/modules/documents/loaders/schemas/edoc.py
new file mode 100644
index 000000000..f54413d24
--- /dev/null
+++ b/sonar/modules/documents/loaders/schemas/edoc.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Edoc schema."""
+
+from .dc import DublinCoreSchema
+
+
+class EdocSchema(DublinCoreSchema):
+    """Edoc marshmallow schema."""
diff --git a/sonar/modules/documents/loaders/schemas/factory.py b/sonar/modules/documents/loaders/schemas/factory.py
index b9289484c..083b48d4b 100644
--- a/sonar/modules/documents/loaders/schemas/factory.py
+++ b/sonar/modules/documents/loaders/schemas/factory.py
@@ -18,8 +18,11 @@
 """Factory for creating a loader schema."""
 
 from .archive_ouverte_unige import ArchiveOuverteUnigeSchema
+from .arodes import ArodesSchema
 from .boris import BorisSchema
+from .edoc import EdocSchema
 from .rerodoc import RerodocSchema
+from .zora import ZoraSchema
 
 
 class LoaderSchemaFactory():
@@ -28,7 +31,10 @@ class LoaderSchemaFactory():
     schemas = {
         'rerodoc': RerodocSchema,
         'archive_ouverte_unige': ArchiveOuverteUnigeSchema,
-        'boris': BorisSchema
+        'boris': BorisSchema,
+        'arodes': ArodesSchema,
+        'zora': ZoraSchema,
+        'edoc': EdocSchema
     }
 
     @staticmethod
diff --git a/sonar/modules/documents/loaders/schemas/zora.py b/sonar/modules/documents/loaders/schemas/zora.py
new file mode 100644
index 000000000..f48abfcb1
--- /dev/null
+++ b/sonar/modules/documents/loaders/schemas/zora.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""ZORA schema."""
+
+from marshmallow import fields, pre_dump
+
+from sonar.modules.documents.dojson.zora.model import overdo
+
+from .marc21 import Marc21Schema
+
+
+class ZoraSchema(Marc21Schema):
+    """Zora schema."""
+
+    identifiedBy = fields.List(fields.Dict())
+    title = fields.List(fields.Dict())
+    documentType = fields.Str()
+    language = fields.List(fields.Dict())
+    abstracts = fields.List(fields.Dict())
+    provisionActivity = fields.List(fields.Dict())
+    dissertation = fields.Dict()
+    partOf = fields.List(fields.Dict())
+    contribution = fields.List(fields.Dict())
+
+    @pre_dump
+    def process(self, obj, **kwargs):
+        """All the process is done by overdo."""
+        return overdo.do(obj)
diff --git a/sonar/modules/documents/receivers.py b/sonar/modules/documents/receivers.py
index a3f1572ad..8c689a889 100644
--- a/sonar/modules/documents/receivers.py
+++ b/sonar/modules/documents/receivers.py
@@ -73,8 +73,10 @@ def transform_harvested_records(sender=None, records=None, **kwargs):
         # Convert from Marc XML to JSON
         data = loader_schema.dump(str(harvested_record))
 
-        # Add transformed data to list
-        records.append(data)
+        # Avoid to import deleted records
+        if data and data.get('title'):
+            # Add transformed data to list
+            records.append(data)
 
     # Chunk record list and send celery task
     for chunk in list(chunks(records, CHUNK_SIZE)):
diff --git a/tests/unit/documents/loaders/test_arodes_loader.py b/tests/unit/documents/loaders/test_arodes_loader.py
new file mode 100644
index 000000000..1aea5e943
--- /dev/null
+++ b/tests/unit/documents/loaders/test_arodes_loader.py
@@ -0,0 +1,981 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Test ArODES record loader."""
+
+import pytest
+
+from sonar.modules.documents.loaders.schemas.arodes import ArodesSchema
+
+
+def test_title():
+    """Test title."""
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record></marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ArodesSchema().dump(xml) == {}
+
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="245" ind1=" " ind2=" ">
+                    <marc:subfield code="a">Art and design as linked data :</marc:subfield>
+                    <marc:subfield code="b">the LODZ project (Linked Open Data Zurich)</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ArodesSchema().dump(xml) == {
+        'title': [{
+            'mainTitle': [{
+                'language': 'eng',
+                'value': 'Art and design as linked data :'
+            }],
+            'subtitle': [{
+                'language': 'eng',
+                'value': 'the LODZ project (Linked Open Data Zurich)'
+            }],
+            'type':
+            'bf:Title'
+        }]
+    }
+
+
+def test_identifiers():
+    """Test identifiers."""
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:controlfield tag="001">1972</marc:controlfield>
+                <marc:datafield tag="024" ind1="7" ind2=" ">
+                    <marc:subfield code="2">DOI</marc:subfield>
+                    <marc:subfield code="a">10.15291/libellarium.v9i2.256</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="024" ind1="7" ind2=" ">
+                    <marc:subfield code="2">DOI</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="024" ind1="7" ind2=" ">
+                    <marc:subfield code="2">UNKNOWN</marc:subfield>
+                    <marc:subfield code="a">1111</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ArodesSchema().dump(xml) == {
+        'identifiedBy': [
+            {
+                'source': 'ArODES',
+                'type': 'bf:Local',
+                'value': '1972'
+            },
+            {
+                'type': 'bf:Doi',
+                'value': '10.15291/libellarium.v9i2.256'
+            },
+        ]
+    }
+
+
+@pytest.mark.parametrize('document_type,result',
+                         [(None, None), ('other', 'coar:c_1843'),
+                          ('livre', 'coar:c_2f33'),
+                          ('chapitre', 'coar:c_3248'),
+                          ('conference', 'coar:c_5794'),
+                          ('scientifique', 'coar:c_6501'),
+                          ('professionnel', 'coar:c_3e5a'),
+                          ('rapport', 'coar:c_18ws'),
+                          ('THESES', 'coar:c_db06'),
+                          ('non-existing', 'coar:c_1843')])
+def test_document_type(document_type, result):
+    """Test document type."""
+    if not document_type:
+        # No 980
+        xml = """
+        <record>
+            <metadata>
+                <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+                    <marc:record>
+                    </marc:record>
+                </marc:collection>
+            </metadata>
+        </record>
+        """
+        assert ArodesSchema().dump(xml) == {}
+
+        # No 980$a
+        xml = """
+        <record>
+            <metadata>
+                <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+                    <marc:record>
+                        <marc:datafield tag="980" ind1=" " ind2=" ">
+                        </marc:datafield>
+                    </marc:record>
+                </marc:collection>
+            </metadata>
+        </record>
+        """
+        assert ArodesSchema().dump(xml) == {}
+
+        return
+
+    xml = f"""
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="980" ind1=" " ind2=" ">
+                    <marc:subfield code="a">{document_type}</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ArodesSchema().dump(xml) == {'documentType': result}
+
+
+def test_language():
+    """Test language."""
+    # No 041
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record></marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ArodesSchema().dump(xml) == {}
+
+    # No 041$a
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="041" ind1=" " ind2=" ">
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ArodesSchema().dump(xml) == {}
+
+    # One language
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="041" ind1=" " ind2=" ">
+                    <marc:subfield code="a">eng</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ArodesSchema().dump(xml) == {
+        'language': [{
+            'type': 'bf:Language',
+            'value': 'eng'
+        }]
+    }
+
+    # Multiple 041
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="041" ind1=" " ind2=" ">
+                    <marc:subfield code="a">eng</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="041" ind1=" " ind2=" ">
+                    <marc:subfield code="a">fre</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ArodesSchema().dump(xml) == {
+        'language': [{
+            'type': 'bf:Language',
+            'value': 'eng'
+        }, {
+            'type': 'bf:Language',
+            'value': 'fre'
+        }]
+    }
+
+    # Multiple 041$a
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="041" ind1=" " ind2=" ">
+                    <marc:subfield code="a">eng</marc:subfield>
+                    <marc:subfield code="a">fre</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ArodesSchema().dump(xml) == {
+        'language': [{
+            'type': 'bf:Language',
+            'value': 'eng'
+        }, {
+            'type': 'bf:Language',
+            'value': 'fre'
+        }]
+    }
+
+
+def test_abstracts():
+    """Test abstracts."""
+    # No 520
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record></marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # No 520$a
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="520" ind1=" " ind2=" ">
+                    <marc:subfield code="9">fre</subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # No language
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="520" ind1=" " ind2=" ">
+                    <marc:subfield code="a">La Convention relative</subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'abstracts': [{
+            'language': 'eng',
+            'value': 'La Convention relative'
+        }]
+    }
+
+    # One abstracts
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="520" ind1=" " ind2=" ">
+                    <marc:subfield code="9">fre</subfield>
+                    <marc:subfield code="a">La Convention relative</subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'abstracts': [{
+            'language': 'fre',
+            'value': 'La Convention relative'
+        }]
+    }
+
+    # Multiple abstracts
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="520" ind1=" " ind2=" ">
+                    <marc:subfield code="9">fre</subfield>
+                    <marc:subfield code="a">La Convention relative</subfield>
+                </marc:datafield>
+                <marc:datafield tag="520" ind1=" " ind2=" ">
+                    <marc:subfield code="9">eng</subfield>
+                    <marc:subfield code="a">The Convention</subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'abstracts': [{
+            'language': 'fre',
+            'value': 'La Convention relative'
+        }, {
+            'language': 'eng',
+            'value': 'The Convention'
+        }]
+    }
+
+
+def test_oa_status():
+    """Test OA status."""
+    # No 906
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record></marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # No 906$a
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="906" ind1=" " ind2=" "></marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # Value NONE
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="906" ind1=" " ind2=" ">
+                    <marc:subfield code="a">NONE</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # OK
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="906" ind1=" " ind2=" ">
+                    <marc:subfield code="a">GOLD</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {'oa_status': 'gold'}
+
+
+def test_date():
+    """Test Date."""
+    # No 269$a, no 260$c
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # 269, but no $a
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="269" ind1=" " ind2=" ">
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # 269$a, but wrong format.
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="269" ind1=" " ind2=" ">
+                    <marc:subfield code="a">wrong</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # 269$a OK
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="269" ind1=" " ind2=" ">
+                    <marc:subfield code="a">2019-01</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'provisionActivity': [{
+            'startDate': '2019-01-01',
+            'type': 'bf:Publication'
+        }]
+    }
+
+    # 260, but no $c
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="260" ind1=" " ind2=" ">
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # 260$c, but wrong format.
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="260" ind1=" " ind2=" ">
+                    <marc:subfield code="c">wrong</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # 260$c OK
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="260" ind1=" " ind2=" ">
+                    <marc:subfield code="c">2019-01</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'provisionActivity': [{
+            'startDate': '2019-01-01',
+            'type': 'bf:Publication'
+        }]
+    }
+
+    # 269$a and 260$c, 269 have priority
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="260" ind1=" " ind2=" ">
+                    <marc:subfield code="c">2020-01</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="269" ind1=" " ind2=" ">
+                    <marc:subfield code="a">2019-01</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'provisionActivity': [{
+            'startDate': '2019-01-01',
+            'type': 'bf:Publication'
+        }]
+    }
+
+
+def test_subjects():
+    """Test subjects."""
+    # No 653
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # 653 but not $a
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="653" ind1=" " ind2=" ">
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # OK, but no language --> default language `eng`
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="653" ind1=" " ind2=" ">
+                    <marc:subfield code="a">subject 1</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'subjects': [{
+            'label': {
+                'language': 'eng',
+                'value': ['subject 1']
+            }
+        }]
+    }
+
+    # OK
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="653" ind1=" " ind2=" ">
+                    <marc:subfield code="a">sujet 1</marc:subfield>
+                    <marc:subfield code="9">fre</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'subjects': [{
+            'label': {
+                'language': 'fre',
+                'value': ['sujet 1']
+            }
+        }]
+    }
+
+    # Multiple subjects
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="653" ind1=" " ind2=" ">
+                    <marc:subfield code="a">sujet 1</marc:subfield>
+                    <marc:subfield code="9">fre</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="653" ind1=" " ind2=" ">
+                    <marc:subfield code="a">sujet 2</marc:subfield>
+                    <marc:subfield code="9">fre</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="653" ind1=" " ind2=" ">
+                    <marc:subfield code="a">subject 1</marc:subfield>
+                    <marc:subfield code="9">eng</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'subjects': [{
+            'label': {
+                'language': 'fre',
+                'value': ['sujet 1', 'sujet 2']
+            }
+        }, {
+            'label': {
+                'language': 'eng',
+                'value': ['subject 1']
+            }
+        }]
+    }
+
+
+def test_dissertation():
+    """Test dissertation."""
+    # OK
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="502" ind1=" " ind2=" ">
+                    <marc:subfield code="b">Dissertation degree</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'dissertation': {
+            'degree': 'Dissertation degree'
+        }
+    }
+
+    # No 502
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # 502, but no $b
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="502" ind1=" " ind2=" ">
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+
+def test_host_document():
+    """Test host document."""
+    # No 773
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # No 773$t
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="773" ind1=" " ind2=" "></marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # Not $g, no provision activity start date
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="773" ind1=" " ind2=" ">
+                    <marc:subfield code="t">Host document</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # Not $g, with provision activity start date
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="269" ind1=" " ind2=" ">
+                    <marc:subfield code="a">2019-01</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="773" ind1=" " ind2=" ">
+                    <marc:subfield code="t">Host document</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'partOf': [{
+            'document': {
+                'title': 'Host document'
+            },
+            'numberingYear': '2019'
+        }],
+        'provisionActivity': [{
+            'startDate': '2019-01-01',
+            'type': 'bf:Publication'
+        }]
+    }
+
+    # OK
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="773" ind1=" " ind2=" ">
+                    <marc:subfield code="t">Host document</marc:subfield>
+                    <marc:subfield code="g">2015, vol. 37, no. 2, pp. 49-58</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'partOf': [{
+            'document': {
+                'title': 'Host document'
+            },
+            'numberingYear': '2015',
+            'numberingVolume': '37',
+            'numberingIssue': '2',
+            'numberingPages': '49-58'
+        }]
+    }
+
+
+def test_contribution():
+    """Test contribution."""
+    # No 700
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # No 700$a
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="700" ind1=" " ind2=" ">
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {}
+
+    # OK
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="700" ind1=" " ind2=" ">
+                    <marc:subfield code="a">John Doe</marc:subfield>
+                    <marc:subfield code="u">RERO</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'contribution': [{
+            'agent': {
+                'preferred_name': 'John Doe',
+                'type': 'bf:Person'
+            },
+            'role': ['ctb'],
+            'affiliation': 'RERO'
+        }]
+    }
+
+    # Multiple
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="700" ind1=" " ind2=" ">
+                    <marc:subfield code="a">John Doe</marc:subfield>
+                    <marc:subfield code="u">RERO</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="700" ind1=" " ind2=" ">
+                    <marc:subfield code="a">Marc Landers</marc:subfield>
+                    <marc:subfield code="u">HES-SO Valais</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ArodesSchema().dump(xml) == {
+        'contribution': [{
+            'agent': {
+                'preferred_name': 'John Doe',
+                'type': 'bf:Person'
+            },
+            'role': ['ctb'],
+            'affiliation': 'RERO'
+        }, {
+            'agent': {
+                'preferred_name': 'Marc Landers',
+                'type': 'bf:Person'
+            },
+            'role': ['ctb'],
+            'affiliation': 'HES-SO Valais'
+        }]
+    }
diff --git a/tests/unit/documents/loaders/test_edoc_loader.py b/tests/unit/documents/loaders/test_edoc_loader.py
new file mode 100644
index 000000000..a90aef338
--- /dev/null
+++ b/tests/unit/documents/loaders/test_edoc_loader.py
@@ -0,0 +1,542 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Test edoc record loader."""
+
+import pytest
+
+from sonar.modules.documents.loaders.schemas.edoc import EdocSchema
+
+
+def test_no_record_metadata():
+    """Test when no record data exists."""
+    xml = """
+<record>
+    <header>
+        <identifier>oai:edoc.unibas.ch:4</identifier>
+    </header>
+</record>
+    """
+    assert not EdocSchema().dump(xml)
+
+
+def test_language():
+    """Test language."""
+    # No language --> default
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:title>Title</dc:title>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['language'] == [{
+        'type': 'bf:Language',
+        'value': 'eng'
+    }]
+
+    # One language
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:language>deu</dc:language>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['language'] == [{
+        'type': 'bf:Language',
+        'value': 'ger'
+    }]
+
+    # Multiple languages
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:language>deu</dc:language>
+            <dc:language>fra</dc:language>
+            <dc:language>eng</dc:language>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['language'] == [{
+        'type': 'bf:Language',
+        'value': 'ger'
+    }, {
+        'type': 'bf:Language',
+        'value': 'fre'
+    }, {
+        'type': 'bf:Language',
+        'value': 'eng'
+    }]
+
+
+def test_identifiers():
+    """Test identifiers."""
+    # No specific identifiers
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:title>Title</dc:title>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['identifiedBy'] == [{
+        'type': 'bf:Local',
+        'source': 'edoc',
+        'value': '123456'
+    }]
+
+    # All identifiers
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:identifier>specific-id</dc:identifier>
+            <dc:identifier>info:doi/10.5451/unibas-001565177</dc:identifier>
+            <dc:identifier>info:pmid/1111</dc:identifier>
+            <dc:identifier>urn:urn:nbn:ch:bel-bau-diss47638</dc:identifier>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['identifiedBy'] == [{
+        'source': 'edoc',
+        'type': 'bf:Local',
+        'value': '123456'
+    }, {
+        'type': 'bf:Identifier',
+        'value': 'specific-id'
+    }, {
+        'type':
+        'bf:Doi',
+        'value':
+        '10.5451/unibas-001565177'
+    }, {
+        'source': 'PMID',
+        'type': 'bf:Local',
+        'value': '1111'
+    }, {
+        'type':
+        'bf:Urn',
+        'value':
+        'urn:nbn:ch:bel-bau-diss47638'
+    }]
+
+
+def test_title():
+    """Test title."""
+    # No title --> default one
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:description>Description</dc:description>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['title'] == [{
+        'type':
+        'bf:Title',
+        'mainTitle': [{
+            'value': 'Default title',
+            'language': 'eng'
+        }]
+    }]
+
+    # Only title
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:title>Title</dc:title>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['title'] == [{
+        'type':
+        'bf:Title',
+        'mainTitle': [{
+            'value': 'Title',
+            'language': 'eng'
+        }]
+    }]
+
+    # Title + subtitle
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:title>Title : Subtitle</dc:title>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['title'] == [{
+        'type':
+        'bf:Title',
+        'mainTitle': [{
+            'value': 'Title',
+            'language': 'eng'
+        }],
+        'subtitle': [{
+            'value': 'Subtitle',
+            'language': 'eng'
+        }]
+    }]
+
+
+def test_provision_activity():
+    """Test provision activity."""
+    # No provision activity
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:description>Description</dc:description>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert 'provisionActivity' not in EdocSchema().dump(xml)
+
+    # Wrong date format
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:date>wrong</dc:date>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert 'provisionActivity' not in EdocSchema().dump(xml)
+
+    # OK
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:date>2019</dc:date>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['provisionActivity'] == [{
+        'type': 'bf:Publication',
+        'startDate': '2019'
+    }]
+
+
+def test_document_type():
+    """Test document type."""
+    # No document type --> other
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:title>Title</dc:title>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['documentType'] == 'coar:c_1843'
+
+    # Multiple, takes only the first
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:type>Thesis</dc:type>
+            <dc:type>NonPeerReviewed</dc:type>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['documentType'] == 'coar:c_db06'
+
+    # None existing, takes "other"
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:type>Unknown</dc:type>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['documentType'] == 'coar:c_1843'
+
+
+@pytest.mark.parametrize(
+    'document_type,result',
+    [('Book', 'coar:c_2f33'), ('Book Section', 'coar:c_3248'),
+     ('Conference', 'coar:c_c94f'), ('Workshop Item', 'coar:c_c94f'),
+     ('Research Data', 'coar:c_ddb1'), ('Article', 'coar:c_6501'),
+     ('Newspaper', 'coar:c_998f'), ('Magazine Article', 'coar:c_998f'),
+     ('Audiovisual Material &amp; Event', 'non_textual_object'),
+     ('Preprint', 'coar:c_816b'), ('Thesis', 'coar:c_db06'),
+     ('Working Paper', 'coar:c_8042'), ('Other', 'coar:c_1843')])
+def test_document_type_mappings(document_type, result):
+    """Test document type mappings."""
+    xml = f"""
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:type>{document_type}</dc:type>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['documentType'] == result
+
+
+def test_abstracts():
+    """Test abstracts."""
+    # No abstract
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:title>Title</dc:title>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert 'abstracts' not in EdocSchema().dump(xml)
+
+    # No abstract
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:description>Description</dc:description>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['abstracts'] == [{
+        'language': 'eng',
+        'value': 'Description'
+    }]
+
+
+def test_subjects():
+    """Test subjects."""
+    # No subject
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:title>Title</dc:title>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert 'subjects' not in EdocSchema().dump(xml)
+
+    # One subject
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:subject>Subject 1</dc:subject>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['subjects'] == [{
+        'label': {
+            'language': 'eng',
+            'value': ['Subject 1']
+        }
+    }]
+
+    # Multiple subjects
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:subject>Subject 1</dc:subject>
+            <dc:subject>Subject 2</dc:subject>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['subjects'] == [{
+        'label': {
+            'language': 'eng',
+            'value': ['Subject 1', 'Subject 2']
+        }
+    }]
+
+
+def test_contribution():
+    """Test contibution."""
+    # No contribution
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:title>Title</dc:title>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert 'contribution' not in EdocSchema().dump(xml)
+
+    # OK, one creator, multiple contributors
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:creator>Creator</dc:creator>
+            <dc:contributor>Contributor 1</dc:contributor>
+            <dc:contributor>Contributor 2</dc:contributor>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['contribution'] == [{
+        'agent': {
+            'type': 'bf:Person',
+            'preferred_name': 'Creator'
+        },
+        'role': ['cre']
+    }, {
+        'agent': {
+            'type': 'bf:Person',
+            'preferred_name': 'Contributor 1'
+        },
+        'role': ['ctb']
+    }, {
+        'agent': {
+            'type': 'bf:Person',
+            'preferred_name': 'Contributor 2'
+        },
+        'role': ['ctb']
+    }]
+
+    # OK, multiple creators, one contributor
+    xml = """
+<record>
+    <header>
+        <identifier>123456</identifier>
+    </header>
+    <metadata>
+        <oai_dc:dc>
+            <dc:creator>Creator 1</dc:creator>
+            <dc:creator>Creator 2</dc:creator>
+            <dc:contributor>Contributor</dc:contributor>
+        </oai_dc:dc>
+    </metadata>
+</record>
+    """
+    assert EdocSchema().dump(xml)['contribution'] == [{
+        'agent': {
+            'type': 'bf:Person',
+            'preferred_name': 'Creator 1'
+        },
+        'role': ['cre']
+    }, {
+        'agent': {
+            'type': 'bf:Person',
+            'preferred_name': 'Creator 2'
+        },
+        'role': ['cre']
+    }, {
+        'agent': {
+            'type': 'bf:Person',
+            'preferred_name': 'Contributor'
+        },
+        'role': ['ctb']
+    }]
diff --git a/tests/unit/documents/loaders/test_zora_loader.py b/tests/unit/documents/loaders/test_zora_loader.py
new file mode 100644
index 000000000..65f662732
--- /dev/null
+++ b/tests/unit/documents/loaders/test_zora_loader.py
@@ -0,0 +1,712 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2021 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Test ZORA record loader."""
+
+import pytest
+
+from sonar.modules.documents.loaders.schemas.zora import ZoraSchema
+
+
+def test_title():
+    """Test title."""
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record></marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ZoraSchema().dump(xml) == {}
+
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="245" ind1=" " ind2=" ">
+                    <marc:subfield code="a">Art and design as linked data :</marc:subfield>
+                    <marc:subfield code="b">the LODZ project (Linked Open Data Zurich)</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ZoraSchema().dump(xml) == {
+        'title': [{
+            'mainTitle': [{
+                'language': 'eng',
+                'value': 'Art and design as linked data :'
+            }],
+            'subtitle': [{
+                'language': 'eng',
+                'value': 'the LODZ project (Linked Open Data Zurich)'
+            }],
+            'type':
+            'bf:Title'
+        }]
+    }
+
+
+def test_identifiers():
+    """Test identifiers."""
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:controlfield tag="001">1972</marc:controlfield>
+                <marc:datafield tag="024" ind1="7" ind2=" ">
+                    <marc:subfield code="2">doi</marc:subfield>
+                    <marc:subfield code="a">10.15291/libellarium.v9i2.256</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="024" ind1="7" ind2=" ">
+                    <marc:subfield code="2">doi</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="024" ind1="7" ind2=" ">
+                    <marc:subfield code="2">pmid</marc:subfield>
+                    <marc:subfield code="a">2222</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="024" ind1="7" ind2=" ">
+                    <marc:subfield code="2">UNKNOWN</marc:subfield>
+                    <marc:subfield code="a">1111</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ZoraSchema().dump(xml) == {
+        'identifiedBy': [{
+            'source': 'ZORA',
+            'type': 'bf:Local',
+            'value': '1972'
+        }, {
+            'type': 'bf:Doi',
+            'value': '10.15291/libellarium.v9i2.256'
+        }, {
+            'type': 'bf:Local',
+            'value': '2222',
+            'source': 'PMID'
+        }, {
+            'type': 'bf:Identifier',
+            'value': '1111'
+        }]
+    }
+
+
+@pytest.mark.parametrize('type, value, result, dissertation', [
+    (None, None, None, None),
+    ('local', 'Herausgegebenes wissenschaftliches Werk', 'coar:c_2f33', None),
+    ('local', 'Monografie', 'coar:c_2f33', None),
+    ('local', 'Buchkapitel', 'coar:c_3248', None),
+    ('local', 'Konferenzbeitrag', 'coar:c_5794', None),
+    ('local', 'Artikel', 'coar:c_6501', None),
+    ('local', 'Zeitungsartikel', 'coar:c_998f', None),
+    ('gnd-content', 'Forschungsbericht', 'coar:c_18ws', None),
+    ('gnd-content', 'Hochschulschrift', 'coar:c_db06', 'Dissertation'),
+    ('gnd-content', 'Hochschulschrift', 'coar:c_bdcc', 'Masterarbeit'),
+    ('gnd-content', 'Hochschulschrift', 'habilitation_thesis', 'Habilitation'),
+    ('local', 'Working Paper', 'coar:c_8042', None),
+    ('local', 'non-existing', 'coar:c_1843', None)
+])
+def test_document_type(type, value, result, dissertation):
+    """Test document type."""
+    if not type:
+        # No 655
+        xml = """
+        <record>
+            <metadata>
+                <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+                    <marc:record>
+                    </marc:record>
+                </marc:collection>
+            </metadata>
+        </record>
+        """
+        assert ZoraSchema().dump(xml) == {}
+
+        # No 655$a
+        xml = """
+        <record>
+            <metadata>
+                <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+                    <marc:record>
+                        <marc:datafield tag="655" ind1=" " ind2=" ">
+                        </marc:datafield>
+                    </marc:record>
+                </marc:collection>
+            </metadata>
+        </record>
+        """
+        assert ZoraSchema().dump(xml) == {}
+
+        # No 655$2
+        xml = """
+        <record>
+            <metadata>
+                <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+                    <marc:record>
+                        <marc:datafield tag="655" ind1=" " ind2=" ">
+                            <marc:subfield code="a">Doc type</marc:subfield>
+                        </marc:datafield>
+                    </marc:record>
+                </marc:collection>
+            </metadata>
+        </record>
+        """
+        assert ZoraSchema().dump(xml) == {}
+
+        return
+
+    xml = f"""
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="502" ind1=" " ind2=" ">
+                    <marc:subfield code="b">{dissertation}</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="655" ind1=" " ind2=" ">
+                    <marc:subfield code="a">{value}</marc:subfield>
+                    <marc:subfield code="2">{type}</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ZoraSchema().dump(xml)['documentType'] == result
+
+
+def test_language():
+    """Test language."""
+    # No 041
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record></marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ZoraSchema().dump(xml) == {}
+
+    # No 041$a
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="041" ind1=" " ind2=" ">
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ZoraSchema().dump(xml) == {}
+
+    # One language
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="041" ind1=" " ind2=" ">
+                    <marc:subfield code="a">eng</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ZoraSchema().dump(xml) == {
+        'language': [{
+            'type': 'bf:Language',
+            'value': 'eng'
+        }]
+    }
+
+    # Multiple 041
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="041" ind1=" " ind2=" ">
+                    <marc:subfield code="a">eng</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="041" ind1=" " ind2=" ">
+                    <marc:subfield code="a">fre</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ZoraSchema().dump(xml) == {
+        'language': [{
+            'type': 'bf:Language',
+            'value': 'eng'
+        }, {
+            'type': 'bf:Language',
+            'value': 'fre'
+        }]
+    }
+
+    # Multiple 041$a
+    xml = """
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="041" ind1=" " ind2=" ">
+                    <marc:subfield code="a">eng</marc:subfield>
+                    <marc:subfield code="a">fre</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    """
+    assert ZoraSchema().dump(xml) == {
+        'language': [{
+            'type': 'bf:Language',
+            'value': 'eng'
+        }, {
+            'type': 'bf:Language',
+            'value': 'fre'
+        }]
+    }
+
+
+def test_abstracts():
+    """Test abstracts."""
+    # No 520
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record></marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {}
+
+    # No 520$a
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="520" ind1=" " ind2=" ">
+                    <marc:subfield code="9">fre</subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {}
+
+    # No language
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="520" ind1=" " ind2=" ">
+                    <marc:subfield code="a">La Convention relative</subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {
+        'abstracts': [{
+            'language': 'eng',
+            'value': 'La Convention relative'
+        }]
+    }
+
+    # One abstracts
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="520" ind1=" " ind2=" ">
+                    <marc:subfield code="9">fre</subfield>
+                    <marc:subfield code="a">La Convention relative</subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {
+        'abstracts': [{
+            'language': 'fre',
+            'value': 'La Convention relative'
+        }]
+    }
+
+    # Multiple abstracts
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="520" ind1=" " ind2=" ">
+                    <marc:subfield code="9">fre</subfield>
+                    <marc:subfield code="a">La Convention relative</subfield>
+                </marc:datafield>
+                <marc:datafield tag="520" ind1=" " ind2=" ">
+                    <marc:subfield code="9">eng</subfield>
+                    <marc:subfield code="a">The Convention</subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {
+        'abstracts': [{
+            'language': 'fre',
+            'value': 'La Convention relative'
+        }, {
+            'language': 'eng',
+            'value': 'The Convention'
+        }]
+    }
+
+
+def test_date():
+    """Test Date."""
+    # No 264$c
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {}
+
+    # 264$c, but wrong format.
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="264" ind1=" " ind2=" ">
+                    <marc:subfield code="c">wrong</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {}
+
+    # 264$c OK
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="264" ind1=" " ind2=" ">
+                    <marc:subfield code="c">2019</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {
+        'provisionActivity': [{
+            'startDate': '2019',
+            'type': 'bf:Publication'
+        }]
+    }
+
+
+def test_dissertation():
+    """Test dissertation."""
+    # OK
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="502" ind1=" " ind2=" ">
+                    <marc:subfield code="b">Dissertation degree</marc:subfield>
+                    <marc:subfield code="c">Universität Zürich</marc:subfield>
+                    <marc:subfield code="d">2007</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {
+        'dissertation': {
+            'degree': 'Dissertation degree',
+            'grantingInstitution': 'Universität Zürich',
+            'date': '2007'
+        }
+    }
+
+    # No 502
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {}
+
+    # 502, but no $b
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="502" ind1=" " ind2=" ">
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {}
+
+
+def test_host_document():
+    """Test host document."""
+    # No 773
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {}
+
+    # No 773$t
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="773" ind1=" " ind2=" "></marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {}
+
+    # Not $g, no provision activity start date
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="773" ind1=" " ind2=" ">
+                    <marc:subfield code="t">Host document</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {}
+
+    # Not $g, with provision activity start date
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="264" ind1=" " ind2=" ">
+                    <marc:subfield code="c">2019</marc:subfield>
+                </marc:datafield>
+                <marc:datafield tag="773" ind1=" " ind2=" ">
+                    <marc:subfield code="t">Host document</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {
+        'partOf': [{
+            'document': {
+                'title': 'Host document'
+            },
+            'numberingYear': '2019'
+        }],
+        'provisionActivity': [{
+            'startDate': '2019',
+            'type': 'bf:Publication'
+        }]
+    }
+
+    # OK
+    xml = '''
+<record>
+    <metadata>
+        <marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
+            xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+            <marc:record>
+                <marc:datafield tag="773" ind1=" " ind2=" ">
+                    <marc:subfield code="t">Host document</marc:subfield>
+                    <marc:subfield code="g">Bd. 16, Nr. 3, S. 411-413 (2002)</marc:subfield>
+                </marc:datafield>
+            </marc:record>
+        </marc:collection>
+    </metadata>
+</record>
+    '''
+    assert ZoraSchema().dump(xml) == {
+        'partOf': [{
+            'document': {
+                'title': 'Host document'
+            },
+            'numberingYear': '2002',
+            'numberingVolume': '16',
+            'numberingIssue': '3',
+            'numberingPages': '411-413'
+        }]
+    }
+
+
+def test_contribution_from_field_100():
+    """Test extracting contribution from field 100."""
+    # OK
+    xml = """
+    <record>
+        <datafield tag="100" ind1=" " ind2=" ">
+            <subfield code="a">Romagnani, Andrea</subfield>
+            <subfield code="e">VerfasserIn</subfield>
+            <subfield code="4">aut</subfield>
+            <subfield code="0">(orcid)0000-0003-3669-3497</subfield>
+        </datafield>
+    </record>
+    """
+    data = ZoraSchema().dump(xml)
+    assert data.get('contribution') == [{
+        'agent': {
+            'type': 'bf:Person',
+            'preferred_name': 'Romagnani, Andrea',
+            'identifiedBy': {
+                'type': 'bf:Local',
+                'source': 'ORCID',
+                'value': '0000-0003-3669-3497'
+            }
+        },
+        'role': ['cre']
+    }]
+
+    # Not $a
+    xml = """
+    <record>
+        <datafield tag="100" ind1=" " ind2=" ">
+        </datafield>
+    </record>
+    """
+    data = ZoraSchema().dump(xml)
+    assert not data.get('contribution')
+
+
+def test_contribution_from_field_700():
+    """Test extracting contribution from field 700."""
+    # OK, with bad ORCID
+    xml = """
+    <record>
+        <datafield tag="700" ind1=" " ind2=" ">
+            <subfield code="a">Romagnani, Andrea</subfield>
+            <subfield code="e">AkademischeR BetreuerIn</subfield>
+            <subfield code="4">dgs</subfield>
+            <subfield code="0">non-orcid</subfield>
+        </datafield>
+    </record>
+    """
+    data = ZoraSchema().dump(xml)
+    assert data.get('contribution') == [{
+        'agent': {
+            'type': 'bf:Person',
+            'preferred_name': 'Romagnani, Andrea'
+        },
+        'role': ['dgs']
+    }]
+
+    # Not $a
+    xml = """
+    <record>
+        <datafield tag="700" ind1=" " ind2=" ">
+        </datafield>
+    </record>
+    """
+    data = ZoraSchema().dump(xml)
+    assert not data.get('contribution')