-
Notifications
You must be signed in to change notification settings - Fork 3
/
DNBXmlFilter.inc.php
461 lines (411 loc) · 19 KB
/
DNBXmlFilter.inc.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
<?php
/**
* @file plugins/importexport/dnb/filter/DNBXmlFilter.inc.php
*
* Copyright (c) 2021 Center for Digital Systems (CeDiS), Universitätsbibliothek Freie Universität Berlin
* Distributed under the GNU GPL v3. For full terms see the plugin file LICENSE.
* Author: Bozana Bokan, Ronald Steffen
*
* @class DNBXmlFilter
* @ingroup plugins_importexport_dnb
*
* @brief Class that converts an Article to a DNB XML document.
*/
import('lib.pkp.plugins.importexport.native.filter.NativeExportFilter');
define('XML_NON_VALID_CHARCTERS_EXCEPTION', 100);
define('FIRST_AUTHOR_NOT_REGISTERED_EXCEPTION', 102);
define('URN_SET_EXCEPTION', 101);
define('MESSAGE_URN_SET','An URN has been set.'); // @RS refine
define('DNB_MSG_SUPPLEMENTARY','Begleitmaterial');
define('DNB_MSG_SUPPLEMENTARY_AMBIGUOUS','Artikel in verschiedenen Dokumentversionen mit Begleitmaterial veröffentlicht');
class DNBXmlFilter extends NativeExportFilter {
/**
* Constructor
* @param $filterGroup FilterGroup
*/
function __construct($filterGroup) {
$this->setDisplayName('DNB XML export');
parent::__construct($filterGroup);
}
//
// Implement template methods from PersistableFilter
//
/**
* @copydoc PersistableFilter::getClassName()
*/
function getClassName() {
return 'plugins.importexport.dnb.filter.DNBXmlFilter';
}
//
// Implement template methods from Filter
//
/**
* @see Filter::process()
* @param $pubObjects ArticleGalley
* @return DOMDocument
*/
function &process(&$pubObject) {
// Create the XML document
$doc = new DOMDocument('1.0', 'utf-8');
$doc->preserveWhiteSpace = false;
$doc->formatOutput = true;
// prepare basic application objects required later
$deployment = $this->getDeployment();
$journal = $deployment->getContext();
$plugin = $deployment->getPlugin();
$cache = $plugin->getCache();
$request = Application::get()->getRequest();
// Get all required data objects
$issue = $submission = $galleyFile = null;
$galley = $pubObject;
$submissionId = $galley->getData('submissionId');
if ($cache->isCached('articles', $submissionId)) {
$submission = $cache->get('articles', $submissionId);
} else {
$submission = Services::get('submission')->get($submissionId);
if ($submission) $cache->add($submission, null);
}
$issueDao = DAORegistry::getDAO('IssueDAO');
$issueId = $issueDao->getBySubmissionId($submission->getId())->getId();
if ($cache->isCached('issues', $issueId)) {
$issue = $cache->get('issues', $issueId);
} else {
$issueDao = DAORegistry::getDAO('IssueDAO'); /* @var $issueDao IssueDAO */
$issue = $issueDao->getById($issueId, $journal->getId());
if ($issue) $cache->add($issue, null);
}
// abort export in case any URN is set on the submission/article level, this is a special case that has to be discussed with DNB and implemented differently in each case
$submissionURN = $submission->getStoredPubId('other::urnDNB');
if (empty($submissionURN)) $submissionURN = $submission->getStoredPubId('other::urn');
if (!empty($submissionURN)) {
throw new ErrorException(MESSAGE_URN_SET, URN_SET_EXCEPTION);
};
// Data we will need later
$language = AppLocale::get3LetterIsoFromLocale($galley->getLocale());
$datePublished = $submission->getDatePublished();
if (!$datePublished) $datePublished = $issue->getDatePublished();
assert(!empty($datePublished));
$yearYYYY = date('Y', strtotime($datePublished));
$yearYY = date('y', strtotime($datePublished));
$month = date('m', strtotime($datePublished));
$day = date('d', strtotime($datePublished));
$contributors = $submission->getAuthors();
// extract submission authors
$authors = array_filter($contributors, array($this, '_filterAuthors'));
if (is_array($authors) && !empty($authors)) {
// get and remove first author from the array
// so the array can be used later in the field 700 1 _
$firstAuthor = array_shift($authors);
}
if (!$firstAuthor) {
throw new ErrorException("DNBXmlFilter Error: ", FIRST_AUTHOR_NOT_REGISTERED_EXCEPTION);
}
// extract submission translators
$translators = array_filter($contributors, array($this, '_filterTranslators'));
// is open access
$openAccess = false;
if ($journal->getSetting('publishingMode') == PUBLISHING_MODE_OPEN) {
$openAccess = true;
} else if ($journal->getSetting('publishingMode') == PUBLISHING_MODE_SUBSCRIPTION) {
if ($issue->getAccessStatus() == 0 || $issue->getAccessStatus() == ISSUE_ACCESS_OPEN) {
$openAccess = true;
} else if ($issue->getAccessStatus() == ISSUE_ACCESS_SUBSCRIPTION) {
if ($submission->getAccessStatus() == ARTICLE_ACCESS_OPEN) {
$openAccess = true;
}
}
}
$archiveAccess = $plugin->getSetting($journal->getId(), 'archiveAccess');
assert($openAccess || $archiveAccess);
// Create the root node
$rootNode = $this->createRootNode($doc);
$doc->appendChild($rootNode);
// record node
$recordNode = $doc->createElementNS($deployment->getNamespace(), 'record');
$rootNode->appendChild($recordNode);
// leader
$recordNode->appendChild($node = $doc->createElementNS($deployment->getNamespace(), 'leader', '00000naa a2200000 u 4500'));
// now follow all fields ordered by MARC field number
// control fields: 001, 007 and 008
$recordNode->appendChild($node = $doc->createElementNS($deployment->getNamespace(), 'controlfield', $galley->getId()));
$node->setAttribute('tag', '001');
$recordNode->appendChild($node = $doc->createElementNS($deployment->getNamespace(), 'controlfield', ' cr |||||||||||'));
$node->setAttribute('tag', '007');
$recordNode->appendChild($node = $doc->createElementNS($deployment->getNamespace(), 'controlfield', $yearYY.$month.$day.'s'.$yearYYYY.'||||xx#|||| ||||| ||||| '.$language.'||'));
$node->setAttribute('tag', '008');
// urn
$urn = $galley->getStoredPubId('other::urnDNB');
if (empty($urn)) $urn = $galley->getStoredPubId('other::urn');
if (!empty($urn)) {
$urnDatafield024 = $this->createDatafieldNode($doc, $recordNode, '024', '7', ' ');
$this->createSubfieldNode($doc, $urnDatafield024, 'a', $urn);
$this->createSubfieldNode($doc, $urnDatafield024, '2', 'urn');
}
// DOI
// according the the latest arrangement with DNB both, article and galley DOIs will be submited to the DNB
$galleyDOI = $galley->getStoredPubId('doi');
if (!empty($galleyDOI)) {
$doiDatafield024 = $this->createDatafieldNode($doc, $recordNode, '024', '7', ' ');
$this->createSubfieldNode($doc, $doiDatafield024, 'a', $galleyDOI);
$this->createSubfieldNode($doc, $doiDatafield024, '2', 'doi');
}
$submissionDOI = $submission->getStoredPubId('doi');
if (!empty($submissionDOI)) {
$doiDatafield024 = $this->createDatafieldNode($doc, $recordNode, '024', '7', ' ');
$this->createSubfieldNode($doc, $doiDatafield024, 'a', $submissionDOI);
$this->createSubfieldNode($doc, $doiDatafield024, '2', 'doi');
}
// plugin version
$datafield040 = $this->createDatafieldNode($doc, $recordNode, '040', ' ', ' ');
$versionDao = DAORegistry::getDAO('VersionDAO'); /* @var $versionDao VersionDAO */
$version = $versionDao->getCurrentVersion('plugins.importexport', $plugin->getPluginSettingsPrefix(), true);
$this->createSubfieldNode($doc, $datafield040, 'a', "OJS DNB-Export-Plugin Version ".$version->getVersionString());
// language
$datafield041 = $this->createDatafieldNode($doc, $recordNode, '041', ' ', ' ');
$this->createSubfieldNode($doc, $datafield041, 'a', $language);
// access to the archived article
$datafield093 = $this->createDatafieldNode($doc, $recordNode, '093', ' ', ' ');
if ($openAccess) {
$this->createSubfieldNode($doc, $datafield093, 'b', 'b');
} else {
$this->createSubfieldNode($doc, $datafield093, 'b', $archiveAccess);
}
// first author
$datafield100 = $this->createDatafieldNode($doc, $recordNode, '100', '1', ' ');
$this->createSubfieldNode($doc, $datafield100, 'a', $firstAuthor->getFullName(false,true));
if (!empty($firstAuthor->getData('orcidAccessToken'))) {
$this->createSubfieldNode($doc, $datafield100, '0', '(orcid)'.basename($firstAuthor->getOrcid()));
}
$this->createSubfieldNode($doc, $datafield100, '4', 'aut');
// title
$title = $submission->getTitle($galley->getLocale());
if (empty($title)) $title = $submission->getTitle($submission->getLocale());
assert(!empty($title));
//remove line breaks in case DNB doesn't like them (they are allowed in XML 1.0 spec)
$title = preg_replace("#[\s\n\r]+#",' ',$title);
$datafield245 = $this->createDatafieldNode($doc, $recordNode, '245', '0', '0');
$this->createSubfieldNode($doc, $datafield245, 'a', $title);
// subtitle
$subTitle = $submission->getSubtitle($galley->getLocale());
if (empty($subTitle)) $subTitle = $submission->getSubtitle($submission->getLocale());
if (!empty($subTitle)) {
//remove line breaks in case DNB doesn't like them (they are allowed in XML 1.0 spec)
$subTitle = preg_replace("#[\s\n\r]+#",' ',$subTitle);
$this->createSubfieldNode($doc, $datafield245, 'b', $subTitle);
}
// date published
$datafield264 = $this->createDatafieldNode($doc, $recordNode, '264', ' ', '1');
$this->createSubfieldNode($doc, $datafield264, 'c', $yearYYYY);
// this package will be delivered including supplementary material
if ($submission->getData('hasSupplementary')) {
// !!! Do not change this message without consultation of the DNB !!!
$datafield300 = $this->createDatafieldNode($doc, $recordNode, '300', ' ', ' ');
$this->createSubfieldNode($doc, $datafield300, 'e', DNB_MSG_SUPPLEMENTARY);
}
// article level URN (only if galley level URN does not exist)
if (empty($urn)) {
$submissionURN = $submission->getStoredPubId('other::urnDNB');
if (empty($submissionURN)) $submissionURN = $submission->getStoredPubId('other::urn');
if (!empty($submissionURN)) {
$urnDatafield500 = $this->createDatafieldNode($doc, $recordNode, '500', ' ', ' ');
if (!empty($submissionURN)) $this->createSubfieldNode($doc, $urnDatafield500, 'a', 'URN: ' . $submissionURN);
}
}
// additional info field in case supplememtary galleys cannot be unambiguously assigned to the main document galleys
if ($submission->getData('supplementaryNotAssignable')) {
// !!! Do not change this message without consultation of the DNB !!!
$supplementaryDatafield500 = $this->createDatafieldNode($doc, $recordNode, '500', ' ', ' ');
$this->createSubfieldNode($doc, $supplementaryDatafield500, 'a', DNB_MSG_SUPPLEMENTARY_AMBIGUOUS);
}
// abstract
$abstract = $submission->getAbstract($galley->getLocale());
if (empty($abstract)) $abstract = $submission->getAbstract($submission->getLocale());
if (!empty($abstract)) {
$abstract = trim(PKPString::html2text($abstract));
//remove line breaks in case DNB doesn't like them (they are allowed in XML 1.0 spec)
$abstract = preg_replace("#[\s\n\r]+#",' ',$abstract);
if (strlen($abstract) > 999) {
$abstract = mb_substr($abstract, 0, 996,"UTF-8");
$abstract .= '...';
}
$abstractURL = $request->url($journal->getPath(), 'article', 'view', array($submissionId));
$datafield520 = $this->createDatafieldNode($doc, $recordNode, '520', '3', ' ');
$this->createSubfieldNode($doc, $datafield520, 'a', $abstract);
$this->createSubfieldNode($doc, $datafield520, 'u', $abstractURL);
}
// license URL
$licenseURL = $submission->getLicenseURL();
if (empty($licenseURL)) {
// copyright notice
$copyrightNotice = $journal->getSetting('copyrightNotice', $galley->getLocale());
if (empty($copyrightNotice)) $copyrightNotice = $journal->getSetting('copyrightNotice', $journal->getPrimaryLocale());
if (!empty($copyrightNotice)) {
// link to the article view page where the copyright notice can be found
$licenseURL = $request->url($journal->getPath(), 'article', 'view', array($submission->getId()));
}
}
if (!empty($licenseURL)) {
$datafield540 = $this->createDatafieldNode($doc, $recordNode, '540', ' ', ' ');
$this->createSubfieldNode($doc, $datafield540, 'u', $licenseURL);
}
// keywords
$submissionKeywordDao = DAORegistry::getDAO('SubmissionKeywordDAO'); /* @var $submissionKeywordDao SubmissionKeywordDAO */
$controlledVocabulary = $submissionKeywordDao->getKeywords($submission->getCurrentPublication()->getId(), array($galley->getLocale()));
if (!empty($controlledVocabulary[$galley->getLocale()])) {
$datafield653 = $this->createDatafieldNode($doc, $recordNode, '653', ' ', ' ');
foreach ($controlledVocabulary[$galley->getLocale()] as $controlledVocabularyItem) {
$this->createSubfieldNode($doc, $datafield653, 'a', $controlledVocabularyItem);
}
}
// other authors
foreach ((array) $authors as $author) {
$datafield700 = $this->createDatafieldNode($doc, $recordNode, '700', '1', ' ');
$this->createSubfieldNode($doc, $datafield700, 'a', $author->getFullName(false,true));
if (!empty($author->getData('orcidAccessToken'))) {
$this->createSubfieldNode($doc, $datafield700, '0', '(orcid)'.basename($author->getOrcid()));
}
$this->createSubfieldNode($doc, $datafield700, '4', 'aut');
}
// translators
foreach ((array) $translators as $translator) {
$datafield700 = $this->createDatafieldNode($doc, $recordNode, '700', '1', ' ');
$this->createSubfieldNode($doc, $datafield700, 'a', $translator->getFullName(false,true));
if (!empty($translator->getData('orcidAccessToken'))) {
$this->createSubfieldNode($doc, $datafield700, '0', '(orcid)'.basename($translator->getOrcid()));
}
$this->createSubfieldNode($doc, $datafield700, '4', 'trl');
}
// issue data
// at least the year has to be provided
// 17.2.2022
// - provide issue year if available, if not year of publication date of the issue
// - remove day and month
$volume = $issue->getShowVolume()?$issue->getVolume():null;
$number = $issue->getShowNumber()?$issue->getNumber():null;
$year = $issue->getShowYear()?$issue->getYear():null;
$issueDatafield773 = $this->createDatafieldNode($doc, $recordNode, '773', '1', ' ');
if (!empty($volume)) $this->createSubfieldNode($doc, $issueDatafield773, 'g', 'volume:'.$volume);
if (!empty($number)) $this->createSubfieldNode($doc, $issueDatafield773, 'g', 'number:'.$number);
if (empty($year)) {
$year = date('Y', strtotime($issue->getDatePublished()));
}
$this->createSubfieldNode($doc, $issueDatafield773, 'g', 'year:'.$year);
$this->createSubfieldNode($doc, $issueDatafield773, '7', 'nnas');
// journal data
// there has to be an ISSN
$issn = $journal->getData('onlineIssn');
if (empty($issn)) $issn = $journal->getData('printIssn');
assert(!empty($issn));
$journalDatafield773 = $this->createDatafieldNode($doc, $recordNode, '773', '1', '8');
$this->createSubfieldNode($doc, $journalDatafield773, 'x', $issn);
// file data
$galleyURL = $request->url($journal->getPath(), 'article', 'view', array($submissionId, $galley->getId()));
$datafield856 = $this->createDatafieldNode($doc, $recordNode, '856', '4', ' ');
$this->createSubfieldNode($doc, $datafield856, 'u', $galleyURL);
$this->createSubfieldNode($doc, $datafield856, 'q', $this->_getGalleyFileType($galley));
$galleyFile = $galley->getFile();
if (isset($galleyFile)) {
# galley is a local file
$fileSize = Services::get('file')->fs->getSize($galleyFile->getData('path'));
} else {
# galley is a remote URL and we stored the filesize before
$fileSize = $galley->getData('fileSize');
}
if ($fileSize > 0) $this->createSubfieldNode($doc, $datafield856, 's', Services::get('file')->getNiceFileSize($fileSize));
if ($openAccess) $this->createSubfieldNode($doc, $datafield856, 'z', 'Open Access');
return $doc;
}
/**
* Check if the contributor is an author resistered with the journal.
* @param $contributor Author
* @return boolean
*/
function _filterAuthors($contributor) {
$userGroup = $contributor->getUserGroup();
return $userGroup->getData('nameLocaleKey') == 'default.groups.name.author';
}
/**
* Check if the contributor is a translator resistered with the journal.
* @param $contributor Author
* @return boolean
*/
function _filterTranslators($contributor) {
$userGroup = $contributor->getUserGroup();
return $userGroup->getData('nameLocaleKey') == 'default.groups.name.translator';
}
/**
* Create and return the root node.
* @param $doc DOMDocument
* @return DOMElement
*/
function createRootNode($doc) {
$deployment = $this->getDeployment();
$rootNode = $doc->createElementNS($deployment->getNamespace(), $deployment->getRootElementName());
$rootNode->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:xsi', $deployment->getXmlSchemaInstance());
$rootNode->setAttribute('xsi:schemaLocation', $deployment->getNamespace() . ' ' . $deployment->getSchemaFilename());
return $rootNode;
}
/**
* Generate the datafield node.
* @param $doc DOMElement
* @param $recordNode DOMElement
* @param $tag string 'tag' attribute
* @param $ind1 string 'ind1' attribute
* @param $ind2 string 'ind2' attribute
* @return DOMElement
*/
function createDatafieldNode($doc, $recordNode, $tag, $ind1, $ind2) {
$deployment = $this->getDeployment();
$datafieldNode = $doc->createElementNS($deployment->getNamespace(), 'datafield');
$datafieldNode->setAttribute('tag', $tag);
$datafieldNode->setAttribute('ind1', $ind1);
$datafieldNode->setAttribute('ind2', $ind2);
$recordNode->appendChild($datafieldNode);
return $datafieldNode;
}
/**
* Generate the subfield node.
* @param $doc DOMElement
* @param $datafieldNode DOMElement
* @param $code string 'code' attribute
* @param $value string Element text value
*/
function createSubfieldNode($doc, $datafieldNode, $code, $value) {
$deployment = $this->getDeployment();
$node = $doc->createElementNS($deployment->getNamespace(), 'subfield');
//check for characters not allowed according to XML 1.0 specification (https://www.w3.org/TR/2006/REC-xml-20060816/Overview.html#NT-Char)
$matches = array();
//use for debugging:
//if ($datafieldNode->getAttribute('tag') == '520') {$value = $value . mb_chr(0,'utf-8').chr(11) . $value;}
$res = preg_match_all('/[^\x09\x0A\x0D\x20-\xFF]/', $value, $matches,PREG_OFFSET_CAPTURE);
if ($res != 0) {
// libxml will strip input at the first occurance of an non-allowed character, subsequent character will be lost
// we don't remove these characters automatically because user has to be aware of the issue
throw new ErrorException("Character code ".ord($matches[0][0][0])." found at position ".$matches[0][0][1]." in MARC21 datafield node ".$datafieldNode->getAttribute('tag')." code ".$code, XML_NON_VALID_CHARCTERS_EXCEPTION);
}
$node->appendChild($doc->createTextNode($value));
$datafieldNode->appendChild($node);
$node->setAttribute('code', $code);
}
/**
* Generate the DNB file type.
* @param $galley ArticleGalley
* @return string pdf or epub (currently supported by DNB)
*/
function _getGalleyFileType($galley) {
if ($galley->isPdfGalley()) {
return 'pdf';
}
switch ($galley->getFileType()) {
case 'application/epub+zip':
return 'epub';
case 'text/plain':
return 'txt';
case 'text/html':
return 'html';
default:
return $galley->getData('fileType');
}
}
}
?>