From 60818d4d689aedf4aea507267aac198e0494f6a1 Mon Sep 17 00:00:00 2001 From: Tom Reijnders Date: Tue, 27 Jun 2023 10:40:49 +0200 Subject: [PATCH] Fix oai-pmh harvesting for really deleted projects --- oai-pmh/management/categories.php | 22 +++++- oai-pmh/management/educational.php | 20 +++++ oai-pmh/management/reset.php | 118 +++++++++++++++++++++++++++++ oai-pmh/management/rights.php | 20 +++++ oai-pmh/oai.php | 63 ++++++++++++++- oai-pmh/oaiclient.php | 18 +++++ oai-pmh/oaiexception.php | 18 +++++ oai-pmh/oaiserver.php | 29 +++++-- oai-pmh/oaixml.php | 18 +++++ oai-pmh/test.php | 18 +++++ oai-pmh/xerteobjects.php | 1 - 11 files changed, 335 insertions(+), 10 deletions(-) create mode 100644 oai-pmh/management/reset.php diff --git a/oai-pmh/management/categories.php b/oai-pmh/management/categories.php index c8e955542f..abf5e2534e 100644 --- a/oai-pmh/management/categories.php +++ b/oai-pmh/management/categories.php @@ -1,9 +1,29 @@ 1) { clearCategoryTable(); diff --git a/oai-pmh/management/educational.php b/oai-pmh/management/educational.php index 15596ea9a5..90c5e19803 100644 --- a/oai-pmh/management/educational.php +++ b/oai-pmh/management/educational.php @@ -1,4 +1,24 @@ users_file_area_full = 'P:\\public_html\\xotoai-pmh\\USER-FILES\\'; + +if (!file_exists("../oai_config.php")) +{ + die("oai-pmh is not available"); +} + +require_once('../oai_config.php'); +require_once(__DIR__ . '/../xerteobjects.php'); + + +function getAllTemplates() +{ + global $xerte_toolkits_site; + $prefix = $xerte_toolkits_site->database_table_prefix; + + $q = "select td.template_id, + otd.template_framework, + otd.template_name as template_type, + otd.display_name as type_display_name, + td.template_name, + td.creator_id as owner_userid, + ld.username as owner_username, + concat(ld.firstname,' ',ld.surname) as owner, + td.date_created, + td.date_modified, + td.date_accessed, + td.number_of_uses, + td.access_to_whom, + td.extra_flags, + td.tsugi_published as lti_enabled, + td.tsugi_xapi_enabled as xapi_enabled + from {$prefix}templatedetails as td, + {$prefix}originaltemplatesdetails as otd, + {$prefix}logindetails as ld + where td.template_type_id=otd.template_type_id and td.creator_id=ld.login_id and td.access_to_whom = 'Public'"; + + $templates = db_query($q); + + return $templates; +} + +function getPublishStatus() +{ + global $xerte_toolkits_site; + + $q = "select * from {$xerte_toolkits_site->database_table_prefix}oai_publish "; + $publish_status = db_query($q); + + return $publish_status; +} + +function getMetaData($templates, $publish_status) +{ + $published = array(); + foreach($templates as $template) { + $template_id = $template['template_id']; + $template_creator = $template['owner_username']; + $template_type = $template['template_type']; + + $meta = get_meta_data($template_id, $template_creator, $template_type); + + $meta->oai_published = $meta->oaiPmhAgree && $meta->domain != 'unknown' && $meta->level != 'unknown'; + + if ($meta->oai_published) { + $meta->creator_id = $template['owner_userid']; + $meta->date_modified = $template['date_modified']; + $meta->template_id = $template_id; + $published[] = $meta; + } + } + return $published; +} + +$templates = getAllTemplates(); +$publish_status = getPublishStatus(); +$published = getMetaData($templates, $publish_status); + + +//Build the new contents of the oai_publish table + +$q = "truncate table {$xerte_toolkits_site->database_table_prefix}oai_publish"; +db_query($q); + +$params = array(); +$q = "insert into {$xerte_toolkits_site->database_table_prefix}oai_publish (template_id, login_id, user_type, status, timestamp) values "; +foreach($published as $meta) { + $q .= "(?,?,'creator','published',?),"; + $params[] = $meta->template_id; + $params[] = $meta->creator_id; + $params[] = $meta->date_modified; +} +$q = rtrim($q, ','); +db_query($q, $params); diff --git a/oai-pmh/management/rights.php b/oai-pmh/management/rights.php index 8ca0e11584..956801c0f2 100644 --- a/oai-pmh/management/rights.php +++ b/oai-pmh/management/rights.php @@ -1,4 +1,24 @@ templates = $tmpTemplates; //$response->count = count($tmpTemplates); - + // Add the templates the have been really deleted as well + $deleted = getDeletedTemplates($metadataPrefix,$from,$until); + foreach($deleted as $d) + { + $record = array('identifier' => ($xerte_toolkits_site->site_url . $d['template_id']), + 'datestamp' => date($d['timestamp']), + 'modified' => date($d['timestamp']), + 'deleted' => true); + $tmpRecords[] = $record; + } return $tmpRecords; }; +function getDeletedTemplates($metadataPrefix,$from,$until) +{ + // Get the ids of all the records that have been deleted but have been published before + global $xerte_toolkits_site; + $prefix = $xerte_toolkits_site->database_table_prefix; + + // Get all the unique ids of the templates that have been deleted from oai_publix and do that not exist anymore in template_details + if ($until != null && $until != "") + { + $q = "select template_id, timestamp from {$prefix}oai_publish op + where op.status = 'deleted' + and op.template_id not in (select template_id from {$prefix}templatedetails td where op.template_id = td.template_id) + and audith_id IN (SELECT max(audith_id) from {$xerte_toolkits_site->database_table_prefix}oai_publish op2 where status='deleted' and timestamp < ? group by op2.template_id)"; + $params = array($until); + } + else { + $q = "select template_id, timestamp from {$prefix}oai_publish op + where op.status = 'deleted' + and op.template_id not in (select template_id from {$prefix}templatedetails td where op.template_id = td.template_id) + and audith_id IN (SELECT max(audith_id) from {$xerte_toolkits_site->database_table_prefix}oai_publish op2 where status='deleted' group by op2.template_id)"; + $params = array(); + } + if ($from != null && $from != "") + { + $q = $q . " and op.timestamp >= ?"; + $params[] = $from; + } + $deleted_templates = db_query($q, $params); + return $deleted_templates; +} function makeRecordFromTemplate($metadataPrefix,$template, $metadata){ global $xerte_toolkits_site; if($metadataPrefix == "lom_ims") { //get first publish time. - $q = "select timestamp from {$xerte_toolkits_site->database_table_prefix}oai_publish where template_id = ? and status = ? group by timestamp limit 1"; + $q = "select timestamp from {$xerte_toolkits_site->database_table_prefix}oai_publish where template_id = ? and status = ? order by timestamp asc limit 1"; $params = array($template['template_id'], "published"); $first_publish_time = db_query_one($q, $params); @@ -323,7 +380,7 @@ function makeRecordFromTemplate($metadataPrefix,$template, $metadata){ 'lifecycle' => array( 'author' => $metadata->author, 'publisher' => $metadata->publisher, - 'publishdate' => $first_publish_time["timestamp"], + 'publishdate' => date($first_publish_time["timestamp"]), ), 'rights' => array( 'rights' => $metadata->rights, diff --git a/oai-pmh/oaiclient.php b/oai-pmh/oaiclient.php index 0377b709ba..8d9f7efa9d 100644 --- a/oai-pmh/oaiclient.php +++ b/oai-pmh/oaiclient.php @@ -1,4 +1,22 @@ response->addChild($centity_node, 'vcard', $vcard); - $publish_date = $record['metadata']['lifecycle']['publishdate']; + $publish_date = $this->formatDatestamp($record['metadata']['lifecycle']['publishdate']); $date_node = $this->response->addChild($contribute_node, 'date'); - $this->response->addChild($date_node, 'datetime', ($publish_date . "T00:00:00+00:00")); + $this->response->addChild($date_node, 'datetime', ($publish_date)); $description_node = $this->response->addChild($date_node, 'description'); $langstring_node = $this->response->addChild($description_node, 'langstring', "The date the object was published."); $langstring_node->setAttribute("xml:lang", $language); @@ -676,12 +694,13 @@ private function readResumptionToken($resumptionToken) } /** - * All datestamps used in this system are GMT even - * return value from database has no TZ information + * All datestamps used in this system are localtime even + * return value from database has no information + * MAKE SURE date_timezone is correct in php.ini */ private function formatDatestamp($datestamp) { - return date("Y-m-d\TH:i:s\Z", strtotime($datestamp)); + return gmdate("Y-m-d\TH:i:s\Z", strtotime($datestamp)); } /** diff --git a/oai-pmh/oaixml.php b/oai-pmh/oaixml.php index e773a8b395..e704bda647 100644 --- a/oai-pmh/oaixml.php +++ b/oai-pmh/oaixml.php @@ -1,4 +1,22 @@ 1) { $xmlfile = $argv[1]; diff --git a/oai-pmh/xerteobjects.php b/oai-pmh/xerteobjects.php index 8a7daf3a27..b4dbfe0344 100644 --- a/oai-pmh/xerteobjects.php +++ b/oai-pmh/xerteobjects.php @@ -1,5 +1,4 @@