From ef490c7273d34e462cfb56c812f9bd1219c98ba3 Mon Sep 17 00:00:00 2001 From: alexdraconian <78018187+alexdraconian@users.noreply.github.com> Date: Sun, 2 Jan 2022 11:14:24 +0900 Subject: [PATCH 1/5] Use indexer to find all links --- helper.php | 194 ++++++++++------------------------------------------- 1 file changed, 34 insertions(+), 160 deletions(-) diff --git a/helper.php b/helper.php index 33b9161..dfa59b8 100644 --- a/helper.php +++ b/helper.php @@ -14,198 +14,71 @@ if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/'); require_once(DOKU_INC.'inc/search.php'); +require_once(DOKU_INC.'inc/search/Indexer.php'); class helper_plugin_orphanswanted extends DokuWiki_Plugin { - function orph_callback_search_wanted(&$data, $base, $file, $type, $lvl, $opts) { - - if($type == 'd') { - return true; // recurse all directories, but we don't store namespaces - } - - if(!preg_match("/.*\.txt$/", $file)) { - // Ignore everything but TXT - return true; - } - - // search the body of the file for links - // dae mod - // orph_Check_InternalLinks(&$data,$base,$file,$type,$lvl,$opts); - $this->orph_Check_InternalLinks($data,$base,$file,$type,$lvl,$opts); - - $eventData = array( - 'data' => &$data, - 'file' => $file - ); - trigger_event('PLUGIN_ORPHANS_WANTED_PROCESS_PAGE', $eventData); - - // get id of this file - $id = pathID($file); - - // make sure ID is lowercase - $id = utf8_strtolower($id); - - //check ACL - if(auth_quickaclcheck($id) < AUTH_READ) { - return false; - } - - // try to avoid making duplicate entries for forms and pages - $item = &$data["$id"]; - - if(isset($item)) { - // This item already has a member in the array - // Note that the file search found it - $item['exists'] = true; - } else { - // Create a new entry - $data["$id"]=array('exists' => true, 'links' => 0); - } - return true; - } - - function orph_handle_link(&$data, $link) { - global $conf; - - if(isset($data[$link])) { - // This item already has a member in the array - // Note that the file search found it - $data[$link]['links'] ++ ; // count the link - } else { - // Create a new entry - $data[$link] = array( - 'exists' => false, // Only found a link, not the file - 'links' => 1 - ); - // echo " \n"; - } - - if ($conf['allowdebug']) echo "

-- New count for link " . $link . ": " . $data[$link]['links'] . "

\n"; - } - + // three choices + // $params_array used to extract excluded namespaces for report + // orphans = orph_report_table($data, true, false, $params_array); + // wanted = orph_report_table($data, false, true), $params_array; + // valid = orph_report_table($data, true, true, $params_array); /** - * Search for internal wiki links in page $file + * Find all page list with wiki's internal indexer. */ - function orph_Check_InternalLinks( &$data, $base, $file, $type, $lvl, $opts ) { - global $conf; - - if (!defined('LINK_PATTERN')) define('LINK_PATTERN', '%\[\[([^\]|#]*)(#[^\]|]*)?\|?([^\]]*)]]%'); - - if(!preg_match("/.*\.txt$/", $file)) { - return; - } - - $currentID = pathID($file); - $currentNS = getNS($currentID); - - if($conf['allowdebug']) echo sprintf("

%s: %s

\n", $file, $currentID); - - // echo " \n"; - $body = @file_get_contents($conf['datadir'] . $file); - - // ignores entries in blocks that ignore links - foreach( array( - '@.*?<\/nowiki>@su', - '@%%.*?%%@su', - '@.*?@su', - '@.*?@su', - '@.*?@su', - '@.*?@su', - '@^( {2,}|\t)[^\*\- ].*?$@mu', - '@]*?>.*?<\/code>@su', - '@]*?>.*?<\/file>@su' - ) - as $ignored ) - { - $body = preg_replace($ignored, '', $body); - } - - $links = array(); - preg_match_all( LINK_PATTERN, $body, $links ); - - foreach($links[1] as $link) { - if($conf['allowdebug']) echo sprintf("--- Checking %s
\n", $link); - - if( (0 < strlen(ltrim($link))) - and ! preg_match('/^[a-zA-Z0-9\.]+>{1}.*$/u',$link) // Interwiki - and ! preg_match('/^\\\\\\\\[\w.:?\-;,]+?\\\\/u',$link) // Windows Share - and ! preg_match('#^([a-z0-9\-\.+]+?)://#i',$link) // external link (accepts all protocols) - and ! preg_match('<'.PREG_PATTERN_VALID_EMAIL.'>',$link) // E-Mail (pattern above is defined in inc/mail.php) - and ! preg_match('!^#.+!',$link) // inside page link (html anchor) - ) { - # remove parameters - $link = preg_replace('/\?.*/', '', $link); - - $pageExists = false; - resolve_pageid($currentNS, $link, $pageExists ); - if ($conf['allowdebug']) echo sprintf("---- link='%s' %s ", $link, $pageExists?'EXISTS':'MISS'); - - if(((strlen(ltrim($link)) > 0) // there IS an id? - and !auth_quickaclcheck($link) < AUTH_READ)) { - // should be visible to user - //echo " \n"; - - if($conf['allowdebug']) echo ' A_LINK' ; - - $link= utf8_strtolower( $link ); - $this->orph_handle_link($data, $link); - } - else - { - if($conf['allowdebug']) echo ' EMPTY_OR_FORBIDDEN' ; + function _get_page_data() { + $all_pages = idx_get_indexer()->getPages(); + $pages = array(); + foreach($all_pages as $pageid) { + $pages[$pageid] = array("exists"=>1, "links"=>0); + } + + foreach($all_pages as $pageid) { + + $relation_data = p_get_metadata($pageid)['relation']['references']; + if (!is_null($relation_data)) { + foreach($relation_data as $name => $exist) { + $pages[$name]['exist'] = $exist; + $pages[$name]['links'] += 1; } - } // link is not empty and is a local link? - else { - if($conf['allowdebug']) echo ' NOT_INTERNAL'; } + } - if($conf['allowdebug']) echo "
\n"; - } // end of foreach link + return $pages; } - // three choices - // $params_array used to extract excluded namespaces for report - // orphans = orph_report_table($data, true, false, $params_array); - // wanted = orph_report_table($data, false, true), $params_array; - // valid = orph_report_table($data, true, true, $params_array); - function orphan_pages($params_array) { - global $conf, $ID; + $data = $this->_get_page_data(); + $result = ''; - $data = array(); - search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID))); $result .= $this->orph_report_table($data, true, false, $params_array, 'orphan'); return $result; } function wanted_pages($params_array) { - global $conf, $ID; + $data = $this->_get_page_data(); + $result = ''; - $data = array(); - search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID))); - $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted'); + $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted'); return $result; } function valid_pages($params_array) { - global $conf, $ID; + $data = $this->_get_page_data(); + $result = ''; - $data = array(); - search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID))); - $result .= $this->orph_report_table($data, true, true, $params_array, 'valid'); + $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted'); return $result; } function all_pages($params_array) { - global $conf, $ID; - $result = ''; - $data = array(); - search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted') , array('ns' => getNS($ID))); + $data = $this->_get_page_data(); + $result = ''; $result .= "

Orphans

"; $result .= $this->orph_report_table($data, true, false, $params_array, 'orphan'); $result .= "

Wanted

"; @@ -251,6 +124,7 @@ function orph_report_table($data, $page_exists, $has_links, $params_array, $call } foreach($data as $id=>$item) { + if( ! (($item['exists'] == $page_exists) and (($item['links'] <> 0)== $has_links)) ) continue ; // $id is a string, looks like this: page, namespace:page, or namespace::page From 50b49d40869797cc5c0af24f184d75bc2f455b87 Mon Sep 17 00:00:00 2001 From: alexdraconian <78018187+alexdraconian@users.noreply.github.com> Date: Sun, 2 Jan 2022 16:34:14 +0900 Subject: [PATCH 2/5] error fix --- helper.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helper.php b/helper.php index dfa59b8..0b72ed9 100644 --- a/helper.php +++ b/helper.php @@ -70,7 +70,7 @@ function valid_pages($params_array) { $data = $this->_get_page_data(); $result = ''; - $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted'); + $result .= $this->orph_report_table($data, false, true, $params_array, 'valid'); return $result; } From d209e0918f02ca3723b6679b4f4cfcb21d0dbcca Mon Sep 17 00:00:00 2001 From: alexdraconian <78018187+alexdraconian@users.noreply.github.com> Date: Sun, 2 Jan 2022 16:35:54 +0900 Subject: [PATCH 3/5] error fix --- helper.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helper.php b/helper.php index 0b72ed9..a87ef68 100644 --- a/helper.php +++ b/helper.php @@ -70,7 +70,7 @@ function valid_pages($params_array) { $data = $this->_get_page_data(); $result = ''; - $result .= $this->orph_report_table($data, false, true, $params_array, 'valid'); + $result .= $this->orph_report_table($data, true, true, $params_array, 'valid'); return $result; } From 5775dcdabaefa3ba77bdeb3d441462d0b9a3aa78 Mon Sep 17 00:00:00 2001 From: alexdraconian <78018187+alexdraconian@users.noreply.github.com> Date: Mon, 3 Jan 2022 23:18:08 +0900 Subject: [PATCH 4/5] Fix bug with non-exist page Fix the bug that occures when indexer returns non-exist page. --- helper.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/helper.php b/helper.php index a87ef68..de8ef30 100644 --- a/helper.php +++ b/helper.php @@ -31,11 +31,13 @@ function _get_page_data() { $all_pages = idx_get_indexer()->getPages(); $pages = array(); foreach($all_pages as $pageid) { - $pages[$pageid] = array("exists"=>1, "links"=>0); + $pages[$pageid] = array("exists"=>page_exists($pageid), "links"=>0); } foreach($all_pages as $pageid) { + if (!page_exists($pageid)) continue; + $relation_data = p_get_metadata($pageid)['relation']['references']; if (!is_null($relation_data)) { foreach($relation_data as $name => $exist) { From f4ee5bc6a033e881093bdf88ef30612948791aa5 Mon Sep 17 00:00:00 2001 From: alexdraconian <78018187+alexdraconian@users.noreply.github.com> Date: Sat, 22 Jan 2022 23:37:04 +0900 Subject: [PATCH 5/5] Delete mis-inserted line --- helper.php | 1 - 1 file changed, 1 deletion(-) diff --git a/helper.php b/helper.php index de8ef30..50b2307 100644 --- a/helper.php +++ b/helper.php @@ -14,7 +14,6 @@ if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/'); require_once(DOKU_INC.'inc/search.php'); -require_once(DOKU_INC.'inc/search/Indexer.php'); class helper_plugin_orphanswanted extends DokuWiki_Plugin {