From ef490c7273d34e462cfb56c812f9bd1219c98ba3 Mon Sep 17 00:00:00 2001 From: alexdraconian <78018187+alexdraconian@users.noreply.github.com> Date: Sun, 2 Jan 2022 11:14:24 +0900 Subject: [PATCH 1/5] Use indexer to find all links --- helper.php | 194 ++++++++++------------------------------------------- 1 file changed, 34 insertions(+), 160 deletions(-) diff --git a/helper.php b/helper.php index 33b9161..dfa59b8 100644 --- a/helper.php +++ b/helper.php @@ -14,198 +14,71 @@ if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/'); require_once(DOKU_INC.'inc/search.php'); +require_once(DOKU_INC.'inc/search/Indexer.php'); class helper_plugin_orphanswanted extends DokuWiki_Plugin { - function orph_callback_search_wanted(&$data, $base, $file, $type, $lvl, $opts) { - - if($type == 'd') { - return true; // recurse all directories, but we don't store namespaces - } - - if(!preg_match("/.*\.txt$/", $file)) { - // Ignore everything but TXT - return true; - } - - // search the body of the file for links - // dae mod - // orph_Check_InternalLinks(&$data,$base,$file,$type,$lvl,$opts); - $this->orph_Check_InternalLinks($data,$base,$file,$type,$lvl,$opts); - - $eventData = array( - 'data' => &$data, - 'file' => $file - ); - trigger_event('PLUGIN_ORPHANS_WANTED_PROCESS_PAGE', $eventData); - - // get id of this file - $id = pathID($file); - - // make sure ID is lowercase - $id = utf8_strtolower($id); - - //check ACL - if(auth_quickaclcheck($id) < AUTH_READ) { - return false; - } - - // try to avoid making duplicate entries for forms and pages - $item = &$data["$id"]; - - if(isset($item)) { - // This item already has a member in the array - // Note that the file search found it - $item['exists'] = true; - } else { - // Create a new entry - $data["$id"]=array('exists' => true, 'links' => 0); - } - return true; - } - - function orph_handle_link(&$data, $link) { - global $conf; - - if(isset($data[$link])) { - // This item already has a member in the array - // Note that the file search found it - $data[$link]['links'] ++ ; // count the link - } else { - // Create a new entry - $data[$link] = array( - 'exists' => false, // Only found a link, not the file - 'links' => 1 - ); - // echo " \n"; - } - - if ($conf['allowdebug']) echo "
-- New count for link " . $link . ": " . $data[$link]['links'] . "
\n"; - } - + // three choices + // $params_array used to extract excluded namespaces for report + // orphans = orph_report_table($data, true, false, $params_array); + // wanted = orph_report_table($data, false, true), $params_array; + // valid = orph_report_table($data, true, true, $params_array); /** - * Search for internal wiki links in page $file + * Find all page list with wiki's internal indexer. */ - function orph_Check_InternalLinks( &$data, $base, $file, $type, $lvl, $opts ) { - global $conf; - - if (!defined('LINK_PATTERN')) define('LINK_PATTERN', '%\[\[([^\]|#]*)(#[^\]|]*)?\|?([^\]]*)]]%'); - - if(!preg_match("/.*\.txt$/", $file)) { - return; - } - - $currentID = pathID($file); - $currentNS = getNS($currentID); - - if($conf['allowdebug']) echo sprintf("%s: %s
\n", $file, $currentID); - - // echo " \n"; - $body = @file_get_contents($conf['datadir'] . $file); - - // ignores entries in blocks that ignore links - foreach( array( - '@]*?>.*?<\/code>@su',
- '@]*?>.*?<\/file>@su'
- )
- as $ignored )
- {
- $body = preg_replace($ignored, '', $body);
- }
-
- $links = array();
- preg_match_all( LINK_PATTERN, $body, $links );
-
- foreach($links[1] as $link) {
- if($conf['allowdebug']) echo sprintf("--- Checking %s
\n", $link);
-
- if( (0 < strlen(ltrim($link)))
- and ! preg_match('/^[a-zA-Z0-9\.]+>{1}.*$/u',$link) // Interwiki
- and ! preg_match('/^\\\\\\\\[\w.:?\-;,]+?\\\\/u',$link) // Windows Share
- and ! preg_match('#^([a-z0-9\-\.+]+?)://#i',$link) // external link (accepts all protocols)
- and ! preg_match('<'.PREG_PATTERN_VALID_EMAIL.'>',$link) // E-Mail (pattern above is defined in inc/mail.php)
- and ! preg_match('!^#.+!',$link) // inside page link (html anchor)
- ) {
- # remove parameters
- $link = preg_replace('/\?.*/', '', $link);
-
- $pageExists = false;
- resolve_pageid($currentNS, $link, $pageExists );
- if ($conf['allowdebug']) echo sprintf("---- link='%s' %s ", $link, $pageExists?'EXISTS':'MISS');
-
- if(((strlen(ltrim($link)) > 0) // there IS an id?
- and !auth_quickaclcheck($link) < AUTH_READ)) {
- // should be visible to user
- //echo " \n";
-
- if($conf['allowdebug']) echo ' A_LINK' ;
-
- $link= utf8_strtolower( $link );
- $this->orph_handle_link($data, $link);
- }
- else
- {
- if($conf['allowdebug']) echo ' EMPTY_OR_FORBIDDEN' ;
+ function _get_page_data() {
+ $all_pages = idx_get_indexer()->getPages();
+ $pages = array();
+ foreach($all_pages as $pageid) {
+ $pages[$pageid] = array("exists"=>1, "links"=>0);
+ }
+
+ foreach($all_pages as $pageid) {
+
+ $relation_data = p_get_metadata($pageid)['relation']['references'];
+ if (!is_null($relation_data)) {
+ foreach($relation_data as $name => $exist) {
+ $pages[$name]['exist'] = $exist;
+ $pages[$name]['links'] += 1;
}
- } // link is not empty and is a local link?
- else {
- if($conf['allowdebug']) echo ' NOT_INTERNAL';
}
+ }
- if($conf['allowdebug']) echo "
\n";
- } // end of foreach link
+ return $pages;
}
- // three choices
- // $params_array used to extract excluded namespaces for report
- // orphans = orph_report_table($data, true, false, $params_array);
- // wanted = orph_report_table($data, false, true), $params_array;
- // valid = orph_report_table($data, true, true, $params_array);
-
function orphan_pages($params_array) {
- global $conf, $ID;
+ $data = $this->_get_page_data();
+
$result = '';
- $data = array();
- search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID)));
$result .= $this->orph_report_table($data, true, false, $params_array, 'orphan');
return $result;
}
function wanted_pages($params_array) {
- global $conf, $ID;
+ $data = $this->_get_page_data();
+
$result = '';
- $data = array();
- search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID)));
- $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted');
+ $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted');
return $result;
}
function valid_pages($params_array) {
- global $conf, $ID;
+ $data = $this->_get_page_data();
+
$result = '';
- $data = array();
- search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID)));
- $result .= $this->orph_report_table($data, true, true, $params_array, 'valid');
+ $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted');
return $result;
}
function all_pages($params_array) {
- global $conf, $ID;
- $result = '';
- $data = array();
- search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted') , array('ns' => getNS($ID)));
+ $data = $this->_get_page_data();
+ $result = '';
$result .= "Orphans
";
$result .= $this->orph_report_table($data, true, false, $params_array, 'orphan');
$result .= "
Wanted
";
@@ -251,6 +124,7 @@ function orph_report_table($data, $page_exists, $has_links, $params_array, $call
}
foreach($data as $id=>$item) {
+
if( ! (($item['exists'] == $page_exists) and (($item['links'] <> 0)== $has_links)) ) continue ;
// $id is a string, looks like this: page, namespace:page, or namespace::page
From 50b49d40869797cc5c0af24f184d75bc2f455b87 Mon Sep 17 00:00:00 2001
From: alexdraconian <78018187+alexdraconian@users.noreply.github.com>
Date: Sun, 2 Jan 2022 16:34:14 +0900
Subject: [PATCH 2/5] error fix
---
helper.php | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/helper.php b/helper.php
index dfa59b8..0b72ed9 100644
--- a/helper.php
+++ b/helper.php
@@ -70,7 +70,7 @@ function valid_pages($params_array) {
$data = $this->_get_page_data();
$result = '';
- $result .= $this->orph_report_table($data, false, true, $params_array, 'wanted');
+ $result .= $this->orph_report_table($data, false, true, $params_array, 'valid');
return $result;
}
From d209e0918f02ca3723b6679b4f4cfcb21d0dbcca Mon Sep 17 00:00:00 2001
From: alexdraconian <78018187+alexdraconian@users.noreply.github.com>
Date: Sun, 2 Jan 2022 16:35:54 +0900
Subject: [PATCH 3/5] error fix
---
helper.php | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/helper.php b/helper.php
index 0b72ed9..a87ef68 100644
--- a/helper.php
+++ b/helper.php
@@ -70,7 +70,7 @@ function valid_pages($params_array) {
$data = $this->_get_page_data();
$result = '';
- $result .= $this->orph_report_table($data, false, true, $params_array, 'valid');
+ $result .= $this->orph_report_table($data, true, true, $params_array, 'valid');
return $result;
}
From 5775dcdabaefa3ba77bdeb3d441462d0b9a3aa78 Mon Sep 17 00:00:00 2001
From: alexdraconian <78018187+alexdraconian@users.noreply.github.com>
Date: Mon, 3 Jan 2022 23:18:08 +0900
Subject: [PATCH 4/5] Fix bug with non-exist page
Fix the bug that occures when indexer returns non-exist page.
---
helper.php | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/helper.php b/helper.php
index a87ef68..de8ef30 100644
--- a/helper.php
+++ b/helper.php
@@ -31,11 +31,13 @@ function _get_page_data() {
$all_pages = idx_get_indexer()->getPages();
$pages = array();
foreach($all_pages as $pageid) {
- $pages[$pageid] = array("exists"=>1, "links"=>0);
+ $pages[$pageid] = array("exists"=>page_exists($pageid), "links"=>0);
}
foreach($all_pages as $pageid) {
+ if (!page_exists($pageid)) continue;
+
$relation_data = p_get_metadata($pageid)['relation']['references'];
if (!is_null($relation_data)) {
foreach($relation_data as $name => $exist) {
From f4ee5bc6a033e881093bdf88ef30612948791aa5 Mon Sep 17 00:00:00 2001
From: alexdraconian <78018187+alexdraconian@users.noreply.github.com>
Date: Sat, 22 Jan 2022 23:37:04 +0900
Subject: [PATCH 5/5] Delete mis-inserted line
---
helper.php | 1 -
1 file changed, 1 deletion(-)
diff --git a/helper.php b/helper.php
index de8ef30..50b2307 100644
--- a/helper.php
+++ b/helper.php
@@ -14,7 +14,6 @@
if(!defined('DOKU_PLUGIN')) define('DOKU_PLUGIN',DOKU_INC.'lib/plugins/');
require_once(DOKU_INC.'inc/search.php');
-require_once(DOKU_INC.'inc/search/Indexer.php');
class helper_plugin_orphanswanted extends DokuWiki_Plugin {