Skip to content

Commit

Permalink
Merge pull request #44 from alexdraconian/master
Browse files Browse the repository at this point in the history
Use indexer to find all links
  • Loading branch information
jaller94 committed May 30, 2023
2 parents 625b950 + f4ee5bc commit de66ba1
Showing 1 changed file with 35 additions and 160 deletions.
195 changes: 35 additions & 160 deletions helper.php
Expand Up @@ -17,195 +17,69 @@

class helper_plugin_orphanswanted extends DokuWiki_Plugin {

function orph_callback_search_wanted(&$data, $base, $file, $type, $lvl, $opts) {

if($type == 'd') {
return true; // recurse all directories, but we don't store namespaces
}

if(!preg_match("/.*\.txt$/", $file)) {
// Ignore everything but TXT
return true;
}

// search the body of the file for links
// dae mod
// orph_Check_InternalLinks(&$data,$base,$file,$type,$lvl,$opts);
$this->orph_Check_InternalLinks($data,$base,$file,$type,$lvl,$opts);

$eventData = array(
'data' => &$data,
'file' => $file
);
trigger_event('PLUGIN_ORPHANS_WANTED_PROCESS_PAGE', $eventData);

// get id of this file
$id = pathID($file);

// make sure ID is lowercase
$id = utf8_strtolower($id);

//check ACL
if(auth_quickaclcheck($id) < AUTH_READ) {
return false;
}

// try to avoid making duplicate entries for forms and pages
$item = &$data["$id"];

if(isset($item)) {
// This item already has a member in the array
// Note that the file search found it
$item['exists'] = true;
} else {
// Create a new entry
$data["$id"]=array('exists' => true, 'links' => 0);
}
return true;
}

function orph_handle_link(&$data, $link) {
global $conf;

if(isset($data[$link])) {
// This item already has a member in the array
// Note that the file search found it
$data[$link]['links'] ++ ; // count the link
} else {
// Create a new entry
$data[$link] = array(
'exists' => false, // Only found a link, not the file
'links' => 1
);
// echo " <!-- added link to list --> \n";
}

if ($conf['allowdebug']) echo "<p>-- New count for link <b>" . $link . "</b>: " . $data[$link]['links'] . "</p>\n";
}

// three choices
// $params_array used to extract excluded namespaces for report
// orphans = orph_report_table($data, true, false, $params_array);
// wanted = orph_report_table($data, false, true), $params_array;
// valid = orph_report_table($data, true, true, $params_array);

/**
* Search for internal wiki links in page $file
* Find all page list with wiki's internal indexer.
*/
function orph_Check_InternalLinks( &$data, $base, $file, $type, $lvl, $opts ) {
global $conf;

if (!defined('LINK_PATTERN')) define('LINK_PATTERN', '%\[\[([^\]|#]*)(#[^\]|]*)?\|?([^\]]*)]]%');

if(!preg_match("/.*\.txt$/", $file)) {
return;
}

$currentID = pathID($file);
$currentNS = getNS($currentID);

if($conf['allowdebug']) echo sprintf("<p><b>%s</b>: %s</p>\n", $file, $currentID);

// echo " <!-- checking file: $file -->\n";
$body = @file_get_contents($conf['datadir'] . $file);

// ignores entries in blocks that ignore links
foreach( array(
'@<nowiki>.*?<\/nowiki>@su',
'@%%.*?%%@su',
'@<php>.*?</php>@su',
'@<PHP>.*?</PHP>@su',
'@<html>.*?</html>@su',
'@<HTML>.*?</HTML>@su',
'@^( {2,}|\t)[^\*\- ].*?$@mu',
'@<code[^>]*?>.*?<\/code>@su',
'@<file[^>]*?>.*?<\/file>@su'
)
as $ignored )
{
$body = preg_replace($ignored, '', $body);
}

$links = array();
preg_match_all( LINK_PATTERN, $body, $links );

foreach($links[1] as $link) {
if($conf['allowdebug']) echo sprintf("--- Checking %s<br />\n", $link);

if( (0 < strlen(ltrim($link)))
and ! preg_match('/^[a-zA-Z0-9\.]+>{1}.*$/u',$link) // Interwiki
and ! preg_match('/^\\\\\\\\[\w.:?\-;,]+?\\\\/u',$link) // Windows Share
and ! preg_match('#^([a-z0-9\-\.+]+?)://#i',$link) // external link (accepts all protocols)
and ! preg_match('<'.PREG_PATTERN_VALID_EMAIL.'>',$link) // E-Mail (pattern above is defined in inc/mail.php)
and ! preg_match('!^#.+!',$link) // inside page link (html anchor)
) {
# remove parameters
$link = preg_replace('/\?.*/', '', $link);

$pageExists = false;
resolve_pageid($currentNS, $link, $pageExists );
if ($conf['allowdebug']) echo sprintf("---- link='%s' %s ", $link, $pageExists?'EXISTS':'MISS');

if(((strlen(ltrim($link)) > 0) // there IS an id?
and !auth_quickaclcheck($link) < AUTH_READ)) {
// should be visible to user
//echo " <!-- adding $link -->\n";

if($conf['allowdebug']) echo ' A_LINK' ;

$link= utf8_strtolower( $link );
$this->orph_handle_link($data, $link);
}
else
{
if($conf['allowdebug']) echo ' EMPTY_OR_FORBIDDEN' ;
function _get_page_data() {
$all_pages = idx_get_indexer()->getPages();
$pages = array();
foreach($all_pages as $pageid) {
$pages[$pageid] = array("exists"=>page_exists($pageid), "links"=>0);
}

foreach($all_pages as $pageid) {

if (!page_exists($pageid)) continue;

$relation_data = p_get_metadata($pageid)['relation']['references'];
if (!is_null($relation_data)) {
foreach($relation_data as $name => $exist) {
$pages[$name]['exist'] = $exist;
$pages[$name]['links'] += 1;
}
} // link is not empty and is a local link?
else {
if($conf['allowdebug']) echo ' NOT_INTERNAL';
}
}

if($conf['allowdebug']) echo "<br />\n";
} // end of foreach link
return $pages;
}

// three choices
// $params_array used to extract excluded namespaces for report
// orphans = orph_report_table($data, true, false, $params_array);
// wanted = orph_report_table($data, false, true), $params_array;
// valid = orph_report_table($data, true, true, $params_array);

function orphan_pages($params_array) {
global $conf, $ID;
$data = $this->_get_page_data();

$result = '';
$data = array();
search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID)));
$result .= $this->orph_report_table($data, true, false, $params_array, 'orphan');

return $result;
}

function wanted_pages($params_array) {
global $conf, $ID;
$data = $this->_get_page_data();

$result = '';
$data = array();
search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID)));
$result .= $this->orph_report_table($data, false, true, $params_array, 'wanted');
$result .= $this->orph_report_table($data, false, true, $params_array, 'wanted');

return $result;
}

function valid_pages($params_array) {
global $conf, $ID;
$data = $this->_get_page_data();

$result = '';
$data = array();
search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted'), array('ns' => getNS($ID)));
$result .= $this->orph_report_table($data, true, true, $params_array, 'valid');
$result .= $this->orph_report_table($data, true, true, $params_array, 'valid');

return $result;
}

function all_pages($params_array) {
global $conf, $ID;
$result = '';
$data = array();
search($data,$conf['datadir'], array($this, 'orph_callback_search_wanted') , array('ns' => getNS($ID)));
$data = $this->_get_page_data();

$result = '';
$result .= "</p><p>Orphans</p><p>";
$result .= $this->orph_report_table($data, true, false, $params_array, 'orphan');
$result .= "</p><p>Wanted</p><p>";
Expand Down Expand Up @@ -251,6 +125,7 @@ function orph_report_table($data, $page_exists, $has_links, $params_array, $call
}

foreach($data as $id=>$item) {

if( ! (($item['exists'] == $page_exists) and (($item['links'] <> 0)== $has_links)) ) continue ;

// $id is a string, looks like this: page, namespace:page, or namespace:<subspaces>:page
Expand Down

0 comments on commit de66ba1

Please sign in to comment.