text www.moodle.org</p> text' => 'URL:
text www.moodle.org</p> text'
+ 'URL: http://moodle.org/s/i=1&j=2.' => 'URL: http://moodle.org/s/i=1&j=2.',
+ 'URL: www.moodle.org/s/i=1&j=2.' => 'URL: www.moodle.org/s/i=1&j=2.',
+ 'URL: http://moodle.org)
' => 'URL: http://moodle.org)
',
+ 'URL:
text www.moodle.org</p> text' => 'URL:
text www.moodle.org</p> text', + 'URL: www.moodle.org?u=1.23' => 'URL: www.moodle.org?u=1.23', + 'URL: www.moodle.org?u=test+param&' => 'URL: www.moodle.org?u=test+param&', + 'URL: www.moodle.org?param=:)' => 'URL: www.moodle.org?param=:)', + 'URL: http://moodle.org www.moodle.org' + => 'URL: http://moodle.org www.moodle.org', + 'URL: http://moodle.org www.moodle.org http://moodle.org' + => 'URL: http://moodle.org www.moodle.org http://moodle.org', + 'http://subdomain.moodle.org - URL' => 'http://subdomain.moodle.org - URL', + 'http://subdomain.subdomain.moodle.org - URL' => 'http://subdomain.subdomain.moodle.org - URL', + 'This contains http, http:// and www but no actual links.'=>'This contains http, http:// and www but no actual links.', + 'This is a story about moodle.coming to a cinema near you.'=>'This is a story about moodle.coming to a cinema near you.', + 'http://en.wikipedia.org/wiki/Slash_%28punctuation%29'=>'http://en.wikipedia.org/wiki/Slash_%28punctuation%29', + 'http://en.wikipedia.org/wiki/Slash_(punctuation)'=>'http://en.wikipedia.org/wiki/Slash_(punctuation)', + 'http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29 - URL' => 'http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29 - URL', + 'http://en.wikipedia.org/wiki/(#Parentheses_.28_.29 - URL' => 'http://en.wikipedia.org/wiki/(#Parentheses_.28_.29 - URL', + 'http://Iñtërnâtiônàlizætiøn.com?ô=nëø'=>'http://Iñtërnâtiônàlizætiøn.com?ô=nëø', + 'www.Iñtërnâtiônàlizætiøn.com?ô=nëø'=>'www.Iñtërnâtiônàlizætiøn.com?ô=nëø', + 'moodle.org' => 'moodle.org',//too hard to identify without additional regexs ); - foreach ($texts as $text => $correctresult) { - $failedmsg = "Testing text: \"$text\": %s"; + foreach ($texts as $text => $correctresult) { + if(mb_detect_encoding($text)=='UTF-8') { + $text_for_msg = utf8_decode($text); + } + else { + $text_for_msg = $text; + } + //urldecode text or things like %28 cause sprintf's, looking for %s's, to throw an exception + $msg = "Testing text: ".urldecode($text_for_msg).": %s"; + + convert_urls_into_links($text); + + //these decode's make all the strings non-garbled. The tests pass without them. + if(mb_detect_encoding($text)=='UTF-8') { + $text = utf8_decode($text); + } + if(mb_detect_encoding($correctresult)=='UTF-8') { + $correctresult = utf8_decode($correctresult); + } + $this->assertEqual($text, $correctresult, $msg); + } + + $reps = 1000; + + $time_start = microtime(true); + for($i=0;$i<$reps;$i++) + { + $text = $this->get_test_text(); convert_urls_into_links($text); - $this->assertEqual($text, $correctresult, $failedmsg); } + $time_end = microtime(true); + $new_time = $time_end - $time_start; + + $time_start = microtime(true); + for($i=0;$i<$reps;$i++) + { + $text = $this->get_test_text(); + $this->old_convert_urls_into_links($text); + } + $time_end = microtime(true); + $old_time = $time_end - $time_start; + + $fast_enough = false; + if( $new_time < $old_time ) { + $fast_enough = true; + } + + $this->assertEqual($fast_enough, true, 'Timing test:'); } } diff --git a/lib/weblib.php b/lib/weblib.php index 9a5d1afa6d3ff..1fa89684b1ae1 100644 --- a/lib/weblib.php +++ b/lib/weblib.php @@ -1720,13 +1720,37 @@ function html_to_text($html) { * @param string $text Passed in by reference. The string to be searched for urls. */ function convert_urls_into_links(&$text) { -/// Make lone URLs into links. eg http://moodle.com/ - $text = preg_replace("~([[:space:]]|^|\(|\[)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])~i", - '$1$2://$3$4', $text); - -/// eg www.moodle.com - $text = preg_replace("~([[:space:]]|^|\(|\[)www\.([^[:space:]]*)([[:alnum:]#?/&=])~i", - '$1www.$2$3', $text); +$filterignoretagsopen = array(']+?>'); + $filterignoretagsclose = array(''); + filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags); + + // Check if we support unicode modifiers in regular expressions. Cache it. + // TODO: this check should be a environment requirement in Moodle 2.0, as far as unicode + // chars are going to arrive to URLs officially really soon (2010?) + // Original RFC regex from: http://www.bytemycode.com/snippets/snippet/796/ + // Various ideas from: http://alanstorm.com/url_regex_explained + // Unicode check, negative assertion and other bits from Moodle. + static $unicoderegexp; + if (!isset($unicoderegexp)) { + $unicoderegexp = @preg_match('/\pL/u', 'a'); // This will fail silenty, returning false, + } + + if ($unicoderegexp) { //We can use unicode modifiers + $text = preg_replace('#(((http(s?))://)(((([\pLl0-9]([\pLl0-9]|-)*[\pLl0-9]|[\pLl0-9])\.)+([\pLl]([\pLl0-9]|-)*[\pLl0-9]|[\pLl]))|(([0-9]{1,3}\.){3}[0-9]{1,3}))(:[\pL0-9]*)?(/([\pLl0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-fA-F0-9]{2})*)*(\?[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text); + $text = preg_replace('#((www\.([\pLl0-9]([\pLl0-9]|-)*[\pLl0-9]|[\pLl0-9])\.)+([\pLl]([\pLl0-9]|-)*[\pLl0-9]|[\pLl])(:[\pL0-9]*)?(/([\pLl0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-fA-F0-9]{2})*)*(\?[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[\pLl0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text); + } else { //We cannot use unicode modifiers + $text = preg_replace('#(((http(s?))://)(((([a-z0-9]([a-z0-9]|-)*[a-z0-9]|[a-z0-9])\.)+([a-z]([a-z0-9]|-)*[a-z0-9]|[a-z]))|(([0-9]{1,3}\.){3}[0-9]{1,3}))(:[a-zA-Z0-9]*)?(/([a-z0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})*)*(\?[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text); + $text = preg_replace('#((www\.([a-z0-9]([a-z0-9]|-)*[a-z0-9]|[a-z0-9])\.)+([a-z]([a-z0-9]|-)*[a-z0-9]|[a-z])(:[a-zA-Z0-9]*)?(/([a-z0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})*)*(\?[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(\#[a-z0-9\.!$&\'\(\)*+,;=_~:@/?-]*)?(?\\1', $text); + } + + if (!empty($ignoretags)) { + $ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems. + $text = str_replace(array_keys($ignoretags),$ignoretags,$text); + } } /**