Permalink
Browse files

Updated conformance tests; no protocol-less urls.

  • Loading branch information...
Nick Pope
Nick Pope committed Jan 5, 2011
1 parent 6bc4a76 commit 06aa16352cbeb4695f5d624b79a0c5591c205d03
Showing with 5 additions and 25 deletions.
  1. +2 −3 lib/Twitter/Autolink.php
  2. +1 −13 lib/Twitter/Extractor.php
  3. +1 −8 lib/Twitter/Regex.php
  4. +1 −1 tests/data/twitter-text-conformance
View
@@ -413,9 +413,8 @@ protected function _addLinksToHashtags($matches) {
protected function _addLinksToURLs($matches) {
list($all, $before, $url, $protocol, $domain, $path, $query) = array_pad($matches, 7, '');
$url = htmlspecialchars($url, ENT_QUOTES, 'UTF-8', false);
- if (!$protocol && !preg_match(self::REGEX_PROBABLE_TLD, $domain)) return $all;
- $href = ((!$protocol || strtolower($protocol) === 'www.') ? 'http://'.$url : $url);
- return $before . $this->wrap($href, $this->class_url, $url);
+ if (!$protocol) return $all;
+ return $before . $this->wrap($url, $this->class_url, $url);
}
/**
View
@@ -87,19 +87,7 @@ public function extractHashtags() {
public function extractURLs() {
preg_match_all(self::$REGEX_VALID_URL, $this->tweet, $matches);
list($all, $before, $url, $protocol, $domain, $path, $query) = array_pad($matches, 7, '');
- $i = count($url)-1;
- for (; $i >= 0; $i--) {
- if (!preg_match('!https?://!', $protocol[$i])) {
- # Note: $protocol can contain 'www.' if no protocol exists!
- if (preg_match(self::REGEX_PROBABLE_TLD, $domain[$i]) || strtolower($protocol[$i]) === 'www.') {
- $url[$i] = 'http://'.(strtolower($protocol[$i]) === 'www.' ? $protocol[$i] : '').$domain[$i];
- } else {
- unset($url[$i]);
- }
- }
- }
- # Renumber the array:
- return array_values($url);
+ return $url;
}
/**
View
@@ -45,13 +45,6 @@
*/
const REGEX_URL_DOMAIN = '(?:[^\\p{P}\\p{Lo}\\s][\\.-](?=[^\\p{P}\\p{Lo}\\s])|[^\\p{P}\\p{Lo}\\s])+\\.[a-z]{2,}(?::[0-9]+)?';
- /**
- * Expression to match handful of probable TLDs for protocol-less URLS.
- *
- * @var string
- */
- const REGEX_PROBABLE_TLD = '/\\.(?:com|net|org|gov|edu)$/iu';
-
/**
* Expression to match characters that may come in the URL path.
*
@@ -160,7 +153,7 @@ protected function __construct($tweet) {
self::$REGEX_VALID_URL = '/(?:' # $1 Complete match (preg_match already matches everything.)
. '('.self::REGEX_URL_CHARS_BEFORE.')' # $2 Preceding character
. '(' # $3 Complete URL
- . '((?:https?:\\/\\/|www\\.)?)' # $4 Protocol (or www)
+ . '(https?:\\/\\/)' # $4 Protocol (or www)
. '('.self::REGEX_URL_DOMAIN.')' # $5 Domain(s) (and port)
. '(\\/'.self::REGEX_URL_CHARS_PATH.'*' # $6 URL Path
. self::REGEX_URL_CHARS_PATH_END.'?)?'
Submodule twitter-text-conformance updated 4 files
+38 −4 README
+24 −24 autolink.yml
+4 −4 extract.yml
+5 −0 hit_highlighting.yml

0 comments on commit 06aa163

Please sign in to comment.