Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Updated conformance tests; no protocol-less urls.

  • Loading branch information...
commit 06aa16352cbeb4695f5d624b79a0c5591c205d03 1 parent 6bc4a76
Nick Pope authored
View
5 lib/Twitter/Autolink.php
@@ -413,9 +413,8 @@ protected function _addLinksToHashtags($matches) {
protected function _addLinksToURLs($matches) {
list($all, $before, $url, $protocol, $domain, $path, $query) = array_pad($matches, 7, '');
$url = htmlspecialchars($url, ENT_QUOTES, 'UTF-8', false);
- if (!$protocol && !preg_match(self::REGEX_PROBABLE_TLD, $domain)) return $all;
- $href = ((!$protocol || strtolower($protocol) === 'www.') ? 'http://'.$url : $url);
- return $before . $this->wrap($href, $this->class_url, $url);
+ if (!$protocol) return $all;
+ return $before . $this->wrap($url, $this->class_url, $url);
}
/**
View
14 lib/Twitter/Extractor.php
@@ -87,19 +87,7 @@ public function extractHashtags() {
public function extractURLs() {
preg_match_all(self::$REGEX_VALID_URL, $this->tweet, $matches);
list($all, $before, $url, $protocol, $domain, $path, $query) = array_pad($matches, 7, '');
- $i = count($url)-1;
- for (; $i >= 0; $i--) {
- if (!preg_match('!https?://!', $protocol[$i])) {
- # Note: $protocol can contain 'www.' if no protocol exists!
- if (preg_match(self::REGEX_PROBABLE_TLD, $domain[$i]) || strtolower($protocol[$i]) === 'www.') {
- $url[$i] = 'http://'.(strtolower($protocol[$i]) === 'www.' ? $protocol[$i] : '').$domain[$i];
- } else {
- unset($url[$i]);
- }
- }
- }
- # Renumber the array:
- return array_values($url);
+ return $url;
}
/**
View
9 lib/Twitter/Regex.php
@@ -46,13 +46,6 @@
const REGEX_URL_DOMAIN = '(?:[^\\p{P}\\p{Lo}\\s][\\.-](?=[^\\p{P}\\p{Lo}\\s])|[^\\p{P}\\p{Lo}\\s])+\\.[a-z]{2,}(?::[0-9]+)?';
/**
- * Expression to match handful of probable TLDs for protocol-less URLS.
- *
- * @var string
- */
- const REGEX_PROBABLE_TLD = '/\\.(?:com|net|org|gov|edu)$/iu';
-
- /**
* Expression to match characters that may come in the URL path.
*
* @var string
@@ -160,7 +153,7 @@ protected function __construct($tweet) {
self::$REGEX_VALID_URL = '/(?:' # $1 Complete match (preg_match already matches everything.)
. '('.self::REGEX_URL_CHARS_BEFORE.')' # $2 Preceding character
. '(' # $3 Complete URL
- . '((?:https?:\\/\\/|www\\.)?)' # $4 Protocol (or www)
+ . '(https?:\\/\\/)' # $4 Protocol (or www)
. '('.self::REGEX_URL_DOMAIN.')' # $5 Domain(s) (and port)
. '(\\/'.self::REGEX_URL_CHARS_PATH.'*' # $6 URL Path
. self::REGEX_URL_CHARS_PATH_END.'?)?'
2  tests/data/twitter-text-conformance
@@ -1 +1 @@
-Subproject commit 75b0baf5282c07d42e1dbb2f203bb0dfc86f1e48
+Subproject commit 6e365f0e3a889490455e222a427fed06b255d3ad
Please sign in to comment.
Something went wrong with that request. Please try again.