Skip to content

Commit

Permalink
Merge pull request #6965 from mautic-inc/staging.plaintext-urls-with-…
Browse files Browse the repository at this point in the history
…query-parameters

Fixed issue where query parameters were not included in plaintext URLs
  • Loading branch information
Woeler committed Dec 5, 2018
2 parents b13c075 + b64f473 commit 9b46a66
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 17 deletions.
31 changes: 29 additions & 2 deletions app/bundles/CoreBundle/Helper/UrlHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -166,17 +166,20 @@ public static function getUrlsFromPlaintext($text, array $contactUrlFields = [])
}
}

$regex = '_(?:(?:https?|ftp)://)(?:\S+(?::\S*)?@)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/[^\s`!()\[\]{};:\'".,<>?«»“”‘’]*)?_ius';
$regex = '_(?:(?:https?|ftp)://)(?:\S+(?::\S*)?@)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/[^\s]*)?_ius';
if (!preg_match_all($regex, $text, $matches)) {
return $urls;
}

$urls = array_merge($urls, $matches[0]);

foreach ($urls as $key => $url) {
// Remove dangling punctuation
$urls[$key] = $url = self::removeTrailingNonAlphaNumeric($url);

// We don't want to match URLs in token default values
// like {contactfield=website|http://ignore.this.url}
if (preg_match_all("#{(.*?)\|$url}#", $text, $matches)) {
if (preg_match_all("#{(.*?)\|".preg_quote($url).'}#', $text, $matches)) {
unset($urls[$key]);

// We know this is a URL due to the default so let's include it as a trackable
Expand Down Expand Up @@ -259,4 +262,28 @@ private static function sanitizeUrlQuery($url)

return $url;
}

/**
* @param string $string
*
* @return string
*/
private static function removeTrailingNonAlphaNumeric($string)
{
// Special handling of closing bracket
if (substr($string, -1) === '}' && preg_match('/^[^{\r\n]*\}.*?$/', $string)) {
$string = substr($string, 0, -1);

return self::removeTrailingNonAlphaNumeric($string);
}

// Ensure only alphanumeric allowed
if (!preg_match("/^.*?[a-zA-Z0-9}\/]$/i", $string)) {
$string = substr($string, 0, -1);

return self::removeTrailingNonAlphaNumeric($string);
}

return $string;
}
}
41 changes: 26 additions & 15 deletions app/bundles/CoreBundle/Tests/unit/Helper/UrlHelperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ public function testGetUrlsFromPlaintextWith2Urls()
public function testGetUrlsFromPlaintextWithSymbols()
{
$this->assertEquals(
['https://example.org/with/square/brackets',
[
'https://example.org/with/square/brackets',
'https://example.org/square/brackets/with/slash/and/comma/',
'https://example.org/with/parentheses',
'https://example.org/with/braces',
Expand All @@ -120,20 +121,30 @@ public function testGetUrlsFromPlaintextWithSymbols()
'https://example.org/with/double-quotes',
'https://example.org/with/exclamation',
'https://example.org/with/quotation',
],
UrlHelper::getUrlsFromPlaintext('This text contains URL with the square brackets [https://example.org/with/square/brackets]
also the square brackets with a slash and a comma [https://example.org/square/brackets/with/slash/and/comma/],
or parentheses (https://example.org/with/parentheses),
or braces {https://example.org/with/braces}
or greater than symbol <https://example.org/with/greater-than-symbol>
even with just a comma: https://example.org/with/comma,
or with a dot: https://example.org/with/dot.
https://example.org/with/colon: It is cool!
This website https://example.org/with/semi-colon; Very awesome!
A single example \'https://example.org/with/simple-quotes\'
A double example "https://example.org/with/double-quotes"
Thanks for this https://example.org/with/exclamation!
Someone said “https://example.org/with/quotation”')
'https://example.org/with/query?utm_campaign=hello',
'https://example.org/with/tokenized-query?foo={contactfield=bar}&bar=foo',
'https://example.org/with/just-tokenized-query?foo={contactfield=bar}',
],
UrlHelper::getUrlsFromPlaintext(
<<<STRING
This text contains URL with the square brackets [https://example.org/with/square/brackets]
also the square brackets with a slash and a comma [https://example.org/square/brackets/with/slash/and/comma/],
or parentheses (https://example.org/with/parentheses),
or braces {https://example.org/with/braces}
or greater than symbol <https://example.org/with/greater-than-symbol>
even with just a comma: https://example.org/with/comma,
or with a dot: https://example.org/with/dot.
https://example.org/with/colon: It is cool!
This website https://example.org/with/semi-colon; Very awesome!
A single example 'https://example.org/with/simple-quotes'
A double example "https://example.org/with/double-quotes"
Thanks for this https://example.org/with/exclamation!
Someone said “https://example.org/with/quotation”
Checkout my UTM tags https://example.org/with/query?utm_campaign=hello.
Hey what about https://example.org/with/tokenized-query?foo={contactfield=bar}&bar=foo.
What happens with this https://example.org/with/just-tokenized-query?foo={contactfield=bar}?
STRING
)
);
}
}

0 comments on commit 9b46a66

Please sign in to comment.