Skip to content

Commit

Permalink
Mark long hrefs or those with invalid characters as sneaky
Browse files Browse the repository at this point in the history
  • Loading branch information
EvanHahn-Signal authored and josh-signal committed Oct 12, 2020
1 parent 0d83076 commit f21dad1
Show file tree
Hide file tree
Showing 2 changed files with 230 additions and 95 deletions.
103 changes: 72 additions & 31 deletions js/modules/link_previews.js
@@ -1,6 +1,6 @@
/* global URL */

const { isNumber, compact, isEmpty } = require('lodash');
const { isNumber, compact, isEmpty, range } = require('lodash');
const nodeUrl = require('url');
const LinkifyIt = require('linkify-it');

Expand All @@ -14,14 +14,17 @@ module.exports = {
isStickerPack,
};

function isLinkSafeToPreview(link) {
let url;
function maybeParseHref(href) {
try {
url = new URL(link);
return new URL(href);
} catch (err) {
return false;
return null;
}
return url.protocol === 'https:' && !isLinkSneaky(link);
}

function isLinkSafeToPreview(href) {
const url = maybeParseHref(href);
return Boolean(url && url.protocol === 'https:' && !isLinkSneaky(href));
}

function isStickerPack(link) {
Expand Down Expand Up @@ -52,35 +55,66 @@ function findLinks(text, caretLocation) {
);
}

function hasAuth(url) {
try {
const urlObject = new URL(url);
return Boolean(urlObject.username);
} catch (e) {
return null;
}
function getDomain(href) {
const url = maybeParseHref(href);
return url ? url.hostname : null;
}

function getDomain(url) {
try {
const urlObject = new URL(url);
return urlObject.hostname;
} catch (error) {
return null;
// See <https://tools.ietf.org/html/rfc3986>.
const VALID_URI_CHARACTERS = new Set([
'%',
// "gen-delims"
':',
'/',
'?',
'#',
'[',
']',
'@',
// "sub-delims"
'!',
'$',
'&',
"'",
'(',
')',
'*',
'+',
',',
';',
'=',
// unreserved
...String.fromCharCode(...range(65, 91), ...range(97, 123)),
...range(10).map(String),
'-',
'.',
'_',
'~',
]);
const ASCII_PATTERN = new RegExp('[\\u0020-\\u007F]', 'g');
const MAX_HREF_LENGTH = 2 ** 12;

function isLinkSneaky(href) {
// This helps users avoid extremely long links (which could be hiding something
// sketchy) and also sidesteps the performance implications of extremely long hrefs.
if (href.length > MAX_HREF_LENGTH) {
return true;
}
}

const ASCII_PATTERN = new RegExp('[\\u0020-\\u007F]', 'g');
const url = maybeParseHref(href);

// If we can't parse it, it's sneaky.
if (!url) {
return true;
}

function isLinkSneaky(link) {
// Any links which contain auth are considered sneaky
if (hasAuth(link)) {
if (url.username) {
return true;
}

const domain = getDomain(link);
// If the domain is falsy, something fishy is going on
if (!domain) {
if (!url.hostname) {
return true;
}

Expand All @@ -89,25 +123,25 @@ function isLinkSneaky(link) {
// maximum of 2048. (This also uses the string's `.length` property,
// which isn't exactly the same thing as the number of octets.)
// [0]: https://tools.ietf.org/html/rfc1034
if (domain.length > 2048) {
if (url.hostname.length > 2048) {
return true;
}

// Domains cannot contain encoded characters
if (domain.includes('%')) {
if (url.hostname.includes('%')) {
return true;
}

// There must be at least 2 domain labels, and none of them can be empty.
const labels = domain.split('.');
const labels = url.hostname.split('.');
if (labels.length < 2 || labels.some(isEmpty)) {
return true;
}

// This is necesary because getDomain returns domains in punycode form.
const unicodeDomain = nodeUrl.domainToUnicode
? nodeUrl.domainToUnicode(domain)
: domain;
? nodeUrl.domainToUnicode(url.hostname)
: url.hostname;

const withoutPeriods = unicodeDomain.replace(/\./g, '');

Expand All @@ -119,5 +153,12 @@ function isLinkSneaky(link) {
return true;
}

return false;
// We can't use `url.pathname` (and so on) because it automatically encodes strings.
// For example, it turns `/aquí` into `/aqu%C3%AD`.
const startOfPathAndHash = href.indexOf('/', url.protocol.length + 4);
const pathAndHash =
startOfPathAndHash === -1 ? '' : href.substr(startOfPathAndHash);
return [...pathAndHash].some(
character => !VALID_URI_CHARACTERS.has(character)
);
}

0 comments on commit f21dad1

Please sign in to comment.