Skip to content

Commit

Permalink
Adds options for constraints on dupe slash removal related protocols
Browse files Browse the repository at this point in the history
* Adds two new options, for min/max length of what the dupe slash removal regex recognizes as a protocol.
* Updates tests to exercise the new options
* Updates readme to document the new options
  • Loading branch information
gcox committed Jul 30, 2020
1 parent 329e3d9 commit a2f6d42
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 3 deletions.
13 changes: 12 additions & 1 deletion index.js
Expand Up @@ -72,6 +72,8 @@ const normalizeUrl = (urlString, options) => {
removeTrailingSlash: true,
removeDirectoryIndex: false,
sortQueryParameters: true,
embeddedProtocolMinLength: 2,
embeddedProtocolMaxLength: 50,
...options
};

Expand All @@ -96,6 +98,14 @@ const normalizeUrl = (urlString, options) => {
throw new Error('The `forceHttp` and `forceHttps` options cannot be used together');
}

if (options.embeddedProtocolMinLength < 1) {
throw new Error('The `embeddedProtocolMinLength` option must be greater than 0');
}

if (options.embeddedProtocolMaxLength < options.embeddedProtocolMinLength) {
throw new Error('The `embeddedProtocolMaxLength` option cannot be less than the `embeddedProtocolMinLength`');
}

if (options.forceHttp && urlObj.protocol === 'https:') {
urlObj.protocol = 'http:';
}
Expand All @@ -117,7 +127,8 @@ const normalizeUrl = (urlString, options) => {

// Remove duplicate slashes if not preceded by a protocol
if (urlObj.pathname) {
urlObj.pathname = urlObj.pathname.replace(/(?:(?<=[a-z\d]{31,}:)|(?<![a-z\d]{2,}:))\/{2,}/g, '/');
const regex = new RegExp(`(?:(?<=[a-z\\d]{${options.embeddedProtocolMaxLength + 1},}:)|(?<![a-z\\d]{${options.embeddedProtocolMinLength},}:))\\/{2,}`, 'gi');
urlObj.pathname = urlObj.pathname.replace(regex, '/');
}

// Decode URI octets
Expand Down
31 changes: 31 additions & 0 deletions readme.md
Expand Up @@ -212,6 +212,37 @@ normalizeUrl('www.sindresorhus.com?b=two&a=one&c=three', {
//=> 'http://sindresorhus.com/?b=two&a=one&c=three'
```

##### embeddedProtocolMinLength

Type: `number`\
Default: `2`

Duplicate slashes will be removed unless prefixed by a protocol with length greater than or equal to `embeddedProtocolMinLength` and less than or equal to `embeddedProtocolMaxLength`.

**Note:** Must be at least 1.

```js
normalizeUrl('www.sindresorhus.com//foo/bar://sindresorhus.com/ab://b.com', {
embeddedProtocolMinLength: 3
});
//=> 'www.sindresorhus.com/foo/bar://sindresorhus.com/ab:/b.com'
```

##### embeddedProtocolMaxLength

Type: `number`\
Default: `50`

**Note:** Cannot be less than `embeddedProtocolMinLength`.

Duplicate slashes will be removed unless prefixed by a protocol with length greater than or equal to `embeddedProtocolMinLength` and less than or equal to `embeddedProtocolMaxLength`
```js
normalizeUrl('www.sindresorhus.com//foo/bar://sindresorhus.com/abcdef://b.com', {
embeddedProtocolMaxLength: 5
});
//=> 'www.sindresorhus.com/foo/bar://sindresorhus.com/abcdef:/b.com'
```

## Related

- [compare-urls](https://github.com/sindresorhus/compare-urls) - Compare URLs by first normalizing them
Expand Down
25 changes: 23 additions & 2 deletions test.js
Expand Up @@ -197,6 +197,18 @@ test('invalid urls', t => {
}, 'Invalid URL: /relative/path/');
});

test('embeddedProtocolMinLength out of bounds', t => {
t.throws(() => {
normalizeUrl('https://www.sindresorhus.com', {embeddedProtocolMinLength: 0});
}, 'The `embeddedProtocolMinLength` option must be greater than 0');
});

test('embeddedProtocolMaxLength out of bounds', t => {
t.throws(() => {
normalizeUrl('https://www.sindresorhus.com', {embeddedProtocolMinLength: 5, embeddedProtocolMaxLength: 4});
}, 'The `embeddedProtocolMaxLength` option cannot be less than the `embeddedProtocolMinLength`');
});

test('remove duplicate pathname slashes', t => {
t.is(normalizeUrl('http://sindresorhus.com////foo/bar'), 'http://sindresorhus.com/foo/bar');
t.is(normalizeUrl('http://sindresorhus.com////foo////bar'), 'http://sindresorhus.com/foo/bar');
Expand All @@ -205,15 +217,24 @@ test('remove duplicate pathname slashes', t => {
t.is(normalizeUrl('http://sindresorhus.com///foo'), 'http://sindresorhus.com/foo');
t.is(normalizeUrl('http://sindresorhus.com:5000//foo'), 'http://sindresorhus.com:5000/foo');
t.is(normalizeUrl('http://sindresorhus.com//foo'), 'http://sindresorhus.com/foo');

// Using default embeddedProtocolMin/MaxLength (2/50) options
t.is(normalizeUrl('http://sindresorhus.com/s3://sindresorhus.com'), 'http://sindresorhus.com/s3://sindresorhus.com');
t.is(normalizeUrl('http://sindresorhus.com/s3://sindresorhus.com//foo'), 'http://sindresorhus.com/s3://sindresorhus.com/foo');
t.is(normalizeUrl('http://sindresorhus.com//foo/s3://sindresorhus.com'), 'http://sindresorhus.com/foo/s3://sindresorhus.com');
t.is(normalizeUrl('http://sindresorhus.com/git://sindresorhus.com'), 'http://sindresorhus.com/git://sindresorhus.com');
t.is(normalizeUrl('http://sindresorhus.com/git://sindresorhus.com//foo'), 'http://sindresorhus.com/git://sindresorhus.com/foo');
t.is(normalizeUrl('http://sindresorhus.com//foo/git://sindresorhus.com//foo'), 'http://sindresorhus.com/foo/git://sindresorhus.com/foo');
t.is(normalizeUrl('http://sindresorhus.com/a://sindresorhus.com//foo'), 'http://sindresorhus.com/a:/sindresorhus.com/foo');
t.is(normalizeUrl('http://sindresorhus.com/alongprotocolwithin30charlimit://sindresorhus.com//foo'), 'http://sindresorhus.com/alongprotocolwithin30charlimit://sindresorhus.com/foo');
t.is(normalizeUrl('http://sindresorhus.com/alongprotocolexceeds30charlimit://sindresorhus.com//foo'), 'http://sindresorhus.com/alongprotocolexceeds30charlimit:/sindresorhus.com/foo');
t.is(normalizeUrl('http://sindresorhus.com/alongprotocolwithin50charlimitxxxxxxxxxxxxxxxxxxxx://sindresorhus.com//foo'), 'http://sindresorhus.com/alongprotocolwithin50charlimitxxxxxxxxxxxxxxxxxxxx://sindresorhus.com/foo');
t.is(normalizeUrl('http://sindresorhus.com/alongprotocolexceeds50charlimitxxxxxxxxxxxxxxxxxxxxx://sindresorhus.com//foo'), 'http://sindresorhus.com/alongprotocolexceeds50charlimitxxxxxxxxxxxxxxxxxxxxx:/sindresorhus.com/foo');

// Using custom embeddedProtocolMin/MaxLength (4/10) options
const options = {embeddedProtocolMinLength: 4, embeddedProtocolMaxLength: 10};
t.is(normalizeUrl('http://sindresorhus.com/abc://sindresorhus.com//foo', options), 'http://sindresorhus.com/abc:/sindresorhus.com/foo');
t.is(normalizeUrl('http://sindresorhus.com/abcd://sindresorhus.com//foo', options), 'http://sindresorhus.com/abcd://sindresorhus.com/foo');
t.is(normalizeUrl('http://sindresorhus.com/abcdefghij://sindresorhus.com//foo', options), 'http://sindresorhus.com/abcdefghij://sindresorhus.com/foo');
t.is(normalizeUrl('http://sindresorhus.com/abcdefghijk://sindresorhus.com//foo', options), 'http://sindresorhus.com/abcdefghijk:/sindresorhus.com/foo');
});

test('data URL', t => {
Expand Down

0 comments on commit a2f6d42

Please sign in to comment.