From c51236372f5070a01f76db0620b3fbcbe3ceb3c9 Mon Sep 17 00:00:00 2001 From: Luke Karrys Date: Wed, 26 Oct 2022 01:06:56 -0700 Subject: [PATCH] feat: add parseUrl method for only parsing --- lib/index.js | 134 ++++++---------------------------------------- lib/parse-url.js | 79 +++++++++++++++++++++++++++ lib/protocols.js | 13 +++++ test/parse-url.js | 10 ++++ 4 files changed, 118 insertions(+), 118 deletions(-) create mode 100644 lib/parse-url.js create mode 100644 lib/protocols.js create mode 100644 test/parse-url.js diff --git a/lib/index.js b/lib/index.js index d5d63c6..50f3501 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,32 +1,11 @@ 'use strict' -const url = require('url') const gitHosts = require('./git-host-info.js') const GitHost = module.exports = require('./git-host.js') const LRU = require('lru-cache') +const parseUrl = require('./parse-url.js') +const protocols = require('./protocols')(gitHosts.byShortcut) const cache = new LRU({ max: 1000 }) -const protocolToRepresentationMap = { - 'git+ssh:': 'sshurl', - 'git+https:': 'https', - 'ssh:': 'sshurl', - 'git:': 'git', -} - -function protocolToRepresentation (protocol) { - return protocolToRepresentationMap[protocol] || protocol.slice(0, -1) -} - -const authProtocols = { - 'git:': true, - 'https:': true, - 'git+https:': true, - 'http:': true, - 'git+http:': true, -} - -const knownProtocols = Object.keys(gitHosts.byShortcut) - .concat(['http:', 'https:', 'git:', 'git+ssh:', 'git+https:', 'ssh:']) - module.exports.fromUrl = function (giturl, opts) { if (typeof giturl !== 'string') { return @@ -41,22 +20,23 @@ module.exports.fromUrl = function (giturl, opts) { return cache.get(key) } +module.exports.parseUrl = parseUrl + function fromUrl (giturl, opts) { if (!giturl) { return } - const correctedUrl = isGitHubShorthand(giturl) ? 'github:' + giturl : correctProtocol(giturl) - const parsed = parseGitUrl(correctedUrl) + const correctedUrl = isGitHubShorthand(giturl) ? `github:${giturl}` : giturl + const parsed = parseUrl(correctedUrl, protocols) if (!parsed) { - return parsed + return } const gitHostShortcut = gitHosts.byShortcut[parsed.protocol] - const gitHostDomain = - gitHosts.byDomain[parsed.hostname.startsWith('www.') ? - parsed.hostname.slice(4) : - parsed.hostname] + const gitHostDomain = gitHosts.byDomain[parsed.hostname.startsWith('www.') + ? parsed.hostname.slice(4) + : parsed.hostname] const gitHostName = gitHostShortcut || gitHostDomain if (!gitHostName) { return @@ -64,7 +44,10 @@ function fromUrl (giturl, opts) { const gitHostInfo = gitHosts[gitHostShortcut || gitHostDomain] let auth = null - if (authProtocols[parsed.protocol] && (parsed.username || parsed.password)) { + if (protocols[parsed.protocol] && + protocols[parsed.protocol].auth && + (parsed.username || parsed.password) + ) { auth = `${parsed.username}${parsed.password ? ':' + parsed.password : ''}` } @@ -116,7 +99,8 @@ function fromUrl (giturl, opts) { user = segments.user && decodeURIComponent(segments.user) project = decodeURIComponent(segments.project) committish = decodeURIComponent(segments.committish) - defaultRepresentation = protocolToRepresentation(parsed.protocol) + defaultRepresentation = (protocols[parsed.protocol] && protocols[parsed.protocol].name) + || parsed.protocol.slice(0, -1) } } catch (err) { /* istanbul ignore else */ @@ -130,31 +114,6 @@ function fromUrl (giturl, opts) { return new GitHost(gitHostName, user, auth, project, committish, defaultRepresentation, opts) } -// accepts input like git:github.com:user/repo and inserts the // after the first : -const correctProtocol = (arg) => { - const firstColon = arg.indexOf(':') - const proto = arg.slice(0, firstColon + 1) - if (knownProtocols.includes(proto)) { - return arg - } - - const firstAt = arg.indexOf('@') - if (firstAt > -1) { - if (firstAt > firstColon) { - return `git+ssh://${arg}` - } else { - return arg - } - } - - const doubleSlash = arg.indexOf('//') - if (doubleSlash === firstColon + 1) { - return arg - } - - return arg.slice(0, firstColon + 1) + '//' + arg.slice(firstColon + 1) -} - // look for github shorthand inputs, such as npm/cli const isGitHubShorthand = (arg) => { // it cannot contain whitespace before the first # @@ -185,64 +144,3 @@ const isGitHubShorthand = (arg) => { doesNotStartWithDot && atOnlyAfterHash && colonOnlyAfterHash && secondSlashOnlyAfterHash } - -// attempt to correct an scp style url so that it will parse with `new URL()` -const correctUrl = (giturl) => { - const firstAt = giturl.indexOf('@') - const lastHash = giturl.lastIndexOf('#') - let firstColon = giturl.indexOf(':') - let lastColon = giturl.lastIndexOf(':', lastHash > -1 ? lastHash : Infinity) - - let corrected - if (lastColon > firstAt) { - // the last : comes after the first @ (or there is no @) - // like it would in: - // proto://hostname.com:user/repo - // username@hostname.com:user/repo - // :password@hostname.com:user/repo - // username:password@hostname.com:user/repo - // proto://username@hostname.com:user/repo - // proto://:password@hostname.com:user/repo - // proto://username:password@hostname.com:user/repo - // then we replace the last : with a / to create a valid path - corrected = giturl.slice(0, lastColon) + '/' + giturl.slice(lastColon + 1) - // // and we find our new : positions - firstColon = corrected.indexOf(':') - lastColon = corrected.lastIndexOf(':') - } - - if (firstColon === -1 && giturl.indexOf('//') === -1) { - // we have no : at all - // as it would be in: - // username@hostname.com/user/repo - // then we prepend a protocol - corrected = `git+ssh://${corrected}` - } - - return corrected -} - -// try to parse the url as its given to us, if that throws -// then we try to clean the url and parse that result instead -// THIS FUNCTION SHOULD NEVER THROW -const parseGitUrl = (giturl) => { - let result - try { - result = new url.URL(giturl) - } catch { - // this fn should never throw - } - - if (result) { - return result - } - - const correctedUrl = correctUrl(giturl) - try { - result = new url.URL(correctedUrl) - } catch { - // this fn should never throw - } - - return result -} diff --git a/lib/parse-url.js b/lib/parse-url.js new file mode 100644 index 0000000..5f5ac4d --- /dev/null +++ b/lib/parse-url.js @@ -0,0 +1,79 @@ +const url = require('url') +const getProtocols = require('./protocols.js') + +const lastIndexOfBefore = (str, char, beforeChar) => { + const startPosition = str.indexOf(beforeChar) + return str.lastIndexOf(char, startPosition > -1 ? startPosition : Infinity) +} + +const safeUrl = (u) => { + try { + return new url.URL(u) + } catch { + // this fn should never throw + } +} + +// accepts input like git:github.com:user/repo and inserts the // after the first : +const correctProtocol = (arg, protocols) => { + const firstColon = arg.indexOf(':') + const proto = arg.slice(0, firstColon + 1) + if (Object.prototype.hasOwnProperty.call(protocols, proto)) { + return arg + } + + const firstAt = arg.indexOf('@') + if (firstAt > -1) { + if (firstAt > firstColon) { + return `git+ssh://${arg}` + } else { + return arg + } + } + + const doubleSlash = arg.indexOf('//') + if (doubleSlash === firstColon + 1) { + return arg + } + + return `${arg.slice(0, firstColon + 1)}//${arg.slice(firstColon + 1)}` +} + +// attempt to correct an scp style url so that it will parse with `new URL()` +const correctUrl = (giturl) => { + // ignore @ that come after the first hash since the denotes the start + // of a committish which can contain @ characters + const firstAt = lastIndexOfBefore(giturl, '@', '#') + // ignore colons that come after the hash since that could include colons such as: + // git@github.com:user/package-2#semver:^1.0.0 + const lastColonBeforeHash = lastIndexOfBefore(giturl, ':', '#') + + if (lastColonBeforeHash > firstAt) { + // the last : comes after the first @ (or there is no @) + // like it would in: + // proto://hostname.com:user/repo + // username@hostname.com:user/repo + // :password@hostname.com:user/repo + // username:password@hostname.com:user/repo + // proto://username@hostname.com:user/repo + // proto://:password@hostname.com:user/repo + // proto://username:password@hostname.com:user/repo + // then we replace the last : with a / to create a valid path + giturl = giturl.slice(0, lastColonBeforeHash) + '/' + giturl.slice(lastColonBeforeHash + 1) + } + + if (lastIndexOfBefore(giturl, ':', '#') === -1 && giturl.indexOf('//') === -1) { + // we have no : at all + // as it would be in: + // username@hostname.com/user/repo + // then we prepend a protocol + giturl = `git+ssh://${giturl}` + } + + return giturl +} + +module.exports = (giturl, protocols = getProtocols()) => { + const withProtocol = correctProtocol(giturl, protocols) + return safeUrl(withProtocol) || safeUrl(correctUrl(withProtocol)) +} diff --git a/lib/protocols.js b/lib/protocols.js new file mode 100644 index 0000000..df6aea1 --- /dev/null +++ b/lib/protocols.js @@ -0,0 +1,13 @@ +module.exports = (byShortcut = {}) => ({ + 'git+ssh:': { name: 'sshurl' }, + 'ssh:': { name: 'sshurl' }, + 'git+https:': { name: 'https', auth: true }, + 'git:': { auth: true }, + 'http:': { auth: true }, + 'https:': { auth: true }, + 'git+http:': { auth: true }, + ...Object.keys(byShortcut).reduce((acc, key) => { + acc[key] = { name: byShortcut[key] } + return acc + }, {}), +}) diff --git a/test/parse-url.js b/test/parse-url.js new file mode 100644 index 0000000..aab57a2 --- /dev/null +++ b/test/parse-url.js @@ -0,0 +1,10 @@ +const t = require('tap') +const HostedGit = require('..') +const parseUrl = require('../lib/parse-url.js') + +t.test('can parse git+ssh url by default', async t => { + // https://github.com/npm/cli/issues/5278 + const u = 'git+ssh://git@abc:frontend/utils.git#6d45447e0c5eb6cd2e3edf05a8c5a9bb81950c79' + t.ok(parseUrl(u)) + t.ok(HostedGit.parseUrl(u)) +})