Skip to content

Commit

Permalink
feat: add parseUrl method for only parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
lukekarrys committed Oct 26, 2022
1 parent 4a97380 commit c512363
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 118 deletions.
134 changes: 16 additions & 118 deletions lib/index.js
@@ -1,32 +1,11 @@
'use strict'
const url = require('url')
const gitHosts = require('./git-host-info.js')
const GitHost = module.exports = require('./git-host.js')
const LRU = require('lru-cache')
const parseUrl = require('./parse-url.js')
const protocols = require('./protocols')(gitHosts.byShortcut)
const cache = new LRU({ max: 1000 })

const protocolToRepresentationMap = {
'git+ssh:': 'sshurl',
'git+https:': 'https',
'ssh:': 'sshurl',
'git:': 'git',
}

function protocolToRepresentation (protocol) {
return protocolToRepresentationMap[protocol] || protocol.slice(0, -1)
}

const authProtocols = {
'git:': true,
'https:': true,
'git+https:': true,
'http:': true,
'git+http:': true,
}

const knownProtocols = Object.keys(gitHosts.byShortcut)
.concat(['http:', 'https:', 'git:', 'git+ssh:', 'git+https:', 'ssh:'])

module.exports.fromUrl = function (giturl, opts) {
if (typeof giturl !== 'string') {
return
Expand All @@ -41,30 +20,34 @@ module.exports.fromUrl = function (giturl, opts) {
return cache.get(key)
}

module.exports.parseUrl = parseUrl

function fromUrl (giturl, opts) {
if (!giturl) {
return
}

const correctedUrl = isGitHubShorthand(giturl) ? 'github:' + giturl : correctProtocol(giturl)
const parsed = parseGitUrl(correctedUrl)
const correctedUrl = isGitHubShorthand(giturl) ? `github:${giturl}` : giturl
const parsed = parseUrl(correctedUrl, protocols)
if (!parsed) {
return parsed
return
}

const gitHostShortcut = gitHosts.byShortcut[parsed.protocol]
const gitHostDomain =
gitHosts.byDomain[parsed.hostname.startsWith('www.') ?
parsed.hostname.slice(4) :
parsed.hostname]
const gitHostDomain = gitHosts.byDomain[parsed.hostname.startsWith('www.')
? parsed.hostname.slice(4)
: parsed.hostname]
const gitHostName = gitHostShortcut || gitHostDomain
if (!gitHostName) {
return
}

const gitHostInfo = gitHosts[gitHostShortcut || gitHostDomain]
let auth = null
if (authProtocols[parsed.protocol] && (parsed.username || parsed.password)) {
if (protocols[parsed.protocol] &&
protocols[parsed.protocol].auth &&
(parsed.username || parsed.password)
) {
auth = `${parsed.username}${parsed.password ? ':' + parsed.password : ''}`
}

Expand Down Expand Up @@ -116,7 +99,8 @@ function fromUrl (giturl, opts) {
user = segments.user && decodeURIComponent(segments.user)
project = decodeURIComponent(segments.project)
committish = decodeURIComponent(segments.committish)
defaultRepresentation = protocolToRepresentation(parsed.protocol)
defaultRepresentation = (protocols[parsed.protocol] && protocols[parsed.protocol].name)
|| parsed.protocol.slice(0, -1)
}
} catch (err) {
/* istanbul ignore else */
Expand All @@ -130,31 +114,6 @@ function fromUrl (giturl, opts) {
return new GitHost(gitHostName, user, auth, project, committish, defaultRepresentation, opts)
}

// accepts input like git:github.com:user/repo and inserts the // after the first :
const correctProtocol = (arg) => {
const firstColon = arg.indexOf(':')
const proto = arg.slice(0, firstColon + 1)
if (knownProtocols.includes(proto)) {
return arg
}

const firstAt = arg.indexOf('@')
if (firstAt > -1) {
if (firstAt > firstColon) {
return `git+ssh://${arg}`
} else {
return arg
}
}

const doubleSlash = arg.indexOf('//')
if (doubleSlash === firstColon + 1) {
return arg
}

return arg.slice(0, firstColon + 1) + '//' + arg.slice(firstColon + 1)
}

// look for github shorthand inputs, such as npm/cli
const isGitHubShorthand = (arg) => {
// it cannot contain whitespace before the first #
Expand Down Expand Up @@ -185,64 +144,3 @@ const isGitHubShorthand = (arg) => {
doesNotStartWithDot && atOnlyAfterHash && colonOnlyAfterHash &&
secondSlashOnlyAfterHash
}

// attempt to correct an scp style url so that it will parse with `new URL()`
const correctUrl = (giturl) => {
const firstAt = giturl.indexOf('@')
const lastHash = giturl.lastIndexOf('#')
let firstColon = giturl.indexOf(':')
let lastColon = giturl.lastIndexOf(':', lastHash > -1 ? lastHash : Infinity)

let corrected
if (lastColon > firstAt) {
// the last : comes after the first @ (or there is no @)
// like it would in:
// proto://hostname.com:user/repo
// username@hostname.com:user/repo
// :password@hostname.com:user/repo
// username:password@hostname.com:user/repo
// proto://username@hostname.com:user/repo
// proto://:password@hostname.com:user/repo
// proto://username:password@hostname.com:user/repo
// then we replace the last : with a / to create a valid path
corrected = giturl.slice(0, lastColon) + '/' + giturl.slice(lastColon + 1)
// // and we find our new : positions
firstColon = corrected.indexOf(':')
lastColon = corrected.lastIndexOf(':')
}

if (firstColon === -1 && giturl.indexOf('//') === -1) {
// we have no : at all
// as it would be in:
// username@hostname.com/user/repo
// then we prepend a protocol
corrected = `git+ssh://${corrected}`
}

return corrected
}

// try to parse the url as its given to us, if that throws
// then we try to clean the url and parse that result instead
// THIS FUNCTION SHOULD NEVER THROW
const parseGitUrl = (giturl) => {
let result
try {
result = new url.URL(giturl)
} catch {
// this fn should never throw
}

if (result) {
return result
}

const correctedUrl = correctUrl(giturl)
try {
result = new url.URL(correctedUrl)
} catch {
// this fn should never throw
}

return result
}
79 changes: 79 additions & 0 deletions lib/parse-url.js
@@ -0,0 +1,79 @@
const url = require('url')
const getProtocols = require('./protocols.js')

const lastIndexOfBefore = (str, char, beforeChar) => {
const startPosition = str.indexOf(beforeChar)
return str.lastIndexOf(char, startPosition > -1 ? startPosition : Infinity)
}

const safeUrl = (u) => {
try {
return new url.URL(u)
} catch {
// this fn should never throw
}
}

// accepts input like git:github.com:user/repo and inserts the // after the first :
const correctProtocol = (arg, protocols) => {
const firstColon = arg.indexOf(':')
const proto = arg.slice(0, firstColon + 1)
if (Object.prototype.hasOwnProperty.call(protocols, proto)) {
return arg
}

const firstAt = arg.indexOf('@')
if (firstAt > -1) {
if (firstAt > firstColon) {
return `git+ssh://${arg}`
} else {
return arg
}
}

const doubleSlash = arg.indexOf('//')
if (doubleSlash === firstColon + 1) {
return arg
}

return `${arg.slice(0, firstColon + 1)}//${arg.slice(firstColon + 1)}`
}

// attempt to correct an scp style url so that it will parse with `new URL()`
const correctUrl = (giturl) => {
// ignore @ that come after the first hash since the denotes the start
// of a committish which can contain @ characters
const firstAt = lastIndexOfBefore(giturl, '@', '#')
// ignore colons that come after the hash since that could include colons such as:
// git@github.com:user/package-2#semver:^1.0.0
const lastColonBeforeHash = lastIndexOfBefore(giturl, ':', '#')

if (lastColonBeforeHash > firstAt) {
// the last : comes after the first @ (or there is no @)
// like it would in:
// proto://hostname.com:user/repo
// username@hostname.com:user/repo
// :password@hostname.com:user/repo
// username:password@hostname.com:user/repo
// proto://username@hostname.com:user/repo
// proto://:password@hostname.com:user/repo
// proto://username:password@hostname.com:user/repo
// then we replace the last : with a / to create a valid path
giturl = giturl.slice(0, lastColonBeforeHash) + '/' + giturl.slice(lastColonBeforeHash + 1)
}

if (lastIndexOfBefore(giturl, ':', '#') === -1 && giturl.indexOf('//') === -1) {
// we have no : at all
// as it would be in:
// username@hostname.com/user/repo
// then we prepend a protocol
giturl = `git+ssh://${giturl}`
}

return giturl
}

module.exports = (giturl, protocols = getProtocols()) => {
const withProtocol = correctProtocol(giturl, protocols)
return safeUrl(withProtocol) || safeUrl(correctUrl(withProtocol))
}
13 changes: 13 additions & 0 deletions lib/protocols.js
@@ -0,0 +1,13 @@
module.exports = (byShortcut = {}) => ({
'git+ssh:': { name: 'sshurl' },
'ssh:': { name: 'sshurl' },
'git+https:': { name: 'https', auth: true },
'git:': { auth: true },
'http:': { auth: true },
'https:': { auth: true },
'git+http:': { auth: true },
...Object.keys(byShortcut).reduce((acc, key) => {
acc[key] = { name: byShortcut[key] }
return acc
}, {}),
})
10 changes: 10 additions & 0 deletions test/parse-url.js
@@ -0,0 +1,10 @@
const t = require('tap')
const HostedGit = require('..')
const parseUrl = require('../lib/parse-url.js')

t.test('can parse git+ssh url by default', async t => {
// https://github.com/npm/cli/issues/5278
const u = 'git+ssh://git@abc:frontend/utils.git#6d45447e0c5eb6cd2e3edf05a8c5a9bb81950c79'
t.ok(parseUrl(u))
t.ok(HostedGit.parseUrl(u))
})

0 comments on commit c512363

Please sign in to comment.