Skip to content

Commit

Permalink
Fix #399 Allow colons in paths again (#418)
Browse files Browse the repository at this point in the history
  • Loading branch information
theon committed Feb 6, 2022
1 parent 65e374c commit f1702e9
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 18 deletions.
1 change: 0 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ val sharedSettings = Seq(
semanticdbEnabled := true,
semanticdbVersion := scalafixSemanticdb.revision,
Test / parallelExecution := false,
scalafmtOnCompile := true,
coverageExcludedPackages := "(io.lemonlabs.uri.inet.Trie.*|io.lemonlabs.uri.inet.PublicSuffixes.*|io.lemonlabs.uri.inet.PublicSuffixTrie.*|io.lemonlabs.uri.inet.PunycodeSupport.*)"
)

Expand Down
11 changes: 11 additions & 0 deletions shared/src/main/scala/io/lemonlabs/uri/Path.scala
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ sealed trait UrlPath extends Path {
def toAbsolute: AbsolutePath
def toAbsoluteOrEmpty: AbsoluteOrEmptyPath

def nonEmptyRootless: Boolean

def withConfig(config: UriConfig): Self

def addPart[P: PathPart](part: P): UrlPath =
Expand Down Expand Up @@ -220,6 +222,9 @@ case object EmptyPath extends AbsoluteOrEmptyPath {
def isEmpty: Boolean =
true

def nonEmptyRootless: Boolean =
false

def withConfig(config: UriConfig): EmptyPath.type =
this

Expand Down Expand Up @@ -267,6 +272,9 @@ final case class RootlessPath(parts: Vector[String])(implicit val config: UriCon
def isEmpty: Boolean =
parts.isEmpty

def nonEmptyRootless: Boolean =
parts.nonEmpty

override def isSlashTerminated: Boolean = parts.lastOption.contains("")
}

Expand Down Expand Up @@ -298,6 +306,9 @@ final case class AbsolutePath(parts: Vector[String])(implicit val config: UriCon
def isEmpty: Boolean =
false

def nonEmptyRootless: Boolean =
false

override private[uri] def toStringWithConfig(c: UriConfig): String =
"/" + super.toStringWithConfig(c)

Expand Down
43 changes: 27 additions & 16 deletions shared/src/main/scala/io/lemonlabs/uri/parsing/UrlParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -124,29 +124,24 @@ class UrlParser(val input: String)(implicit conf: UriConfig = UriConfig.default)
port <- _port.?
} yield extractAuthority(t._1, t._2, port)

def _path_segment: Parser[String] =
until(charIn("/?#")).string.map(extractPathPart)
def _path_segment: Parser0[String] =
until0(charIn("/?#")).string.map(extractPathPart)

/** A sequence of path parts that MUST start with a slash
*
* If a URI contains an authority component, then the path component must either be empty
* or begin with a slash ("/") character.
*/
def _path_for_authority: Parser0[AbsoluteOrEmptyPath] =
char('/') *> until0(charIn("#?[]") | Parser.end).map { path =>
extractAbsOrEmptyPath(path.split("/", -1).toList)
} |
(charIn("#?").peek | Parser.end).as(EmptyPath)
(char('/') *> _path_segment).rep0.map { parts =>
extractAbsOrEmptyPath(parts)
}

/** A sequence of path parts optionally starting with a slash
*/
def _path: Parser0[UrlPath] =
until0(charIn("#?[]:") | Parser.end).map { path =>
val segments = path.split("/", -1)
if (path.nonEmpty && path.charAt(0) == '/')
extractRelPath("/", segments.drop(1).toVector)
else
extractRelPath("", segments.toVector)
(char('/').? ~ rep0sep0(_path_segment, separator = char('/'))).map { case (maybeSlash, parts) =>
extractRelPath(maybeSlash, parts)
}

def _query_param: Parser[(String, Some[String])] =
Expand Down Expand Up @@ -193,6 +188,9 @@ class UrlParser(val input: String)(implicit conf: UriConfig = UriConfig.default)
for {
scheme <- _scheme
_ <- char(':')
// If a URI does not contain an authority component,
// then the path cannot begin with two slash characters ("//")
_ <- not(string("//"))
path <- _path
maybe_query_string <- _maybe_query_string
maybe_fragment <- _fragment.?
Expand All @@ -217,6 +215,9 @@ class UrlParser(val input: String)(implicit conf: UriConfig = UriConfig.default)
def _data_url_base64: Parser[DataUrl] =
for {
_ <- Parser.string("data:")
// If a URI does not contain an authority component,
// then the path cannot begin with two slash characters ("//")
_ <- not(string("//"))
media_type <- _media_type
_ <- Parser.string(";base64,")
data <- Parser.until(Parser.end)
Expand All @@ -225,6 +226,9 @@ class UrlParser(val input: String)(implicit conf: UriConfig = UriConfig.default)
def _data_url_percent_encoded: Parser[DataUrl] =
for {
_ <- Parser.string("data:")
// If a URI does not contain an authority component,
// then the path cannot begin with two slash characters ("//")
_ <- not(string("//"))
media_type <- _media_type
_ <- Parser.char(';').?
_ <- Parser.char(',')
Expand All @@ -245,7 +249,14 @@ class UrlParser(val input: String)(implicit conf: UriConfig = UriConfig.default)

def _rel_url: Parser0[RelativeUrl] =
for {
// If a URI does not contain an authority component,
// then the path cannot begin with two slash characters ("//")
_ <- not(string("//"))
path <- _path
// In addition, a URI reference (Section 4.1) may be a relative-path reference, in which case the
// first path segment cannot contain a colon (":") character
colonInFirstSegment = path.nonEmptyRootless && path.parts.headOption.exists(_.contains(':'))
_ <- if (colonInFirstSegment) Parser.fail else Parser.unit
maybe_query_string <- _maybe_query_string
maybe_fragment <- _fragment.?
} yield extractRelativeUrl(path, maybe_query_string, maybe_fragment)
Expand Down Expand Up @@ -334,15 +345,15 @@ class UrlParser(val input: String)(implicit conf: UriConfig = UriConfig.default)

val extractAbsOrEmptyPath = (pp: immutable.Seq[String]) =>
if (pp.isEmpty) EmptyPath
else AbsolutePath(pp.toVector.map(extractPathPart))
else AbsolutePath(pp.toVector)

val extractRelPath = (maybeSlash: String, pp: immutable.Seq[String]) =>
val extractRelPath = (maybeSlash: Option[Unit], pp: immutable.Seq[String]) =>
if (maybeSlash.nonEmpty)
AbsolutePath(pp.toVector.map(extractPathPart))
AbsolutePath(pp.toVector)
else if (pp == Seq(""))
UrlPath.empty
else
RootlessPath(pp.toVector.map(extractPathPart))
RootlessPath(pp.toVector)

val extractMediaTypeParam = (k: String, v: String) => k -> v

Expand Down
12 changes: 12 additions & 0 deletions shared/src/test/scala/io/lemonlabs/uri/GithubIssuesTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,16 @@ class GithubIssuesTests extends AnyFlatSpec with Matchers with OptionValues {
"Github Issue #368" should "allow parsing of an empty query" in {
QueryString.parseTry("") should equal(Success(QueryString.empty))
}

"Github Issue #399" should "allow parsing paths with colons" in {
Url.parseTry("/this:1/does/not") should equal(
Success(RelativeUrl(AbsolutePath.fromParts("this:1", "does", "not"), QueryString.empty, None))
)
}

it should "allow parsing paths with square brackets" in {
Url.parseTry("/this[1]/does/not") should equal(
Success(RelativeUrl(AbsolutePath.fromParts("this[1]", "does", "not"), QueryString.empty, None))
)
}
}
2 changes: 1 addition & 1 deletion shared/src/test/scala/io/lemonlabs/uri/ParsingTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ class ParsingTests extends AnyFlatSpec with Matchers {
val e = the[UriParsingException] thrownBy Url.parse(nineSegIp)

// todo: Improve error messages to be closer to the parboiled2 message below
e.getMessage should equal("Invalid URL could not be parsed. Error(7,NonEmptyList(EndOfString(7,31)))")
e.getMessage should equal("Invalid URL could not be parsed. Error(31,NonEmptyList(Fail(31)))")
// e.getMessage should equal(
// """Invalid URL could not be parsed. Invalid input ']', expected HexDigit or ':' (line 1, column 26):
// |http://[1:2:3:4:5:6:7:8:9]:9000
Expand Down

0 comments on commit f1702e9

Please sign in to comment.