Skip to content

Commit

Permalink
Merge pull request apple#446 from hamishknight/chunk-loader-5.7
Browse files Browse the repository at this point in the history
  • Loading branch information
hamishknight committed Jun 1, 2022
2 parents 8f0160b + 571c34c commit dd81d61
Show file tree
Hide file tree
Showing 7 changed files with 1,106 additions and 347 deletions.
35 changes: 31 additions & 4 deletions Sources/_RegexParser/Regex/AST/Atom.swift
Original file line number Diff line number Diff line change
Expand Up @@ -441,28 +441,55 @@ extension AST.Atom.CharacterProperty {

/// Character age, as per UnicodeScalar.Properties.age.
case age(major: Int, minor: Int)


/// A block property.
case block(Unicode.Block)

case posix(Unicode.POSIXProperty)

/// Some special properties implemented by PCRE and Oniguruma.
case pcreSpecial(PCRESpecialCategory)
case onigurumaSpecial(OnigurumaSpecialProperty)


/// Some special properties implemented by Java.
case javaSpecial(JavaSpecial)

public enum MapKind: Hashable {
case lowercase
case uppercase
case titlecase
}
}

// TODO: erm, separate out or fold into something? splat it in?
public enum PCRESpecialCategory: String, Hashable {
case alphanumeric = "Xan"
case posixSpace = "Xps"
case perlSpace = "Xsp"
case universallyNamed = "Xuc"
case perlWord = "Xwd"
}

/// Special Java properties that correspond to methods on
/// `java.lang.Character`, with the `java` prefix replaced by `is`.
public enum JavaSpecial: String, Hashable, CaseIterable {
case alphabetic = "javaAlphabetic"
case defined = "javaDefined"
case digit = "javaDigit"
case identifierIgnorable = "javaIdentifierIgnorable"
case ideographic = "javaIdeographic"
case isoControl = "javaISOControl"
case javaIdentifierPart = "javaJavaIdentifierPart" // not a typo, that's actually the name
case javaIdentifierStart = "javaJavaIdentifierStart" // not a typo, that's actually the name
case javaLetter = "javaLetter"
case javaLetterOrDigit = "javaLetterOrDigit"
case lowerCase = "javaLowerCase"
case mirrored = "javaMirrored"
case spaceChar = "javaSpaceChar"
case titleCase = "javaTitleCase"
case unicodeIdentifierPart = "javaUnicodeIdentifierPart"
case unicodeIdentifierStart = "javaUnicodeIdentifierStart"
case upperCase = "javaUpperCase"
case whitespace = "javaWhitespace"
}
}

extension AST.Atom {
Expand Down
361 changes: 355 additions & 6 deletions Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Sources/_RegexParser/Regex/Parse/Diagnostics.swift
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ enum ParseError: Error, Hashable {
case unknownProperty(key: String?, value: String)
case unrecognizedScript(String)
case unrecognizedCategory(String)
case unrecognizedBlock(String)
case invalidAge(String)
case invalidNumericValue(String)
case unrecognizedNumericType(String)
Expand Down Expand Up @@ -195,6 +196,8 @@ extension ParseError: CustomStringConvertible {
return "unrecognized script '\(value)'"
case .unrecognizedCategory(let value):
return "unrecognized category '\(value)'"
case .unrecognizedBlock(let value):
return "unrecognized block '\(value)'"
case .unrecognizedNumericType(let value):
return "unrecognized numeric type '\(value)'"
case .invalidAge(let value):
Expand Down
4 changes: 3 additions & 1 deletion Sources/_RegexParser/Regex/Parse/Sema.swift
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,10 @@ extension RegexValidator {
break
case .pcreSpecial:
throw error(.unsupported("PCRE property"), at: loc)
case .onigurumaSpecial:
case .block:
throw error(.unsupported("Unicode block property"), at: loc)
case .javaSpecial:
throw error(.unsupported("Java property"), at: loc)
}
}

Expand Down

0 comments on commit dd81d61

Please sign in to comment.