From 2441c77bc4037fde5e14f510d6feb7ec46782e55 Mon Sep 17 00:00:00 2001 From: Denys Shabalin Date: Thu, 18 Oct 2018 14:21:09 +0200 Subject: [PATCH 1/2] Add rsc benchmark --- build.sbt | 5 + confs/baseline/build.sbt | 6 + confs/baseline/compile | 1 + confs/baseline/plugins.sbt | 1 + confs/baseline/run | 1 + confs/current/build.sbt | 6 + confs/current/compile | 1 + confs/current/plugins.sbt | 1 + confs/current/run | 1 + confs/scala-native-0.3.7/run | 2 +- confs/scala-native-0.3.8/build.sbt | 5 + confs/scala-native-0.3.8/compile | 1 + confs/scala-native-0.3.8/plugins.sbt | 1 + confs/scala-native-0.3.8/run | 1 + input/rsc.RscBenchmark | 18 + input/rsc/CharClass.scala | 419 ++ input/rsc/CharGroup.scala | 128 + input/rsc/Compiler.scala | 263 + input/rsc/Inst.scala | 151 + input/rsc/Machine.scala | 395 ++ input/rsc/MachineInput.scala | 244 + input/rsc/Matcher.scala | 436 ++ input/rsc/Parser.scala | 1743 ++++++ input/rsc/Pattern.scala | 223 + input/rsc/PatternSyntaxException.scala | 37 + input/rsc/Prog.scala | 174 + input/rsc/RE2.scala | 812 +++ input/rsc/Regexp.scala | 309 + input/rsc/Simplify.scala | 194 + input/rsc/Stdlib.scala | 182 + input/rsc/Unicode.scala | 242 + input/rsc/UnicodeTables.scala | 5051 +++++++++++++++++ input/rsc/Utils.scala | 207 + output/rsc.RscBenchmark | 1 + project/plugins.sbt | 2 +- scripts/run.py | 41 +- scripts/summary.py | 4 +- src/main/scala/rsc/Compiler.scala | 162 + src/main/scala/rsc/RscBenchmark.scala | 9 + src/main/scala/rsc/cli/Main.scala | 25 + src/main/scala/rsc/lexis/Inputs.scala | 76 + src/main/scala/rsc/lexis/Keywords.scala | 63 + src/main/scala/rsc/lexis/Names.scala | 47 + src/main/scala/rsc/lexis/Offsets.scala | 10 + src/main/scala/rsc/lexis/Positions.scala | 31 + src/main/scala/rsc/lexis/Tokens.scala | 94 + src/main/scala/rsc/lexis/package.scala | 5 + src/main/scala/rsc/parse/Bounds.scala | 55 + src/main/scala/rsc/parse/Contexts.scala | 85 + src/main/scala/rsc/parse/Defns.scala | 124 + src/main/scala/rsc/parse/Groups.scala | 68 + src/main/scala/rsc/parse/Helpers.scala | 101 + src/main/scala/rsc/parse/Imports.scala | 85 + src/main/scala/rsc/parse/Lits.scala | 41 + src/main/scala/rsc/parse/Messages.scala | 60 + src/main/scala/rsc/parse/Mods.scala | 177 + src/main/scala/rsc/parse/Newlines.scala | 139 + src/main/scala/rsc/parse/Params.scala | 107 + src/main/scala/rsc/parse/Parser.scala | 35 + src/main/scala/rsc/parse/Paths.scala | 161 + src/main/scala/rsc/parse/Pats.scala | 163 + src/main/scala/rsc/parse/Sources.scala | 97 + src/main/scala/rsc/parse/Templates.scala | 151 + src/main/scala/rsc/parse/Terms.scala | 397 ++ src/main/scala/rsc/parse/Tpts.scala | 201 + src/main/scala/rsc/pretty/Escape.scala | 26 + src/main/scala/rsc/pretty/Ops.scala | 21 + src/main/scala/rsc/pretty/Pretty.scala | 25 + src/main/scala/rsc/pretty/PrettyAtom.scala | 45 + .../scala/rsc/pretty/PrettyCompiler.scala | 25 + src/main/scala/rsc/pretty/PrettyEnv.scala | 16 + src/main/scala/rsc/pretty/PrettyInput.scala | 27 + src/main/scala/rsc/pretty/PrettyMessage.scala | 45 + src/main/scala/rsc/pretty/PrettyName.scala | 22 + src/main/scala/rsc/pretty/PrettyOutline.scala | 38 + .../scala/rsc/pretty/PrettyPosition.scala | 33 + .../scala/rsc/pretty/PrettyResolution.scala | 25 + src/main/scala/rsc/pretty/PrettyScope.scala | 43 + .../scala/rsc/pretty/PrettySeverity.scala | 15 + src/main/scala/rsc/pretty/PrettyStatus.scala | 28 + src/main/scala/rsc/pretty/PrettySymtab.scala | 37 + src/main/scala/rsc/pretty/PrettyTodo.scala | 71 + src/main/scala/rsc/pretty/PrettyToken.scala | 96 + src/main/scala/rsc/pretty/PrettyTree.scala | 15 + src/main/scala/rsc/pretty/PrettyType.scala | 38 + src/main/scala/rsc/pretty/Printer.scala | 138 + src/main/scala/rsc/pretty/ProductRepl.scala | 59 + src/main/scala/rsc/pretty/Repl.scala | 137 + src/main/scala/rsc/pretty/Str.scala | 110 + src/main/scala/rsc/pretty/TreeStr.scala | 304 + src/main/scala/rsc/pretty/package.scala | 7 + .../scala/rsc/report/ConsoleReporter.scala | 25 + src/main/scala/rsc/report/Messages.scala | 295 + src/main/scala/rsc/report/Points.scala | 19 + src/main/scala/rsc/report/Reporter.scala | 8 + src/main/scala/rsc/report/Severity.scala | 18 + src/main/scala/rsc/report/StoreReporter.scala | 24 + src/main/scala/rsc/report/package.scala | 5 + src/main/scala/rsc/scan/Characters.scala | 31 + src/main/scala/rsc/scan/History.scala | 19 + src/main/scala/rsc/scan/Messages.scala | 25 + src/main/scala/rsc/scan/Scanner.scala | 466 ++ src/main/scala/rsc/scan/Snapshot.scala | 12 + src/main/scala/rsc/semantics/Names.scala | 26 + src/main/scala/rsc/semantics/Symbols.scala | 14 + src/main/scala/rsc/semantics/Types.scala | 20 + src/main/scala/rsc/semantics/package.scala | 5 + src/main/scala/rsc/settings/Settings.scala | 49 + src/main/scala/rsc/syntax/Trees.scala | 363 ++ src/main/scala/rsc/typecheck/Atoms.scala | 51 + src/main/scala/rsc/typecheck/Envs.scala | 166 + src/main/scala/rsc/typecheck/Linker.scala | 63 + src/main/scala/rsc/typecheck/Outliner.scala | 81 + .../scala/rsc/typecheck/Resolutions.scala | 17 + src/main/scala/rsc/typecheck/Scheduler.scala | 254 + src/main/scala/rsc/typecheck/Scoper.scala | 163 + src/main/scala/rsc/typecheck/Scopes.scala | 388 ++ src/main/scala/rsc/typecheck/Statuses.scala | 27 + src/main/scala/rsc/typecheck/Symtab.scala | 68 + src/main/scala/rsc/typecheck/Todo.scala | 22 + .../scala/rsc/typecheck/Typechecker.scala | 594 ++ src/main/scala/rsc/typecheck/package.scala | 5 + src/main/scala/rsc/util/CharUtil.scala | 46 + src/main/scala/rsc/util/CrashException.scala | 40 + src/main/scala/rsc/util/ErrorUtil.scala | 40 + src/main/scala/rsc/util/PrettyUtil.scala | 26 + src/main/scala/rsc/util/StringUtil.scala | 14 + src/main/scala/rsc/util/TreeUtil.scala | 17 + src/main/scala/rsc/util/package.scala | 10 + 129 files changed, 19017 insertions(+), 23 deletions(-) create mode 100644 confs/baseline/build.sbt create mode 100644 confs/baseline/compile create mode 100644 confs/baseline/plugins.sbt create mode 100644 confs/baseline/run create mode 100644 confs/current/build.sbt create mode 100644 confs/current/compile create mode 100644 confs/current/plugins.sbt create mode 100644 confs/current/run create mode 100644 confs/scala-native-0.3.8/build.sbt create mode 100644 confs/scala-native-0.3.8/compile create mode 100644 confs/scala-native-0.3.8/plugins.sbt create mode 100644 confs/scala-native-0.3.8/run create mode 100644 input/rsc.RscBenchmark create mode 100644 input/rsc/CharClass.scala create mode 100644 input/rsc/CharGroup.scala create mode 100644 input/rsc/Compiler.scala create mode 100644 input/rsc/Inst.scala create mode 100644 input/rsc/Machine.scala create mode 100644 input/rsc/MachineInput.scala create mode 100644 input/rsc/Matcher.scala create mode 100644 input/rsc/Parser.scala create mode 100644 input/rsc/Pattern.scala create mode 100644 input/rsc/PatternSyntaxException.scala create mode 100644 input/rsc/Prog.scala create mode 100644 input/rsc/RE2.scala create mode 100644 input/rsc/Regexp.scala create mode 100644 input/rsc/Simplify.scala create mode 100644 input/rsc/Stdlib.scala create mode 100644 input/rsc/Unicode.scala create mode 100644 input/rsc/UnicodeTables.scala create mode 100644 input/rsc/Utils.scala create mode 100644 output/rsc.RscBenchmark create mode 100644 src/main/scala/rsc/Compiler.scala create mode 100644 src/main/scala/rsc/RscBenchmark.scala create mode 100644 src/main/scala/rsc/cli/Main.scala create mode 100644 src/main/scala/rsc/lexis/Inputs.scala create mode 100644 src/main/scala/rsc/lexis/Keywords.scala create mode 100644 src/main/scala/rsc/lexis/Names.scala create mode 100644 src/main/scala/rsc/lexis/Offsets.scala create mode 100644 src/main/scala/rsc/lexis/Positions.scala create mode 100644 src/main/scala/rsc/lexis/Tokens.scala create mode 100644 src/main/scala/rsc/lexis/package.scala create mode 100644 src/main/scala/rsc/parse/Bounds.scala create mode 100644 src/main/scala/rsc/parse/Contexts.scala create mode 100644 src/main/scala/rsc/parse/Defns.scala create mode 100644 src/main/scala/rsc/parse/Groups.scala create mode 100644 src/main/scala/rsc/parse/Helpers.scala create mode 100644 src/main/scala/rsc/parse/Imports.scala create mode 100644 src/main/scala/rsc/parse/Lits.scala create mode 100644 src/main/scala/rsc/parse/Messages.scala create mode 100644 src/main/scala/rsc/parse/Mods.scala create mode 100644 src/main/scala/rsc/parse/Newlines.scala create mode 100644 src/main/scala/rsc/parse/Params.scala create mode 100644 src/main/scala/rsc/parse/Parser.scala create mode 100644 src/main/scala/rsc/parse/Paths.scala create mode 100644 src/main/scala/rsc/parse/Pats.scala create mode 100644 src/main/scala/rsc/parse/Sources.scala create mode 100644 src/main/scala/rsc/parse/Templates.scala create mode 100644 src/main/scala/rsc/parse/Terms.scala create mode 100644 src/main/scala/rsc/parse/Tpts.scala create mode 100644 src/main/scala/rsc/pretty/Escape.scala create mode 100644 src/main/scala/rsc/pretty/Ops.scala create mode 100644 src/main/scala/rsc/pretty/Pretty.scala create mode 100644 src/main/scala/rsc/pretty/PrettyAtom.scala create mode 100644 src/main/scala/rsc/pretty/PrettyCompiler.scala create mode 100644 src/main/scala/rsc/pretty/PrettyEnv.scala create mode 100644 src/main/scala/rsc/pretty/PrettyInput.scala create mode 100644 src/main/scala/rsc/pretty/PrettyMessage.scala create mode 100644 src/main/scala/rsc/pretty/PrettyName.scala create mode 100644 src/main/scala/rsc/pretty/PrettyOutline.scala create mode 100644 src/main/scala/rsc/pretty/PrettyPosition.scala create mode 100644 src/main/scala/rsc/pretty/PrettyResolution.scala create mode 100644 src/main/scala/rsc/pretty/PrettyScope.scala create mode 100644 src/main/scala/rsc/pretty/PrettySeverity.scala create mode 100644 src/main/scala/rsc/pretty/PrettyStatus.scala create mode 100644 src/main/scala/rsc/pretty/PrettySymtab.scala create mode 100644 src/main/scala/rsc/pretty/PrettyTodo.scala create mode 100644 src/main/scala/rsc/pretty/PrettyToken.scala create mode 100644 src/main/scala/rsc/pretty/PrettyTree.scala create mode 100644 src/main/scala/rsc/pretty/PrettyType.scala create mode 100644 src/main/scala/rsc/pretty/Printer.scala create mode 100644 src/main/scala/rsc/pretty/ProductRepl.scala create mode 100644 src/main/scala/rsc/pretty/Repl.scala create mode 100644 src/main/scala/rsc/pretty/Str.scala create mode 100644 src/main/scala/rsc/pretty/TreeStr.scala create mode 100644 src/main/scala/rsc/pretty/package.scala create mode 100644 src/main/scala/rsc/report/ConsoleReporter.scala create mode 100644 src/main/scala/rsc/report/Messages.scala create mode 100644 src/main/scala/rsc/report/Points.scala create mode 100644 src/main/scala/rsc/report/Reporter.scala create mode 100644 src/main/scala/rsc/report/Severity.scala create mode 100644 src/main/scala/rsc/report/StoreReporter.scala create mode 100644 src/main/scala/rsc/report/package.scala create mode 100644 src/main/scala/rsc/scan/Characters.scala create mode 100644 src/main/scala/rsc/scan/History.scala create mode 100644 src/main/scala/rsc/scan/Messages.scala create mode 100644 src/main/scala/rsc/scan/Scanner.scala create mode 100644 src/main/scala/rsc/scan/Snapshot.scala create mode 100644 src/main/scala/rsc/semantics/Names.scala create mode 100644 src/main/scala/rsc/semantics/Symbols.scala create mode 100644 src/main/scala/rsc/semantics/Types.scala create mode 100644 src/main/scala/rsc/semantics/package.scala create mode 100644 src/main/scala/rsc/settings/Settings.scala create mode 100644 src/main/scala/rsc/syntax/Trees.scala create mode 100644 src/main/scala/rsc/typecheck/Atoms.scala create mode 100644 src/main/scala/rsc/typecheck/Envs.scala create mode 100644 src/main/scala/rsc/typecheck/Linker.scala create mode 100644 src/main/scala/rsc/typecheck/Outliner.scala create mode 100644 src/main/scala/rsc/typecheck/Resolutions.scala create mode 100644 src/main/scala/rsc/typecheck/Scheduler.scala create mode 100644 src/main/scala/rsc/typecheck/Scoper.scala create mode 100644 src/main/scala/rsc/typecheck/Scopes.scala create mode 100644 src/main/scala/rsc/typecheck/Statuses.scala create mode 100644 src/main/scala/rsc/typecheck/Symtab.scala create mode 100644 src/main/scala/rsc/typecheck/Todo.scala create mode 100644 src/main/scala/rsc/typecheck/Typechecker.scala create mode 100644 src/main/scala/rsc/typecheck/package.scala create mode 100644 src/main/scala/rsc/util/CharUtil.scala create mode 100644 src/main/scala/rsc/util/CrashException.scala create mode 100644 src/main/scala/rsc/util/ErrorUtil.scala create mode 100644 src/main/scala/rsc/util/PrettyUtil.scala create mode 100644 src/main/scala/rsc/util/StringUtil.scala create mode 100644 src/main/scala/rsc/util/TreeUtil.scala create mode 100644 src/main/scala/rsc/util/package.scala diff --git a/build.sbt b/build.sbt index 9874e27..ae87f31 100644 --- a/build.sbt +++ b/build.sbt @@ -1 +1,6 @@ scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/baseline/build.sbt b/confs/baseline/build.sbt new file mode 100644 index 0000000..ae87f31 --- /dev/null +++ b/confs/baseline/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/baseline/compile b/confs/baseline/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/baseline/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/baseline/plugins.sbt b/confs/baseline/plugins.sbt new file mode 100644 index 0000000..2a63bf0 --- /dev/null +++ b/confs/baseline/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT") diff --git a/confs/baseline/run b/confs/baseline/run new file mode 100644 index 0000000..0ea88ec --- /dev/null +++ b/confs/baseline/run @@ -0,0 +1 @@ +target/scala-2.11/benchs-out diff --git a/confs/current/build.sbt b/confs/current/build.sbt new file mode 100644 index 0000000..ae87f31 --- /dev/null +++ b/confs/current/build.sbt @@ -0,0 +1,6 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" +nativeLTO := "thin" diff --git a/confs/current/compile b/confs/current/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/current/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/current/plugins.sbt b/confs/current/plugins.sbt new file mode 100644 index 0000000..2a63bf0 --- /dev/null +++ b/confs/current/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT") diff --git a/confs/current/run b/confs/current/run new file mode 100644 index 0000000..0ea88ec --- /dev/null +++ b/confs/current/run @@ -0,0 +1 @@ +target/scala-2.11/benchs-out diff --git a/confs/scala-native-0.3.7/run b/confs/scala-native-0.3.7/run index ae89e34..0ea88ec 100644 --- a/confs/scala-native-0.3.7/run +++ b/confs/scala-native-0.3.7/run @@ -1 +1 @@ -target/scala-2.11/scala-native-benchmarks-out +target/scala-2.11/benchs-out diff --git a/confs/scala-native-0.3.8/build.sbt b/confs/scala-native-0.3.8/build.sbt new file mode 100644 index 0000000..b4a5690 --- /dev/null +++ b/confs/scala-native-0.3.8/build.sbt @@ -0,0 +1,5 @@ +scalaVersion := "2.11.12" +enablePlugins(ScalaNativePlugin) +nativeLinkStubs := true +nativeGC := "immix" +nativeMode := "release" diff --git a/confs/scala-native-0.3.8/compile b/confs/scala-native-0.3.8/compile new file mode 100644 index 0000000..2f3f09f --- /dev/null +++ b/confs/scala-native-0.3.8/compile @@ -0,0 +1 @@ +nativeLink diff --git a/confs/scala-native-0.3.8/plugins.sbt b/confs/scala-native-0.3.8/plugins.sbt new file mode 100644 index 0000000..2d38aa0 --- /dev/null +++ b/confs/scala-native-0.3.8/plugins.sbt @@ -0,0 +1 @@ +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.8") diff --git a/confs/scala-native-0.3.8/run b/confs/scala-native-0.3.8/run new file mode 100644 index 0000000..0ea88ec --- /dev/null +++ b/confs/scala-native-0.3.8/run @@ -0,0 +1 @@ +target/scala-2.11/benchs-out diff --git a/input/rsc.RscBenchmark b/input/rsc.RscBenchmark new file mode 100644 index 0000000..196f50a --- /dev/null +++ b/input/rsc.RscBenchmark @@ -0,0 +1,18 @@ +input/rsc/Pattern.scala +input/rsc/Simplify.scala +input/rsc/Matcher.scala +input/rsc/CharGroup.scala +input/rsc/PatternSyntaxException.scala +input/rsc/MachineInput.scala +input/rsc/RE2.scala +input/rsc/Inst.scala +input/rsc/Stdlib.scala +input/rsc/Compiler.scala +input/rsc/Machine.scala +input/rsc/Parser.scala +input/rsc/Unicode.scala +input/rsc/UnicodeTables.scala +input/rsc/Regexp.scala +input/rsc/Prog.scala +input/rsc/CharClass.scala +input/rsc/Utils.scala diff --git a/input/rsc/CharClass.scala b/input/rsc/CharClass.scala new file mode 100644 index 0000000..7202896 --- /dev/null +++ b/input/rsc/CharClass.scala @@ -0,0 +1,419 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Original Go source here: +// http://code.google.com/p/go/source/browse/src/pkg/regexp/syntax/parse.go + +package com.twitter.re2s + +import com.twitter.re2s.CharClass._ + +/** + * A "builder"-style helper class for manipulating character classes + * represented as an array of pairs of runes [lo, hi], each denoting an + * inclusive interval. + * + * All methods mutate the internal state and return {@code this}, allowing + * operations to be chained. + */ +// inclusive ranges, pairs of [lo,hi]. r.length is even. +class CharClass(private var r: Array[Int]) { + // prefix of |r| that is defined. Even. + private var len: Int = r.length + + // After a call to ensureCapacity(), |r.length| is at least |newLen|. + private def ensureCapacity(_newLen: Int): Unit = { + var newLen: Int = _newLen + if (r.length < newLen) { + // Expand by at least doubling, except when len == 0. + if (newLen < len * 2) { + newLen = len * 2 + } + val r2: Array[Int] = new Array[Int](newLen) + System.arraycopy(r, 0, r2, 0, len) + r = r2 + } + } + + // Returns the character class as an int array. Subsequent CharClass + // operations may mutate this array, so typically this is the last operation + // performed on a given CharClass instance. + def toArray(): Array[Int] = { + if (this.len == r.length) { + r + } else { + val r2: Array[Int] = new Array[Int](len) + System.arraycopy(r, 0, r2, 0, len) + r2 + } + } + + // cleanClass() sorts the ranges (pairs of elements) of this CharClass, + // merges them, and eliminates duplicates. + def cleanClass(): CharClass = { + if (len < 4) { + return this + } + + // Sort by lo increasing, hi decreasing to break ties. + qsortIntPair(r, 0, len - 2) + + // Merge abutting, overlapping. + var w: Int = 2 // write index + var i: Int = 2 + while (i < len) { + val lo: Int = r(i) + val hi: Int = r(i + 1) + if (lo <= r(w - 1) + 1) { + // merge with previous range + if (hi > r(w - 1)) { + r(w - 1) = hi + } + } else { + // new disjoint range + r(w) = lo + r(w + 1) = hi + w += 2 + } + i += 2 + } + len = w + + this + } + + // appendLiteral() appends the literal |x| to this CharClass. + def appendLiteral(x: Int, flags: Int): CharClass = { + if ((flags & RE2.FOLD_CASE) != 0) { + appendFoldedRange(x, x) + } else { + appendRange(x, x) + } + } + + // appendRange() appends the range [lo-hi] (inclusive) to this CharClass. + def appendRange(lo: Int, hi: Int): CharClass = { + // Expand last range or next to last range if it overlaps or abuts. + // Checking two ranges helps when appending case-folded + // alphabets, so that one range can be expanding A-Z and the + // other expanding a-z. + if (len > 0) { + var i: Int = 2 + while (i <= 4) { + if (len >= i) { + val rlo: Int = r(len - i) + val rhi: Int = r(len - i + 1) + if (lo <= rhi + 1 && rlo <= hi + 1) { + if (lo < rlo) { + r(len - i) = lo + } + if (hi > rhi) { + r(len - i + 1) = hi + } + return this + } + } + i += 2 + } + } + + // Can't coalesce append. Expand capacity by doubling as needed. + ensureCapacity(len + 2) + r(len) = lo + len += 1 + r(len) = hi + len += 1 + + this + } + + // appendFoldedRange() appends the range [lo-hi] and its case + // folding-equivalent runes to this CharClass. + def appendFoldedRange(_lo: Int, _hi: Int): CharClass = { + var lo: Int = _lo + var hi: Int = _hi + + // Optimizations. + if (lo <= Unicode.MIN_FOLD && hi >= Unicode.MAX_FOLD) { + // Range is full: folding can't add more. + return appendRange(lo, hi) + } + if (hi < Unicode.MIN_FOLD || lo > Unicode.MAX_FOLD) { + // Range is outside folding possibilities. + return appendRange(lo, hi) + } + if (lo < Unicode.MIN_FOLD) { + // [lo, minFold-1] needs no folding. + appendRange(lo, Unicode.MIN_FOLD - 1) + lo = Unicode.MIN_FOLD + } + if (hi > Unicode.MAX_FOLD) { + // [maxFold+1, hi] needs no folding. + appendRange(Unicode.MAX_FOLD + 1, hi) + hi = Unicode.MAX_FOLD + } + + // Brute force. Depend on appendRange to coalesce ranges on the fly. + var c: Int = lo + while (c <= hi) { + appendRange(c, c) + var f: Int = Unicode.simpleFold(c) + while (f != c) { + appendRange(f, f) + f = Unicode.simpleFold(f) + } + c += 1 + } + + this + } + + // appendClass() appends the class |x| to this CharClass. + // It assumes |x| is clean. Does not mutate |x|. + def appendClass(x: Array[Int]): CharClass = { + var i: Int = 0 + while (i < x.length) { + appendRange(x(i), x(i + 1)) + i += 2 + } + + this + } + + // appendFoldedClass() appends the case folding of the class |x| to this + // CharClass. Does not mutate |x|. + def appendFoldedClass(x: Array[Int]): CharClass = { + var i: Int = 0 + while (i < x.length) { + appendFoldedRange(x(i), x(i + 1)) + i += 2 + } + + this + } + + // appendNegatedClass() append the negation of the class |x| to this + // CharClass. It assumes |x| is clean. Does not mutate |x|. + def appendNegatedClass(x: Array[Int]): CharClass = { + var nextLo: Int = 0 + var i: Int = 0 + while (i < x.length) { + val lo: Int = x(i) + val hi: Int = x(i + 1) + if (nextLo <= lo - 1) { + appendRange(nextLo, lo - 1) + } + nextLo = hi + 1 + i += 2 + } + if (nextLo <= Unicode.MAX_RUNE) { + appendRange(nextLo, Unicode.MAX_RUNE) + } + + this + } + + // appendTable() appends the Unicode range table |table| to this CharClass. + // Does not mutate |table|. + def appendTable(table: Array[Array[Int]]): CharClass = { + var i: Int = 0 + while (i <= table.length) { + val triple: Array[Int] = table(i) + val lo: Int = triple(0) + val hi: Int = triple(1) + val stride: Int = triple(2) + if (stride == 1) { + appendRange(lo, hi) + } else { + var c: Int = lo + while (c <= hi) { + appendRange(c, c) + c += stride + } + } + i += 1 + } + + this + } + + // appendNegatedTable() returns the result of appending the negation of range + // table |table| to this CharClass. Does not mutate |table|. + def appendNegatedTable(table: Array[Array[Int]]): CharClass = { + var nextLo: Int = 0 // lo end of next class to add + var i: Int = 0 + while (i <= table.length) { + val triple: Array[Int] = table(i) + val lo: Int = triple(0) + val hi: Int = triple(1) + val stride: Int = triple(2) + if (stride == 1) { + if (nextLo <= lo - 1) { + appendRange(nextLo, lo - 1) + } + nextLo = hi + 1 + } else { + var c: Int = lo + while (c <= hi) { + if (nextLo <= c - 1) { + appendRange(nextLo, c - 1) + } + nextLo = c + 1 + c += stride + } + } + i += 1 + } + if (nextLo <= Unicode.MAX_RUNE) { + appendRange(nextLo, Unicode.MAX_RUNE) + } + + this + } + + // appendTableWithSign() calls append{,Negated}Table depending on sign. + // Does not mutate |table|. + def appendTableWithSign(table: Array[Array[Int]], sign: Int): CharClass = { + if (sign < 0) { + appendNegatedTable(table) + } else { + appendTable(table) + } + } + + // negateClass() negates this CharClass, which must already be clean. + def negateClass(): CharClass = { + var nextLo: Int = 0 // lo end of next class to add + var w: Int = 0 // write index + var i: Int = 0 + while (i < len) { + val lo: Int = r(i) + val hi: Int = r(i + 1) + if (nextLo <= lo - 1) { + r(w) = nextLo + r(w + 1) = lo - 1 + w += 2 + } + nextLo = hi + 1 + i += 2 + } + len = w + + if (nextLo <= Unicode.MAX_RUNE) { + // It's possible for the negation to have one more + // range - this one - than the original class, so use append. + ensureCapacity(len + 2) + r(len) = nextLo + len += 1 + r(len) = Unicode.MAX_RUNE + len += 1 + } + + this + } + + // appendClassWithSign() calls appendClass() if sign is +1 or + // appendNegatedClass if sign is -1. Does not mutate |x|. + def appendClassWithSign(x: Array[Int], sign: Int): CharClass = { + if (sign < 0) { + appendNegatedClass(x) + } else { + appendClass(x) + } + } + + // appendGroup() appends CharGroup |g| to this CharClass, folding iff + // |foldCase|. Does not mutate |g|. + def appendGroup(g: CharGroup, foldCase: Boolean): CharClass = { + var cls: Array[Int] = g.cls + if (foldCase) { + cls = new CharClass(Utils.EMPTY_INTS).appendFoldedClass(cls).cleanClass().toArray() + } + + appendClassWithSign(cls, g.sign) + } + + override def toString(): String = + charClassToString(r, len) +} + +object CharClass { + + // cmp() returns the ordering of the pair (a(i), a(i+1)) relative to + // (pivotFrom, pivotTo), where the first component of the pair (lo) is + // ordered naturally and the second component (hi) is in reverse order. + private def cmp(array: Array[Int], + i: Int, + pivotFrom: Int, + pivotTo: Int): Int = { + val cmp: Int = array(i) - pivotFrom + + if (cmp != 0) cmp else pivotTo - array(i + 1) + } + + // qsortIntPair() quicksorts pairs of ints in |array| according to lt(). + // Precondition: |left|, |right|, |this.len| must all be even |this.len > 1|. + private def qsortIntPair(array: Array[Int], left: Int, right: Int): Unit = { + val pivotIndex: Int = ((left + right) / 2) & ~1 + val pivotFrom: Int = array(pivotIndex) + val pivotTo: Int = array(pivotIndex + 1) + var i: Int = left + var j: Int = right + + while (i <= j) { + while (i < right && cmp(array, i, pivotFrom, pivotTo) < 0) { + i += 2 + } + while (j > left && cmp(array, j, pivotFrom, pivotTo) > 0) { + j -= 2 + } + if (i <= j) { + if (i != j) { + var temp: Int = array(i) + array(i) = array(j) + array(j) = temp + temp = array(i + 1) + array(i + 1) = array(j + 1) + array(j + 1) = temp + } + i += 2 + j -= 2 + } + } + if (left < j) { + qsortIntPair(array, left, j) + } + if (i < right) { + qsortIntPair(array, i, right) + } + } + + // Exposed, since useful for debugging CharGroups too. + def charClassToString(r: Array[Int], len: Int): String = { + val b: java.lang.StringBuilder = new java.lang.StringBuilder() + b.append('[') + var i: Int = 0 + while (i < len) { + if (i > 0) { + b.append(' ') + } + val lo: Int = r(i) + val hi: Int = r(i + 1) + // Avoid String.format (not available on GWT). + // Cf. https://code.google.com/p/google-web-toolkit/issues/detail?id=3945 + if (lo == hi) { + b.append("0x") + b.append(Integer.toHexString(lo)) + } else { + b.append("0x") + b.append(Integer.toHexString(lo)) + b.append("-0x") + b.append(Integer.toHexString(hi)) + } + i += 2 + } + b.append(']') + b.toString() + } +} diff --git a/input/rsc/CharGroup.scala b/input/rsc/CharGroup.scala new file mode 100644 index 0000000..37d11ae --- /dev/null +++ b/input/rsc/CharGroup.scala @@ -0,0 +1,128 @@ +// GENERATED BY make_perl_groups.pl DO NOT EDIT. +// make_perl_groups.pl >perl_groups.go +package com.twitter.re2s + +import java.util.HashMap + +class CharGroup(val sign: Int, val cls: Array[Int]) + +object CharGroup { + + private val code1: Array[Int] = Array[Int]( /* \d */ + 0x30, + 0x39) + + private val code2: Array[Int] = Array[Int]( + /* \s */ + 0x9, 0xa, 0xc, 0xd, 0x20, 0x20) + + private val code3: Array[Int] = Array[Int]( + /* \w */ + 0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a) + + val PERL_GROUPS: HashMap[String, CharGroup] = + new HashMap[String, CharGroup]() + + PERL_GROUPS.put("\\d", new CharGroup(+1, code1)) + PERL_GROUPS.put("\\D", new CharGroup(-1, code1)) + PERL_GROUPS.put("\\s", new CharGroup(+1, code2)) + PERL_GROUPS.put("\\S", new CharGroup(-1, code2)) + PERL_GROUPS.put("\\w", new CharGroup(+1, code3)) + PERL_GROUPS.put("\\W", new CharGroup(-1, code3)) + + private val code4: Array[Int] = Array[Int]( + /* [:alnum:] */ + 0x30, 0x39, 0x41, 0x5a, 0x61, 0x7a) + + private val code5: Array[Int] = Array[Int]( /* [:alpha:] */ + 0x41, + 0x5a, + 0x61, + 0x7a) + + private val code6: Array[Int] = Array[Int]( /* [:ascii:] */ + 0x0, + 0x7f) + + private val code7: Array[Int] = Array[Int]( /* [:blank:] */ + 0x9, + 0x9, + 0x20, + 0x20) + + private val code8: Array[Int] = Array[Int]( /* [:cntrl:] */ + 0x0, + 0x1f, + 0x7f, + 0x7f) + + private val code9: Array[Int] = Array[Int]( /* [:digit:] */ + 0x30, + 0x39) + + private val code10: Array[Int] = Array[Int]( /* [:graph:] */ + 0x21, + 0x7e) + + private val code11: Array[Int] = Array[Int]( /* [:lower:] */ + 0x61, + 0x7a) + + private val code12: Array[Int] = Array[Int]( /* [:print:] */ + 0x20, + 0x7e) + + private val code13: Array[Int] = Array[Int]( + /* [:punct:] */ + 0x21, 0x2f, 0x3a, 0x40, 0x5b, 0x60, 0x7b, 0x7e) + + private val code14: Array[Int] = Array[Int]( /* [:space:] */ + 0x9, + 0xd, + 0x20, + 0x20) + + private val code15: Array[Int] = Array[Int]( /* [:upper:] */ + 0x41, + 0x5a) + + private val code16: Array[Int] = Array[Int]( + /* [:word:] */ + 0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a) + + private val code17: Array[Int] = Array[Int]( + /* [:xdigit:] */ + 0x30, 0x39, 0x41, 0x46, 0x61, 0x66) + + val POSIX_GROUPS: HashMap[String, CharGroup] = + new HashMap[String, CharGroup]() + + POSIX_GROUPS.put("[:alnum:]", new CharGroup(+1, code4)) + POSIX_GROUPS.put("[:^alnum:]", new CharGroup(-1, code4)) + POSIX_GROUPS.put("[:alpha:]", new CharGroup(+1, code5)) + POSIX_GROUPS.put("[:^alpha:]", new CharGroup(-1, code5)) + POSIX_GROUPS.put("[:ascii:]", new CharGroup(+1, code6)) + POSIX_GROUPS.put("[:^ascii:]", new CharGroup(-1, code6)) + POSIX_GROUPS.put("[:blank:]", new CharGroup(+1, code7)) + POSIX_GROUPS.put("[:^blank:]", new CharGroup(-1, code7)) + POSIX_GROUPS.put("[:cntrl:]", new CharGroup(+1, code8)) + POSIX_GROUPS.put("[:^cntrl:]", new CharGroup(-1, code8)) + POSIX_GROUPS.put("[:digit:]", new CharGroup(+1, code9)) + POSIX_GROUPS.put("[:^digit:]", new CharGroup(-1, code9)) + POSIX_GROUPS.put("[:graph:]", new CharGroup(+1, code10)) + POSIX_GROUPS.put("[:^graph:]", new CharGroup(-1, code10)) + POSIX_GROUPS.put("[:lower:]", new CharGroup(+1, code11)) + POSIX_GROUPS.put("[:^lower:]", new CharGroup(-1, code11)) + POSIX_GROUPS.put("[:print:]", new CharGroup(+1, code12)) + POSIX_GROUPS.put("[:^print:]", new CharGroup(-1, code12)) + POSIX_GROUPS.put("[:punct:]", new CharGroup(+1, code13)) + POSIX_GROUPS.put("[:^punct:]", new CharGroup(-1, code13)) + POSIX_GROUPS.put("[:space:]", new CharGroup(+1, code14)) + POSIX_GROUPS.put("[:^space:]", new CharGroup(-1, code14)) + POSIX_GROUPS.put("[:upper:]", new CharGroup(+1, code15)) + POSIX_GROUPS.put("[:^upper:]", new CharGroup(-1, code15)) + POSIX_GROUPS.put("[:word:]", new CharGroup(+1, code16)) + POSIX_GROUPS.put("[:^word:]", new CharGroup(-1, code16)) + POSIX_GROUPS.put("[:xdigit:]", new CharGroup(+1, code17)) + POSIX_GROUPS.put("[:^xdigit:]", new CharGroup(-1, code17)) +} diff --git a/input/rsc/Compiler.scala b/input/rsc/Compiler.scala new file mode 100644 index 0000000..134c675 --- /dev/null +++ b/input/rsc/Compiler.scala @@ -0,0 +1,263 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Original Go source here: +// http://code.google.com/p/go/source/browse/src/pkg/regexp/syntax/compile.go + +package com.twitter.re2s + +import com.twitter.re2s.Compiler._ +import com.twitter.re2s.Inst.{Op => IOP} +import com.twitter.re2s.Regexp.{Op => ROP} + +/** + * Compiler from {@code Regexp} (RE2 abstract syntax) to {@code RE2} + * (compiled regular expression). + * + * The only entry point is {@link #compileRegexp}. + */ +class Compiler private () { + private val prog: Prog = new Prog() // Program being built + newInst(IOP.FAIL) + + private def newInst(op: IOP): Frag = { + prog.addInst(op) + new Frag(prog.numInst() - 1, 0) + } + + // Returns a no-op fragment. Sometimes unavoidable. + private def nop(): Frag = { + val f: Compiler.Frag = newInst(IOP.NOP) + f.out = f.i << 1 + f + } + + private def fail(): Compiler.Frag = new Frag(0, 0) + + // Given fragment a, returns (a) capturing as \n. + // Given a fragment a, returns a fragment with capturing parens around a. + private def cap(arg: Int): Frag = { + val f: Compiler.Frag = newInst(IOP.CAPTURE) + f.out = f.i << 1 + prog.getInst(f.i).arg = arg + if (prog.numCap < arg + 1) { + prog.numCap = arg + 1 + } + f + } + + // Given fragments a and b, returns ab a|b + private def cat(f1: Frag, f2: Frag): Frag = { + // concat of failure is failure + if (f1.i == 0 || f2.i == 0) { + return fail() + } + prog.patch(f1.out, f2.i) + new Frag(f1.i, f2.out) + } + + // Given fragments for a and b, returns fragment for a|b. + private def alt(f1: Frag, f2: Frag): Frag = { + // alt of failure is other + if (f1.i == 0) { + return f2 + } + if (f2.i == 0) { + return f1 + } + val f: Compiler.Frag = newInst(IOP.ALT) + val i: Inst = prog.getInst(f.i) + i.out = f1.i + i.arg = f2.i + f.out = prog.append(f1.out, f2.out) + f + } + + // Given a fragment for a, returns a fragment for a? or a?? (if nongreedy) + private def quest(f1: Frag, nongreedy: Boolean): Frag = { + val f: Compiler.Frag = newInst(IOP.ALT) + val i: Inst = prog.getInst(f.i) + if (nongreedy) { + i.arg = f1.i + f.out = f.i << 1 + } else { + i.out = f1.i + f.out = f.i << 1 | 1 + } + f.out = prog.append(f.out, f1.out) + f + } + + // Given a fragment a, returns a fragment for a* or a*? (if nongreedy) + private def star(f1: Frag, nongreedy: Boolean): Frag = { + val f: Compiler.Frag = newInst(IOP.ALT) + val i: Inst = prog.getInst(f.i) + if (nongreedy) { + i.arg = f1.i + f.out = f.i << 1 + } else { + i.out = f1.i + f.out = f.i << 1 | 1 + } + prog.patch(f1.out, f.i) + f + } + + // Given a fragment for a, returns a fragment for a+ or a+? (if nongreedy) + private def plus(f1: Frag, nongreedy: Boolean): Frag = + new Frag(f1.i, star(f1, nongreedy).out) + + // op is a bitmask of EMPTY_* flags. + private def empty(op: Int): Frag = { + val f: Compiler.Frag = newInst(IOP.EMPTY_WIDTH) + prog.getInst(f.i).arg = op + f.out = f.i << 1 + f + } + + // flags : parser flags + private def rune(runes: Array[Int], _flags: Int): Frag = { + var flags: Int = _flags + val f: Compiler.Frag = newInst(IOP.RUNE) + val i: Inst = prog.getInst(f.i) + i.runes = runes + flags &= RE2.FOLD_CASE // only relevant flag is FoldCase + if (runes.length != 1 || Unicode.simpleFold(runes(0)) == runes(0)) { + flags &= ~RE2.FOLD_CASE // and sometimes not even that + } + i.arg = flags + f.out = f.i << 1 + // Special cases for exec machine. + if ((flags & RE2.FOLD_CASE) == 0 && runes.length == 1 || + runes.length == 2 && + runes(0) == runes(1)) { + i.op = IOP.RUNE1 + } else if (runes.length == 2 && + runes(0) == 0 && + runes(1) == Unicode.MAX_RUNE) { + i.op = IOP.RUNE_ANY + } else if (runes.length == 4 && + runes(0) == 0 && + runes(1) == '\n' - 1 && + runes(2) == '\n' + 1 && + runes(3) == Unicode.MAX_RUNE) { + i.op = IOP.RUNE_ANY_NOT_NL + } + f + } + + private def compile(re: Regexp): Frag = { + re.op match { + case ROP.NO_MATCH => + fail() + case ROP.EMPTY_MATCH => + nop() + case ROP.LITERAL => + if (re.runes.length == 0) { + nop() + } else { + val runes: Array[Int] = re.runes + var f: Frag = null + var i: Int = 0 + while (i < runes.length) { + val r: Int = runes(i) + val f1: Compiler.Frag = rune(Array[Int](r), re.flags) + f = if (f == null) f1 else cat(f, f1) + i += 1 + } + f + } + case ROP.CHAR_CLASS => + rune(re.runes, re.flags) + case ROP.ANY_CHAR_NOT_NL => + rune(ANY_RUNE_NOT_NL, 0) + case ROP.ANY_CHAR => + rune(ANY_RUNE, 0) + case ROP.BEGIN_LINE => + empty(Utils.EMPTY_BEGIN_LINE) + case ROP.END_LINE => + empty(Utils.EMPTY_END_LINE) + case ROP.BEGIN_TEXT => + empty(Utils.EMPTY_BEGIN_TEXT) + case ROP.END_TEXT => + empty(Utils.EMPTY_END_TEXT) + case ROP.WORD_BOUNDARY => + empty(Utils.EMPTY_WORD_BOUNDARY) + case ROP.NO_WORD_BOUNDARY => + empty(Utils.EMPTY_NO_WORD_BOUNDARY) + case ROP.CAPTURE => + val bra: Compiler.Frag = cap(re.cap << 1) + val sub: Compiler.Frag = compile(re.subs(0)) + val ket: Compiler.Frag = cap(re.cap << 1 | 1) + cat(cat(bra, sub), ket) + case ROP.STAR => + star(compile(re.subs(0)), (re.flags & RE2.NON_GREEDY) != 0) + case ROP.PLUS => + plus(compile(re.subs(0)), (re.flags & RE2.NON_GREEDY) != 0) + case ROP.QUEST => + quest(compile(re.subs(0)), (re.flags & RE2.NON_GREEDY) != 0) + case ROP.CONCAT => + if (re.subs.length == 0) { + nop() + } else { + val subs: Array[Regexp] = re.subs + var f: Frag = null + var i: Int = 0 + while (i < subs.length) { + val sub: Regexp = subs(i) + val f1: Compiler.Frag = compile(sub) + f = if (f == null) f1 else cat(f, f1) + i += 1 + } + f + } + case ROP.ALTERNATE => + if (re.subs.length == 0) { + nop() + } else { + val subs: Array[Regexp] = re.subs + var f: Frag = null + var i: Int = 0 + while (i < subs.length) { + val sub: Regexp = subs(i) + val f1: Compiler.Frag = compile(sub) + f = if (f == null) f1 else alt(f, f1) + i += 1 + } + f + } + case _ => + throw new IllegalStateException("regexp: unhandled case in compile") + } + } +} + +object Compiler { + + /** + * A fragment of a compiled regular expression program. + * @see http://swtch.com/~rsc/regexp/regexp1.html + */ + private class Frag( + val i: Int, // an instruction address (pc). + var out: Int // a patch list see explanation in Prog.java + ) + + def compileRegexp(re: Regexp): Prog = { + val c: Compiler = new Compiler() + val f: Compiler.Frag = c.compile(re) + c.prog.patch(f.out, c.newInst(IOP.MATCH).i) + c.prog.start = f.i + c.prog + } + + private val ANY_RUNE_NOT_NL: Array[Int] = Array[Int]( + 0, + '\n' - 1, + '\n' + 1, + Unicode.MAX_RUNE + ) + + private val ANY_RUNE: Array[Int] = Array[Int](0, Unicode.MAX_RUNE) +} diff --git a/input/rsc/Inst.scala b/input/rsc/Inst.scala new file mode 100644 index 0000000..cda4fe8 --- /dev/null +++ b/input/rsc/Inst.scala @@ -0,0 +1,151 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Original Go source here: +// http://code.google.com/p/go/source/browse/src/pkg/regexp/syntax/prog.go + +package com.twitter.re2s + +import com.twitter.re2s.Inst._ +import com.twitter.re2s.Inst.Op._ + +/** + * A single instruction in the regular expression virtual machine. + * @see http://swtch.com/~rsc/regexp/regexp2.html + */ +class Inst(var op: Inst.Op) { + var out: Int = 0 // all but MATCH, FAIL + var arg: Int = 0 // ALT, ALT_MATCH, CAPTURE, EMPTY_WIDTH + var runes: Array[Int] = null // length==1 => exact match + // otherwise a list of [lo,hi] pairs. hi is *inclusive*. + + // op() returns i.Op but merges all the rune special cases into RUNE + // Beware "op" is a public field. + def runeOp(): Op = { + op match { + case RUNE1 | RUNE_ANY | RUNE_ANY_NOT_NL => + Op.RUNE + case _ => + op + } + } + + // MatchRune returns true if the instruction matches (and consumes) r. + // It should only be called when op == InstRune. + def matchRune(r: Int): Boolean = { + // Special case: single-rune slice is from literal string, not char + // class. + if (runes.length == 1) { + val r0: Int = runes(0) + if (r == r0) { + return true + } + if ((arg & RE2.FOLD_CASE) != 0) { + var r1: Int = Unicode.simpleFold(r0) + while (r1 != r0) { + if (r == r1) { + return true + } + r1 = Unicode.simpleFold(r1) + } + } + return false + } + + // Peek at the first few pairs. + // Should handle ASCII well. + var j: Int = 0 + while (j < runes.length && j <= 8) { + if (r < runes(j)) { + return false + } + if (r <= runes(j + 1)) { + return true + } + j += 2 + } + + // Otherwise binary search. + var lo: Int = 0 + var hi: Int = runes.length / 2 + while (lo < hi) { + val m: Int = lo + (hi - lo) / 2 + val c: Int = runes(2 * m) + if (c <= r) { + if (r <= runes(2 * m + 1)) { + return true + } + lo = m + 1 + } else { + hi = m + } + } + + false + } + + override def toString(): String = { + op match { + case ALT => + "alt -> " + out + ", " + arg + case ALT_MATCH => + "altmatch -> " + out + ", " + arg + case CAPTURE => + "cap " + arg + " -> " + out + case EMPTY_WIDTH => + "empty " + arg + " -> " + out + case MATCH => + "match" + case FAIL => + "fail" + case NOP => + "nop -> " + out + case RUNE => + if (runes == null) { + "rune " // can't happen + } else { + "rune " + escapeRunes(runes) + + (if ((arg & RE2.FOLD_CASE) != 0) "/i" else "") + " -> " + out + } + case RUNE1 => + "rune1 " + escapeRunes(runes) + " -> " + out + case RUNE_ANY => + "any -> " + out + case RUNE_ANY_NOT_NL => + "anynotnl -> " + out + case _ => + throw new IllegalStateException("unhandled case in Inst.toString") + } + } +} + +object Inst { + type Op = Int + object Op { + final val ALT: Int = 0 + final val ALT_MATCH: Int = 1 + final val CAPTURE: Int = 2 + final val EMPTY_WIDTH: Int = 3 + final val FAIL: Int = 4 + final val MATCH: Int = 5 + final val NOP: Int = 6 + final val RUNE: Int = 7 + final val RUNE1: Int = 8 + final val RUNE_ANY: Int = 9 + final val RUNE_ANY_NOT_NL: Int = 10 + } + + // Returns an RE2 expression matching exactly |runes|. + private def escapeRunes(runes: Array[Int]): String = { + val out: java.lang.StringBuilder = new java.lang.StringBuilder() + out.append('"') + var i: Int = 0 + while (i < runes.length) { + val rune: Int = runes(i) + Utils.escapeRune(out, rune) + } + out.append('"') + out.toString + } +} diff --git a/input/rsc/Machine.scala b/input/rsc/Machine.scala new file mode 100644 index 0000000..1d30630 --- /dev/null +++ b/input/rsc/Machine.scala @@ -0,0 +1,395 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Original Go source here: +// http://code.google.com/p/go/source/browse/src/pkg/regexp/exec.go + +package com.twitter.re2s + +import java.util.ArrayList +import java.util.Arrays +import java.util.List +import com.twitter.re2s.Inst.{Op => IOP} +import com.twitter.re2s.Machine._ + +// A Machine matches an input string of Unicode characters against an +// RE2 instance using a simple NFA. +// +// Called by RE2.doExecute. +class Machine(re2: RE2) { + // Compiled program. + private val prog: Prog = re2.prog + + // Two queues for runq, nextq. + private val q0: Machine.Queue = new Queue(prog.numInst()) + private val q1: Machine.Queue = new Queue(prog.numInst()) + + // pool of available threads + // Really a stack: + private val pool: ArrayList[Machine.Thread] = new ArrayList[Thread]() + + // Whether a match was found. + private var matched: Boolean = false + + // Capture information for the match. + private var matchcap: Array[Int] = + new Array[Int](if (prog.numCap < 2) 2 else prog.numCap) + + // init() reinitializes an existing Machine for re-use on a new input. + def init(ncap: Int): Unit = { + val iter: java.util.Iterator[Machine.Thread] = pool.iterator() + while (iter.hasNext()) { + val t: Machine.Thread = iter.next() + t.cap = new Array[Int](ncap) + } + this.matchcap = new Array[Int](ncap) + } + + def submatches(): Array[Int] = { + if (matchcap.length == 0) { + return Utils.EMPTY_INTS + } + val cap: Array[Int] = new Array[Int](matchcap.length) + System.arraycopy(matchcap, 0, cap, 0, matchcap.length) + cap + } + + // alloc() allocates a new thread with the given instruction. + // It uses the free pool if possible. + private def alloc(inst: Inst): Thread = { + val n: Int = pool.size() + val t: Machine.Thread = if (n > 0) pool.remove(n - 1) else new Thread(matchcap.length) + t.inst = inst + t + } + + // free() returns t to the free pool. + private def free(t: Thread): Unit = + pool.add(t) + + // match() runs the machine over the input |in| starting at |pos| with the + // RE2 Anchor |anchor|. + // It reports whether a match was found. + // If so, matchcap holds the submatch information. + def match_(in: MachineInput, _pos: Int, anchor: Int): Boolean = { + var pos: Int = _pos + val startCond: Int = re2.cond + if (startCond == Utils.EMPTY_ALL) { // impossible + return false + } + if ((anchor == RE2.ANCHOR_START || anchor == RE2.ANCHOR_BOTH) && + pos != 0) { + return false + } + matched = false + Arrays.fill(matchcap, -1) + var runq: Machine.Queue = q0 + var nextq: Machine.Queue = q1 + var r: Int = in.step(pos) + var rune: Int = r >> 3 + var width: Int = r & 7 + var rune1: Int = -1 + var width1: Int = 0 + if (r != MachineInput.EOF) { + r = in.step(pos + width) + rune1 = r >> 3 + width1 = r & 7 + } + var flag: Int = 0 // bitmask of EMPTY_* flags + if (pos == 0) { + flag = Utils.emptyOpContext(-1, rune) + } else { + flag = in.context(pos) + } + var stop: Boolean = false + while (!stop) { + if (runq.isEmpty()) { + if ((startCond & Utils.EMPTY_BEGIN_TEXT) != 0 && pos != 0) { + // Anchored match, past beginning of text. + stop = true + } + if (matched) { + // Have match finished exploring alternatives. + stop = true + } + if (!re2.prefix.isEmpty() && + rune1 != re2.prefixRune && + in.canCheckPrefix()) { + // Match requires literal prefix fast search for it. + val advance: Int = in.index(re2, pos) + if (advance < 0) { + stop = true + } + pos += advance + r = in.step(pos) + rune = r >> 3 + width = r & 7 + r = in.step(pos + width) + rune1 = r >> 3 + width1 = r & 7 + } + } + if (!matched && (pos == 0 || anchor == RE2.UNANCHORED)) { + // If we are anchoring at begin then only add threads that begin + // at |pos| = 0. + if (matchcap.length > 0) { + matchcap(0) = pos + } + this.add(runq, prog.start, pos, matchcap, flag, null) + } + flag = Utils.emptyOpContext(rune, rune1) + step(runq, + nextq, + pos, + pos + width, + rune, + flag, + anchor, + pos == in.endPos()) + if (width == 0) { // EOF + stop = true + } + if (matchcap.length == 0 && matched) { + // Found a match and not paying attention + // to where it is, so any match will do. + stop = true + } + pos += width + rune = rune1 + width = width1 + if (rune != -1) { + r = in.step(pos + width) + rune1 = r >> 3 + width1 = r & 7 + } + val tmpq: Machine.Queue = runq + runq = nextq + nextq = tmpq + } + nextq.clear(pool) + matched + } + + // step() executes one step of the machine, running each of the threads + // on |runq| and appending new threads to |nextq|. + // The step processes the rune |c| (which may be -1 for EOF), + // which starts at position |pos| and ends at |nextPos|. + // |nextCond| gives the setting for the EMPTY_* flags after |c|. + // |anchor| is the anchoring flag and |atEnd| signals if we are at the end of + // the input string. + private def step(runq: Queue, + nextq: Queue, + pos: Int, + nextPos: Int, + c: Int, + nextCond: Int, + anchor: Int, + atEnd: Boolean): Unit = { + val longest: Boolean = re2.longest + var j: Int = 0 + while (j < runq.size) { + val entry: Machine.Entry = runq.dense(j) + if (entry == null) { + () // continue + } else { + var t: Machine.Thread = entry.thread + if (t == null) { + () //continue + } else { + if (longest && matched && t.cap.length > 0 && matchcap(0) < t.cap(0)) { + // free(t) + pool.add(t) + () // continue + } else { + val i: Inst = t.inst + var add: Boolean = false + i.op match { + case IOP.MATCH => + if (anchor == RE2.ANCHOR_BOTH && !atEnd) { + // Don't match if we anchor at both start and end and those + // expectations aren't met. + () // break switch + } else { + if (t.cap.length > 0 && (!longest || !matched || matchcap(1) < pos)) { + t.cap(1) = pos + System.arraycopy(t.cap, 0, matchcap, 0, t.cap.length) + } + if (!longest) { + // First-match mode: cut off all lower-priority threads. + var k: Int = j + 1 + while (k < runq.size) { + val d: Machine.Entry = runq.dense(k) + if (d.thread != null) { + // free(d.thread) + pool.add(d.thread) + } + k += 1 + } + runq.size = 0 + } + matched = true + } + case IOP.RUNE => + add = i.matchRune(c) + case IOP.RUNE1 => + add = c == i.runes(0) + case IOP.RUNE_ANY => + add = true + case IOP.RUNE_ANY_NOT_NL => + add = c != '\n' + case _ => + throw new IllegalStateException("bad inst") + } + if (add) { + t = this.add(nextq, i.out, nextPos, t.cap, nextCond, t) + } + if (t != null) { + // free(t) + pool.add(t) + } + } + } + } + j += 1 + } + runq.size = 0 + } + + // add() adds an entry to |q| for |pc|, unless the |q| already has such an + // entry. It also recursively adds an entry for all instructions reachable + // from |pc| by following empty-width conditions satisfied by |cond|. |pos| + // gives the current position in the input. |cond| is a bitmask of EMPTY_* + // flags. + private def add(q: Queue, + pc: Int, + pos: Int, + cap: Array[Int], + cond: Int, + _t: Thread): Thread = { + var t: Machine.Thread = _t + if (pc == 0) { + return t + } + if (q.contains(pc)) { + return t + } + val d: Machine.Entry = q.add(pc) + val inst: Inst = prog.getInst(pc) + inst.runeOp() match { + case IOP.FAIL => + () // nothing + case IOP.ALT | IOP.ALT_MATCH => + t = this.add(q, inst.out, pos, cap, cond, t) + t = this.add(q, inst.arg, pos, cap, cond, t) + case IOP.EMPTY_WIDTH => + if ((inst.arg & ~cond) == 0) { + t = this.add(q, inst.out, pos, cap, cond, t) + } + case IOP.NOP => + t = this.add(q, inst.out, pos, cap, cond, t) + case IOP.CAPTURE => + if (inst.arg < cap.length) { + val opos: Int = cap(inst.arg) + cap(inst.arg) = pos + this.add(q, inst.out, pos, cap, cond, null) + cap(inst.arg) = opos + } else { + t = this.add(q, inst.out, pos, cap, cond, t) + } + case IOP.MATCH | IOP.RUNE | IOP.RUNE1 | IOP.RUNE_ANY | + IOP.RUNE_ANY_NOT_NL => + if (t == null) { + t = alloc(inst) + } else { + t.inst = inst + } + if (cap.length > 0 && t.cap != cap) { + System.arraycopy(cap, 0, t.cap, 0, cap.length) + } + d.thread = t + t = null + case _ => + throw new IllegalStateException("unhandled") + } + t + } +} + +object Machine { + + // A logical thread in the NFA. + private class Thread(n: Int) { + var cap: Array[Int] = new Array[Int](n) + var inst: Inst = null + } + + // A queue is a 'sparse array' holding pending threads of execution. See: + // research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html + private class Queue(n: Int) { + val dense: Array[Entry] = new Array[Entry](n) // may contain stale Entries in slots >= size + val sparse: Array[Int] = new Array[Int](n) // may contain stale but in-bounds values. + var size: Int = 0 // of prefix of |dense| that is logically populated + + def contains(pc: Int): Boolean = { + val j: Int = sparse(pc) // (non-negative) + if (j >= size) { + return false + } + val d: Machine.Entry = dense(j) + d != null && d.pc == pc + } + + def isEmpty(): Boolean = size == 0 + + def add(pc: Int): Entry = { + val j: Int = size + size += 1 + sparse(pc) = j + var e: Machine.Entry = dense(j) + if (e == null) { // recycle previous Entry if any + val entry: Machine.Entry = new Entry() + dense(j) = entry + e = entry + } + e.thread = null + e.pc = pc + e + } + + // Frees all threads on the thread queue, returning them to the free pool. + def clear(freePool: List[Thread]): Unit = { + var i: Int = 0 + while (i < size) { + val entry: Machine.Entry = dense(i) + if (entry != null && entry.thread != null) { + // free(entry.thread) + freePool.add(entry.thread) + } + // (don't release dense[i] to GC recycle it.) + i += 1 + } + size = 0 + } + + override def toString(): String = { + val out: java.lang.StringBuilder = new java.lang.StringBuilder() + out.append('{') + var i: Int = 0 + while (i < size) { + if (i != 0) { + out.append(", ") + } + out.append(dense(i).pc) + i += 1 + } + out.append('}') + out.toString() + } + } + + private class Entry() { + var pc: Int = 0 + var thread: Thread = null + } +} diff --git a/input/rsc/MachineInput.scala b/input/rsc/MachineInput.scala new file mode 100644 index 0000000..31df803 --- /dev/null +++ b/input/rsc/MachineInput.scala @@ -0,0 +1,244 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Original Go source here: +// http://code.google.com/p/go/source/browse/src/pkg/regexp/regexp.go + +package com.twitter.re2s + +/** + * MachineInput abstracts different representations of the input text + * supplied to the Machine. It provides one-character lookahead. + */ +abstract class MachineInput() { + + // Returns the rune at the specified index the units are + // unspecified, but could be UTF-8 byte, UTF-16 char, or rune + // indices. Returns the width (in the same units) of the rune in + // the lower 3 bits, and the rune (Unicode code point) in the high + // bits. Never negative, except for EOF which is represented as -1 + // << 3 | 0. + def step(pos: Int): Int + + // can we look ahead without losing info? + def canCheckPrefix(): Boolean + + // Returns the index relative to |pos| at which |re2.prefix| is found + // in this input stream, or a negative value if not found. + def index(re2: RE2, pos: Int): Int + + // Returns a bitmask of EMPTY_* flags. + def context(pos: Int): Int + + // Returns the end position in the same units as step(). + def endPos(): Int +} + +object MachineInput { + + final val EOF: Int = (-1 << 3) | 0 + + def fromUTF8(b: Array[Byte], start: Int, end: Int): MachineInput = + new UTF8Input(b, start, end) + + def fromUTF16(s: CharSequence, start: Int, end: Int): MachineInput = + new UTF16Input(s, start, end) + + // An implementation of MachineInput for UTF-8 byte arrays. + // |pos| and |width| are byte indices. + private class UTF8Input(b: Array[Byte], start: Int, end: Int) + extends MachineInput() { + if (end > b.length) { + throw new ArrayIndexOutOfBoundsException( + "end is greater than length: " + end + " > " + b.length) + } + + override def step(_i: Int): Int = { + var i: Int = _i + i += start + if (i >= end) { + return EOF + } + + // UTF-8. RFC 3629 in five lines: + // + // Unicode code points UTF-8 encoding (binary) + // 00-7F (7 bits) 0tuvwxyz + // 0080-07FF (11 bits) 110pqrst 10uvwxyz + // 0800-FFFF (16 bits) 1110jklm 10npqrst 10uvwxyz + // 010000-10FFFF (21 bits) 11110efg 10hijklm 10npqrst 10uvwxyz + var x: Int = b(i) & 0xff // zero extend + i += 1 + if ((x & 0x80) == 0) { + return x << 3 | 1 + } else if ((x & 0xE0) == 0xC0) { // 110xxxxx + x = x & 0x1F + if (i >= end) { + return EOF + } + x = x << 6 | b(i) & 0x3F + i += 1 + return x << 3 | 2 + } else if ((x & 0xF0) == 0xE0) { // 1110xxxx + x = x & 0x0F + if (i + 1 >= end) { + return EOF + } + x = x << 6 | b(i) & 0x3F + i += 1 + x = x << 6 | b(i) & 0x3F + i += 1 + return x << 3 | 3 + } else { // 11110xxx + x = x & 0x07 + if (i + 2 >= end) { + return EOF + } + x = x << 6 | b(i) & 0x3F + i += 1 + x = x << 6 | b(i) & 0x3F + i += 1 + x = x << 6 | b(i) & 0x3F + i += 1 + return x << 3 | 4 + } + } + + override def canCheckPrefix(): Boolean = true + + override def index(re2: RE2, _pos: Int): Int = { + var pos: Int = _pos + pos += start + val i: Int = Utils.indexOf(b, re2.prefixUTF8, pos) + if (i < 0) i else i - pos + } + + override def context(_pos: Int): Int = { + var pos: Int = _pos + pos += this.start + var r1: Int = -1 + if (pos > this.start && pos <= this.end) { + var start: Int = pos - 1 + r1 = b(start) + start -= 1 + if (r1 >= 0x80) { // decode UTF-8 + // Find start, up to 4 bytes earlier. + var lim: Int = pos - 4 + if (lim < this.start) { + lim = this.start + } + while (start >= lim && (b(start) & 0xC0) == 0x80) { // 10xxxxxx + start -= 1 + } + if (start < this.start) { + start = this.start + } + r1 = step(start) >> 3 + } + } + val r2: Int = if (pos < this.end) step(pos) >> 3 else -1 + return Utils.emptyOpContext(r1, r2) + } + + override def endPos(): Int = end + } + + // |pos| and |width| are in Java "char" units. + private class UTF16Input(str: CharSequence, start: Int, end: Int) + extends MachineInput() { + + override def step(_pos: Int): Int = { + var pos: Int = _pos + pos += start + if (pos < end) { + val rune: Int = Character.codePointAt(str, pos) + val nextPos: Int = pos + Character.charCount(rune) + val width: Int = nextPos - pos + return rune << 3 | width + } else { + return EOF + } + } + + override def canCheckPrefix(): Boolean = true + + override def index(re2: RE2, _pos: Int): Int = { + var pos: Int = _pos + pos += start + val i: Int = indexOf(str, re2.prefix, pos) + if (i < 0) i else i - pos + } + + override def context(_pos: Int): Int = { + var pos: Int = _pos + pos += start + val r1: Int = + if (pos > start && pos <= end) Character.codePointBefore(str, pos) + else -1 + val r2: Int = if (pos < end) Character.codePointAt(str, pos) else -1 + return Utils.emptyOpContext(r1, r2) + } + + override def endPos(): Int = end + + private def indexOf(hayStack: CharSequence, + needle: String, + pos: Int): Int = + hayStack match { + case hayStack: String => + hayStack.indexOf(needle, pos) + case hayStack: StringBuilder => + hayStack.indexOf(needle, pos) + case _ => + indexOfFallback(hayStack, needle, pos) + } + + // Modified version of {@link String#indexOf(String) that allows a CharSequence. + private def indexOfFallback(hayStack: CharSequence, + needle: String, + _fromIndex: Int): Int = { + var fromIndex: Int = _fromIndex + + if (fromIndex >= hayStack.length()) { + return if (needle.isEmpty()) 0 else -1 + } + if (fromIndex < 0) { + fromIndex = 0 + } + if (needle.isEmpty()) { + return fromIndex + } + + val first: Char = needle.charAt(0) + val max: Int = hayStack.length() - needle.length() + var i: Int = fromIndex + + while (i <= max) { + /* Look for first character. */ + if (hayStack.charAt(i) != first) { + while ({ i += 1; i } <= max && hayStack.charAt(i) != first) {} + } + + /* Found first character, now look at the rest of v2 */ + if (i <= max) { + var j: Int = i + 1 + val end: Int = j + needle.length() - 1 + var k: Int = 1 + while (j < end && hayStack.charAt(j) == needle.charAt(k)) { + j += 1 + k += 1 + } + if (j == end) { + /* Found whole string. */ + return i + } + } + + i += 1 + } + + return -1 + } + } +} diff --git a/input/rsc/Matcher.scala b/input/rsc/Matcher.scala new file mode 100644 index 0000000..443a782 --- /dev/null +++ b/input/rsc/Matcher.scala @@ -0,0 +1,436 @@ +// Copyright 2010 Google Inc. All Rights Reserved. + +package com.twitter.re2s + +/** + * A stateful iterator that interprets a regex {@code Pattern} on a + * specific input. Its interface mimics the JDK 1.4.2 + * {@code java.util.regex.Matcher}. + * + *

Conceptually, a Matcher consists of four parts: + *

    + *
  1. A compiled regular expression {@code Pattern}, set at + * construction and fixed for the lifetime of the matcher.
  2. + * + *
  3. The remainder of the input string, set at construction or + * {@link #reset_0()} and advanced by each match operation such as + * {@link #find}, {@link #matches} or {@link #lookingAt}.
  4. + * + *
  5. The current match information, accessible via {@link #start}, + * {@link #end}, and {@link #group}, and updated by each match + * operation.
  6. + * + *
  7. The append position, used and advanced by + * {@link #appendReplacement} and {@link #appendTail} if performing a + * search and replace from the input to an external {@code StringBuffer}. + * + *
+ * + *

See the package-level + * documentation for an overview of how to use this API.

+ * + * @author rsc@google.com (Russ Cox) + */ +final class Matcher(private val _pattern: Pattern, private var _inputSequence: CharSequence) { + if (_pattern == null) { + throw new NullPointerException("pattern is null") + } + + // The number of submatches (groups) in the pattern. + private val _groupCount: Int = _pattern.re2.numberOfCapturingGroups() + + // The group indexes, in [start, end) pairs. Zeroth pair is overall match. + private val _groups: Array[Int] = new Array[Int](2 + 2 * _groupCount) + + // The input length in UTF16 codes. + private var _inputLength: Int = _inputSequence.length() + + // The append position: where the next append should start. + private var _appendPos: Int = 0 + + // Is there a current match? + private var _hasMatch: Boolean = false + + // Have we found the submatches (groups) of the current match? + // group[0], group[1] are set regardless. + private var _hasGroups: Boolean = false + + // The anchor flag to use when repeating the match to find subgroups. + private var _anchorFlag: Int = 0 + + /** Returns the {@code Pattern} associated with this {@code Matcher}. */ + def pattern(): Pattern = _pattern + + /** + * Resets the {@code Matcher}, rewinding input and + * discarding any match information. + * + * @return the {@code Matcher} itself, for chained method calls + */ + def reset_0(): Matcher = { + _appendPos = 0 + _hasMatch = false + _hasGroups = false + this + } + + /** + * Resets the {@code Matcher} and changes the input. + * + * @param input the new input string + * @return the {@code Matcher} itself, for chained method calls + */ + def reset_1(input: CharSequence): Matcher = { + if (input == null) { + throw new NullPointerException("input is null") + } + reset_0() + _inputSequence = input + _inputLength = input.length() + this + } + + /** + * Returns the start position of the most recent match. + * + * @throws IllegalStateException if there is no match + */ + def start_0(): Int = start_1(0) + + /** + * Returns the end position of the most recent match. + * + * @throws IllegalStateException if there is no match + */ + def end_0(): Int = end_1(0) + + /** + * Returns the start position of a subgroup of the most recent match. + * + * @param group the group index 0 is the overall match + * @throws IllegalStateException if there is no match + * @throws IndexOutOfBoundsException + * if {@code group < 0} or {@code group > groupCount()} + */ + def start_1(group: Int): Int = { + loadGroup(group) + _groups(2 * group) + } + + /** + * Returns the end position of a subgroup of the most recent match. + * + * @param group the group index 0 is the overall match + * @throws IllegalStateException if there is no match + * @throws IndexOutOfBoundsException + * if {@code group < 0} or {@code group > groupCount()} + */ + def end_1(group: Int): Int = { + loadGroup(group) + _groups(2 * group + 1) + } + + /** + * Returns the most recent match. + * + * @throws IllegalStateException if there is no match + */ + def group_0(): String = group_1(0) + + /** + * Returns the subgroup of the most recent match. + * + * @throws IllegalStateException if there is no match + * @throws IndexOutOfBoundsException if {@code group < 0} + * or {@code group > groupCount()} + */ + def group_1(group: Int): String = { + val start: Int = this.start_1(group) + val end: Int = this.end_1(group) + if (start < 0 && end < 0) { + // Means the subpattern didn't get matched at all. + return null + } + substring(start, end) + } + + /** + * Returns the number of subgroups in this pattern. + * + * @return the number of subgroups the overall match (group 0) does not count + */ + def groupCount(): Int = _groupCount + + /** Helper: finds subgroup information if needed for group. */ + private def loadGroup(group: Int): Unit = { + if (group < 0 || group > _groupCount) { + throw new IndexOutOfBoundsException( + "Group index out of bounds: " + group) + } + if (!_hasMatch) { + throw new IllegalStateException("perhaps no match attempted") + } + if (group == 0 || _hasGroups) { + return + } + + // Include the character after the matched text (if there is one). + // This is necessary in the case of inputSequence abc and pattern + // (a)(b$)?(b)? . If we do pass in the trailing c, + // the groups evaluate to new String[] {"ab", "a", null, "b" } + // If we don't, they evaluate to new String[] {"ab", "a", "b", null} + // We know it won't affect the total matched because the previous call + // to match included the extra character, and it was not matched then. + var end: Int = _groups(1) + 1 + if (end > _inputLength) { + end = _inputLength + } + + val ok: Boolean = _pattern.re2.match_5(_inputSequence, + _groups(0), + end, + _anchorFlag, + _groups, + 1 + _groupCount) + // Must match - hasMatch says that the last call with these + // parameters worked just fine. + if (!ok) { + throw new IllegalStateException("inconsistency in matching group data") + } else { + _hasGroups = true + } + } + + /** + * Matches the entire input against the pattern (anchored start and end). + * If there is a match, {@code matches} sets the match state to describe it. + * + * @return true if the entire input matches the pattern + */ + def matches(): Boolean = genMatch(0, RE2.ANCHOR_BOTH) + + /** + * Matches the beginning of input against the pattern (anchored start). + * If there is a match, {@code lookingAt} sets the match state to describe it. + * + * @return true if the beginning of the input matches the pattern + */ + def lookingAt(): Boolean = genMatch(0, RE2.ANCHOR_START) + + /** + * Matches the input against the pattern (unanchored). + * The search begins at the end of the last match, or else the beginning + * of the input. + * If there is a match, {@code find} sets the match state to describe it. + * + * @return true if it finds a match + */ + def find_0(): Boolean = { + var start: Int = 0 + if (_hasMatch) { + start = _groups(1) + if (_groups(0) == _groups(1)) { // empty match - nudge forward + start += 1 + } + } + genMatch(start, RE2.UNANCHORED) + } + + /** + * Matches the input against the pattern (unanchored), + * starting at a specified position. + * If there is a match, {@code find} sets the match state to describe it. + * + * @param start the input position where the search begins + * @return true if it finds a match + * @throws IndexOutOfBoundsException if start is not a valid input position + */ + def find_1(start: Int): Boolean = { + if (start < 0 || start > _inputLength) { + throw new IndexOutOfBoundsException( + "start index out of bounds: " + start) + } + reset_0() + genMatch(start, 0) + } + + /** Helper: does match starting at start, with RE2 anchor flag. */ + private def genMatch(startByte: Int, anchor: Int): Boolean = { + val ok: Boolean = _pattern.re2.match_5(_inputSequence, + startByte, + _inputLength, + anchor, + _groups, + 1) + if (!ok) { + false + } else { + _hasMatch = true + _hasGroups = false + _anchorFlag = anchor + true + } + } + + /** Helper: return substring for [start, end). */ + def substring(start: Int, end: Int): String = + // This is fast for both StringBuilder and String. + _inputSequence.subSequence(start, end).toString() + + /** Helper for Pattern: return input length. */ + def inputLength(): Int = + _inputLength + + /** + * Appends to {@code sb} two strings: the text from the append position up + * to the beginning of the most recent match, and then the replacement with + * submatch groups substituted for references of the form {@code $n}, where + * {@code n} is the group number in decimal. It advances the append position + * to the position where the most recent match ended. + * + *

To embed a literal {@code $}, use \$ (actually {@code "\\$"} with string + * escapes). The escape is only necessary when {@code $} is followed by a + * digit, but it is always allowed. Only {@code $} and {@code \} need + * escaping, but any character can be escaped. + * + *

The group number {@code n} in {@code $n} is always at least one digit + * and expands to use more digits as long as the resulting number is a + * valid group number for this pattern. To cut it off earlier, escape the + * first digit that should not be used. + * + * @param sb the {@link StringBuffer} to append to + * @param replacement the replacement string + * @return the {@code Matcher} itself, for chained method calls + * @throws IllegalStateException if there was no most recent match + * @throws IndexOutOfBoundsException if replacement refers to an invalid group + */ + def appendReplacement(sb: StringBuffer, replacement: String): Matcher = { + val s: Int = start_0() + val e: Int = end_0() + if (_appendPos < s) { + sb.append(substring(_appendPos, s)) + } + _appendPos = e + var last: Int = 0 + var i: Int = 0 + val m: Int = replacement.length() + while (i < m - 1) { + if (replacement.charAt(i) == '\\') { + if (last < i) { + sb.append(replacement.substring(last, i)) + } + i += 1 + last = i + } else if (replacement.charAt(i) == '$') { + var c: Char = replacement.charAt(i + 1) + if ('0' <= c && c <= '9') { + var n: Int = c - '0' + if (last < i) { + sb.append(replacement.substring(last, i)) + } + i += 2 + var break: Boolean = false + while (!break && i < m) { + c = replacement.charAt(i) + if (c < '0' || c > '9' || n * 10 + c - '0' > _groupCount) { + break = true + } else { + n = n * 10 + c - '0' + i += 1 + } + } + if (n > _groupCount) { + throw new IndexOutOfBoundsException("n > number of groups: " + n) + } + val group: String = this.group_1(n) + if (group != null) { + sb.append(group) + } + last = i + i -= 1 + } + } + i += 1 + } + if (last < m) { + sb.append(replacement.substring(last, m)) + } + this + } + + /** + * Appends to {@code sb} the substring of the input from the + * append position to the end of the input. + * + * @param sb the {@link StringBuffer} to append to + * @return the argument {@code sb}, for method chaining + */ + def appendTail(sb: StringBuffer): StringBuffer = { + sb.append(substring(_appendPos, _inputLength)) + sb + } + + /** + * Returns the input with all matches replaced by {@code replacement}, + * interpreted as for {@code appendReplacement}. + * + * @param replacement the replacement string + * @return the input string with the matches replaced + * @throws IndexOutOfBoundsException if replacement refers to an invalid group + */ + def replaceAll(replacement: String): String = + replace(replacement, true) + + /** + * Returns the input with the first match replaced by {@code replacement}, + * interpreted as for {@code appendReplacement}. + * + * @param replacement the replacement string + * @return the input string with the first match replaced + * @throws IndexOutOfBoundsException if replacement refers to an invalid group + */ + def replaceFirst(replacement: String): String = + replace(replacement, false) + + /** Helper: replaceAll/replaceFirst hybrid. */ + private def replace(replacement: String, all: Boolean): String = { + reset_0() + val sb: StringBuffer = new StringBuffer() + var break: Boolean = false + while (!break && find_0()) { + appendReplacement(sb, replacement) + if (!all) { + break = true + } + } + appendTail(sb) + sb.toString() + } +} + +object Matcher { + + /** + * Quotes '\' and '$' in {@code s}, so that the returned string could be + * used in {@link #appendReplacement} as a literal replacement of {@code s}. + * + * @param s the string to be quoted + * @return the quoted string + */ + def quoteReplacement(s: String): String = { + if (s.indexOf('\\') < 0 && s.indexOf('$') < 0) { + return s + } + val sb: java.lang.StringBuilder = new java.lang.StringBuilder() + var i: Int = 0 + while (i < s.length()) { + val c: Char = s.charAt(i) + if (c == '\\' || c == '$') { + sb.append('\\') + } + sb.append(c) + i += 1 + } + sb.toString + } +} diff --git a/input/rsc/Parser.scala b/input/rsc/Parser.scala new file mode 100644 index 0000000..ca44912 --- /dev/null +++ b/input/rsc/Parser.scala @@ -0,0 +1,1743 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Original Go source here: +// http://code.google.com/p/go/source/browse/src/pkg/regexp/syntax/parse.go + +// TODO(adonovan): +// - Eliminate allocations (new int[], new Regexp[], new ArrayList) by +// recycling old arrays on a freelist. + +package com.twitter.re2s + +import java.util.ArrayList +import com.twitter.re2s.Parser._ +import com.twitter.re2s.Regexp.{Op => ROP} + +/** + * A parser of regular expression patterns. + * + * The only public entry point is {@link #parse(String pattern, int flags)}. + */ +class Parser(wholeRegexp: String, _flags: Int) { + // Flags control the behavior of the parser and record information about + // regexp context. + private var flags: Int = _flags // parse mode flags + + // Stack of parsed expressions. + private val stack: Parser.Stack = new Stack() + private var free: Regexp = null + private var numCap: Int = 0 // number of capturing groups seen + + // Allocate a Regexp, from the free list if possible. + private def newRegexp(op: ROP): Regexp = { + var re: Regexp = free + if (re != null && re.subs != null && re.subs.length > 0) { + free = re.subs(0) + re.reinit() + re.op = op + } else { + re = new Regexp() + re.op = op + } + re + } + + private def reuse(re: Regexp): Unit = { + if (re.subs != null && re.subs.length > 0) { + re.subs(0) = free + } + free = re + } + + // Parse stack manipulation. + + private def pop(): Regexp = + stack.remove(stack.size() - 1) + + private def popToPseudo(): Array[Regexp] = { + var n: Int = stack.size() + var i: Int = n + while (i > 0 && !ROP.isPseudo(stack.get(i - 1).op)) { + i -= 1 + } + val r: Array[Regexp] = stack.subList(i, n).toArray(new Array[Regexp](n - i)) + stack.removeRange(i, n) + r + } + + // push pushes the regexp re onto the parse stack and returns the regexp. + // Returns null for a CHAR_CLASS that can be merged with the top-of-stack. + private def push(re: Regexp): Regexp = { + if (re.op == ROP.CHAR_CLASS && + re.runes.length == 2 && + re.runes(0) == re.runes(1)) { + // Collapse range [x-x] -> single rune x. + if (maybeConcat(re.runes(0), flags & ~RE2.FOLD_CASE)) { + return null + } + re.op = ROP.LITERAL + re.runes = Array[Int](re.runes(0)) + re.flags = flags & ~RE2.FOLD_CASE + } else if ((re.op == ROP.CHAR_CLASS && + re.runes.length == 4 && + re.runes(0) == re.runes(1) && + re.runes(2) == re.runes(3) && + Unicode.simpleFold(re.runes(0)) == re.runes(2) && + Unicode.simpleFold(re.runes(2)) == re.runes(0)) || + (re.op == ROP.CHAR_CLASS && + re.runes.length == 2 && + re.runes(0) + 1 == re.runes(1) && + Unicode.simpleFold(re.runes(0)) == re.runes(1) && + Unicode.simpleFold(re.runes(1)) == re.runes(0))) { + // Case-insensitive rune like [Aa] or [Δδ]. + if (maybeConcat(re.runes(0), flags | RE2.FOLD_CASE)) { + return null + } + + // Rewrite as (case-insensitive) literal. + re.op = ROP.LITERAL + re.runes = Array[Int](re.runes(0)) + re.flags = flags | RE2.FOLD_CASE + } else { + // Incremental concatenation. + maybeConcat(-1, 0) + } + + stack.add(re) + re + } + + // maybeConcat implements incremental concatenation + // of literal runes into string nodes. The parser calls this + // before each push, so only the top fragment of the stack + // might need processing. Since this is called before a push, + // the topmost literal is no longer subject to operators like * + // (Otherwise ab* would turn into (ab)*.) + // If (r >= 0 and there's a node left over, maybeConcat uses it + // to push r with the given flags. + // maybeConcat reports whether r was pushed. + private def maybeConcat(r: Int, flags: Int): Boolean = { + val n: Int = stack.size() + if (n < 2) { + return false + } + val re1: Regexp = stack.get(n - 1) + val re2: Regexp = stack.get(n - 2) + if (re1.op != ROP.LITERAL || + re2.op != ROP.LITERAL || + (re1.flags & RE2.FOLD_CASE) != (re2.flags & RE2.FOLD_CASE)) { + return false + } + + // Push re1 into re2. + re2.runes = concatRunes(re2.runes, re1.runes) + + // Reuse re1 if possible. + if (r >= 0) { + re1.runes = Array[Int](r) + re1.flags = flags + return true + } + + pop() + reuse(re1) + false // did not push r + } + + // newLiteral returns a new LITERAL Regexp with the given flags + private def newLiteral(_r: Int, flags: Int): Regexp = { + var r: Int = _r + val re: Regexp = newRegexp(ROP.LITERAL) + re.flags = flags + if ((flags & RE2.FOLD_CASE) != 0) { + r = minFoldRune(r) + } + re.runes = new Array[Int](r) + re + } + + // literal pushes a literal regexp for the rune r on the stack + // and returns that regexp. + private def literal(r: Int): Unit = + push(newLiteral(r, flags)) + + // op pushes a regexp with the given op onto the stack + // and returns that regexp. + private def op(op: ROP): Regexp = { + val re: Regexp = newRegexp(op) + re.flags = flags + push(re) + } + + // repeat replaces the top stack element with itself repeated according to + // op, min, max. beforePos is the start position of the repetition operator. + // Pre: t is positioned after the initial repetition operator. + // Post: t advances past an optional perl-mode '?', or stays put. + // Or, it fails with PatternSyntaxException. + private def repeat(op: ROP, + min: Int, + max: Int, + beforePos: Int, + t: StringIterator, + lastRepeatPos: Int): Unit = { + var flags: Int = this.flags + if ((flags & RE2.PERL_X) != 0) { + if (t.more() && t.lookingAt_c('?')) { + t.skip(1) // '?' + flags ^= RE2.NON_GREEDY + } + if (lastRepeatPos != -1) { + // In Perl it is not allowed to stack repetition operators: + // a** is a syntax error, not a doubled star, and a++ means + // something else entirely, which we don't support! + throw new PatternSyntaxException(ERR_INVALID_REPEAT_OP, + t.from(lastRepeatPos)) + } + } + val n: Int = stack.size() + if (n == 0) { + throw new PatternSyntaxException(ERR_MISSING_REPEAT_ARGUMENT, + t.from(beforePos)) + } + val sub: Regexp = stack.get(n - 1) + if (ROP.isPseudo(sub.op)) { + throw new PatternSyntaxException(ERR_MISSING_REPEAT_ARGUMENT, + t.from(beforePos)) + } + val re: Regexp = newRegexp(op) + re.min = min + re.max = max + re.flags = flags + re.subs = Array[Regexp](sub) + stack.set(n - 1, re) + } + + // concat replaces the top of the stack (above the topmost '|' or '(') with + // its concatenation. + private def concat(): Regexp = { + maybeConcat(-1, 0) + + // Scan down to find pseudo-operator | or (. + val subs: Array[Regexp] = popToPseudo() + + // Empty concatenation is special case. + if (subs.length == 0) { + return push(newRegexp(ROP.EMPTY_MATCH)) + } + + push(collapse(subs, ROP.CONCAT)) + } + + // alternate replaces the top of the stack (above the topmost '(') with its + // alternation. + private def alternate(): Regexp = { + // Scan down to find pseudo-operator (. + // There are no | above (. + val subs: Array[Regexp] = popToPseudo() + + // Make sure top class is clean. + // All the others already are (see swapVerticalBar). + if (subs.length > 0) { + cleanAlt(subs(subs.length - 1)) + } + + // Empty alternate is special case + // (shouldn't happen but easy to handle). + if (subs.length == 0) { + return push(newRegexp(ROP.NO_MATCH)) + } + + push(collapse(subs, ROP.ALTERNATE)) + } + + // cleanAlt cleans re for eventual inclusion in an alternation. + private def cleanAlt(re: Regexp): Unit = { + re.op match { + case ROP.CHAR_CLASS => + re.runes = new CharClass(re.runes).cleanClass().toArray() + if (re.runes.length == 2 && + re.runes(0) == 0 && + re.runes(1) == Unicode.MAX_RUNE) { + re.runes = null + re.op = ROP.ANY_CHAR + return + } + if (re.runes.length == 4 && + re.runes(0) == 0 && + re.runes(1) == '\n' - 1 && + re.runes(2) == '\n' + 1 && + re.runes(3) == Unicode.MAX_RUNE) { + re.runes = null + re.op = ROP.ANY_CHAR_NOT_NL + return + } + } + } + + // collapse returns the result of applying op to subs[start:end]. + // If (sub contains op nodes, they all get hoisted up + // so that there is never a concat of a concat or an + // alternate of an alternate. + private def collapse(subs: Array[Regexp], op: ROP): Regexp = { + if (subs.length == 1) { + return subs(0) + } + // Concatenate subs iff op is same. + // Compute length in first pass. + var len: Int = 0 + var i: Int = 0 + while (i < subs.length) { + val sub: Regexp = subs(i) + len += (if (sub.op == op) sub.subs.length else 1) + i += 1 + } + val newsubs: Array[Regexp] = new Array[Regexp](len) + i = 0 + var j: Int = 0 + while (j < subs.length) { + val sub: Regexp = subs(j) + if (sub.op == op) { + System.arraycopy(sub.subs, 0, newsubs, i, sub.subs.length) + i += sub.subs.length + reuse(sub) + } else { + newsubs(i) = sub + i += 1 + } + j += 1 + } + var re: Regexp = newRegexp(op) + re.subs = newsubs + + if (op == ROP.ALTERNATE) { + re.subs = factor(re.subs, re.flags) + if (re.subs.length == 1) { + val old: Regexp = re + re = re.subs(0) + reuse(old) + } + } + re + } + + // factor factors common prefixes from the alternation list sub. It + // returns a replacement list that reuses the same storage and frees + // (passes to p.reuse) any removed *Regexps. + // + // For example, + // ABC|ABD|AEF|BCX|BCY + // simplifies by literal prefix extraction to + // A(B(C|D)|EF)|BC(X|Y) + // which simplifies by character class introduction to + // A(B[CD]|EF)|BC[XY] + // + private def factor(array: Array[Regexp], flags: Int): Array[Regexp] = { + if (array.length < 2) { + return array + } + + // The following code is subtle, because it's a literal Java + // translation of code that makes clever use of Go "slices". + // A slice is a triple (array, offset, length), and the Go + // implementation uses two slices, |sub| and |out| backed by the + // same array. In Java, we have to be explicit about all of these + // variables, so: + // + // Go Java + // sub (array, s, lensub) + // out (array, 0, lenout) // (always a prefix of |array|) + // + // In the comments we'll use the logical notation of go slices, e.g. sub[i] + // even though the Java code will read array[s + i]. + + var s: Int = 0 // offset of first |sub| within array. + var lensub: Int = array.length // = len(sub) + var lenout: Int = 0 // = len(out) + + // Round 1: Factor out common literal prefixes. + // Note: (str, strlen) and (istr, istrlen) are like Go slices + // onto a prefix of some Regexp's runes array (hence offset=0). + var str: Array[Int] = null + var strlen: Int = 0 + var strflags: Int = 0 + var start: Int = 0 + var i: Int = 0 + while (i <= lensub) { + // Invariant: the Regexps that were in sub[0:start] have been + // used or marked for reuse, and the slice space has been reused + // for out (len <= start). + // + // Invariant: sub[start:i] consists of regexps that all begin + // with str as modified by strflags. + var istr: Array[Int] = null + var istrlen: Int = 0 + var iflags: Int = 0 + var continue: Boolean = false + if (i < lensub) { + // NB, we inlined Go's leadingString() since Java has no pair return. + var re: Regexp = array(s + i) + if (re.op == ROP.CONCAT && re.subs.length > 0) { + re = re.subs(0) + } + if (re.op == ROP.LITERAL) { + istr = re.runes + istrlen = re.runes.length + iflags = re.flags & RE2.FOLD_CASE + } + // istr is the leading literal string that re begins with. + // The string refers to storage in re or its children. + + if (iflags == strflags) { + var same: Int = 0 + while (same < strlen && + same < istrlen && + str(same) == istr(same)) { + same += 1 + } + if (same > 0) { + // Matches at least one rune in current range. + // Keep going around. + strlen = same + continue = true + } + } + } + + if (!continue) { + // Found end of a run with common leading literal string: + // sub[start:i] all begin with str[0:strlen], but sub[i] + // does not even begin with str[0]. + // + // Factor out common string and append factored expression to out. + if (i == start) { + // Nothing to do - run of length 0. + } else if (i == start + 1) { + // Just one: don't bother factoring. + array(lenout) = array(s + start) + lenout += 1 + } else { + // Construct factored form: prefix(suffix1|suffix2|...) + val prefix: Regexp = newRegexp(ROP.LITERAL) + prefix.flags = strflags + prefix.runes = Utils.subarray_i(str, 0, strlen) + + var j: Int = start + while (j < i) { + array(s + j) = removeLeadingString(array(s + j), strlen) + j += 1 + } + // Recurse. + val suffix: Regexp = + collapse(subarray(array, s + start, s + i), ROP.ALTERNATE) + val re: Regexp = newRegexp(ROP.CONCAT) + re.subs = Array[Regexp](prefix, suffix) + array(lenout) = re + lenout += 1 + } + + // Prepare for next iteration. + start = i + str = istr + strlen = istrlen + strflags = iflags + } + + i += 1 + } + // In Go: sub = out + lensub = lenout + s = 0 + + // Round 2: Factor out common complex prefixes, + // just the first piece of each concatenation, + // whatever it is. This is good enough a lot of the time. + start = 0 + lenout = 0 + var first: Regexp = null + i = 0 + while (i <= lensub) { + // Invariant: the Regexps that were in sub[0:start] have been + // used or marked for reuse, and the slice space has been reused + // for out (lenout <= start). + // + // Invariant: sub[start:i] consists of regexps that all begin with + // ifirst. + var ifirst: Regexp = null + var continue: Boolean = false + if (i < lensub) { + ifirst = leadingRegexp(array(s + i)) + if (first != null && first.equals(ifirst)) { + continue = true + } + } + + if (!continue) { + // Found end of a run with common leading regexp: + // sub[start:i] all begin with first but sub[i] does not. + // + // Factor out common regexp and append factored expression to out. + if (i == start) { + // Nothing to do - run of length 0. + } else if (i == start + 1) { + // Just one: don't bother factoring. + array(lenout) = array(s + start) + lenout += 1 + } else { + // Construct factored form: prefix(suffix1|suffix2|...) + val prefix: Regexp = first + var j: Int = start + while (j < i) { + val reuse: Boolean = j != start // prefix came from sub[start] + array(s + j) = removeLeadingRegexp(array(s + j), reuse) + j += 1 + } + // recurse + val suffix: Regexp = + collapse(subarray(array, s + start, s + i), ROP.ALTERNATE) + val re: Regexp = newRegexp(ROP.CONCAT) + re.subs = Array[Regexp](prefix, suffix) + array(lenout) = re + lenout += 1 + } + + // Prepare for next iteration. + start = i + first = ifirst + } + + i += 1 + } + // In Go: sub = out + lensub = lenout + s = 0 + + // Round 3: Collapse runs of single literals into character classes. + start = 0 + lenout = 0 + i = 0 + while (i <= lensub) { + // Invariant: the Regexps that were in sub[0:start] have been + // used or marked for reuse, and the slice space has been reused + // for out (lenout <= start). + // + // Invariant: sub[start:i] consists of regexps that are either + // literal runes or character classes. + var continue: Boolean = false + if (i < lensub && isCharClass(array(s + i))) { + continue = true + } + + if (!continue) { + // sub[i] is not a char or char class + // emit char class for sub[start:i]... + if (i == start) { + // Nothing to do - run of length 0. + } else if (i == start + 1) { + array(lenout) = array(s + start) + lenout += 1 + } else { + // Make new char class. + // Start with most complex regexp in sub[start]. + var max: Int = start + var j: Int = start + 1 + while (j < i) { + val subMax: Regexp = array(s + max) + val subJ: Regexp = array(s + j) + if (subMax.op < subJ.op || + subMax.op == subJ.op && subMax.runes.length < subJ.runes.length) { + max = j + } + j += 1 + } + // swap sub[start], sub[max]. + val tmp: Regexp = array(s + start) + array(s + start) = array(s + max) + array(s + max) = tmp + + j = start + 1 + while (j < i) { + mergeCharClass(array(s + start), array(s + j)) + reuse(array(s + j)) + j += 1 + } + cleanAlt(array(s + start)) + array(lenout) = array(s + start) + lenout += 1 + } + + // ... and then emit sub[i]. + if (i < lensub) { + array(lenout) = array(s + i) + lenout += 1 + } + start = i + 1 + } + + i += 1 + } + // In Go: sub = out + lensub = lenout + s = 0 + + // Round 4: Collapse runs of empty matches into a single empty match. + start = 0 + lenout = 0 + i = 0 + while (i < lensub) { + var continue: Boolean = false + if (i + 1 < lensub && + array(s + i).op == ROP.EMPTY_MATCH && + array(s + i + 1).op == ROP.EMPTY_MATCH) { + continue = true + } + if (!continue) { + array(lenout) = array(s + i) + lenout += 1 + } + i += 1 + } + // In Go: sub = out + lensub = lenout + s = 0 + + subarray(array, s, lensub) + } + + // removeLeadingString removes the first n leading runes + // from the beginning of re. It returns the replacement for re. + private def removeLeadingString(_re: Regexp, n: Int): Regexp = { + var re: Regexp = _re + if (re.op == ROP.CONCAT && re.subs.length > 0) { + // Removing a leading string in a concatenation + // might simplify the concatenation. + val sub: Regexp = removeLeadingString(re.subs(0), n) + re.subs(0) = sub + if (sub.op == ROP.EMPTY_MATCH) { + reuse(sub) + re.subs.length match { + case 0 | 1 => + // Impossible but handle. + re.op = ROP.EMPTY_MATCH + re.subs = null + case 2 => + val old: Regexp = re + re = re.subs(1) + reuse(old) + case _ => + re.subs = subarray(re.subs, 1, re.subs.length) + } + } + return re + } + + if (re.op == ROP.LITERAL) { + re.runes = Utils.subarray_i(re.runes, n, re.runes.length) + if (re.runes.length == 0) { + re.op = ROP.EMPTY_MATCH + } + } + re + } + + // removeLeadingRegexp removes the leading regexp in re. + // It returns the replacement for re. + // If reuse is true, it passes the removed regexp (if no longer needed) to + // reuse. + private def removeLeadingRegexp(_re: Regexp, reuse: Boolean): Regexp = { + var re: Regexp = _re + if (re.op == ROP.CONCAT && re.subs.length > 0) { + if (reuse) { + this.reuse(re.subs(0)) + } + re.subs = subarray(re.subs, 1, re.subs.length) + re.subs.length match { + case 0 => + re.op = ROP.EMPTY_MATCH + re.subs = Regexp.EMPTY_SUBS + case 1 => + val old: Regexp = re + re = re.subs(0) + this.reuse(old) + } + return re + } + if (reuse) { + this.reuse(re) + } + return newRegexp(ROP.EMPTY_MATCH) + } + + // Parsing. + + private def parseInternal(): Regexp = { + if ((flags & RE2.LITERAL) != 0) { + // Trivial parser for literal string. + return literalRegexp(wholeRegexp, flags) + } + + // Otherwise, must do real work. + var lastRepeatPos: Int = -1 + var min: Int = -1 + var max: Int = -1 + var t: Parser.StringIterator = new StringIterator(wholeRegexp) + while (t.more()) { + var repeatPos: Int = -1 + t.peek() match { + case '(' => + if ((flags & RE2.PERL_X) != 0 && t.lookingAt_s("(?")) { + // Flag changes and non-capturing groups. + parsePerlFlags(t) + } else { + numCap += 1 + op(ROP.LEFT_PAREN).cap = numCap + t.skip(1) // '(' + } + + case '|' => + parseVerticalBar() + t.skip(1) // '|' + + case ')' => + parseRightParen() + t.skip(1) // ')' + + case '^' => + if ((flags & RE2.ONE_LINE) != 0) { + op(ROP.BEGIN_TEXT) + } else { + op(ROP.BEGIN_LINE) + } + t.skip(1) // '^' + + case '$' => + if ((flags & RE2.ONE_LINE) != 0) { + op(ROP.END_TEXT).flags |= RE2.WAS_DOLLAR + } else { + op(ROP.END_LINE) + } + t.skip(1) // '$' + + case '.' => + if ((flags & RE2.DOT_NL) != 0) { + op(ROP.ANY_CHAR) + } else { + op(ROP.ANY_CHAR_NOT_NL) + } + t.skip(1) // '.' + + case '[' => + parseClass(t) + + case '*' | '+' | '?' => + repeatPos = t.pos() + val op: Int = + t.pop match { + case '*' => ROP.STAR + case '+' => ROP.PLUS + case '?' => ROP.QUEST + } + repeat(op, min, max, repeatPos, t, lastRepeatPos) + // (min and max are now dead.) + + case '{' => + repeatPos = t.pos() + val minMax: Int = parseRepeat(t) + if (minMax < 0) { + // If the repeat cannot be parsed, { is a literal. + t.rewindTo(repeatPos) + literal(t.pop()) // '{' + } else { + min = minMax >> 16 + max = minMax.&(0xffff).toShort // sign extend + repeat(ROP.REPEAT, min, max, repeatPos, t, lastRepeatPos) + } + + case '\\' => + var breakBigswitch: Boolean = false + val savedPos: Int = t.pos() + t.skip(1) // '\\' + if ((flags & RE2.PERL_X) != 0 && t.more()) { + val c: Int = t.pop() + c match { + case 'A' => + op(ROP.BEGIN_TEXT) + breakBigswitch = true + case 'b' => + op(ROP.WORD_BOUNDARY) + breakBigswitch = true + case 'B' => + op(ROP.NO_WORD_BOUNDARY) + breakBigswitch = true + case 'C' => + // any byte not supported + throw new PatternSyntaxException(ERR_INVALID_ESCAPE, "\\C") + case 'Q' => + // \Q ... \E: the ... is always literals + var lit: String = t.rest() + val i: Int = lit.indexOf("\\E") + if (i >= 0) { + lit = lit.substring(0, i) + } + t.skipString(lit) + t.skipString("\\E") + push(literalRegexp(lit, flags)) + breakBigswitch = true + case 'z' => + op(ROP.END_TEXT) + breakBigswitch = true + case _ => + t.rewindTo(savedPos) + } + } + + if (!breakBigswitch) { + val re: Regexp = newRegexp(ROP.CHAR_CLASS) + re.flags = flags + + // Look for Unicode character group like \p{Han} + if (t.lookingAt_s("\\p") || t.lookingAt_s("\\P")) { + val cc: CharClass = new CharClass(Utils.EMPTY_INTS) + if (parseUnicodeClass(t, cc)) { + re.runes = cc.toArray() + push(re) + breakBigswitch = true + } + } + + if (!breakBigswitch) { + // Perl character class escape. + val cc: CharClass = new CharClass(Utils.EMPTY_INTS) + if (parsePerlClassEscape(t, cc)) { + re.runes = cc.toArray() + push(re) + breakBigswitch = true + } + } + + if (!breakBigswitch) { + t.rewindTo(savedPos) + this.reuse(re) + + // Ordinary single-character escape. + literal(parseEscape(t)) + } + } + + case _ => + literal(t.pop()) + } + lastRepeatPos = repeatPos + } + + concat() + if (swapVerticalBar()) { + pop() // pop vertical bar + } + alternate() + + val n: Int = stack.size() + if (n != 1) { + throw new PatternSyntaxException(ERR_MISSING_PAREN, wholeRegexp) + } + stack.get(0) + } + + // parsePerlFlags parses a Perl flag setting or non-capturing group or both, + // like (?i) or (?: or (?i:. + // Pre: t at "(?". Post: t after ")". + // Sets numCap. + private def parsePerlFlags(t: StringIterator): Unit = { + val startPos: Int = t.pos() + + // Check for named captures, first introduced in Python's regexp library. + // As usual, there are three slightly different syntaxes: + // + // (?Pexpr) the original, introduced by Python + // (?expr) the .NET alteration, adopted by Perl 5.10 + // (?'name'expr) another .NET alteration, adopted by Perl 5.10 + // + // Perl 5.10 gave in and implemented the Python version too, + // but they claim that the last two are the preferred forms. + // PCRE and languages based on it (specifically, PHP and Ruby) + // support all three as well. EcmaScript 4 uses only the Python form. + // + // In both the open source world (via Code Search) and the + // Google source tree, (?Pname) is the dominant form, + // so that's the one we implement. One is enough. + val s: String = t.rest() + if (s.startsWith("(?P<")) { + // Pull out name. + val end: Int = s.indexOf('>') + if (end < 0) { + throw new PatternSyntaxException(ERR_INVALID_NAMED_CAPTURE, s) + } + val name: String = s.substring(4, end) // "name" + t.skipString(name) + t.skip(5) // "(?P<>" + if (!isValidCaptureName(name)) { + throw new PatternSyntaxException(ERR_INVALID_NAMED_CAPTURE, + s.substring(0, end)) // "(?P" + } + // Like ordinary capture, but named. + val re: Regexp = op(ROP.LEFT_PAREN) + numCap += 1 + re.cap = numCap + re.name = name + return + } + + // Non-capturing group. Might also twiddle Perl flags. + t.skip(2) // "(?" + var flags: Int = this.flags + var sign: Int = +1 + var sawFlag: Boolean = false + + var breakLoop: Boolean = false + while (t.more() && !breakLoop) { + val c: Int = t.pop() + c match { + // Flags. + case 'i' => + flags |= RE2.FOLD_CASE + sawFlag = true + case 'm' => + flags &= ~RE2.ONE_LINE + sawFlag = true + case 's' => + flags |= RE2.DOT_NL + sawFlag = true + case 'U' => + flags |= RE2.NON_GREEDY + sawFlag = true + + // Switch to negation. + case '-' => + if (sign < 0) { + breakLoop = true + } else { + sign = -1 + // Invert flags so that | above turn into &~ and vice versa. + // We'll invert flags again before using it below. + flags = ~flags + sawFlag = false + } + + // End of flags, starting group or not. + case ':' | ')' => + if (sign < 0) { + if (!sawFlag) { + breakLoop = true + } + if (!breakLoop) { + flags = ~flags + } + } + if (!breakLoop) { + if (c == ':') { + // Open new group + op(ROP.LEFT_PAREN) + } + this.flags = flags + return + } + + case _ => + breakLoop = true + } + } + + throw new PatternSyntaxException(ERR_INVALID_PERL_OP, t.from(startPos)) + } + + // parseVerticalBar handles a | in the input. + private def parseVerticalBar(): Unit = { + concat() + + // The concatenation we just parsed is on top of the stack. + // If it sits above an opVerticalBar, swap it below + // (things below an opVerticalBar become an alternation). + // Otherwise, push a new vertical bar. + if (!swapVerticalBar()) { + op(ROP.VERTICAL_BAR) + } + } + + // If the top of the stack is an element followed by an opVerticalBar + // swapVerticalBar swaps the two and returns true. + // Otherwise it returns false. + private def swapVerticalBar(): Boolean = { + // If above and below vertical bar are literal or char class, + // can merge into a single char class. + val n: Int = stack.size() + if (n >= 3 && + stack.get(n - 2).op == ROP.VERTICAL_BAR && + isCharClass(stack.get(n - 1)) && + isCharClass(stack.get(n - 3))) { + var re1: Regexp = stack.get(n - 1) + var re3: Regexp = stack.get(n - 3) + // Make re3 the more complex of the two. + if (re1.op > re3.op) { + val tmp: Regexp = re3 + re3 = re1 + re1 = tmp + stack.set(n - 3, re3) + } + mergeCharClass(re3, re1) + this.reuse(re1) + pop() + return true + } + + if (n >= 2) { + val re1: Regexp = stack.get(n - 1) + val re2: Regexp = stack.get(n - 2) + if (re2.op == ROP.VERTICAL_BAR) { + if (n >= 3) { + // Now out of reach. + // Clean opportunistically. + cleanAlt(stack.get(n - 3)) + } + stack.set(n - 2, re1) + stack.set(n - 1, re2) + return true + } + } + return false + } + + // parseRightParen handles a ')' in the input. + private def parseRightParen(): Unit = { + concat() + if (swapVerticalBar()) { + pop() // pop vertical bar + } + alternate() + + val n: Int = stack.size() + if (n < 2) { + throw new PatternSyntaxException(ERR_INTERNAL_ERROR, "stack underflow") + } + val re1: Regexp = pop() + val re2: Regexp = pop() + if (re2.op != ROP.LEFT_PAREN) { + throw new PatternSyntaxException(ERR_MISSING_PAREN, wholeRegexp) + } + // Restore flags at time of paren. + this.flags = re2.flags + if (re2.cap == 0) { + // Just for grouping. + push(re1) + } else { + re2.op = ROP.CAPTURE + re2.subs = Array[Regexp](re1) + push(re2) + } + } + + // parsePerlClassEscape parses a leading Perl character class escape like \d + // from the beginning of |t|. If one is present, it appends the characters + // to cc and returns true. The iterator is advanced past the escape + // on success, undefined on failure, in which case false is returned. + private def parsePerlClassEscape(t: StringIterator, cc: CharClass): Boolean = { + val beforePos: Int = t.pos() + if ((flags & RE2.PERL_X) == 0 || + !t.more() || t.pop() != '\\' || // consume '\\' + !t.more()) { + return false + } + t.pop() // e.g. advance past 'd' in "\\d" + val g: CharGroup = CharGroup.PERL_GROUPS.get(t.from(beforePos)) + if (g == null) { + return false + } + cc.appendGroup(g, (flags & RE2.FOLD_CASE) != 0) + return true + } + + // parseNamedClass parses a leading POSIX named character class like + // [:alnum:] from the beginning of t. If one is present, it appends the + // characters to cc, advances the iterator, and returns true. + // Pre: t at "[:". Post: t after ":]". + // On failure (no class of than name), throws PatternSyntaxException. + // On misparse, returns false t.pos() is undefined. + private def parseNamedClass(t: StringIterator, cc: CharClass): Boolean = { + // (Go precondition check deleted.) + val cls: String = t.rest() + val i: Int = cls.indexOf(":]") + if (i < 0) { + return false + } + val name: String = cls.substring(0, i + 2) // "[:alnum:]" + t.skipString(name) + val g: CharGroup = CharGroup.POSIX_GROUPS.get(name) + if (g == null) { + throw new PatternSyntaxException(ERR_INVALID_CHAR_RANGE, name) + } + cc.appendGroup(g, (flags & RE2.FOLD_CASE) != 0) + true + } + + // parseUnicodeClass() parses a leading Unicode character class like \p{Han} + // from the beginning of t. If one is present, it appends the characters to + // to |cc|, advances |t| and returns true. + // + // Returns false if such a pattern is not present or UNICODE_GROUPS + // flag is not enabled |t.pos()| is not advanced in this case. + // Indicates error by throwing PatternSyntaxException. + private def parseUnicodeClass(t: StringIterator, cc: CharClass): Boolean = { + val startPos: Int = t.pos() + if ((flags & RE2.UNICODE_GROUPS) == 0 || + !t.lookingAt_s("\\p") && !t.lookingAt_s("\\P")) { + return false + } + t.skip(1) // '\\' + // Committed to parse or throw exception. + var sign: Int = +1 + var c: Int = t.pop() // 'p' or 'P' + if (c == 'P') { + sign = -1 + } + c = t.pop() + var name: String = "" + if (c != '{') { + // Single-letter name. + name = Utils.runeToString(c) + } else { + // Name is in braces. + val rest: String = t.rest() + val end: Int = rest.indexOf('}') + if (end < 0) { + t.rewindTo(startPos) + throw new PatternSyntaxException(ERR_INVALID_CHAR_RANGE, t.rest()) + } + name = rest.substring(0, end) // e.g. "Han" + t.skipString(name) + t.skip(1) // '}' + // Don't use skip(end) because it assumes UTF-16 coding, and + // StringIterator doesn't guarantee that. + } + + // Group can have leading negation too. + // \p{^Han} == \P{Han}, \P{^Han} == \p{Han}. + if (!name.isEmpty() && name.charAt(0) == '^') { + sign = -sign + name = name.substring(1) + } + + val pair: Parser.Pair[Array[Array[Int]], Array[Array[Int]]] = unicodeTable(name) + if (pair == null) { + throw new PatternSyntaxException(ERR_INVALID_CHAR_RANGE, + t.from(startPos)) + } + val tab: Array[Array[Int]] = pair.first + val fold: Array[Array[Int]] = pair.second // fold-equivalent table + + // Variation of CharClass.appendGroup() for tables. + if ((flags & RE2.FOLD_CASE) == 0 || fold == null) { + cc.appendTableWithSign(tab, sign) + } else { + // Merge and clean tab and fold in a temporary buffer. + // This is necessary for the negative case and just tidy + // for the positive case. + val tmp: Array[Int] = new CharClass(Utils.EMPTY_INTS) + .appendTable(tab) + .appendTable(fold) + .cleanClass() + .toArray() + cc.appendClassWithSign(tmp, sign) + } + return true + } + + // parseClass parses a character class and pushes it onto the parse stack. + // + // NOTES: + // Pre: at '[' Post: after ']'. + // Mutates stack. Advances iterator. May throw. + private def parseClass(t: StringIterator): Unit = { + var startPos: Int = t.pos() + t.skip(1) // '[' + val re: Regexp = newRegexp(ROP.CHAR_CLASS) + re.flags = flags + val cc: CharClass = new CharClass(Utils.EMPTY_INTS) + + var sign: Int = +1 + if (t.more() && t.lookingAt_c('^')) { + sign = -1 + t.skip(1) // '^' + + // If character class does not match \n, add it here, + // so that negation later will do the right thing. + if ((flags & RE2.CLASS_NL) == 0) { + cc.appendRange('\n', '\n') + } + } + + var first: Boolean = true // ']' and '-' are okay as first char in class + while (!t.more() || t.peek() != ']' || first) { + var continue: Boolean = false + + // POSIX: - is only okay unescaped as first or last in class. + // Perl: - is okay anywhere. + if (t.more() && t.lookingAt_c('-') && + (flags & RE2.PERL_X) == 0 && + !first) { + val s: String = t.rest() + if (s.equals("-") || !s.startsWith("-]")) { + t.rewindTo(startPos) + throw new PatternSyntaxException(ERR_INVALID_CHAR_RANGE, t.rest()) + } + } + first = false + + val beforePos: Int = t.pos() + + // Look for POSIX [:alnum:] etc. + if (t.lookingAt_s("[:")) { + if (parseNamedClass(t, cc)) { + continue = true + } + if (!continue) { + t.rewindTo(beforePos) + } + } + + if (!continue) { + // Look for Unicode character group like \p{Han}. + if (parseUnicodeClass(t, cc)) { + continue = true + } else { + // Look for Perl character class symbols (extension). + if (parsePerlClassEscape(t, cc)) { + continue = true + } else { + t.rewindTo(beforePos) + + // Single character or simple range. + var lo: Int = parseClassChar(t, startPos) + var hi: Int = lo + if (t.more() && t.lookingAt_c('-')) { + t.skip(1) // '-' + if (t.more() && t.lookingAt_c(']')) { + // [a-] means (a|-) so check for final ]. + t.skip(-1) + } else { + hi = parseClassChar(t, startPos) + if (hi < lo) { + throw new PatternSyntaxException(ERR_INVALID_CHAR_RANGE, + t.from(beforePos)) + } + } + } + if ((flags & RE2.FOLD_CASE) == 0) { + cc.appendRange(lo, hi) + } else { + cc.appendFoldedRange(lo, hi) + } + } + } + } + } + t.skip(1) // ']' + + cc.cleanClass() + if (sign < 0) { + cc.negateClass() + } + re.runes = cc.toArray() + push(re) + } +} + +object Parser { + + // Unexpected error + private final val ERR_INTERNAL_ERROR: String = + "regexp/syntax: internal error" + + // Parse errors + private final val ERR_INVALID_CHAR_CLASS: String = + "invalid character class" + private final val ERR_INVALID_CHAR_RANGE: String = + "invalid character class range" + private final val ERR_INVALID_ESCAPE: String = + "invalid escape sequence" + private final val ERR_INVALID_NAMED_CAPTURE: String = + "invalid named capture" + private final val ERR_INVALID_PERL_OP: String = + "invalid or unsupported Perl syntax" + private final val ERR_INVALID_REPEAT_OP: String = + "invalid nested repetition operator" + private final val ERR_INVALID_REPEAT_SIZE: String = + "invalid repeat count" + private final val ERR_MISSING_BRACKET: String = + "missing closing ]" + private final val ERR_MISSING_PAREN: String = + "missing closing )" + private final val ERR_MISSING_REPEAT_ARGUMENT: String = + "missing argument to repetition operator" + private final val ERR_TRAILING_BACKSLASH: String = + "trailing backslash at end of expression" + + // Hack to expose ArrayList.removeRange(). + private class Stack() extends ArrayList[Regexp]() { + override def get(index: Int): Regexp = super.get(index) + override def removeRange(fromIndex: Int, toIndex: Int): Unit = + super.removeRange(fromIndex, toIndex) + } + + // minFoldRune returns the minimum rune fold-equivalent to r. + private def minFoldRune(_r: Int): Int = { + var r: Int = _r + if (r < Unicode.MIN_FOLD || r > Unicode.MAX_FOLD) { + return r + } + var min: Int = r + var r0: Int = r + r = Unicode.simpleFold(r) + while (r != r0) { + if (min > r) { + min = r + } + r = Unicode.simpleFold(r) + } + return min + } + + // leadingRegexp returns the leading regexp that re begins with. + // The regexp refers to storage in re or its children. + private def leadingRegexp(re: Regexp): Regexp = { + if (re.op == ROP.EMPTY_MATCH) { + return null + } + if (re.op == ROP.CONCAT && re.subs.length > 0) { + val sub: Regexp = re.subs(0) + if (sub.op == ROP.EMPTY_MATCH) { + return null + } + return sub + } + return re + } + + private def literalRegexp(s: String, flags: Int): Regexp = { + val re: Regexp = new Regexp() + re.op = ROP.LITERAL + re.flags = flags + re.runes = Utils.stringToRunes(s) + re + } + + // StringIterator: a stream of runes with an opaque cursor, permitting + // rewinding. The units of the cursor are not specified beyond the + // fact that ASCII characters are single width. (Cursor positions + // could be UTF-8 byte indices, UTF-16 code indices or rune indices.) + // + // In particular, be careful with: + // - skip(int): only use this to advance over ASCII characters + // since these always have a width of 1. + // - skip(String): only use this to advance over strings which are + // known to be at the current position, e.g. due to prior call to + // lookingAt(). + // Only use pop() to advance over possibly non-ASCII runes. + private class StringIterator(private val str: String) { + private var _pos: Int = 0 // current position in UTF-16 string + + // Returns the cursor position. Do not interpret the result! + def pos(): Int = _pos + + // Resets the cursor position to a previous value returned by pos(). + def rewindTo(pos: Int): Unit = this._pos = pos + + // Returns true unless the stream is exhausted. + def more(): Boolean = _pos < str.length() + + // Returns the rune at the cursor position. + // Precondition: |more()|. + def peek(): Int = str.codePointAt(_pos) + + // Advances the cursor by |n| positions, which must be ASCII runes. + // + // (In practise, this is only ever used to skip over regexp + // metacharacters that are ASCII, so there is no numeric difference + // between indices into UTF-8 bytes, UTF-16 codes and runes.) + def skip(n: Int): Unit = _pos += n + + // Advances the cursor by the number of cursor positions in |s|. + def skipString(s: String): Unit = _pos += s.length() + + // Returns the rune at the cursor position, and advances the cursor + // past it. Precondition: |more()|. + def pop(): Int = { + val r: Int = str.codePointAt(_pos) + _pos += Character.charCount(r) + r + } + + // Equivalent to both peek() == c but more efficient because we + // don't support surrogates. Precondition: |more()|. + def lookingAt_c(c: Char): Boolean = str.charAt(_pos) == c + + // Equivalent to rest().startsWith(s). + def lookingAt_s(s: String): Boolean = rest().startsWith(s) + + // Returns the rest of the pattern as a Java UTF-16 string. + def rest(): String = str.substring(_pos) + + // Returns the substring from |beforePos| to the current position. + // |beforePos| must have been previously returned by |pos()|. + def from(beforePos: Int): String = str.substring(beforePos, _pos) + + override def toString(): String = rest() + } + + /** + * Parse regular expression pattern {@var pattern} with mode flags + * {@var flags}. + */ + def parse(pattern: String, flags: Int): Regexp = + (new Parser(pattern, flags)).parseInternal() + + // parseRepeat parses {min} (max=min) or {min,} (max=-1) or {min,max}. + // If |t| is not of that form, it returns -1. + // If |t| has the right form but the values are negative or too big, + // it returns -2. + // On success, returns a nonnegative number encoding min/max in the + // high/low signed halfwords of the result. (Note: min >= 0 max may + // be -1.) + // + // On success, advances |t| beyond the repeat otherwise |t.pos()| is + // undefined. + private def parseRepeat(t: StringIterator): Int = { + val start: Int = t.pos() + if (!t.more() || !t.lookingAt_c('{')) { + return -1 + } + t.skip(1) // '{' + val min: Int = parseInt(t) // (can be -2) + if (min == -1) { + return -1 + } + if (!t.more()) { + return -1 + } + var max: Int = 0 + if (!t.lookingAt_c(',')) { + max = min + } else { + t.skip(1) // ',' + if (!t.more()) { + return -1 + } + if (t.lookingAt_c('}')) { + max = -1 + } else if ({ max = parseInt(t); max } == -1) { // (can be -2) + return -1 + } + } + if (!t.more() || !t.lookingAt_c('}')) { + return -1 + } + t.skip(1) // '}' + if (min < 0 || min > 1000 || + max == -2 || max > 1000 || max >= 0 && min > max) { + // Numbers were negative or too big, or max is present and min > max. + throw new PatternSyntaxException(ERR_INVALID_REPEAT_SIZE, t.from(start)) + } + return (min << 16) | (max & 0xffff) // success + } + + // isValidCaptureName reports whether name + // is a valid capture name: [A-Za-z0-9_]+. + // PCRE limits names to 32 bytes. + // Python rejects names starting with digits. + // We don't enforce either of those. + private def isValidCaptureName(name: String): Boolean = { + if (name.isEmpty()) { + return false + } + var i: Int = 0 + while (i < name.length()) { + val c: Char = name.charAt(i) + if (c != '_' && !Utils.isalnum(c)) { + return false + } + i += 1 + } + return true + } + + // parseInt parses a nonnegative decimal integer. + // -1 => bad format. -2 => format ok, but integer overflow. + private def parseInt(t: StringIterator): Int = { + var start: Int = t.pos() + var c: Int = 0 + while (t.more() && { c = t.peek(); c } >= '0' && c <= '9') { + t.skip(1) // digit + } + val n: String = t.from(start) + if (n.isEmpty() || + n.length() > 1 && n.charAt(0) == '0') { // disallow leading zeros + return -1 // bad format + } + if (n.length() > 8) { + return -2 // overflow + } + return Integer.valueOf(n, 10) // can't fail + } + + // can this be represented as a character class? + // single-rune literal string, char class, ., and .|\n. + private def isCharClass(re: Regexp): Boolean = + (re.op == ROP.LITERAL && re.runes.length == 1 || + re.op == ROP.CHAR_CLASS || + re.op == ROP.ANY_CHAR_NOT_NL || + re.op == ROP.ANY_CHAR) + + // does re match r? + private def matchRune(re: Regexp, r: Int): Boolean = { + re.op match { + case ROP.LITERAL => + re.runes.length == 1 && re.runes(0) == r + case ROP.CHAR_CLASS => + var i: Int = 0 + while (i < re.runes.length) { + if (re.runes(i) <= r && r <= re.runes(i + 1)) { + return true + } + i += 2 + } + false + case ROP.ANY_CHAR_NOT_NL => + r != '\n' + case ROP.ANY_CHAR => + true + case _ => + false + } + } + + // mergeCharClass makes dst = dst|src. + // The caller must ensure that dst.Op >= src.Op, + // to reduce the amount of copying. + private def mergeCharClass(dst: Regexp, src: Regexp): Unit = { + dst.op match { + case ROP.ANY_CHAR => + // src doesn't add anything. + () + case ROP.ANY_CHAR_NOT_NL => + // src might add \n + if (matchRune(src, '\n')) { + dst.op = ROP.ANY_CHAR + } + case ROP.CHAR_CLASS => + // src is simpler, so either literal or char class + if (src.op == ROP.LITERAL) { + dst.runes = new CharClass(dst.runes) + .appendLiteral(src.runes(0), src.flags) + .toArray() + } else { + dst.runes = new CharClass(dst.runes).appendClass(src.runes).toArray() + } + case ROP.LITERAL => + // both literal + if (src.runes(0) == dst.runes(0) && src.flags == dst.flags) { + () + } else { + dst.op = ROP.CHAR_CLASS + dst.runes = new CharClass(Utils.EMPTY_INTS) + .appendLiteral(dst.runes(0), dst.flags) + .appendLiteral(src.runes(0), src.flags) + .toArray() + } + } + } + + // parseEscape parses an escape sequence at the beginning of s + // and returns the rune. + // Pre: t at '\\'. Post: after escape. + private def parseEscape(t: StringIterator): Int = { + val startPos: Int = t.pos() + t.skip(1) // '\\' + if (!t.more()) { + throw new PatternSyntaxException(ERR_TRAILING_BACKSLASH, "") + } + var c: Int = t.pop() + + c match { + // Octal escapes. + case '1' | '2' | '3' | '4' | '5' | '6' | '7' + // Single non-zero digit is a backreference not supported + if !t.more() || t.peek() < '0' || t.peek() > '7' => + throw new PatternSyntaxException(ERR_INVALID_ESCAPE, t.from(startPos)) + + // Consume up to three octal digits already have one. + case '0' => + var r: Int = c - '0' + var i: Int = 1 + var breakInner: Boolean = false + while (i < 3 && !breakInner) { + if (!t.more() || t.peek() < '0' || t.peek() > '7') { + breakInner = true + } else { + r = r * 8 + t.peek() - '0' + t.skip(1) // digit + i += 1 + } + } + r + + // Hexadecimal escapes. + case 'x' => + if (!t.more()) { + throw new PatternSyntaxException(ERR_INVALID_ESCAPE, t.from(startPos)) + } + c = t.pop() + if (c == '{') { + // Any number of digits in braces. + // Perl accepts any text at all it ignores all text + // after the first non-hex digit. We require only hex digits, + // and at least one. + var nhex: Int = 0 + var r: Int = 0 + var breakInner: Boolean = false + while (!breakInner) { + if (!t.more()) { + throw new PatternSyntaxException(ERR_INVALID_ESCAPE, t.from(startPos)) + } + c = t.pop() + if (c == '}') { + breakInner = true + } + if (!breakInner) { + val v: Int = Utils.unhex(c) + if (v < 0) { + throw new PatternSyntaxException(ERR_INVALID_ESCAPE, t.from(startPos)) + } + r = r * 16 + v + if (r > Unicode.MAX_RUNE) { + throw new PatternSyntaxException(ERR_INVALID_ESCAPE, t.from(startPos)) + } + nhex += 1 + } + } + if (nhex == 0) { + throw new PatternSyntaxException(ERR_INVALID_ESCAPE, t.from(startPos)) + } + r + } else { + // Easy case: two hex digits. + val x: Int = Utils.unhex(c) + c = t.pop() + val y: Int = Utils.unhex(c) + if (x < 0 || y < 0) { + throw new PatternSyntaxException(ERR_INVALID_ESCAPE, t.from(startPos)) + } + x * 16 + y + } + + // C escapes. There is no case 'b', to avoid misparsing + // the Perl word-boundary \b as the C backspace \b + // when in POSIX mode. In Perl, /\b/ means word-boundary + // but /[\b]/ means backspace. We don't support that. + // If you want a backspace, embed a literal backspace + // character or use \x08. + case 'a' => + 7 // No \a in Java + case 'f' => + '\f' + case 'n' => + '\n' + case 'r' => + '\r' + case 't' => + '\t' + case 'v' => + 11 // No \v in Java + + // Default case. + case _ => + if (!Utils.isalnum(c)) { + // Escaped non-word characters are always themselves. + // PCRE is not quite so rigorous: it accepts things like + // \q, but we don't. We once rejected \_, but too many + // programs and people insist on using it, so allow \_. + c + } else { + throw new PatternSyntaxException(ERR_INVALID_ESCAPE, t.from(startPos)) + } + } + } + + // parseClassChar parses a character class character and returns it. + // wholeClassPos is the position of the start of the entire class "[...". + // Pre: t at class char Post: t after it. + private def parseClassChar(t: StringIterator, wholeClassPos: Int): Int = { + if (!t.more()) { + throw new PatternSyntaxException(ERR_MISSING_BRACKET, + t.from(wholeClassPos)) + } + + // Allow regular escape sequences even though + // many need not be escaped in this context. + if (t.lookingAt_c('\\')) { + return parseEscape(t) + } + + return t.pop() + } + + // RangeTables are represented as int[][], a list of triples (start, end, + // stride). + private val ANY_TABLE: Array[Array[Int]] = Array[Array[Int]]( + Array[Int](0, Unicode.MAX_RUNE, 1) + ) + + // unicodeTable() returns the Unicode RangeTable identified by name + // and the table of additional fold-equivalent code points. + // Returns null if |name| does not identify a Unicode character range. + private def unicodeTable( + name: String): Pair[Array[Array[Int]], Array[Array[Int]]] = { + // Special case: "Any" means any. + if (name.equals("Any")) { + return Pair.of[Array[Array[Int]], Array[Array[Int]]](ANY_TABLE, ANY_TABLE) + } + var table: Array[Array[Int]] = UnicodeTables.CATEGORIES.get(name) + if (table != null) { + return Pair.of[Array[Array[Int]], Array[Array[Int]]](table, UnicodeTables.FOLD_CATEGORIES.get(name)) + } + table = UnicodeTables.SCRIPTS.get(name) + if (table != null) { + return Pair.of[Array[Array[Int]], Array[Array[Int]]](table, UnicodeTables.FOLD_SCRIPT.get(name)) + } + return null + } + + //// Utilities + + // Returns a new copy of the specified subarray. + def subarray(array: Array[Regexp], start: Int, end: Int): Array[Regexp] = { + val r: Array[Regexp] = new Array[Regexp](end - start) + var i: Int = start + while (i < end) { + r(i - start) = array(i) + i += 1 + } + r + } + + private class Pair[F, S](val first: F, val second: S) + + private object Pair { + def of[F, S](f: F, s: S): Pair[F, S] = new Pair(f, s) + } + + private def concatRunes(x: Array[Int], y: Array[Int]): Array[Int] = { + val z: Array[Int] = new Array[Int](x.length + y.length) + System.arraycopy(x, 0, z, 0, x.length) + System.arraycopy(y, 0, z, x.length, y.length) + z + } +} diff --git a/input/rsc/Pattern.scala b/input/rsc/Pattern.scala new file mode 100644 index 0000000..3b8894f --- /dev/null +++ b/input/rsc/Pattern.scala @@ -0,0 +1,223 @@ +// Copyright 2010 Google Inc. All Rights Reserved. + +package com.twitter.re2s + +import java.io.Serializable + +/** + * A compiled representation of an RE2 regular expression, mimicking the + * {@code java.util.regex.Pattern} API. + * + *

The matching functions take {@code String} arguments instead of + * the more general Java {@code CharSequence} since the latter doesn't + * provide UTF-16 decoding. + * + *

See the package-level + * documentation for an overview of how to use this API.

+ * + * @author rsc@google.com (Russ Cox) + */ +final class Pattern(val pattern: String, val flags: Int, val re2: RE2) + extends Serializable() { + if (pattern == null) { + throw new NullPointerException("pattern is null") + } + if (re2 == null) { + throw new NullPointerException("re2 is null") + } + + /** + * Releases memory used by internal caches associated with this pattern. Does + * not change the observable behaviour. Useful for tests that detect memory + * leaks via allocation tracking. + */ + def reset_0(): Unit = re2.reset_0() + + def matches(input: String): Boolean = + matcher(input).matches() + + /** + * Creates a new {@code Matcher} matching the pattern against the input. + * + * @param input the input string + */ + def matcher(input: CharSequence): Matcher = + new Matcher(this, input) + + /** + * Splits input around instances of the regular expression. + * It returns an array giving the strings that occur before, between, and after instances + * of the regular expression. Empty strings that would occur at the end + * of the array are omitted. + * + * @param input the input string to be split + * @return the split strings + */ + def split_1(input: String): Array[String] = + split_2(input, 0) + + /** + * Splits input around instances of the regular expression. + * It returns an array giving the strings that occur before, between, and after instances + * of the regular expression. + * + *

If {@code limit <= 0}, there is no limit on the size of the returned array. + * If {@code limit == 0}, empty strings that would occur at the end of the array are omitted. + * If {@code limit > 0}, at most limit strings are returned. The final string contains + * the remainder of the input, possibly including additional matches of the pattern. + * + * @param input the input string to be split + * @param limit the limit + * @return the split strings + */ + def split_2(input: String, limit: Int): Array[String] = + split(matcher(input), limit) + + /** Helper: run split on m's input. */ + private def split(m: Matcher, limit: Int): Array[String] = { + var matchCount: Int = 0 + var arraySize: Int = 0 + var last: Int = 0 + while (m.find_0()) { + matchCount += 1 + if (limit != 0 || last < m.start_0()) { + arraySize = matchCount + } + last = m.end_0() + } + if (last < m.inputLength() || limit != 0) { + matchCount += 1 + arraySize = matchCount + } + + var trunc: Int = 0 + if (limit > 0 && arraySize > limit) { + arraySize = limit + trunc = 1 + } + val array: Array[String] = new Array[String](arraySize) + var i: Int = 0 + last = 0 + m.reset_0() + while (m.find_0() && i < arraySize - trunc) { + array(i) = m.substring(last, m.start_0()) + i += 1 + last = m.end_0() + } + if (i < arraySize) { + array(i) = m.substring(last, m.inputLength()) + } + array + } + + override def toString(): String = pattern + + /** + * Returns the number of capturing groups in this matcher's pattern. + * Group zero denotes the entire pattern and is excluded from this count. + * + * @return the number of capturing groups in this pattern + */ + def groupCount(): Int = + re2.numberOfCapturingGroups() +} + +object Pattern { + + /** Flag: case insensitive matching. */ + final val CASE_INSENSITIVE: Int = 1 + + /** Flag: dot ({@code .}) matches all characters, including newline. */ + final val DOTALL: Int = 2 + + /** + * Flag: multiline matching: {@code ^} and {@code $} match at + * beginning and end of line, not just beginning and end of input. + */ + final val MULTILINE: Int = 4 + + /** + * Flag: Unicode groups (e.g. {@code \p\{Greek\}}) will be syntax errors. + */ + final val DISABLE_UNICODE_GROUPS: Int = 8 + + /** + * Creates and returns a new {@code Pattern} corresponding to + * compiling {@code regex} with the default flags (0). + * + * @param regex the regular expression + * @throws PatternSyntaxException if the pattern is malformed + */ + def compile_1(regex: String): Pattern = + compile_3(regex, regex, 0) + + /** + * Creates and returns a new {@code Pattern} corresponding to + * compiling {@code regex} with the default flags (0). + * + * @param regex the regular expression + * @param flags bitwise OR of the flag constants {@code CASE_INSENSITIVE}, + * {@code DOTALL}, and {@code MULTILINE} + * @throws PatternSyntaxException if the regular expression is malformed + * @throws IllegalArgumentException if an unknown flag is given + */ + def compile_2(regex: String, flags: Int): Pattern = { + var flregex: String = regex + if ((flags & CASE_INSENSITIVE) != 0) { + flregex = "(?i)" + flregex + } + if ((flags & DOTALL) != 0) { + flregex = "(?s)" + flregex + } + if ((flags & MULTILINE) != 0) { + flregex = "(?m)" + flregex + } + if ((flags & ~(MULTILINE | DOTALL | CASE_INSENSITIVE | DISABLE_UNICODE_GROUPS)) != 0) { + throw new IllegalArgumentException( + "Flags should only be a combination " + + "of MULTILINE, DOTALL, CASE_INSENSITIVE, DISABLE_UNICODE_GROUPS") + } + compile_3(flregex, regex, flags) + } + + /** + * Helper: create new Pattern with given regex and flags. + * Flregex is the regex with flags applied. + */ + private def compile_3(flregex: String, regex: String, flags: Int): Pattern = { + var re2Flags: Int = RE2.PERL + if ((flags & DISABLE_UNICODE_GROUPS) != 0) { + re2Flags &= ~RE2.UNICODE_GROUPS + } + new Pattern(regex, + flags, + RE2.compileImpl(flregex, re2Flags, /*longest=*/ false)) + } + + /** + * Matches a string against a regular expression. + * + * @param regex the regular expression + * @param input the input + * @return true if the regular expression matches the entire input + * @throws PatternSyntaxException if the regular expression is malformed + */ + def matches(regex: String, input: CharSequence): Boolean = + compile_1(regex).matcher(input).matches() + + /** + * Returns a literal pattern string for the specified + * string. + * + *

This method produces a string that can be used to + * create a Pattern that would match the string + * s as if it were a literal pattern.

Metacharacters + * or escape sequences in the input sequence will be given no special + * meaning. + * + * @param s The string to be literalized + * @return A literal string replacement + */ + def quote(s: String): String = + RE2.quoteMeta(s) +} diff --git a/input/rsc/PatternSyntaxException.scala b/input/rsc/PatternSyntaxException.scala new file mode 100644 index 0000000..8815139 --- /dev/null +++ b/input/rsc/PatternSyntaxException.scala @@ -0,0 +1,37 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package com.twitter.re2s + +/** + * An exception thrown by the parser if the pattern was invalid. + * + * Following {@code java.util.regex.PatternSyntaxException}, this is an + * unchecked exception. + */ +class PatternSyntaxException(error: String, input: String) + extends RuntimeException( + "error parsing regexp: " + error + ": `" + input + "`") { + /** + * Retrieves the error index. + * + * @return The approximate index in the pattern of the error, + * or -1 if the index is not known + */ + def getIndex(): Int = -1 + + /** + * Retrieves the description of the error. + * + * @return The description of the error + */ + def getDescription(): String = error + + /** + * Retrieves the erroneous regular-expression pattern. + * + * @return The erroneous pattern + */ + def getPattern(): String = input +} diff --git a/input/rsc/Prog.scala b/input/rsc/Prog.scala new file mode 100644 index 0000000..439746a --- /dev/null +++ b/input/rsc/Prog.scala @@ -0,0 +1,174 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Original Go source here: +// http://code.google.com/p/go/source/browse/src/pkg/regexp/syntax/prog.go + +package com.twitter.re2s + +import java.util.ArrayList +import com.twitter.re2s.Inst.{Op => IOP} + +/** + * A Prog is a compiled regular expression program. + */ +class Prog() { + + private val inst: ArrayList[Inst] = new ArrayList[Inst]() + var start: Int = 0 // index of start instruction + var numCap: Int = 2 // number of CAPTURE insts in re + // 2 => implicit ( and ) for whole match $0 + + // Returns the instruction at the specified pc. + // Precondition: pc > 0 && pc < numInst(). + def getInst(pc: Int): Inst = inst.get(pc) + + // Returns the number of instructions in this program. + def numInst(): Int = inst.size() + + // Adds a new instruction to this program, with operator |op| and |pc| equal + // to |numInst()|. + def addInst(op: IOP): Unit = inst.add(new Inst(op)) + + // skipNop() follows any no-op or capturing instructions and returns the + // resulting instruction. + def skipNop(_pc: Int): Inst = { + var pc: Int = _pc + var i: Inst = inst.get(pc) + while (i.op == IOP.NOP || i.op == IOP.CAPTURE) { + i = inst.get(pc) + pc = i.out + } + i + } + + // prefix() returns a pair of a literal string that all matches for the + // regexp must start with, and a boolean which is true if the prefix is the + // entire match. The string is returned by appending to |prefix|. + def prefix(prefix: java.lang.StringBuilder): Boolean = { + var i: Inst = skipNop(start) + + // Avoid allocation of buffer if prefix is empty. + if (i.runeOp() != IOP.RUNE || i.runes.length != 1) { + return i.op == IOP.MATCH // (append "" to prefix) + } + + // Have prefix gather characters. + while (i.runeOp() == IOP.RUNE && + i.runes.length == 1 && + (i.arg & RE2.FOLD_CASE) == 0) { + prefix.appendCodePoint(i.runes(0)) // an int, not a byte. + i = skipNop(i.out) + } + + i.op == IOP.MATCH + } + + // startCond() returns the leading empty-width conditions that must be true + // in any match. It returns -1 (all bits set) if no matches are possible. + def startCond(): Int = { + var flag: Int = 0 // bitmask of EMPTY_* flags + var pc: Int = start + var break: Boolean = false + while (!break) { + val i: Inst = inst.get(pc) + i.op match { + case IOP.EMPTY_WIDTH => + flag |= i.arg + case IOP.FAIL => + return -1 + case IOP.CAPTURE | IOP.NOP => + () // skip + case _ => + break = true + } + if (!break) { + pc = i.out + } + } + flag + } + + // --- Patch list --- + + // A patchlist is a list of instruction pointers that need to be filled in + // (patched). Because the pointers haven't been filled in yet, we can reuse + // their storage to hold the list. It's kind of sleazy, but works well in + // practice. See http://swtch.com/~rsc/regexp/regexp1.html for inspiration. + + // These aren't really pointers: they're integers, so we can reinterpret them + // this way without using package unsafe. A value l denotes p.inst[l>>1].out + // (l&1==0) or .arg (l&1==1). l == 0 denotes the empty list, okay because we + // start every program with a fail instruction, so we'll never want to point + // at its output link. + + def next(l: Int): Int = { + val i: Inst = inst.get(l >> 1) + if ((l & 1) == 0) { + i.out + } else { + i.arg + } + } + + def patch(_l: Int, value: Int): Unit = { + var l: Int = _l + while (l != 0) { + var i: Inst = inst.get(l >> 1) + if ((l & 1) == 0) { + l = i.out + i.out = value + } else { + l = i.arg + i.arg = value + } + } + } + + def append(l1: Int, l2: Int): Int = { + if (l1 == 0) { + return l2 + } + if (l2 == 0) { + return l1 + } + var last: Int = l1 + var break: Boolean = false + while (!break) { + val next: Int = this.next(last) + if (next == 0) { + break = true + } else { + last = next + } + } + val i: Inst = inst.get(last >> 1) + if ((last & 1) == 0) { + i.out = l2 + } else { + i.arg = l2 + } + l1 + } + + // --- + + override def toString(): String = { + val out: java.lang.StringBuilder = new java.lang.StringBuilder() + var pc: Int = 0 + while (pc < inst.size()) { + val len: Int = out.length() + out.append(pc) + if (pc == start) { + out.append('*') + } + out + .append(" ".substring(out.length() - len)) + .append(inst.get(pc)) + .append('\n') + pc += 1 + } + out.toString() + } +} diff --git a/input/rsc/RE2.scala b/input/rsc/RE2.scala new file mode 100644 index 0000000..a1ff7c7 --- /dev/null +++ b/input/rsc/RE2.scala @@ -0,0 +1,812 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Original Go source here: +// http://code.google.com/p/go/source/browse/src/pkg/regexp/regexp.go + +// Beware, submatch results may pin a large underlying String into +// memory. Consider creating explicit string copies if submatches are +// long-lived and inputs are large. +// +// The JDK API supports incremental processing of the input without +// necessarily consuming it all we do not attempt to do so. + +// The Java API emphasises UTF-16 Strings, not UTF-8 byte[] as in Go, as +// the primary input datatype, and the method names have been changed to +// reflect this. + +package com.twitter.re2s + +import java.util.ArrayList +import java.util.Arrays +import java.util.List +import com.twitter.re2s.RE2._ + +/** + * An RE2 class instance is a compiled representation of an RE2 regular + * expression, independent of the public Java-like Pattern/Matcher API. + * + *

This class also contains various implementation helpers for RE2 + * regular expressions. + * + *

Use the {@link #quoteMeta(String)} utility function to quote all + * regular expression metacharacters in an arbitrary string. + * + *

See the {@code Matcher} and {@code Pattern} classes for the public + * API, and the package-level + * documentation for an overview of how to use this API. + */ +class RE2() { + var expr: String = null // as passed to Compile + var prog: Prog = null // compiled program + var cond: Int = 0 // EMPTY_* bitmask: empty-width conditions + // required at start of match + var numSubexp: Int = 0 + var longest: Boolean = false + + var prefix: String = null // required UTF-16 prefix in unanchored matches + var prefixUTF8 + : Array[Byte] = null // required UTF-8 prefix in unanchored matches + var prefixComplete: Boolean = false // true iff prefix is the entire regexp + var prefixRune: Int = 0 // first rune in prefix + + // Cache of machines for running regexp. + // Accesses must be serialized using |this| monitor. + private val machine: ArrayList[Machine] = new ArrayList[Machine]() + + /** + * Returns the number of parenthesized subexpressions in this regular + * expression. + */ + def numberOfCapturingGroups(): Int = numSubexp + + // get() returns a machine to use for matching |this|. It uses |this|'s + // machine cache if possible, to avoid unnecessary allocation. + def get(): Machine = synchronized[Machine]({ + val n: Int = machine.size() + if (n > 0) { + return machine.remove(n - 1) + } + return new Machine(this) + }) + + // Clears the memory associated with this machine. + def reset_0(): Unit = synchronized[Unit]({ + machine.clear() + }) + + // put() returns a machine to |this|'s machine cache. There is no attempt to + // limit the size of the cache, so it will grow to the maximum number of + // simultaneous matches run using |this|. (The cache empties when |this| + // gets garbage collected.) + def put(m: Machine): Unit = synchronized[Unit]({ + machine.add(m) + }) + + override def toString(): String = expr + + // doExecute() finds the leftmost match in the input and returns + // the position of its subexpressions. + // Derived from exec.go. + private def doExecute(in: MachineInput, + pos: Int, + anchor: Int, + ncap: Int): Array[Int] = { + val m: Machine = get() + m.init(ncap) + val cap: Array[Int] = if (m.match_(in, pos, anchor)) m.submatches() else null + put(m) + cap + } + + /** + * Returns true iff this regexp matches the string {@code s}. + */ + def match_1(s: CharSequence): Boolean = + doExecute(MachineInput.fromUTF16(s, 0, s.length()), 0, UNANCHORED, 0) != null + + /** + * Matches the regular expression against input starting at position start + * and ending at position end, with the given anchoring. + * Records the submatch boundaries in group, which is [start, end) pairs + * of byte offsets. The number of boundaries needed is inferred + * from the size of the group array. It is most efficient not to ask for + * submatch boundaries. + * + * @param input the input byte array + * @param start the beginning position in the input + * @param end the end position in the input + * @param anchor the anchoring flag (UNANCHORED, ANCHOR_START, ANCHOR_BOTH) + * @param group the array to fill with submatch positions + * @param ngroup the number of array pairs to fill in + * @return true if a match was found + */ + def match_5(input: CharSequence, + start: Int, + end: Int, + anchor: Int, + group: Array[Int], + ngroup: Int): Boolean = { + if (start > end) { + return false + } + + val groupMatch: Array[Int] = doExecute(MachineInput.fromUTF16(input, 0, end), + start, + anchor, + 2 * ngroup) + + if (groupMatch == null) { + return false + } + + if (group != null) { + System.arraycopy(groupMatch, 0, group, 0, groupMatch.length) + } + + true + } + + /** + * Returns true iff this regexp matches the UTF-8 byte array {@code b}. + */ + // This is visible for testing. + def matchUTF8(b: Array[Byte]): Boolean = + doExecute(MachineInput.fromUTF8(b, 0, b.length), 0, UNANCHORED, 0) != null + + /** + * Returns a copy of {@code src} in which all matches for this regexp + * have been replaced by {@code repl}. No support is provided for + * expressions (e.g. {@code \1} or {@code $1}) in the replacement + * string. + */ + // This is visible for testing. + def replaceAll(src: String, repl: String): String = + replaceAllFunc(src, 2 * src.length() + 1, (orig: String) => { + repl + }) + + /** + * Returns a copy of {@code src} in which only the first match for this regexp + * has been replaced by {@code repl}. No support is provided for + * expressions (e.g. {@code \1} or {@code $1}) in the replacement + * string. + */ + // This is visible for testing. + def replaceFirst(src: String, repl: String): String = + replaceAllFunc(src, 1, (orig: String) => { + repl + }) + + /** + * Returns a copy of {@code src} in which at most {@code maxReplaces} matches + * for this regexp have been replaced by the return value of of function + * {@code repl} (whose first argument is the matched string). No support is + * provided for expressions (e.g. {@code \1} or {@code $1}) in the + * replacement string. + */ + // This is visible for testing. + def replaceAllFunc(src: String, maxReplaces: Int, + f: String => String): String = { + var lastMatchEnd: Int = 0 // end position of the most recent match + var searchPos: Int = 0 // position where we next look for a match + val buf: java.lang.StringBuilder = new java.lang.StringBuilder() + val input: MachineInput = MachineInput.fromUTF16(src, 0, src.length()) + var numReplaces: Int = 0 + var break: Boolean = false + + while (!break && searchPos <= src.length()) { + val a: Array[Int] = doExecute(input, searchPos, UNANCHORED, 2) + if (a == null || a.length == 0) { + break = true // no more matches + } else { + // Copy the unmatched characters before this match. + buf.append(src.substring(lastMatchEnd, a(0))) + + // Now insert a copy of the replacement string, but not for a + // match of the empty string immediately after another match. + // (Otherwise, we get double replacement for patterns that + // match both empty and nonempty strings.) + if (a(1) > lastMatchEnd || a(0) == 0) { + buf.append(f(src.substring(a(0), a(1)))) + // Increment the replace count. + numReplaces += 1 + } + lastMatchEnd = a(1) + + // Advance past this match always advance at least one character. + val width: Int = input.step(searchPos) & 0x7 + if (searchPos + width > a(1)) { + searchPos += width + } else if (searchPos + 1 > a(1)) { + // This clause is only needed at the end of the input + // string. In that case, DecodeRuneInString returns width=0. + searchPos += 1 + } else { + searchPos = a(1) + } + if (numReplaces >= maxReplaces) { + // Should never be greater though. + break = true + } + } + } + + // Copy the unmatched characters after the last match. + buf.append(src.substring(lastMatchEnd)) + buf.toString() + } + + // The number of capture values in the program may correspond + // to fewer capturing expressions than are in the regexp. + // For example, "(a){0}" turns into an empty program, so the + // maximum capture in the program is 0 but we need to return + // an expression for \1. Pad returns a with -1s appended as needed + // the result may alias a. + private def pad(_a: Array[Int]): Array[Int] = { + var a: Array[Int] = _a + if (a == null) { + return null // No match. + } + var n: Int = (1 + numSubexp) * 2 + if (a.length < n) { + val a2: Array[Int] = new Array[Int](n) + System.arraycopy(a, 0, a2, 0, a.length) + Arrays.fill(a2, a.length, n, -1) + a = a2 + } + a + } + + // Find matches in input. + private def allMatches(input: MachineInput, _n: Int, + f: Array[Int] => Unit): Unit = { + var n: Int = _n + val end: Int = input.endPos() + if (n < 0) { + n = end + 1 + } + var pos: Int = 0 + var i: Int = 0 + var prevMatchEnd: Int = -1 + var break: Boolean = false + while (!break && i < n && pos <= end) { + val matches: Array[Int] = doExecute(input, pos, UNANCHORED, prog.numCap) + if (matches == null || matches.length == 0) { + break = true + } else { + var accept: Boolean = true + if (matches(1) == pos) { + // We've found an empty match. + if (matches(0) == prevMatchEnd) { + // We don't allow an empty match right + // after a previous match, so ignore it. + accept = false + } + val r: Int = input.step(pos) + if (r < 0) { // EOF + pos = end + 1 + } else { + pos += r & 0x7 + } + } else { + pos = matches(1) + } + prevMatchEnd = matches(1) + + if (accept) { + f(pad(matches)) + i += 1 + } + } + } + } + + // Legacy Go-style interface preserved (package-private) for better + // test coverage. + // + // There are 16 methods of RE2 that match a regular expression and + // identify the matched text. Their names are matched by this regular + // expression: + // + // find(All)?(UTF8)?(Submatch)?(Index)? + // + // If 'All' is present, the routine matches successive non-overlapping + // matches of the entire expression. Empty matches abutting a + // preceding match are ignored. The return value is an array + // containing the successive return values of the corresponding + // non-All routine. These routines take an extra integer argument, n + // if n >= 0, the function returns at most n matches/submatches. + // + // If 'UTF8' is present, the argument is a UTF-8 encoded byte[] array + // otherwise it is a UTF-16 encoded java.lang.String return values + // are adjusted as appropriate. + // + // If 'Submatch' is present, the return value is an list identifying + // the successive submatches of the expression. Submatches are + // matches of parenthesized subexpressions within the regular + // expression, numbered from left to right in order of opening + // parenthesis. Submatch 0 is the match of the entire expression, + // submatch 1 the match of the first parenthesized subexpression, and + // so on. + // + // If 'Index' is present, matches and submatches are identified by + // byte index pairs within the input string: result[2*n:2*n+1] + // identifies the indexes of the nth submatch. The pair for n==0 + // identifies the match of the entire expression. If 'Index' is not + // present, the match is identified by the text of the match/submatch. + // If an index is negative, it means that subexpression did not match + // any string in the input. + + /** + * Returns an array holding the text of the leftmost match in {@code b} + * of this regular expression. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findUTF8(b: Array[Byte]): Array[Byte] = { + val a: Array[Int] = doExecute(MachineInput.fromUTF8(b, 0, b.length), 0, UNANCHORED, 2) + if (a == null) { + return null + } + Utils.subarray_b(b, a(0), a(1)) + } + + /** + * Returns a two-element array of integers defining the location of + * the leftmost match in {@code b} of this regular expression. The + * match itself is at {@code b[loc[0]...loc[1]]}. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findUTF8Index(b: Array[Byte]): Array[Int] = { + val a: Array[Int] = doExecute(MachineInput.fromUTF8(b, 0, b.length), 0, UNANCHORED, 2) + if (a == null) { + return null + } + return Utils.subarray_i(a, 0, 2) + } + + /** + * Returns a string holding the text of the leftmost match in + * {@code s} of this regular expression. + * + *

If there is no match, the return value is an empty string, but it + * will also be empty if the regular expression successfully matches + * an empty string. Use {@link #findIndex} or + * {@link #findSubmatch} if it is necessary to distinguish these + * cases. + */ + // This is visible for testing. + def find(s: String): String = { + val a: Array[Int] = doExecute(MachineInput.fromUTF16(s, 0, s.length()), 0, UNANCHORED, 2) + if (a == null) { + return "" + } + return s.substring(a(0), a(1)) + } + + /** + * Returns a two-element array of integers defining the location of + * the leftmost match in {@code s} of this regular expression. The + * match itself is at {@code s.substring(loc[0], loc[1])}. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findIndex(s: String): Array[Int] = { + val a: Array[Int] = doExecute(MachineInput.fromUTF16(s, 0, s.length()), 0, UNANCHORED, 2) + if (a == null) { + return null + } + return a + } + + /** + * Returns an array of arrays the text of the leftmost match of the + * regular expression in {@code b} and the matches, if any, of its + * subexpressions, as defined by the Submatch description above. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findUTF8Submatch(b: Array[Byte]): Array[Array[Byte]] = { + val a: Array[Int] = doExecute(MachineInput.fromUTF8(b, 0, b.length), 0, UNANCHORED, prog.numCap) + if (a == null) { + return null + } + val ret: Array[Array[Byte]] = new Array[Array[Byte]](1 + numSubexp) + var i: Int = 0 + while (i < ret.length) { + if (2 * i < a.length && a(2 * i) >= 0) { + ret(i) = Utils.subarray_b(b, a(2 * i), a(2 * i + 1)) + } + i += 1 + } + ret + } + + /** + * Returns an array holding the index pairs identifying the leftmost + * match of this regular expression in {@code b} and the matches, if + * any, of its subexpressions, as defined by the the Submatch and Index + * descriptions above. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findUTF8SubmatchIndex(b: Array[Byte]): Array[Int] = + pad(doExecute(MachineInput.fromUTF8(b, 0, b.length), 0, UNANCHORED, prog.numCap)) + + /** + * Returns an array of strings holding the text of the leftmost match + * of the regular expression in {@code s} and the matches, if any, of + * its subexpressions, as defined by the Submatch description above. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findSubmatch(s: String): Array[String] = { + val a: Array[Int] = doExecute(MachineInput.fromUTF16(s, 0, s.length()), 0, UNANCHORED, prog.numCap) + if (a == null) { + return null + } + val ret: Array[String] = new Array[String](1 + numSubexp) + var i: Int = 0 + while (i < ret.length) { + if (2 * i < a.length && a(2 * i) >= 0) { + ret(i) = s.substring(a(2 * i), a(2 * i + 1)) + } + i += 1 + } + ret + } + + /** + * Returns an array holding the index pairs identifying the leftmost + * match of this regular expression in {@code s} and the matches, if + * any, of its subexpressions, as defined by the Submatch description above. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findSubmatchIndex(s: String): Array[Int] = + pad(doExecute(MachineInput.fromUTF16(s, 0, s.length()), 0, UNANCHORED, prog.numCap)) + + /** + * {@code findAllUTF8()} is the All version of + * {@link #findUTF8} it returns a list of up to {@code n} successive + * matches of the expression, as defined by the All + * description above. + * + *

A return value of null indicates no match. + * + * TODO(adonovan): think about defining a byte slice view class, like + * a read-only Go slice backed by |b|. + */ + // This is visible for testing. + def findAllUTF8(b: Array[Byte], n: Int): List[Array[Byte]] = { + val result: ArrayList[Array[Byte]] = new ArrayList[Array[Byte]]() + allMatches(MachineInput.fromUTF8(b, 0, b.length), n, (_match: Array[Int]) => { + result.add(Utils.subarray_b(b, _match(0), _match(1))) + }) + if (result.isEmpty()) { + return null + } + result + } + + /** + * {@code findAllUTF8Index} is the All version of + * {@link #findUTF8Index} it returns a list of up to {@code n} + * successive matches of the expression, as defined by the All description above. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findAllUTF8Index(b: Array[Byte], n: Int): List[Array[Int]] = { + val result: ArrayList[Array[Int]] = new ArrayList[Array[Int]]() + allMatches(MachineInput.fromUTF8(b, 0, b.length), n, (_match: Array[Int]) => { + result.add(Utils.subarray_i(_match, 0, 2)) + }) + if (result.isEmpty()) { + return null + } + return result + } + + /** + * {@code findAll} is the All version of + * {@link #find} it returns a list of up to {@code n} + * successive matches of the expression, as defined by the All description above. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findAll(s: String, n: Int): List[String] = { + val result: ArrayList[String] = new ArrayList[String]() + allMatches(MachineInput.fromUTF16(s, 0, s.length()), n, (_match: Array[Int]) => { + result.add(s.substring(_match(0), _match(1))) + }) + if (result.isEmpty()) { + return null + } + return result + } + + /** + * {@code findAllIndex} is the All version of + * {@link #findIndex} it returns a list of up to {@code n} + * successive matches of the expression, as defined by the All description above. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findAllIndex(s: String, n: Int): List[Array[Int]] = { + val result: ArrayList[Array[Int]] = new ArrayList[Array[Int]]() + allMatches(MachineInput.fromUTF16(s, 0, s.length()), n, (_match: Array[Int]) => { + result.add(Utils.subarray_i(_match, 0, 2)) + }) + if (result.isEmpty()) { + return null + } + return result + } + + /** + * {@code findAllUTF8Submatch} is the All version + * of {@link #findUTF8Submatch} it returns a list of up to {@code n} + * successive matches of the expression, as defined by the All description above. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findAllUTF8Submatch(b: Array[Byte], n: Int): List[Array[Array[Byte]]] = { + val result: ArrayList[Array[Array[Byte]]] = new ArrayList[Array[Array[Byte]]]() + allMatches(MachineInput.fromUTF8(b, 0, b.length), n, (_match: Array[Int]) => { + val slice: Array[Array[Byte]] = new Array[Array[Byte]](_match.length / 2) + var j: Int = 0 + while (j < slice.length) { + if (_match(2 * j) >= 0) { + slice(j) = Utils.subarray_b(b, _match(2 * j), _match(2 * j + 1)) + } + j += 1 + } + result.add(slice) + }) + if (result.isEmpty()) { + return null + } + return result + } + + /** + * {@code findAllUTF8SubmatchIndex} is the All + * version of {@link #findUTF8SubmatchIndex} it returns a list of up + * to {@code n} successive matches of the expression, as defined by + * the All description above. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findAllUTF8SubmatchIndex(b: Array[Byte], n: Int): List[Array[Int]] = { + val result: ArrayList[Array[Int]] = new ArrayList[Array[Int]]() + allMatches(MachineInput.fromUTF8(b, 0, b.length), n, (_match: Array[Int]) => { + result.add(_match) + }) + if (result.isEmpty()) { + return null + } + return result + } + + /** + * {@code findAllSubmatch} is the All version + * of {@link #findSubmatch} it returns a list of up to + * {@code n} successive matches of the expression, as defined by the + * All description above. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findAllSubmatch(s: String, n: Int): List[Array[String]] = { + val result: ArrayList[Array[String]] = new ArrayList[Array[String]]() + allMatches(MachineInput.fromUTF16(s, 0, s.length()), n, (_match: Array[Int]) => { + val slice: Array[String] = new Array[String](_match.length / 2) + var j: Int = 0 + while (j < slice.length) { + if (_match(2 * j) >= 0) { + slice(j) = s.substring(_match(2 * j), _match(2 * j + 1)) + } + j += 1 + } + result.add(slice) + }) + if (result.isEmpty()) { + return null + } + return result + } + + /** + * {@code findAllSubmatchIndex} is the All + * version of {@link #findSubmatchIndex} it returns a list of + * up to {@code n} successive matches of the expression, as defined by + * the All description above. + * + *

A return value of null indicates no match. + */ + // This is visible for testing. + def findAllSubmatchIndex(s: String, n: Int): List[Array[Int]] = { + val result: ArrayList[Array[Int]] = new ArrayList[Array[Int]]() + allMatches(MachineInput.fromUTF16(s, 0, s.length()), n, (_match: Array[Int]) => { + result.add(_match) + }) + if (result.isEmpty()) { + return null + } + return result + } +} + +object RE2 { + + //// Parser flags. + + // Fold case during matching (case-insensitive). + final val FOLD_CASE: Int = 0x01 + + // Treat pattern as a literal string instead of a regexp. + final val LITERAL: Int = 0x02 + + // Allow character classes like [^a-z] and [[:space:]] to match newline. + final val CLASS_NL: Int = 0x04 + + // Allow '.' to match newline. + final val DOT_NL: Int = 0x08 + + // Treat ^ and $ as only matching at beginning and end of text, not + // around embedded newlines. (Perl's default). + final val ONE_LINE: Int = 0x10 + + // Make repetition operators default to non-greedy. + final val NON_GREEDY: Int = 0x20 + + // allow Perl extensions: + // non-capturing parens - (?: ) + // non-greedy operators - *? +? ?? {}? + // flag edits - (?i) (?-i) (?i: ) + // i - FoldCase + // m - !OneLine + // s - DotNL + // U - NonGreedy + // line ends: \A \z + // \Q and \E to disable/enable metacharacters + // (?Pexpr) for named captures + // \C (any byte) is not supported. + final val PERL_X: Int = 0x40 + + // Allow \p{Han}, \P{Han} for Unicode group and negation. + final val UNICODE_GROUPS: Int = 0x80 + + // Regexp END_TEXT was $, not \z. Internal use only. + final val WAS_DOLLAR: Int = 0x100 + + final val MATCH_NL: Int = CLASS_NL | DOT_NL + + // As close to Perl as possible. + final val PERL: Int = CLASS_NL | ONE_LINE | PERL_X | UNICODE_GROUPS + + // POSIX syntax. + final val POSIX: Int = 0 + + //// Anchors + final val UNANCHORED: Int = 0 + final val ANCHOR_START: Int = 1 + final val ANCHOR_BOTH: Int = 2 + + /** + * Parses a regular expression and returns, if successful, an + * {@code RE2} instance that can be used to match against text. + * + *

When matching against text, the regexp returns a match that + * begins as early as possible in the input (leftmost), and among those + * it chooses the one that a backtracking search would have found first. + * This so-called leftmost-first matching is the same semantics + * that Perl, Python, and other implementations use, although this + * package implements it without the expense of backtracking. + * For POSIX leftmost-longest matching, see {@link #compilePOSIX}. + */ + def compile(expr: String): RE2 = + compileImpl(expr, PERL, false) + + /** + * {@code compilePOSIX} is like {@link #compile} but restricts the + * regular expression to POSIX ERE (egrep) syntax and changes the + * match semantics to leftmost-longest. + * + *

That is, when matching against text, the regexp returns a match that + * begins as early as possible in the input (leftmost), and among those + * it chooses a match that is as long as possible. + * This so-called leftmost-longest matching is the same semantics + * that early regular expression implementations used and that POSIX + * specifies. + * + *

However, there can be multiple leftmost-longest matches, with different + * submatch choices, and here this package diverges from POSIX. + * Among the possible leftmost-longest matches, this package chooses + * the one that a backtracking search would have found first, while POSIX + * specifies that the match be chosen to maximize the length of the first + * subexpression, then the second, and so on from left to right. + * The POSIX rule is computationally prohibitive and not even well-defined. + * See http://swtch.com/~rsc/regexp/regexp2.html#posix + */ + def compilePOSIX(expr: String): RE2 = + compileImpl(expr, POSIX, true) + + // Exposed to ExecTests. + def compileImpl(expr: String, mode: Int, longest: Boolean): RE2 = { + var re: Regexp = Parser.parse(expr, mode) + val maxCap: Int = re.maxCap() // (may shrink during simplify) + re = Simplify.simplify(re) + val prog: Prog = Compiler.compileRegexp(re) + val re2: RE2 = new RE2() + re2.expr = expr + re2.prog = prog + re2.numSubexp = maxCap + re2.cond = prog.startCond() + re2.longest = longest + val prefixBuilder: java.lang.StringBuilder = new java.lang.StringBuilder() + re2.prefixComplete = prog.prefix(prefixBuilder) + re2.prefix = prefixBuilder.toString() + re2.prefixUTF8 = re2.prefix.getBytes("UTF-8") + if (!re2.prefix.isEmpty()) { + re2.prefixRune = re2.prefix.codePointAt(0) + } + re2 + } + + /** + * Returns true iff textual regular expression {@code pattern} + * matches string {@code s}. + * + *

More complicated queries need to use {@link #compile} and the + * full {@code RE2} interface. + */ + // This is visible for testing. + def match_(pattern: String, s: CharSequence): Boolean = + compile(pattern).match_1(s) + + /** + * Returns a string that quotes all regular expression metacharacters + * inside the argument text the returned string is a regular + * expression matching the literal text. For example, + * {@code quoteMeta("[foo]").equals("\\[foo\\]")}. + */ + def quoteMeta(s: String): String = { + val b: java.lang.StringBuilder = new java.lang.StringBuilder(2 * s.length()) + // A char loop is correct because all metacharacters fit in one UTF-16 code. + var i: Int = 0 + var len: Int = s.length() + while (i < len) { + val c: Char = s.charAt(i) + if ("\\.+*?()|[]{}^$".indexOf(c) >= 0) { + b.append('\\') + } + b.append(c) + i += 1 + } + b.toString() + } +} diff --git a/input/rsc/Regexp.scala b/input/rsc/Regexp.scala new file mode 100644 index 0000000..6d6e619 --- /dev/null +++ b/input/rsc/Regexp.scala @@ -0,0 +1,309 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Original Go source here: +// http://code.google.com/p/go/source/browse/src/pkg/regexp/syntax/regexp.go + +package com.twitter.re2s + +import java.util.Arrays +import com.twitter.re2s.Regexp._ + +/** + * Regular expression abstract syntax tree. + * Produced by parser, used by compiler. + * NB, this corresponds to {@code syntax.regexp} in the Go implementation + * Go's {@code regexp} is called {@code RE2} in Java. + */ +class Regexp() { + var op: Op = 0 // operator + var flags: Int = 0 // bitmap of parse flags + var subs: Array[Regexp] = null // subexpressions, if any. Never null. + // subs[0] is used as the freelist. + var runes: Array[Int] = null // matched runes, for LITERAL, CHAR_CLASS + var min: Int = 0 // min, max for REPEAT + var max: Int = 0 // min, max for REPEAT + var cap: Int = 0 // capturing index, for CAPTURE + var name: String = null // capturing name, for CAPTURE + + def reinit(): Unit = { + this.flags = 0 + subs = EMPTY_SUBS + runes = null + min = 0 + max = 0 + cap = 0 + name = null + } + + override def toString(): String = { + val out: java.lang.StringBuilder = new java.lang.StringBuilder() + appendTo(out) + out.toString + } + + // appendTo() appends the Perl syntax for |this| regular expression to |out|. + private def appendTo(out: java.lang.StringBuilder): Unit = { + op match { + case Op.NO_MATCH => + out.append("[^\\x00-\\x{10FFFF}]") + case Op.EMPTY_MATCH => + out.append("(?:)") + case Op.STAR | Op.PLUS | Op.QUEST | Op.REPEAT => + val sub: Regexp = subs(0) + if (sub.op > Op.CAPTURE || + sub.op == Op.LITERAL && sub.runes.length > 1) { + out.append("(?:") + sub.appendTo(out) + out.append(')') + } else { + sub.appendTo(out) + } + op match { + case Op.STAR => + out.append('*') + case Op.PLUS => + out.append('+') + case Op.QUEST => + out.append('?') + case Op.REPEAT => + out.append('{').append(min) + if (min != max) { + out.append(',') + if (max >= 0) { + out.append(max) + } + } + out.append('}') + } + if ((flags & RE2.NON_GREEDY) != 0) { + out.append('?') + } + case Op.CONCAT => + var i: Int = 0 + while (i <= subs.length) { + val sub: Regexp = subs(i) + if (sub.op == Op.ALTERNATE) { + out.append("(?:") + sub.appendTo(out) + out.append(')') + } else { + sub.appendTo(out) + } + i += 1 + } + case Op.ALTERNATE => + var sep: String = "" + var i: Int = 0 + while (i <= subs.length) { + val sub: Regexp = subs(i) + out.append(sep) + sep = "|" + sub.appendTo(out) + i += 1 + } + case Op.LITERAL => + if ((flags & RE2.FOLD_CASE) != 0) { + out.append("(?i:") + } + var i: Int = 0 + while (i < runes.length) { + val rune: Int = runes(i) + Utils.escapeRune(out, rune) + i += 1 + } + if ((flags & RE2.FOLD_CASE) != 0) { + out.append(')') + } + case Op.ANY_CHAR_NOT_NL => + out.append("(?-s:.)") + case Op.ANY_CHAR => + out.append("(?s:.)") + case Op.CAPTURE => + if (name == null || name.isEmpty()) { + out.append('(') + } else { + out.append("(?P<") + out.append(name) + out.append(">") + } + if (subs(0).op != Op.EMPTY_MATCH) { + subs(0).appendTo(out) + } + out.append(')') + case Op.BEGIN_TEXT => + out.append("\\A") + case Op.END_TEXT => + if ((flags & RE2.WAS_DOLLAR) != 0) { + out.append("(?-m:$)") + } else { + out.append("\\z") + } + case Op.BEGIN_LINE => + out.append('^') + case Op.END_LINE => + out.append('$') + case Op.WORD_BOUNDARY => + out.append("\\b") + case Op.NO_WORD_BOUNDARY => + out.append("\\B") + case Op.CHAR_CLASS => + if (runes.length % 2 != 0) { + out.append("[invalid char class]") + } else { + out.append('[') + if (runes.length == 0) { + out.append("^\\x00-\\x{10FFFF}") + } else if (runes(0) == 0 && + runes(runes.length - 1) == Unicode.MAX_RUNE) { + // Contains 0 and MAX_RUNE. Probably a negated class. + // Print the gaps. + out.append('^') + var i: Int = 1 + while (i < runes.length - 1) { + val lo: Int = runes(i) + 1 + val hi: Int = runes(i + 1) - 1 + quoteIfHyphen(out, lo) + Utils.escapeRune(out, lo) + if (lo != hi) { + out.append('-') + quoteIfHyphen(out, hi) + Utils.escapeRune(out, hi) + } + i += 2 + } + } else { + var i: Int = 0 + while (i < runes.length) { + val lo: Int = runes(i) + val hi: Int = runes(i + 1) + quoteIfHyphen(out, lo) + Utils.escapeRune(out, lo) + if (lo != hi) { + out.append('-') + quoteIfHyphen(out, hi) + Utils.escapeRune(out, hi) + } + i += 2 + } + } + out.append(']') + } + case _ => // incl. pseudos + out.append(op) + } + } + + // maxCap() walks the regexp to find the maximum capture index. + def maxCap(): Int = { + var m: Int = 0 + if (op == Op.CAPTURE) { + m = cap + } + if (subs != null) { + var i: Int = 0 + while (i < subs.length) { + val sub: Regexp = subs(i) + val n: Int = sub.maxCap() + if (m < n) { + m = n + } + i += 1 + } + } + m + } + + // equals() returns true if this and that have identical structure. + override def equals(that: Any): Boolean = { + that match { + case that: Regexp => + val x: Regexp = this + val y: Regexp = that + if (x.op != y.op) { + return false + } + x.op match { + case Op.END_TEXT => + // The parse flags remember whether this is \z or \Z. + if ((x.flags & RE2.WAS_DOLLAR) != (y.flags & RE2.WAS_DOLLAR)) { + return false + } + case Op.LITERAL | Op.CHAR_CLASS => + if (!Arrays.equals(x.runes, y.runes)) { + return false + } + case Op.ALTERNATE | Op.CONCAT => + if (x.subs.length != y.subs.length) { + return false + } + var i: Int = 0 + while (i < x.subs.length) { + if (!x.subs(i).equals(y.subs(i))) { + return false + } + i += 1 + } + case Op.STAR | Op.PLUS | Op.QUEST => + if ((x.flags & RE2.NON_GREEDY) != (y.flags & RE2.NON_GREEDY) || + !x.subs(0).equals(y.subs(0))) { + return false + } + case Op.REPEAT => + if ((x.flags & RE2.NON_GREEDY) != (y.flags & RE2.NON_GREEDY) || + x.min != y.min || x.max != y.max || !x.subs(0).equals(y.subs(0))) { + return false + } + case Op.CAPTURE => + if (x.cap != y.cap || x.name != y.name || + !x.subs(0).equals(y.subs(0))) { + return false + } + } + return true + case _ => + false + } + } + +} + +object Regexp { + type Op = Int + object Op { + final val NO_MATCH: Int = 0 // Matches no strings. + final val EMPTY_MATCH: Int = 1 // Matches empty string. + final val LITERAL: Int = 2 // Matches runes[] sequence + final val CHAR_CLASS: Int = 3 // Matches Runes interpreted as range pair list + final val ANY_CHAR_NOT_NL: Int = 4 // Matches any character except '\n' + final val ANY_CHAR: Int = 5 // Matches any character + final val BEGIN_LINE: Int = 6 // Matches empty string at end of line + final val END_LINE: Int = 7 // Matches empty string at end of line + final val BEGIN_TEXT: Int = 8 // Matches empty string at beginning of text + final val END_TEXT: Int = 9 // Matches empty string at end of text + final val WORD_BOUNDARY: Int = 10 // Matches word boundary `\b` + final val NO_WORD_BOUNDARY: Int = 11 // Matches word non-boundary `\B` + final val CAPTURE: Int = 12 // Capturing subexpr with index cap, optional name name + final val STAR: Int = 13 // Matches subs[0] zero or more times. + final val PLUS: Int = 14 // Matches subs[0] one or more times. + final val QUEST: Int = 15 // Matches subs[0] zero or one times. + final val REPEAT: Int = 16 // Matches subs[0] [min, max] times max=-1 => no limit. + final val CONCAT: Int = 17 // Matches concatenation of subs[] + final val ALTERNATE: Int = 18 // Matches union of subs[] + + // Pseudo ops, used internally by Parser for parsing stack: + final val LEFT_PAREN: Int = 19 + final val VERTICAL_BAR: Int = 20 + + def isPseudo(op: Op): Boolean = op >= LEFT_PAREN + } + + final val EMPTY_SUBS: Array[Regexp] = new Array[Regexp](0) + + private def quoteIfHyphen(out: java.lang.StringBuilder, rune: Int): Unit = { + if (rune == '-') { + out.append('\\') + } + } +} diff --git a/input/rsc/Simplify.scala b/input/rsc/Simplify.scala new file mode 100644 index 0000000..b7d7b45 --- /dev/null +++ b/input/rsc/Simplify.scala @@ -0,0 +1,194 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Original Go source here: +// http://code.google.com/p/go/source/browse/src/pkg/regexp/syntax/simplify.go + +package com.twitter.re2s + +import java.util.ArrayList +import com.twitter.re2s.Regexp.{Op => ROP} + +object Simplify { + + // Simplify returns a regexp equivalent to re but without counted + // repetitions and with various other simplifications, such as + // rewriting /(?:a+)+/ to /a+/. The resulting regexp will execute + // correctly but its string representation will not produce the same + // parse tree, because capturing parentheses may have been duplicated + // or removed. For example, the simplified form for /(x){1,2}/ is + // /(x)(x)?/ but both parentheses capture as $1. The returned regexp + // may share structure with or be the original. + def simplify(re: Regexp): Regexp = { + if (re == null) { + return null + } + re.op match { + case ROP.CAPTURE | ROP.CONCAT | ROP.ALTERNATE => + // Simplify children, building new Regexp if children change. + var nre: Regexp = re + var i: Int = 0 + while (i < re.subs.length) { + val sub: Regexp = re.subs(i) + val nsub: Regexp = simplify(sub) + if (nre == re && nsub != sub) { + // Start a copy. + nre = new Regexp() // shallow copy + nre.op = re.op + nre.flags = re.flags + nre.subs = re.subs + nre.runes = re.runes + nre.min = re.min + nre.max = re.max + nre.cap = re.cap + nre.name = re.name + nre.runes = null + nre.subs = Parser.subarray(re.subs, 0, re.subs.length) // clone + } + if (nre != re) { + nre.subs(i) = nsub + } + i += 1 + } + nre + case ROP.STAR | ROP.PLUS | ROP.QUEST => + val sub: Regexp = simplify(re.subs(0)) + simplify1(re.op, re.flags, sub, re) + case ROP.REPEAT => + // Special special case: x{0} matches the empty string + // and doesn't even need to consider x. + if (re.min == 0 && re.max == 0) { + val re: Regexp = new Regexp() + re.op = ROP.EMPTY_MATCH + } + + // The fun begins. + val sub: Regexp = simplify(re.subs(0)) + + // x{n,} means at least n matches of x. + if (re.max == -1) { + // Special case: x{0,} is x*. + if (re.min == 0) { + return simplify1(ROP.STAR, re.flags, sub, null) + } + + // Special case: x{1,} is x+. + if (re.min == 1) { + return simplify1(ROP.PLUS, re.flags, sub, null) + } + + // General case: x{4,} is xxxx+. + val nre: Regexp = new Regexp() + nre.op = ROP.CONCAT + val subs: ArrayList[Regexp] = new ArrayList[Regexp]() + var i: Int = 0 + while (i < re.min - 1) { + subs.add(sub) + i += 1 + } + subs.add(simplify1(ROP.PLUS, re.flags, sub, null)) + nre.subs = subs.toArray(new Array[Regexp](subs.size())) + nre + } + + // Special case x{0} handled above. + + // Special case: x{1} is just x. + if (re.min == 1 && re.max == 1) { + return sub + } + + // General case: x{n,m} means n copies of x and m copies of x? + // The machine will do less work if we nest the final m copies, + // so that x{2,5} = xx(x(x(x)?)?)? + + // Build leading prefix: xx. + var prefixSubs: ArrayList[Regexp] = null + if (re.min > 0) { + prefixSubs = new ArrayList[Regexp]() + var i: Int = 0 + while (i < re.min) { + prefixSubs.add(sub) + i += 1 + } + } + + // Build and attach suffix: (x(x(x)?)?)? + if (re.max > re.min) { + var suffix: Regexp = simplify1(ROP.QUEST, re.flags, sub, null) + var i: Int = re.min + 1 + while (i < re.max) { + val nre2: Regexp = new Regexp() + nre2.op = ROP.CONCAT + nre2.subs = Array[Regexp](sub, suffix) + suffix = simplify1(ROP.QUEST, re.flags, nre2, null) + i += 1 + } + if (prefixSubs == null) { + return suffix + } + prefixSubs.add(suffix) + } + if (prefixSubs != null) { + val prefix: Regexp = new Regexp() + prefix.op = ROP.CONCAT + prefix.subs = + prefixSubs.toArray(new Array[Regexp](prefixSubs.size())) + return prefix + } + + // Some degenerate case like min > max or min < max < 0. + // Handle as impossible match. + val nre: Regexp = new Regexp() + nre.op = ROP.NO_MATCH + nre + case _ => + re + } + } + + // simplify1 implements Simplify for the unary OpStar, + // OpPlus, and OpQuest operators. It returns the simple regexp + // equivalent to + // + // Regexp{Op: op, Flags: flags, Sub: {sub}} + // + // under the assumption that sub is already simple, and + // without first allocating that structure. If the regexp + // to be returned turns out to be equivalent to re, simplify1 + // returns re instead. + // + // simplify1 is factored out of Simplify because the implementation + // for other operators generates these unary expressions. + // Letting them call simplify1 makes sure the expressions they + // generate are simple. + private def simplify1(op: ROP, + flags: Int, + sub: Regexp, + _re: Regexp): Regexp = { + var re: Regexp = _re + + // Special case: repeat the empty string as much as + // you want, but it's still the empty string. + if (sub.op == ROP.EMPTY_MATCH) { + return sub + } + // The operators are idempotent if the flags match. + if (op == sub.op && + (flags & RE2.NON_GREEDY) == (sub.flags & RE2.NON_GREEDY)) { + return sub + } + if (re != null && re.op == op && + (re.flags & RE2.NON_GREEDY) == (flags & RE2.NON_GREEDY) && + sub == re.subs(0)) { + return re + } + + re = new Regexp() + re.op = op + re.flags = flags + re.subs = Array[Regexp](sub) + re + } +} diff --git a/input/rsc/Stdlib.scala b/input/rsc/Stdlib.scala new file mode 100644 index 0000000..ae257a6 --- /dev/null +++ b/input/rsc/Stdlib.scala @@ -0,0 +1,182 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). + +package java { + package io { + class Serializable() + } + + package lang { + class ArrayIndexOutOfBoundsException(message: String) + object Character { + def charCount(x: Int): Int + def codePointAt(x: Int): Int + def codePointBefore(x: Int): Int + def isLowerCase(x: Char): Boolean + def isUpperCase(x: Char): Boolean + def toChars(x: Int): Array[Char] + } + class CharSequence() { + def charAt(x: Int): Char + def length(): Int + def subSequence(x: Int, y: Int): CharSequence + def toString(): String + } + class IllegalArgumentException(message: String) + class IllegalStateException(message: String) + class IndexOutOfBoundsException(message: String) + object Integer { + def valueOf(x: String, y: Int): Int + def toHexString(x: Int): String + } + class NullPointerException(message: String) + class RuntimeException() + class String() { + def apply(x: Int): Char + def charAt(x: Int): Char + def codePointAt(x: Int): Int + def codePointCount(x: Int, y: Int): Int + def getBytes(): Array[Byte] + def indexOf(x: Char): Int + def isEmpty(): Boolean + def length(): Int + def startsWith(x: String): Boolean + def substring(x: Int, y: Int): String + } + object String { + def valueOf(x: Char): String + } + class StringBuffer() { + def append(x: Any): StringBuffer + } + class StringBuilder() { + def append(x: Any): StringBuilder + def appendCodePoint(x: Int): StringBuilder + def indexOf(x: String, y: Int): Int + def length(): Int + } + object System { + def arraycopy( + src: Any, + srcPos: Int, + dest: Any, + destPos: Int, + length: Int): Unit + } + } + + package util { + object Arrays { + def fill(x: Any, y: Any): Unit + } + class ArrayList[T]() { + def add(x: T): Boolean + def clear(): Unit + def get(x: Int): T + def isEmpty(): Boolean + def iterator(): Iterator[T] + def remove(x: Any): Boolean + def removeRange(x: Int, y: Int): Unit + def set(x: Int, y: T): Unit + def size(): Int + def subList(x: Int, y: Int): ArrayList[T] + def toArray(x: Any): Array[T] + } + class HashMap[T, U]() { + def get(x: T): U + def put(x: T, y: U): U + } + class Iterator[T]() { + def hasNext(): Boolean + def next(): T + } + class List[T]() { + def add(x: T): Boolean + } + class Map[T, U]() { + def get(x: T): U + def put(x: T, y: U): U + } + } +} + +package scala { + class Any() { + def +(x: Any): String + def ==(x: Any): Boolean + def !=(x: Any): Boolean + def equals(x: Any): Boolean + def toString(): String + } + class AnyVal() extends Any() + class AnyRef() extends Any() { + def synchronized[T](x: T): T + } + class Array[T]() { + val length: Int + def apply(x: Int): T + def update(x: Int, y: Int): Unit + } + object Array { + def apply[T](x: T*): Array[T] + } + class Boolean() extends AnyVal() { + def apply(): Boolean + def !(): Boolean + def &&(x: Boolean): Boolean + def ||(x: Boolean): Boolean + } + class Byte() extends AnyVal() { + def &(x: Any): Byte + } + class Char() extends AnyVal() { + def <(x: Any): Boolean + def <=(x: Any): Boolean + def >(x: Any): Boolean + def >=(x: Any): Boolean + def -(x: Any): Char + def +(x: Any): Char + } + class Float() extends AnyVal() + class Double() extends AnyVal() + class Function0[+R]() { + def apply(): R + } + class Function1[-T1, +R]() { + def apply(x: T1): R + } + class Function2[-T1, -T2, +R]() + class Function3[-T1, -T2, -T3, +R]() + class Function4[-T1, -T2, -T3, -T4, +R]() + class Int() extends Any() { + def ~(x: Any): Int + def &(x: Any): Int + def |(x: Any): Int + def ^(x: Any): Int + def <<(x: Any): Int + def >>(x: Any): Int + def <(x: Any): Boolean + def <=(x: Any): Boolean + def >(x: Any): Boolean + def >=(x: Any): Boolean + def +(x: Any): Int + def -(x: Any): Int + def *(x: Any): Int + def /(x: Any): Int + def %(x: Any): Int + def toChar(): Int + def toShort(): Short + } + class Nothing() + class Long() extends AnyVal() + class Seq[T]() + class Short() extends AnyVal() + class Tuple1[+T1]() + class Tuple2[+T1, +T2]() + class Tuple3[+T1, +T2, +T3]() + class Tuple4[+T1, +T2, +T3, +T4]() + class Unit() extends AnyVal() + object Predef { + type String = _root_.java.lang.String + } +} diff --git a/input/rsc/Unicode.scala b/input/rsc/Unicode.scala new file mode 100644 index 0000000..2b9ca2c --- /dev/null +++ b/input/rsc/Unicode.scala @@ -0,0 +1,242 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Many of these were derived from the corresponding Go functions in +// http://code.google.com/p/go/source/browse/src/pkg/unicode/letter.go + +package com.twitter.re2s + +/** + * Utilities for dealing with Unicode better than Java does. + * + * @author adonovan@google.com (Alan Donovan) + */ +object Unicode { + + // The highest legal rune value. + final val MAX_RUNE: Int = 0x10FFFF + + // The highest legal ASCII value. + final val MAX_ASCII: Int = 0x7f + + // The highest legal Latin-1 value. + final val MAX_LATIN1: Int = 0xFF + + private final val MAX_CASE: Int = 3 + + // Represents invalid code points. + private final val REPLACEMENT_CHAR: Int = 0xFFFD + + // Minimum and maximum runes involved in folding. + // Checked during test. + final val MIN_FOLD: Int = 0x0041 + final val MAX_FOLD: Int = 0x1044f + + // is32 uses binary search to test whether rune is in the specified + // slice of 32-bit ranges. + // TODO(adonovan): opt: consider using int[n*3] instead of int[n][3]. + private def is32(ranges: Array[Array[Int]], r: Int): Boolean = { + // binary search over ranges + var lo: Int = 0 + var hi: Int = ranges.length + while (lo < hi) { + val m: Int = lo + (hi - lo) / 2 + val range: Array[Int] = ranges(m) // [lo, hi, stride] + if (range(0) <= r && r <= range(1)) { + return ((r - range(0)) % range(2)) == 0 + } + if (r < range(0)) { + hi = m + } else { + lo = m + 1 + } + } + return false + } + + // is tests whether rune is in the specified table of ranges. + private def is(ranges: Array[Array[Int]], r: Int): Boolean = { + // common case: rune is ASCII or Latin-1, so use linear search. + if (r <= MAX_LATIN1) { + var i: Int = 0 + while (i < ranges.length) { + val range: Array[Int] = ranges(i) // range = [lo, hi, stride] + if (r > range(1)) { + i += 1 + } else { + if (r < range(0)) { + return false + } + return ((r - range(0)) % range(2)) == 0 + } + } + return false + } + + ranges.length > 0 && r >= ranges(0)(0) && is32(ranges, r) + } + + // isUpper reports whether the rune is an upper case letter. + def isUpper(r: Int): Boolean = { + // See comment in isGraphic. + if (r <= MAX_LATIN1) { + Character.isUpperCase(r.toChar) + } else { + is(UnicodeTables.Upper, r) + } + } + + // isLower reports whether the rune is a lower case letter. + def isLower(r: Int): Boolean = { + // See comment in isGraphic. + if (r <= MAX_LATIN1) { + Character.isLowerCase(r.toChar) + } else { + is(UnicodeTables.Lower, r) + } + } + + // isTitle reports whether the rune is a title case letter. + def isTitle(r: Int): Boolean = { + if (r <= MAX_LATIN1) { + false + } else { + is(UnicodeTables.Title, r) + } + } + + // isPrint reports whether the rune is printable (Unicode L/M/N/P/S or ' '). + def isPrint(r: Int): Boolean = { + if (r <= MAX_LATIN1) { + r >= 0x20 && r < 0x7F || + r >= 0xA1 && r != 0xAD + } else { + is(UnicodeTables.L, r) || + is(UnicodeTables.M, r) || + is(UnicodeTables.N, r) || + is(UnicodeTables.P, r) || + is(UnicodeTables.S, r) + } + } + + // A case range is conceptually a record: + // class CaseRange { + // int lo, hi + // int upper, lower, title + // } + // but flattened as an int[5]. + + // to maps the rune using the specified case mapping. + private def to_3(kase: Int, r: Int, caseRange: Array[Array[Int]]): Int = { + if (kase < 0 || MAX_CASE <= kase) { + return REPLACEMENT_CHAR // as reasonable an error as any + } + // binary search over ranges + var lo: Int = 0 + var hi: Int = caseRange.length + while (lo < hi) { + val m: Int = lo + (hi - lo) / 2 + val cr: Array[Int] = caseRange(m) // cr = [lo, hi, upper, lower, title] + val crlo: Int = cr(0) + val crhi: Int = cr(1) + if (crlo <= r && r <= crhi) { + val delta: Int = cr(2 + kase) + if (delta > MAX_RUNE) { + // In an Upper-Lower sequence, which always starts with + // an UpperCase letter, the real deltas always look like: + // {0, 1, 0} UpperCase (Lower is next) + // {-1, 0, -1} LowerCase (Upper, Title are previous) + // The characters at even offsets from the beginning of the + // sequence are upper case the ones at odd offsets are lower. + // The correct mapping can be done by clearing or setting the low + // bit in the sequence offset. + // The constants UpperCase and TitleCase are even while LowerCase + // is odd so we take the low bit from kase. + return crlo + (((r - crlo) & ~1) | (kase & 1)) + } + return r + delta + } + if (r < crlo) { + hi = m + } else { + lo = m + 1 + } + } + return r + } + + // to maps the rune to the specified case: UpperCase, LowerCase, or TitleCase. + private def to_2(kase: Int, r: Int): Int = + to_3(kase, r, UnicodeTables.CASE_RANGES) + + // toUpper maps the rune to upper case. + def toUpper(r: Int): Int = { + if (r <= MAX_ASCII) { + var res: Int = r + if ('a' <= r && r <= 'z') { + res -= 'a' - 'A' + } + res + } else { + to_2(UnicodeTables.UpperCase, r) + } + } + + // toLower maps the rune to lower case. + def toLower(r: Int): Int = { + if (r <= MAX_ASCII) { + var res: Int = r + if ('A' <= r && r <= 'Z') { + res += 'a' - 'A' + } + res + } else { + to_2(UnicodeTables.LowerCase, r) + } + } + + // simpleFold iterates over Unicode code points equivalent under + // the Unicode-defined simple case folding. Among the code points + // equivalent to rune (including rune itself), SimpleFold returns the + // smallest r >= rune if one exists, or else the smallest r >= 0. + // + // For example: + // SimpleFold('A') = 'a' + // SimpleFold('a') = 'A' + // + // SimpleFold('K') = 'k' + // SimpleFold('k') = '\u212A' (Kelvin symbol, K) + // SimpleFold('\u212A') = 'K' + // + // SimpleFold('1') = '1' + // + // Derived from Go's unicode.SimpleFold. + // + def simpleFold(r: Int): Int = { + // Consult caseOrbit table for special cases. + var lo: Int = 0 + var hi: Int = UnicodeTables.CASE_ORBIT.length + while (lo < hi) { + val m: Int = lo + (hi - lo) / 2 + if (UnicodeTables.CASE_ORBIT(m)(0) < r) { + lo = m + 1 + } else { + hi = m + } + } + if (lo < UnicodeTables.CASE_ORBIT.length && + UnicodeTables.CASE_ORBIT(lo)(0) == r) { + return UnicodeTables.CASE_ORBIT(lo)(1) + } + + // No folding specified. This is a one- or two-element + // equivalence class containing rune and toLower(rune) + // and toUpper(rune) if they are different from rune. + val l: Int = toLower(r) + if (l != r) { + return l + } + return toUpper(r) + } +} diff --git a/input/rsc/UnicodeTables.scala b/input/rsc/UnicodeTables.scala new file mode 100644 index 0000000..9a7ad4c --- /dev/null +++ b/input/rsc/UnicodeTables.scala @@ -0,0 +1,5051 @@ +// AUTOGENERATED by make_unicode_tables.awk from the output of +// go/src/pkg/unicode/maketables.go. Yes it's awful, but frankly +// it's quicker than porting 1300 more lines of Go. + +package com.twitter.re2s + +import java.util.HashMap +import java.util.Map + +object UnicodeTables { + final lazy val UpperCase: Int = 0 + final lazy val LowerCase: Int = 1 + final lazy val TitleCase: Int = 2 + final lazy val UpperLower: Int = 0x110000 + + private lazy val _Lm: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x02b0, 0x02c1, 1), + Array[Int](0x02c6, 0x02d1, 1), + Array[Int](0x02e0, 0x02e4, 1), + Array[Int](0x02ec, 0x02ee, 2), + Array[Int](0x0374, 0x037a, 6), + Array[Int](0x0559, 0x0640, 231), + Array[Int](0x06e5, 0x06e6, 1), + Array[Int](0x07f4, 0x07f5, 1), + Array[Int](0x07fa, 0x081a, 32), + Array[Int](0x0824, 0x0828, 4), + Array[Int](0x0971, 0x0e46, 1237), + Array[Int](0x0ec6, 0x10fc, 566), + Array[Int](0x17d7, 0x1843, 108), + Array[Int](0x1aa7, 0x1c78, 465), + Array[Int](0x1c79, 0x1c7d, 1), + Array[Int](0x1d2c, 0x1d61, 1), + Array[Int](0x1d78, 0x1d9b, 35), + Array[Int](0x1d9c, 0x1dbf, 1), + Array[Int](0x2071, 0x207f, 14), + Array[Int](0x2090, 0x209c, 1), + Array[Int](0x2c7d, 0x2d6f, 242), + Array[Int](0x2e2f, 0x3005, 470), + Array[Int](0x3031, 0x3035, 1), + Array[Int](0x303b, 0x309d, 98), + Array[Int](0x309e, 0x30fc, 94), + Array[Int](0x30fd, 0x30fe, 1), + Array[Int](0xa015, 0xa4f8, 1251), + Array[Int](0xa4f9, 0xa4fd, 1), + Array[Int](0xa60c, 0xa67f, 115), + Array[Int](0xa717, 0xa71f, 1), + Array[Int](0xa770, 0xa788, 24), + Array[Int](0xa9cf, 0xaa70, 161), + Array[Int](0xaadd, 0xff70, 21651), + Array[Int](0xff9e, 0xff9f, 1) + ) + private final lazy val _Ll: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0061, 0x007a, 1), + Array[Int](0x00aa, 0x00b5, 11), + Array[Int](0x00ba, 0x00df, 37), + Array[Int](0x00e0, 0x00f6, 1), + Array[Int](0x00f8, 0x00ff, 1), + Array[Int](0x0101, 0x0137, 2), + Array[Int](0x0138, 0x0148, 2), + Array[Int](0x0149, 0x0177, 2), + Array[Int](0x017a, 0x017e, 2), + Array[Int](0x017f, 0x0180, 1), + Array[Int](0x0183, 0x0185, 2), + Array[Int](0x0188, 0x018c, 4), + Array[Int](0x018d, 0x0192, 5), + Array[Int](0x0195, 0x0199, 4), + Array[Int](0x019a, 0x019b, 1), + Array[Int](0x019e, 0x01a1, 3), + Array[Int](0x01a3, 0x01a5, 2), + Array[Int](0x01a8, 0x01aa, 2), + Array[Int](0x01ab, 0x01ad, 2), + Array[Int](0x01b0, 0x01b4, 4), + Array[Int](0x01b6, 0x01b9, 3), + Array[Int](0x01ba, 0x01bd, 3), + Array[Int](0x01be, 0x01bf, 1), + Array[Int](0x01c6, 0x01cc, 3), + Array[Int](0x01ce, 0x01dc, 2), + Array[Int](0x01dd, 0x01ef, 2), + Array[Int](0x01f0, 0x01f3, 3), + Array[Int](0x01f5, 0x01f9, 4), + Array[Int](0x01fb, 0x0233, 2), + Array[Int](0x0234, 0x0239, 1), + Array[Int](0x023c, 0x023f, 3), + Array[Int](0x0240, 0x0242, 2), + Array[Int](0x0247, 0x024f, 2), + Array[Int](0x0250, 0x0293, 1), + Array[Int](0x0295, 0x02af, 1), + Array[Int](0x0371, 0x0373, 2), + Array[Int](0x0377, 0x037b, 4), + Array[Int](0x037c, 0x037d, 1), + Array[Int](0x0390, 0x03ac, 28), + Array[Int](0x03ad, 0x03ce, 1), + Array[Int](0x03d0, 0x03d1, 1), + Array[Int](0x03d5, 0x03d7, 1), + Array[Int](0x03d9, 0x03ef, 2), + Array[Int](0x03f0, 0x03f3, 1), + Array[Int](0x03f5, 0x03fb, 3), + Array[Int](0x03fc, 0x0430, 52), + Array[Int](0x0431, 0x045f, 1), + Array[Int](0x0461, 0x0481, 2), + Array[Int](0x048b, 0x04bf, 2), + Array[Int](0x04c2, 0x04ce, 2), + Array[Int](0x04cf, 0x0527, 2), + Array[Int](0x0561, 0x0587, 1), + Array[Int](0x1d00, 0x1d2b, 1), + Array[Int](0x1d62, 0x1d77, 1), + Array[Int](0x1d79, 0x1d9a, 1), + Array[Int](0x1e01, 0x1e95, 2), + Array[Int](0x1e96, 0x1e9d, 1), + Array[Int](0x1e9f, 0x1eff, 2), + Array[Int](0x1f00, 0x1f07, 1), + Array[Int](0x1f10, 0x1f15, 1), + Array[Int](0x1f20, 0x1f27, 1), + Array[Int](0x1f30, 0x1f37, 1), + Array[Int](0x1f40, 0x1f45, 1), + Array[Int](0x1f50, 0x1f57, 1), + Array[Int](0x1f60, 0x1f67, 1), + Array[Int](0x1f70, 0x1f7d, 1), + Array[Int](0x1f80, 0x1f87, 1), + Array[Int](0x1f90, 0x1f97, 1), + Array[Int](0x1fa0, 0x1fa7, 1), + Array[Int](0x1fb0, 0x1fb4, 1), + Array[Int](0x1fb6, 0x1fb7, 1), + Array[Int](0x1fbe, 0x1fc2, 4), + Array[Int](0x1fc3, 0x1fc4, 1), + Array[Int](0x1fc6, 0x1fc7, 1), + Array[Int](0x1fd0, 0x1fd3, 1), + Array[Int](0x1fd6, 0x1fd7, 1), + Array[Int](0x1fe0, 0x1fe7, 1), + Array[Int](0x1ff2, 0x1ff4, 1), + Array[Int](0x1ff6, 0x1ff7, 1), + Array[Int](0x210a, 0x210e, 4), + Array[Int](0x210f, 0x2113, 4), + Array[Int](0x212f, 0x2139, 5), + Array[Int](0x213c, 0x213d, 1), + Array[Int](0x2146, 0x2149, 1), + Array[Int](0x214e, 0x2184, 54), + Array[Int](0x2c30, 0x2c5e, 1), + Array[Int](0x2c61, 0x2c65, 4), + Array[Int](0x2c66, 0x2c6c, 2), + Array[Int](0x2c71, 0x2c73, 2), + Array[Int](0x2c74, 0x2c76, 2), + Array[Int](0x2c77, 0x2c7c, 1), + Array[Int](0x2c81, 0x2ce3, 2), + Array[Int](0x2ce4, 0x2cec, 8), + Array[Int](0x2cee, 0x2d00, 18), + Array[Int](0x2d01, 0x2d25, 1), + Array[Int](0xa641, 0xa66d, 2), + Array[Int](0xa681, 0xa697, 2), + Array[Int](0xa723, 0xa72f, 2), + Array[Int](0xa730, 0xa731, 1), + Array[Int](0xa733, 0xa771, 2), + Array[Int](0xa772, 0xa778, 1), + Array[Int](0xa77a, 0xa77c, 2), + Array[Int](0xa77f, 0xa787, 2), + Array[Int](0xa78c, 0xa78e, 2), + Array[Int](0xa791, 0xa7a1, 16), + Array[Int](0xa7a3, 0xa7a9, 2), + Array[Int](0xa7fa, 0xfb00, 21254), + Array[Int](0xfb01, 0xfb06, 1), + Array[Int](0xfb13, 0xfb17, 1), + Array[Int](0xff41, 0xff5a, 1), + Array[Int](0x10428, 0x1044f, 1), + Array[Int](0x1d41a, 0x1d433, 1), + Array[Int](0x1d44e, 0x1d454, 1), + Array[Int](0x1d456, 0x1d467, 1), + Array[Int](0x1d482, 0x1d49b, 1), + Array[Int](0x1d4b6, 0x1d4b9, 1), + Array[Int](0x1d4bb, 0x1d4bd, 2), + Array[Int](0x1d4be, 0x1d4c3, 1), + Array[Int](0x1d4c5, 0x1d4cf, 1), + Array[Int](0x1d4ea, 0x1d503, 1), + Array[Int](0x1d51e, 0x1d537, 1), + Array[Int](0x1d552, 0x1d56b, 1), + Array[Int](0x1d586, 0x1d59f, 1), + Array[Int](0x1d5ba, 0x1d5d3, 1), + Array[Int](0x1d5ee, 0x1d607, 1), + Array[Int](0x1d622, 0x1d63b, 1), + Array[Int](0x1d656, 0x1d66f, 1), + Array[Int](0x1d68a, 0x1d6a5, 1), + Array[Int](0x1d6c2, 0x1d6da, 1), + Array[Int](0x1d6dc, 0x1d6e1, 1), + Array[Int](0x1d6fc, 0x1d714, 1), + Array[Int](0x1d716, 0x1d71b, 1), + Array[Int](0x1d736, 0x1d74e, 1), + Array[Int](0x1d750, 0x1d755, 1), + Array[Int](0x1d770, 0x1d788, 1), + Array[Int](0x1d78a, 0x1d78f, 1), + Array[Int](0x1d7aa, 0x1d7c2, 1), + Array[Int](0x1d7c4, 0x1d7c9, 1), + Array[Int](0x1d7cb, 0x1d7cb, 1) + ) + private final lazy val _C: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0001, 0x001f, 1), + Array[Int](0x007f, 0x009f, 1), + Array[Int](0x00ad, 0x0600, 1363), + Array[Int](0x0601, 0x0603, 1), + Array[Int](0x06dd, 0x070f, 50), + Array[Int](0x17b4, 0x17b5, 1), + Array[Int](0x200b, 0x200f, 1), + Array[Int](0x202a, 0x202e, 1), + Array[Int](0x2060, 0x2064, 1), + Array[Int](0x206a, 0x206f, 1), + Array[Int](0xd800, 0xf8ff, 1), + Array[Int](0xfeff, 0xfff9, 250), + Array[Int](0xfffa, 0xfffb, 1), + Array[Int](0x110bd, 0x1d173, 49334), + Array[Int](0x1d174, 0x1d17a, 1), + Array[Int](0xe0001, 0xe0020, 31), + Array[Int](0xe0021, 0xe007f, 1), + Array[Int](0xf0000, 0xffffd, 1), + Array[Int](0x100000, 0x10fffd, 1) + ) + private final lazy val _M: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0300, 0x036f, 1), + Array[Int](0x0483, 0x0489, 1), + Array[Int](0x0591, 0x05bd, 1), + Array[Int](0x05bf, 0x05c1, 2), + Array[Int](0x05c2, 0x05c4, 2), + Array[Int](0x05c5, 0x05c7, 2), + Array[Int](0x0610, 0x061a, 1), + Array[Int](0x064b, 0x065f, 1), + Array[Int](0x0670, 0x06d6, 102), + Array[Int](0x06d7, 0x06dc, 1), + Array[Int](0x06df, 0x06e4, 1), + Array[Int](0x06e7, 0x06e8, 1), + Array[Int](0x06ea, 0x06ed, 1), + Array[Int](0x0711, 0x0730, 31), + Array[Int](0x0731, 0x074a, 1), + Array[Int](0x07a6, 0x07b0, 1), + Array[Int](0x07eb, 0x07f3, 1), + Array[Int](0x0816, 0x0819, 1), + Array[Int](0x081b, 0x0823, 1), + Array[Int](0x0825, 0x0827, 1), + Array[Int](0x0829, 0x082d, 1), + Array[Int](0x0859, 0x085b, 1), + Array[Int](0x0900, 0x0903, 1), + Array[Int](0x093a, 0x093c, 1), + Array[Int](0x093e, 0x094f, 1), + Array[Int](0x0951, 0x0957, 1), + Array[Int](0x0962, 0x0963, 1), + Array[Int](0x0981, 0x0983, 1), + Array[Int](0x09bc, 0x09be, 2), + Array[Int](0x09bf, 0x09c4, 1), + Array[Int](0x09c7, 0x09c8, 1), + Array[Int](0x09cb, 0x09cd, 1), + Array[Int](0x09d7, 0x09e2, 11), + Array[Int](0x09e3, 0x0a01, 30), + Array[Int](0x0a02, 0x0a03, 1), + Array[Int](0x0a3c, 0x0a3e, 2), + Array[Int](0x0a3f, 0x0a42, 1), + Array[Int](0x0a47, 0x0a48, 1), + Array[Int](0x0a4b, 0x0a4d, 1), + Array[Int](0x0a51, 0x0a70, 31), + Array[Int](0x0a71, 0x0a75, 4), + Array[Int](0x0a81, 0x0a83, 1), + Array[Int](0x0abc, 0x0abe, 2), + Array[Int](0x0abf, 0x0ac5, 1), + Array[Int](0x0ac7, 0x0ac9, 1), + Array[Int](0x0acb, 0x0acd, 1), + Array[Int](0x0ae2, 0x0ae3, 1), + Array[Int](0x0b01, 0x0b03, 1), + Array[Int](0x0b3c, 0x0b3e, 2), + Array[Int](0x0b3f, 0x0b44, 1), + Array[Int](0x0b47, 0x0b48, 1), + Array[Int](0x0b4b, 0x0b4d, 1), + Array[Int](0x0b56, 0x0b57, 1), + Array[Int](0x0b62, 0x0b63, 1), + Array[Int](0x0b82, 0x0bbe, 60), + Array[Int](0x0bbf, 0x0bc2, 1), + Array[Int](0x0bc6, 0x0bc8, 1), + Array[Int](0x0bca, 0x0bcd, 1), + Array[Int](0x0bd7, 0x0c01, 42), + Array[Int](0x0c02, 0x0c03, 1), + Array[Int](0x0c3e, 0x0c44, 1), + Array[Int](0x0c46, 0x0c48, 1), + Array[Int](0x0c4a, 0x0c4d, 1), + Array[Int](0x0c55, 0x0c56, 1), + Array[Int](0x0c62, 0x0c63, 1), + Array[Int](0x0c82, 0x0c83, 1), + Array[Int](0x0cbc, 0x0cbe, 2), + Array[Int](0x0cbf, 0x0cc4, 1), + Array[Int](0x0cc6, 0x0cc8, 1), + Array[Int](0x0cca, 0x0ccd, 1), + Array[Int](0x0cd5, 0x0cd6, 1), + Array[Int](0x0ce2, 0x0ce3, 1), + Array[Int](0x0d02, 0x0d03, 1), + Array[Int](0x0d3e, 0x0d44, 1), + Array[Int](0x0d46, 0x0d48, 1), + Array[Int](0x0d4a, 0x0d4d, 1), + Array[Int](0x0d57, 0x0d62, 11), + Array[Int](0x0d63, 0x0d82, 31), + Array[Int](0x0d83, 0x0dca, 71), + Array[Int](0x0dcf, 0x0dd4, 1), + Array[Int](0x0dd6, 0x0dd8, 2), + Array[Int](0x0dd9, 0x0ddf, 1), + Array[Int](0x0df2, 0x0df3, 1), + Array[Int](0x0e31, 0x0e34, 3), + Array[Int](0x0e35, 0x0e3a, 1), + Array[Int](0x0e47, 0x0e4e, 1), + Array[Int](0x0eb1, 0x0eb4, 3), + Array[Int](0x0eb5, 0x0eb9, 1), + Array[Int](0x0ebb, 0x0ebc, 1), + Array[Int](0x0ec8, 0x0ecd, 1), + Array[Int](0x0f18, 0x0f19, 1), + Array[Int](0x0f35, 0x0f39, 2), + Array[Int](0x0f3e, 0x0f3f, 1), + Array[Int](0x0f71, 0x0f84, 1), + Array[Int](0x0f86, 0x0f87, 1), + Array[Int](0x0f8d, 0x0f97, 1), + Array[Int](0x0f99, 0x0fbc, 1), + Array[Int](0x0fc6, 0x102b, 101), + Array[Int](0x102c, 0x103e, 1), + Array[Int](0x1056, 0x1059, 1), + Array[Int](0x105e, 0x1060, 1), + Array[Int](0x1062, 0x1064, 1), + Array[Int](0x1067, 0x106d, 1), + Array[Int](0x1071, 0x1074, 1), + Array[Int](0x1082, 0x108d, 1), + Array[Int](0x108f, 0x109a, 11), + Array[Int](0x109b, 0x109d, 1), + Array[Int](0x135d, 0x135f, 1), + Array[Int](0x1712, 0x1714, 1), + Array[Int](0x1732, 0x1734, 1), + Array[Int](0x1752, 0x1753, 1), + Array[Int](0x1772, 0x1773, 1), + Array[Int](0x17b6, 0x17d3, 1), + Array[Int](0x17dd, 0x180b, 46), + Array[Int](0x180c, 0x180d, 1), + Array[Int](0x18a9, 0x1920, 119), + Array[Int](0x1921, 0x192b, 1), + Array[Int](0x1930, 0x193b, 1), + Array[Int](0x19b0, 0x19c0, 1), + Array[Int](0x19c8, 0x19c9, 1), + Array[Int](0x1a17, 0x1a1b, 1), + Array[Int](0x1a55, 0x1a5e, 1), + Array[Int](0x1a60, 0x1a7c, 1), + Array[Int](0x1a7f, 0x1b00, 129), + Array[Int](0x1b01, 0x1b04, 1), + Array[Int](0x1b34, 0x1b44, 1), + Array[Int](0x1b6b, 0x1b73, 1), + Array[Int](0x1b80, 0x1b82, 1), + Array[Int](0x1ba1, 0x1baa, 1), + Array[Int](0x1be6, 0x1bf3, 1), + Array[Int](0x1c24, 0x1c37, 1), + Array[Int](0x1cd0, 0x1cd2, 1), + Array[Int](0x1cd4, 0x1ce8, 1), + Array[Int](0x1ced, 0x1cf2, 5), + Array[Int](0x1dc0, 0x1de6, 1), + Array[Int](0x1dfc, 0x1dff, 1), + Array[Int](0x20d0, 0x20f0, 1), + Array[Int](0x2cef, 0x2cf1, 1), + Array[Int](0x2d7f, 0x2de0, 97), + Array[Int](0x2de1, 0x2dff, 1), + Array[Int](0x302a, 0x302f, 1), + Array[Int](0x3099, 0x309a, 1), + Array[Int](0xa66f, 0xa672, 1), + Array[Int](0xa67c, 0xa67d, 1), + Array[Int](0xa6f0, 0xa6f1, 1), + Array[Int](0xa802, 0xa806, 4), + Array[Int](0xa80b, 0xa823, 24), + Array[Int](0xa824, 0xa827, 1), + Array[Int](0xa880, 0xa881, 1), + Array[Int](0xa8b4, 0xa8c4, 1), + Array[Int](0xa8e0, 0xa8f1, 1), + Array[Int](0xa926, 0xa92d, 1), + Array[Int](0xa947, 0xa953, 1), + Array[Int](0xa980, 0xa983, 1), + Array[Int](0xa9b3, 0xa9c0, 1), + Array[Int](0xaa29, 0xaa36, 1), + Array[Int](0xaa43, 0xaa4c, 9), + Array[Int](0xaa4d, 0xaa7b, 46), + Array[Int](0xaab0, 0xaab2, 2), + Array[Int](0xaab3, 0xaab4, 1), + Array[Int](0xaab7, 0xaab8, 1), + Array[Int](0xaabe, 0xaabf, 1), + Array[Int](0xaac1, 0xabe3, 290), + Array[Int](0xabe4, 0xabea, 1), + Array[Int](0xabec, 0xabed, 1), + Array[Int](0xfb1e, 0xfe00, 738), + Array[Int](0xfe01, 0xfe0f, 1), + Array[Int](0xfe20, 0xfe26, 1), + Array[Int](0x101fd, 0x10a01, 2052), + Array[Int](0x10a02, 0x10a03, 1), + Array[Int](0x10a05, 0x10a06, 1), + Array[Int](0x10a0c, 0x10a0f, 1), + Array[Int](0x10a38, 0x10a3a, 1), + Array[Int](0x10a3f, 0x11000, 1473), + Array[Int](0x11001, 0x11002, 1), + Array[Int](0x11038, 0x11046, 1), + Array[Int](0x11080, 0x11082, 1), + Array[Int](0x110b0, 0x110ba, 1), + Array[Int](0x1d165, 0x1d169, 1), + Array[Int](0x1d16d, 0x1d172, 1), + Array[Int](0x1d17b, 0x1d182, 1), + Array[Int](0x1d185, 0x1d18b, 1), + Array[Int](0x1d1aa, 0x1d1ad, 1), + Array[Int](0x1d242, 0x1d244, 1), + Array[Int](0xe0100, 0xe01ef, 1) + ) + private final lazy val _L: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0041, 0x005a, 1), + Array[Int](0x0061, 0x007a, 1), + Array[Int](0x00aa, 0x00b5, 11), + Array[Int](0x00ba, 0x00c0, 6), + Array[Int](0x00c1, 0x00d6, 1), + Array[Int](0x00d8, 0x00f6, 1), + Array[Int](0x00f8, 0x02c1, 1), + Array[Int](0x02c6, 0x02d1, 1), + Array[Int](0x02e0, 0x02e4, 1), + Array[Int](0x02ec, 0x02ee, 2), + Array[Int](0x0370, 0x0374, 1), + Array[Int](0x0376, 0x0377, 1), + Array[Int](0x037a, 0x037d, 1), + Array[Int](0x0386, 0x0388, 2), + Array[Int](0x0389, 0x038a, 1), + Array[Int](0x038c, 0x038e, 2), + Array[Int](0x038f, 0x03a1, 1), + Array[Int](0x03a3, 0x03f5, 1), + Array[Int](0x03f7, 0x0481, 1), + Array[Int](0x048a, 0x0527, 1), + Array[Int](0x0531, 0x0556, 1), + Array[Int](0x0559, 0x0561, 8), + Array[Int](0x0562, 0x0587, 1), + Array[Int](0x05d0, 0x05ea, 1), + Array[Int](0x05f0, 0x05f2, 1), + Array[Int](0x0620, 0x064a, 1), + Array[Int](0x066e, 0x066f, 1), + Array[Int](0x0671, 0x06d3, 1), + Array[Int](0x06d5, 0x06e5, 16), + Array[Int](0x06e6, 0x06ee, 8), + Array[Int](0x06ef, 0x06fa, 11), + Array[Int](0x06fb, 0x06fc, 1), + Array[Int](0x06ff, 0x0710, 17), + Array[Int](0x0712, 0x072f, 1), + Array[Int](0x074d, 0x07a5, 1), + Array[Int](0x07b1, 0x07ca, 25), + Array[Int](0x07cb, 0x07ea, 1), + Array[Int](0x07f4, 0x07f5, 1), + Array[Int](0x07fa, 0x0800, 6), + Array[Int](0x0801, 0x0815, 1), + Array[Int](0x081a, 0x0824, 10), + Array[Int](0x0828, 0x0840, 24), + Array[Int](0x0841, 0x0858, 1), + Array[Int](0x0904, 0x0939, 1), + Array[Int](0x093d, 0x0950, 19), + Array[Int](0x0958, 0x0961, 1), + Array[Int](0x0971, 0x0977, 1), + Array[Int](0x0979, 0x097f, 1), + Array[Int](0x0985, 0x098c, 1), + Array[Int](0x098f, 0x0990, 1), + Array[Int](0x0993, 0x09a8, 1), + Array[Int](0x09aa, 0x09b0, 1), + Array[Int](0x09b2, 0x09b6, 4), + Array[Int](0x09b7, 0x09b9, 1), + Array[Int](0x09bd, 0x09ce, 17), + Array[Int](0x09dc, 0x09dd, 1), + Array[Int](0x09df, 0x09e1, 1), + Array[Int](0x09f0, 0x09f1, 1), + Array[Int](0x0a05, 0x0a0a, 1), + Array[Int](0x0a0f, 0x0a10, 1), + Array[Int](0x0a13, 0x0a28, 1), + Array[Int](0x0a2a, 0x0a30, 1), + Array[Int](0x0a32, 0x0a33, 1), + Array[Int](0x0a35, 0x0a36, 1), + Array[Int](0x0a38, 0x0a39, 1), + Array[Int](0x0a59, 0x0a5c, 1), + Array[Int](0x0a5e, 0x0a72, 20), + Array[Int](0x0a73, 0x0a74, 1), + Array[Int](0x0a85, 0x0a8d, 1), + Array[Int](0x0a8f, 0x0a91, 1), + Array[Int](0x0a93, 0x0aa8, 1), + Array[Int](0x0aaa, 0x0ab0, 1), + Array[Int](0x0ab2, 0x0ab3, 1), + Array[Int](0x0ab5, 0x0ab9, 1), + Array[Int](0x0abd, 0x0ad0, 19), + Array[Int](0x0ae0, 0x0ae1, 1), + Array[Int](0x0b05, 0x0b0c, 1), + Array[Int](0x0b0f, 0x0b10, 1), + Array[Int](0x0b13, 0x0b28, 1), + Array[Int](0x0b2a, 0x0b30, 1), + Array[Int](0x0b32, 0x0b33, 1), + Array[Int](0x0b35, 0x0b39, 1), + Array[Int](0x0b3d, 0x0b5c, 31), + Array[Int](0x0b5d, 0x0b5f, 2), + Array[Int](0x0b60, 0x0b61, 1), + Array[Int](0x0b71, 0x0b83, 18), + Array[Int](0x0b85, 0x0b8a, 1), + Array[Int](0x0b8e, 0x0b90, 1), + Array[Int](0x0b92, 0x0b95, 1), + Array[Int](0x0b99, 0x0b9a, 1), + Array[Int](0x0b9c, 0x0b9e, 2), + Array[Int](0x0b9f, 0x0ba3, 4), + Array[Int](0x0ba4, 0x0ba8, 4), + Array[Int](0x0ba9, 0x0baa, 1), + Array[Int](0x0bae, 0x0bb9, 1), + Array[Int](0x0bd0, 0x0c05, 53), + Array[Int](0x0c06, 0x0c0c, 1), + Array[Int](0x0c0e, 0x0c10, 1), + Array[Int](0x0c12, 0x0c28, 1), + Array[Int](0x0c2a, 0x0c33, 1), + Array[Int](0x0c35, 0x0c39, 1), + Array[Int](0x0c3d, 0x0c58, 27), + Array[Int](0x0c59, 0x0c60, 7), + Array[Int](0x0c61, 0x0c85, 36), + Array[Int](0x0c86, 0x0c8c, 1), + Array[Int](0x0c8e, 0x0c90, 1), + Array[Int](0x0c92, 0x0ca8, 1), + Array[Int](0x0caa, 0x0cb3, 1), + Array[Int](0x0cb5, 0x0cb9, 1), + Array[Int](0x0cbd, 0x0cde, 33), + Array[Int](0x0ce0, 0x0ce1, 1), + Array[Int](0x0cf1, 0x0cf2, 1), + Array[Int](0x0d05, 0x0d0c, 1), + Array[Int](0x0d0e, 0x0d10, 1), + Array[Int](0x0d12, 0x0d3a, 1), + Array[Int](0x0d3d, 0x0d4e, 17), + Array[Int](0x0d60, 0x0d61, 1), + Array[Int](0x0d7a, 0x0d7f, 1), + Array[Int](0x0d85, 0x0d96, 1), + Array[Int](0x0d9a, 0x0db1, 1), + Array[Int](0x0db3, 0x0dbb, 1), + Array[Int](0x0dbd, 0x0dc0, 3), + Array[Int](0x0dc1, 0x0dc6, 1), + Array[Int](0x0e01, 0x0e30, 1), + Array[Int](0x0e32, 0x0e33, 1), + Array[Int](0x0e40, 0x0e46, 1), + Array[Int](0x0e81, 0x0e82, 1), + Array[Int](0x0e84, 0x0e87, 3), + Array[Int](0x0e88, 0x0e8a, 2), + Array[Int](0x0e8d, 0x0e94, 7), + Array[Int](0x0e95, 0x0e97, 1), + Array[Int](0x0e99, 0x0e9f, 1), + Array[Int](0x0ea1, 0x0ea3, 1), + Array[Int](0x0ea5, 0x0ea7, 2), + Array[Int](0x0eaa, 0x0eab, 1), + Array[Int](0x0ead, 0x0eb0, 1), + Array[Int](0x0eb2, 0x0eb3, 1), + Array[Int](0x0ebd, 0x0ec0, 3), + Array[Int](0x0ec1, 0x0ec4, 1), + Array[Int](0x0ec6, 0x0edc, 22), + Array[Int](0x0edd, 0x0f00, 35), + Array[Int](0x0f40, 0x0f47, 1), + Array[Int](0x0f49, 0x0f6c, 1), + Array[Int](0x0f88, 0x0f8c, 1), + Array[Int](0x1000, 0x102a, 1), + Array[Int](0x103f, 0x1050, 17), + Array[Int](0x1051, 0x1055, 1), + Array[Int](0x105a, 0x105d, 1), + Array[Int](0x1061, 0x1065, 4), + Array[Int](0x1066, 0x106e, 8), + Array[Int](0x106f, 0x1070, 1), + Array[Int](0x1075, 0x1081, 1), + Array[Int](0x108e, 0x10a0, 18), + Array[Int](0x10a1, 0x10c5, 1), + Array[Int](0x10d0, 0x10fa, 1), + Array[Int](0x10fc, 0x1100, 4), + Array[Int](0x1101, 0x1248, 1), + Array[Int](0x124a, 0x124d, 1), + Array[Int](0x1250, 0x1256, 1), + Array[Int](0x1258, 0x125a, 2), + Array[Int](0x125b, 0x125d, 1), + Array[Int](0x1260, 0x1288, 1), + Array[Int](0x128a, 0x128d, 1), + Array[Int](0x1290, 0x12b0, 1), + Array[Int](0x12b2, 0x12b5, 1), + Array[Int](0x12b8, 0x12be, 1), + Array[Int](0x12c0, 0x12c2, 2), + Array[Int](0x12c3, 0x12c5, 1), + Array[Int](0x12c8, 0x12d6, 1), + Array[Int](0x12d8, 0x1310, 1), + Array[Int](0x1312, 0x1315, 1), + Array[Int](0x1318, 0x135a, 1), + Array[Int](0x1380, 0x138f, 1), + Array[Int](0x13a0, 0x13f4, 1), + Array[Int](0x1401, 0x166c, 1), + Array[Int](0x166f, 0x167f, 1), + Array[Int](0x1681, 0x169a, 1), + Array[Int](0x16a0, 0x16ea, 1), + Array[Int](0x1700, 0x170c, 1), + Array[Int](0x170e, 0x1711, 1), + Array[Int](0x1720, 0x1731, 1), + Array[Int](0x1740, 0x1751, 1), + Array[Int](0x1760, 0x176c, 1), + Array[Int](0x176e, 0x1770, 1), + Array[Int](0x1780, 0x17b3, 1), + Array[Int](0x17d7, 0x17dc, 5), + Array[Int](0x1820, 0x1877, 1), + Array[Int](0x1880, 0x18a8, 1), + Array[Int](0x18aa, 0x18b0, 6), + Array[Int](0x18b1, 0x18f5, 1), + Array[Int](0x1900, 0x191c, 1), + Array[Int](0x1950, 0x196d, 1), + Array[Int](0x1970, 0x1974, 1), + Array[Int](0x1980, 0x19ab, 1), + Array[Int](0x19c1, 0x19c7, 1), + Array[Int](0x1a00, 0x1a16, 1), + Array[Int](0x1a20, 0x1a54, 1), + Array[Int](0x1aa7, 0x1b05, 94), + Array[Int](0x1b06, 0x1b33, 1), + Array[Int](0x1b45, 0x1b4b, 1), + Array[Int](0x1b83, 0x1ba0, 1), + Array[Int](0x1bae, 0x1baf, 1), + Array[Int](0x1bc0, 0x1be5, 1), + Array[Int](0x1c00, 0x1c23, 1), + Array[Int](0x1c4d, 0x1c4f, 1), + Array[Int](0x1c5a, 0x1c7d, 1), + Array[Int](0x1ce9, 0x1cec, 1), + Array[Int](0x1cee, 0x1cf1, 1), + Array[Int](0x1d00, 0x1dbf, 1), + Array[Int](0x1e00, 0x1f15, 1), + Array[Int](0x1f18, 0x1f1d, 1), + Array[Int](0x1f20, 0x1f45, 1), + Array[Int](0x1f48, 0x1f4d, 1), + Array[Int](0x1f50, 0x1f57, 1), + Array[Int](0x1f59, 0x1f5f, 2), + Array[Int](0x1f60, 0x1f7d, 1), + Array[Int](0x1f80, 0x1fb4, 1), + Array[Int](0x1fb6, 0x1fbc, 1), + Array[Int](0x1fbe, 0x1fc2, 4), + Array[Int](0x1fc3, 0x1fc4, 1), + Array[Int](0x1fc6, 0x1fcc, 1), + Array[Int](0x1fd0, 0x1fd3, 1), + Array[Int](0x1fd6, 0x1fdb, 1), + Array[Int](0x1fe0, 0x1fec, 1), + Array[Int](0x1ff2, 0x1ff4, 1), + Array[Int](0x1ff6, 0x1ffc, 1), + Array[Int](0x2071, 0x207f, 14), + Array[Int](0x2090, 0x209c, 1), + Array[Int](0x2102, 0x2107, 5), + Array[Int](0x210a, 0x2113, 1), + Array[Int](0x2115, 0x2119, 4), + Array[Int](0x211a, 0x211d, 1), + Array[Int](0x2124, 0x212a, 2), + Array[Int](0x212b, 0x212d, 1), + Array[Int](0x212f, 0x2139, 1), + Array[Int](0x213c, 0x213f, 1), + Array[Int](0x2145, 0x2149, 1), + Array[Int](0x214e, 0x2183, 53), + Array[Int](0x2184, 0x2c00, 2684), + Array[Int](0x2c01, 0x2c2e, 1), + Array[Int](0x2c30, 0x2c5e, 1), + Array[Int](0x2c60, 0x2ce4, 1), + Array[Int](0x2ceb, 0x2cee, 1), + Array[Int](0x2d00, 0x2d25, 1), + Array[Int](0x2d30, 0x2d65, 1), + Array[Int](0x2d6f, 0x2d80, 17), + Array[Int](0x2d81, 0x2d96, 1), + Array[Int](0x2da0, 0x2da6, 1), + Array[Int](0x2da8, 0x2dae, 1), + Array[Int](0x2db0, 0x2db6, 1), + Array[Int](0x2db8, 0x2dbe, 1), + Array[Int](0x2dc0, 0x2dc6, 1), + Array[Int](0x2dc8, 0x2dce, 1), + Array[Int](0x2dd0, 0x2dd6, 1), + Array[Int](0x2dd8, 0x2dde, 1), + Array[Int](0x2e2f, 0x3005, 470), + Array[Int](0x3006, 0x3031, 43), + Array[Int](0x3032, 0x3035, 1), + Array[Int](0x303b, 0x303c, 1), + Array[Int](0x3041, 0x3096, 1), + Array[Int](0x309d, 0x309f, 1), + Array[Int](0x30a1, 0x30fa, 1), + Array[Int](0x30fc, 0x30ff, 1), + Array[Int](0x3105, 0x312d, 1), + Array[Int](0x3131, 0x318e, 1), + Array[Int](0x31a0, 0x31ba, 1), + Array[Int](0x31f0, 0x31ff, 1), + Array[Int](0x3400, 0x4db5, 1), + Array[Int](0x4e00, 0x9fcb, 1), + Array[Int](0xa000, 0xa48c, 1), + Array[Int](0xa4d0, 0xa4fd, 1), + Array[Int](0xa500, 0xa60c, 1), + Array[Int](0xa610, 0xa61f, 1), + Array[Int](0xa62a, 0xa62b, 1), + Array[Int](0xa640, 0xa66e, 1), + Array[Int](0xa67f, 0xa697, 1), + Array[Int](0xa6a0, 0xa6e5, 1), + Array[Int](0xa717, 0xa71f, 1), + Array[Int](0xa722, 0xa788, 1), + Array[Int](0xa78b, 0xa78e, 1), + Array[Int](0xa790, 0xa791, 1), + Array[Int](0xa7a0, 0xa7a9, 1), + Array[Int](0xa7fa, 0xa801, 1), + Array[Int](0xa803, 0xa805, 1), + Array[Int](0xa807, 0xa80a, 1), + Array[Int](0xa80c, 0xa822, 1), + Array[Int](0xa840, 0xa873, 1), + Array[Int](0xa882, 0xa8b3, 1), + Array[Int](0xa8f2, 0xa8f7, 1), + Array[Int](0xa8fb, 0xa90a, 15), + Array[Int](0xa90b, 0xa925, 1), + Array[Int](0xa930, 0xa946, 1), + Array[Int](0xa960, 0xa97c, 1), + Array[Int](0xa984, 0xa9b2, 1), + Array[Int](0xa9cf, 0xaa00, 49), + Array[Int](0xaa01, 0xaa28, 1), + Array[Int](0xaa40, 0xaa42, 1), + Array[Int](0xaa44, 0xaa4b, 1), + Array[Int](0xaa60, 0xaa76, 1), + Array[Int](0xaa7a, 0xaa80, 6), + Array[Int](0xaa81, 0xaaaf, 1), + Array[Int](0xaab1, 0xaab5, 4), + Array[Int](0xaab6, 0xaab9, 3), + Array[Int](0xaaba, 0xaabd, 1), + Array[Int](0xaac0, 0xaac2, 2), + Array[Int](0xaadb, 0xaadd, 1), + Array[Int](0xab01, 0xab06, 1), + Array[Int](0xab09, 0xab0e, 1), + Array[Int](0xab11, 0xab16, 1), + Array[Int](0xab20, 0xab26, 1), + Array[Int](0xab28, 0xab2e, 1), + Array[Int](0xabc0, 0xabe2, 1), + Array[Int](0xac00, 0xd7a3, 1), + Array[Int](0xd7b0, 0xd7c6, 1), + Array[Int](0xd7cb, 0xd7fb, 1), + Array[Int](0xf900, 0xfa2d, 1), + Array[Int](0xfa30, 0xfa6d, 1), + Array[Int](0xfa70, 0xfad9, 1), + Array[Int](0xfb00, 0xfb06, 1), + Array[Int](0xfb13, 0xfb17, 1), + Array[Int](0xfb1d, 0xfb1f, 2), + Array[Int](0xfb20, 0xfb28, 1), + Array[Int](0xfb2a, 0xfb36, 1), + Array[Int](0xfb38, 0xfb3c, 1), + Array[Int](0xfb3e, 0xfb40, 2), + Array[Int](0xfb41, 0xfb43, 2), + Array[Int](0xfb44, 0xfb46, 2), + Array[Int](0xfb47, 0xfbb1, 1), + Array[Int](0xfbd3, 0xfd3d, 1), + Array[Int](0xfd50, 0xfd8f, 1), + Array[Int](0xfd92, 0xfdc7, 1), + Array[Int](0xfdf0, 0xfdfb, 1), + Array[Int](0xfe70, 0xfe74, 1), + Array[Int](0xfe76, 0xfefc, 1), + Array[Int](0xff21, 0xff3a, 1), + Array[Int](0xff41, 0xff5a, 1), + Array[Int](0xff66, 0xffbe, 1), + Array[Int](0xffc2, 0xffc7, 1), + Array[Int](0xffca, 0xffcf, 1), + Array[Int](0xffd2, 0xffd7, 1), + Array[Int](0xffda, 0xffdc, 1), + Array[Int](0x10000, 0x1000b, 1), + Array[Int](0x1000d, 0x10026, 1), + Array[Int](0x10028, 0x1003a, 1), + Array[Int](0x1003c, 0x1003d, 1), + Array[Int](0x1003f, 0x1004d, 1), + Array[Int](0x10050, 0x1005d, 1), + Array[Int](0x10080, 0x100fa, 1), + Array[Int](0x10280, 0x1029c, 1), + Array[Int](0x102a0, 0x102d0, 1), + Array[Int](0x10300, 0x1031e, 1), + Array[Int](0x10330, 0x10340, 1), + Array[Int](0x10342, 0x10349, 1), + Array[Int](0x10380, 0x1039d, 1), + Array[Int](0x103a0, 0x103c3, 1), + Array[Int](0x103c8, 0x103cf, 1), + Array[Int](0x10400, 0x1049d, 1), + Array[Int](0x10800, 0x10805, 1), + Array[Int](0x10808, 0x1080a, 2), + Array[Int](0x1080b, 0x10835, 1), + Array[Int](0x10837, 0x10838, 1), + Array[Int](0x1083c, 0x1083f, 3), + Array[Int](0x10840, 0x10855, 1), + Array[Int](0x10900, 0x10915, 1), + Array[Int](0x10920, 0x10939, 1), + Array[Int](0x10a00, 0x10a10, 16), + Array[Int](0x10a11, 0x10a13, 1), + Array[Int](0x10a15, 0x10a17, 1), + Array[Int](0x10a19, 0x10a33, 1), + Array[Int](0x10a60, 0x10a7c, 1), + Array[Int](0x10b00, 0x10b35, 1), + Array[Int](0x10b40, 0x10b55, 1), + Array[Int](0x10b60, 0x10b72, 1), + Array[Int](0x10c00, 0x10c48, 1), + Array[Int](0x11003, 0x11037, 1), + Array[Int](0x11083, 0x110af, 1), + Array[Int](0x12000, 0x1236e, 1), + Array[Int](0x13000, 0x1342e, 1), + Array[Int](0x16800, 0x16a38, 1), + Array[Int](0x1b000, 0x1b001, 1), + Array[Int](0x1d400, 0x1d454, 1), + Array[Int](0x1d456, 0x1d49c, 1), + Array[Int](0x1d49e, 0x1d49f, 1), + Array[Int](0x1d4a2, 0x1d4a5, 3), + Array[Int](0x1d4a6, 0x1d4a9, 3), + Array[Int](0x1d4aa, 0x1d4ac, 1), + Array[Int](0x1d4ae, 0x1d4b9, 1), + Array[Int](0x1d4bb, 0x1d4bd, 2), + Array[Int](0x1d4be, 0x1d4c3, 1), + Array[Int](0x1d4c5, 0x1d505, 1), + Array[Int](0x1d507, 0x1d50a, 1), + Array[Int](0x1d50d, 0x1d514, 1), + Array[Int](0x1d516, 0x1d51c, 1), + Array[Int](0x1d51e, 0x1d539, 1), + Array[Int](0x1d53b, 0x1d53e, 1), + Array[Int](0x1d540, 0x1d544, 1), + Array[Int](0x1d546, 0x1d54a, 4), + Array[Int](0x1d54b, 0x1d550, 1), + Array[Int](0x1d552, 0x1d6a5, 1), + Array[Int](0x1d6a8, 0x1d6c0, 1), + Array[Int](0x1d6c2, 0x1d6da, 1), + Array[Int](0x1d6dc, 0x1d6fa, 1), + Array[Int](0x1d6fc, 0x1d714, 1), + Array[Int](0x1d716, 0x1d734, 1), + Array[Int](0x1d736, 0x1d74e, 1), + Array[Int](0x1d750, 0x1d76e, 1), + Array[Int](0x1d770, 0x1d788, 1), + Array[Int](0x1d78a, 0x1d7a8, 1), + Array[Int](0x1d7aa, 0x1d7c2, 1), + Array[Int](0x1d7c4, 0x1d7cb, 1), + Array[Int](0x20000, 0x2a6d6, 1), + Array[Int](0x2a700, 0x2b734, 1), + Array[Int](0x2b740, 0x2b81d, 1), + Array[Int](0x2f800, 0x2fa1d, 1) + ) + private final lazy val _N: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0030, 0x0039, 1), + Array[Int](0x00b2, 0x00b3, 1), + Array[Int](0x00b9, 0x00bc, 3), + Array[Int](0x00bd, 0x00be, 1), + Array[Int](0x0660, 0x0669, 1), + Array[Int](0x06f0, 0x06f9, 1), + Array[Int](0x07c0, 0x07c9, 1), + Array[Int](0x0966, 0x096f, 1), + Array[Int](0x09e6, 0x09ef, 1), + Array[Int](0x09f4, 0x09f9, 1), + Array[Int](0x0a66, 0x0a6f, 1), + Array[Int](0x0ae6, 0x0aef, 1), + Array[Int](0x0b66, 0x0b6f, 1), + Array[Int](0x0b72, 0x0b77, 1), + Array[Int](0x0be6, 0x0bf2, 1), + Array[Int](0x0c66, 0x0c6f, 1), + Array[Int](0x0c78, 0x0c7e, 1), + Array[Int](0x0ce6, 0x0cef, 1), + Array[Int](0x0d66, 0x0d75, 1), + Array[Int](0x0e50, 0x0e59, 1), + Array[Int](0x0ed0, 0x0ed9, 1), + Array[Int](0x0f20, 0x0f33, 1), + Array[Int](0x1040, 0x1049, 1), + Array[Int](0x1090, 0x1099, 1), + Array[Int](0x1369, 0x137c, 1), + Array[Int](0x16ee, 0x16f0, 1), + Array[Int](0x17e0, 0x17e9, 1), + Array[Int](0x17f0, 0x17f9, 1), + Array[Int](0x1810, 0x1819, 1), + Array[Int](0x1946, 0x194f, 1), + Array[Int](0x19d0, 0x19da, 1), + Array[Int](0x1a80, 0x1a89, 1), + Array[Int](0x1a90, 0x1a99, 1), + Array[Int](0x1b50, 0x1b59, 1), + Array[Int](0x1bb0, 0x1bb9, 1), + Array[Int](0x1c40, 0x1c49, 1), + Array[Int](0x1c50, 0x1c59, 1), + Array[Int](0x2070, 0x2074, 4), + Array[Int](0x2075, 0x2079, 1), + Array[Int](0x2080, 0x2089, 1), + Array[Int](0x2150, 0x2182, 1), + Array[Int](0x2185, 0x2189, 1), + Array[Int](0x2460, 0x249b, 1), + Array[Int](0x24ea, 0x24ff, 1), + Array[Int](0x2776, 0x2793, 1), + Array[Int](0x2cfd, 0x3007, 778), + Array[Int](0x3021, 0x3029, 1), + Array[Int](0x3038, 0x303a, 1), + Array[Int](0x3192, 0x3195, 1), + Array[Int](0x3220, 0x3229, 1), + Array[Int](0x3251, 0x325f, 1), + Array[Int](0x3280, 0x3289, 1), + Array[Int](0x32b1, 0x32bf, 1), + Array[Int](0xa620, 0xa629, 1), + Array[Int](0xa6e6, 0xa6ef, 1), + Array[Int](0xa830, 0xa835, 1), + Array[Int](0xa8d0, 0xa8d9, 1), + Array[Int](0xa900, 0xa909, 1), + Array[Int](0xa9d0, 0xa9d9, 1), + Array[Int](0xaa50, 0xaa59, 1), + Array[Int](0xabf0, 0xabf9, 1), + Array[Int](0xff10, 0xff19, 1), + Array[Int](0x10107, 0x10133, 1), + Array[Int](0x10140, 0x10178, 1), + Array[Int](0x1018a, 0x10320, 406), + Array[Int](0x10321, 0x10323, 1), + Array[Int](0x10341, 0x1034a, 9), + Array[Int](0x103d1, 0x103d5, 1), + Array[Int](0x104a0, 0x104a9, 1), + Array[Int](0x10858, 0x1085f, 1), + Array[Int](0x10916, 0x1091b, 1), + Array[Int](0x10a40, 0x10a47, 1), + Array[Int](0x10a7d, 0x10a7e, 1), + Array[Int](0x10b58, 0x10b5f, 1), + Array[Int](0x10b78, 0x10b7f, 1), + Array[Int](0x10e60, 0x10e7e, 1), + Array[Int](0x11052, 0x1106f, 1), + Array[Int](0x12400, 0x12462, 1), + Array[Int](0x1d360, 0x1d371, 1), + Array[Int](0x1d7ce, 0x1d7ff, 1), + Array[Int](0x1f100, 0x1f10a, 1) + ) + private final lazy val _P: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0021, 0x0023, 1), + Array[Int](0x0025, 0x002a, 1), + Array[Int](0x002c, 0x002f, 1), + Array[Int](0x003a, 0x003b, 1), + Array[Int](0x003f, 0x0040, 1), + Array[Int](0x005b, 0x005d, 1), + Array[Int](0x005f, 0x007b, 28), + Array[Int](0x007d, 0x00a1, 36), + Array[Int](0x00ab, 0x00b7, 12), + Array[Int](0x00bb, 0x00bf, 4), + Array[Int](0x037e, 0x0387, 9), + Array[Int](0x055a, 0x055f, 1), + Array[Int](0x0589, 0x058a, 1), + Array[Int](0x05be, 0x05c0, 2), + Array[Int](0x05c3, 0x05c6, 3), + Array[Int](0x05f3, 0x05f4, 1), + Array[Int](0x0609, 0x060a, 1), + Array[Int](0x060c, 0x060d, 1), + Array[Int](0x061b, 0x061e, 3), + Array[Int](0x061f, 0x066a, 75), + Array[Int](0x066b, 0x066d, 1), + Array[Int](0x06d4, 0x0700, 44), + Array[Int](0x0701, 0x070d, 1), + Array[Int](0x07f7, 0x07f9, 1), + Array[Int](0x0830, 0x083e, 1), + Array[Int](0x085e, 0x0964, 262), + Array[Int](0x0965, 0x0970, 11), + Array[Int](0x0df4, 0x0e4f, 91), + Array[Int](0x0e5a, 0x0e5b, 1), + Array[Int](0x0f04, 0x0f12, 1), + Array[Int](0x0f3a, 0x0f3d, 1), + Array[Int](0x0f85, 0x0fd0, 75), + Array[Int](0x0fd1, 0x0fd4, 1), + Array[Int](0x0fd9, 0x0fda, 1), + Array[Int](0x104a, 0x104f, 1), + Array[Int](0x10fb, 0x1361, 614), + Array[Int](0x1362, 0x1368, 1), + Array[Int](0x1400, 0x166d, 621), + Array[Int](0x166e, 0x169b, 45), + Array[Int](0x169c, 0x16eb, 79), + Array[Int](0x16ec, 0x16ed, 1), + Array[Int](0x1735, 0x1736, 1), + Array[Int](0x17d4, 0x17d6, 1), + Array[Int](0x17d8, 0x17da, 1), + Array[Int](0x1800, 0x180a, 1), + Array[Int](0x1944, 0x1945, 1), + Array[Int](0x1a1e, 0x1a1f, 1), + Array[Int](0x1aa0, 0x1aa6, 1), + Array[Int](0x1aa8, 0x1aad, 1), + Array[Int](0x1b5a, 0x1b60, 1), + Array[Int](0x1bfc, 0x1bff, 1), + Array[Int](0x1c3b, 0x1c3f, 1), + Array[Int](0x1c7e, 0x1c7f, 1), + Array[Int](0x1cd3, 0x2010, 829), + Array[Int](0x2011, 0x2027, 1), + Array[Int](0x2030, 0x2043, 1), + Array[Int](0x2045, 0x2051, 1), + Array[Int](0x2053, 0x205e, 1), + Array[Int](0x207d, 0x207e, 1), + Array[Int](0x208d, 0x208e, 1), + Array[Int](0x2329, 0x232a, 1), + Array[Int](0x2768, 0x2775, 1), + Array[Int](0x27c5, 0x27c6, 1), + Array[Int](0x27e6, 0x27ef, 1), + Array[Int](0x2983, 0x2998, 1), + Array[Int](0x29d8, 0x29db, 1), + Array[Int](0x29fc, 0x29fd, 1), + Array[Int](0x2cf9, 0x2cfc, 1), + Array[Int](0x2cfe, 0x2cff, 1), + Array[Int](0x2d70, 0x2e00, 144), + Array[Int](0x2e01, 0x2e2e, 1), + Array[Int](0x2e30, 0x2e31, 1), + Array[Int](0x3001, 0x3003, 1), + Array[Int](0x3008, 0x3011, 1), + Array[Int](0x3014, 0x301f, 1), + Array[Int](0x3030, 0x303d, 13), + Array[Int](0x30a0, 0x30fb, 91), + Array[Int](0xa4fe, 0xa4ff, 1), + Array[Int](0xa60d, 0xa60f, 1), + Array[Int](0xa673, 0xa67e, 11), + Array[Int](0xa6f2, 0xa6f7, 1), + Array[Int](0xa874, 0xa877, 1), + Array[Int](0xa8ce, 0xa8cf, 1), + Array[Int](0xa8f8, 0xa8fa, 1), + Array[Int](0xa92e, 0xa92f, 1), + Array[Int](0xa95f, 0xa9c1, 98), + Array[Int](0xa9c2, 0xa9cd, 1), + Array[Int](0xa9de, 0xa9df, 1), + Array[Int](0xaa5c, 0xaa5f, 1), + Array[Int](0xaade, 0xaadf, 1), + Array[Int](0xabeb, 0xfd3e, 20819), + Array[Int](0xfd3f, 0xfe10, 209), + Array[Int](0xfe11, 0xfe19, 1), + Array[Int](0xfe30, 0xfe52, 1), + Array[Int](0xfe54, 0xfe61, 1), + Array[Int](0xfe63, 0xfe68, 5), + Array[Int](0xfe6a, 0xfe6b, 1), + Array[Int](0xff01, 0xff03, 1), + Array[Int](0xff05, 0xff0a, 1), + Array[Int](0xff0c, 0xff0f, 1), + Array[Int](0xff1a, 0xff1b, 1), + Array[Int](0xff1f, 0xff20, 1), + Array[Int](0xff3b, 0xff3d, 1), + Array[Int](0xff3f, 0xff5b, 28), + Array[Int](0xff5d, 0xff5f, 2), + Array[Int](0xff60, 0xff65, 1), + Array[Int](0x10100, 0x10101, 1), + Array[Int](0x1039f, 0x103d0, 49), + Array[Int](0x10857, 0x1091f, 200), + Array[Int](0x1093f, 0x10a50, 273), + Array[Int](0x10a51, 0x10a58, 1), + Array[Int](0x10a7f, 0x10b39, 186), + Array[Int](0x10b3a, 0x10b3f, 1), + Array[Int](0x11047, 0x1104d, 1), + Array[Int](0x110bb, 0x110bc, 1), + Array[Int](0x110be, 0x110c1, 1), + Array[Int](0x12470, 0x12473, 1) + ) + private final lazy val _S: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0024, 0x002b, 7), + Array[Int](0x003c, 0x003e, 1), + Array[Int](0x005e, 0x0060, 2), + Array[Int](0x007c, 0x007e, 2), + Array[Int](0x00a2, 0x00a9, 1), + Array[Int](0x00ac, 0x00ae, 2), + Array[Int](0x00af, 0x00b1, 1), + Array[Int](0x00b4, 0x00b8, 2), + Array[Int](0x00d7, 0x00f7, 32), + Array[Int](0x02c2, 0x02c5, 1), + Array[Int](0x02d2, 0x02df, 1), + Array[Int](0x02e5, 0x02eb, 1), + Array[Int](0x02ed, 0x02ef, 2), + Array[Int](0x02f0, 0x02ff, 1), + Array[Int](0x0375, 0x0384, 15), + Array[Int](0x0385, 0x03f6, 113), + Array[Int](0x0482, 0x0606, 388), + Array[Int](0x0607, 0x0608, 1), + Array[Int](0x060b, 0x060e, 3), + Array[Int](0x060f, 0x06de, 207), + Array[Int](0x06e9, 0x06fd, 20), + Array[Int](0x06fe, 0x07f6, 248), + Array[Int](0x09f2, 0x09f3, 1), + Array[Int](0x09fa, 0x09fb, 1), + Array[Int](0x0af1, 0x0b70, 127), + Array[Int](0x0bf3, 0x0bfa, 1), + Array[Int](0x0c7f, 0x0d79, 250), + Array[Int](0x0e3f, 0x0f01, 194), + Array[Int](0x0f02, 0x0f03, 1), + Array[Int](0x0f13, 0x0f17, 1), + Array[Int](0x0f1a, 0x0f1f, 1), + Array[Int](0x0f34, 0x0f38, 2), + Array[Int](0x0fbe, 0x0fc5, 1), + Array[Int](0x0fc7, 0x0fcc, 1), + Array[Int](0x0fce, 0x0fcf, 1), + Array[Int](0x0fd5, 0x0fd8, 1), + Array[Int](0x109e, 0x109f, 1), + Array[Int](0x1360, 0x1390, 48), + Array[Int](0x1391, 0x1399, 1), + Array[Int](0x17db, 0x1940, 357), + Array[Int](0x19de, 0x19ff, 1), + Array[Int](0x1b61, 0x1b6a, 1), + Array[Int](0x1b74, 0x1b7c, 1), + Array[Int](0x1fbd, 0x1fbf, 2), + Array[Int](0x1fc0, 0x1fc1, 1), + Array[Int](0x1fcd, 0x1fcf, 1), + Array[Int](0x1fdd, 0x1fdf, 1), + Array[Int](0x1fed, 0x1fef, 1), + Array[Int](0x1ffd, 0x1ffe, 1), + Array[Int](0x2044, 0x2052, 14), + Array[Int](0x207a, 0x207c, 1), + Array[Int](0x208a, 0x208c, 1), + Array[Int](0x20a0, 0x20b9, 1), + Array[Int](0x2100, 0x2101, 1), + Array[Int](0x2103, 0x2106, 1), + Array[Int](0x2108, 0x2109, 1), + Array[Int](0x2114, 0x2116, 2), + Array[Int](0x2117, 0x2118, 1), + Array[Int](0x211e, 0x2123, 1), + Array[Int](0x2125, 0x2129, 2), + Array[Int](0x212e, 0x213a, 12), + Array[Int](0x213b, 0x2140, 5), + Array[Int](0x2141, 0x2144, 1), + Array[Int](0x214a, 0x214d, 1), + Array[Int](0x214f, 0x2190, 65), + Array[Int](0x2191, 0x2328, 1), + Array[Int](0x232b, 0x23f3, 1), + Array[Int](0x2400, 0x2426, 1), + Array[Int](0x2440, 0x244a, 1), + Array[Int](0x249c, 0x24e9, 1), + Array[Int](0x2500, 0x26ff, 1), + Array[Int](0x2701, 0x2767, 1), + Array[Int](0x2794, 0x27c4, 1), + Array[Int](0x27c7, 0x27ca, 1), + Array[Int](0x27cc, 0x27ce, 2), + Array[Int](0x27cf, 0x27e5, 1), + Array[Int](0x27f0, 0x2982, 1), + Array[Int](0x2999, 0x29d7, 1), + Array[Int](0x29dc, 0x29fb, 1), + Array[Int](0x29fe, 0x2b4c, 1), + Array[Int](0x2b50, 0x2b59, 1), + Array[Int](0x2ce5, 0x2cea, 1), + Array[Int](0x2e80, 0x2e99, 1), + Array[Int](0x2e9b, 0x2ef3, 1), + Array[Int](0x2f00, 0x2fd5, 1), + Array[Int](0x2ff0, 0x2ffb, 1), + Array[Int](0x3004, 0x3012, 14), + Array[Int](0x3013, 0x3020, 13), + Array[Int](0x3036, 0x3037, 1), + Array[Int](0x303e, 0x303f, 1), + Array[Int](0x309b, 0x309c, 1), + Array[Int](0x3190, 0x3191, 1), + Array[Int](0x3196, 0x319f, 1), + Array[Int](0x31c0, 0x31e3, 1), + Array[Int](0x3200, 0x321e, 1), + Array[Int](0x322a, 0x3250, 1), + Array[Int](0x3260, 0x327f, 1), + Array[Int](0x328a, 0x32b0, 1), + Array[Int](0x32c0, 0x32fe, 1), + Array[Int](0x3300, 0x33ff, 1), + Array[Int](0x4dc0, 0x4dff, 1), + Array[Int](0xa490, 0xa4c6, 1), + Array[Int](0xa700, 0xa716, 1), + Array[Int](0xa720, 0xa721, 1), + Array[Int](0xa789, 0xa78a, 1), + Array[Int](0xa828, 0xa82b, 1), + Array[Int](0xa836, 0xa839, 1), + Array[Int](0xaa77, 0xaa79, 1), + Array[Int](0xfb29, 0xfbb2, 137), + Array[Int](0xfbb3, 0xfbc1, 1), + Array[Int](0xfdfc, 0xfdfd, 1), + Array[Int](0xfe62, 0xfe64, 2), + Array[Int](0xfe65, 0xfe66, 1), + Array[Int](0xfe69, 0xff04, 155), + Array[Int](0xff0b, 0xff1c, 17), + Array[Int](0xff1d, 0xff1e, 1), + Array[Int](0xff3e, 0xff40, 2), + Array[Int](0xff5c, 0xff5e, 2), + Array[Int](0xffe0, 0xffe6, 1), + Array[Int](0xffe8, 0xffee, 1), + Array[Int](0xfffc, 0xfffd, 1), + Array[Int](0x10102, 0x10137, 53), + Array[Int](0x10138, 0x1013f, 1), + Array[Int](0x10179, 0x10189, 1), + Array[Int](0x10190, 0x1019b, 1), + Array[Int](0x101d0, 0x101fc, 1), + Array[Int](0x1d000, 0x1d0f5, 1), + Array[Int](0x1d100, 0x1d126, 1), + Array[Int](0x1d129, 0x1d164, 1), + Array[Int](0x1d16a, 0x1d16c, 1), + Array[Int](0x1d183, 0x1d184, 1), + Array[Int](0x1d18c, 0x1d1a9, 1), + Array[Int](0x1d1ae, 0x1d1dd, 1), + Array[Int](0x1d200, 0x1d241, 1), + Array[Int](0x1d245, 0x1d300, 187), + Array[Int](0x1d301, 0x1d356, 1), + Array[Int](0x1d6c1, 0x1d6db, 26), + Array[Int](0x1d6fb, 0x1d715, 26), + Array[Int](0x1d735, 0x1d74f, 26), + Array[Int](0x1d76f, 0x1d789, 26), + Array[Int](0x1d7a9, 0x1d7c3, 26), + Array[Int](0x1f000, 0x1f02b, 1), + Array[Int](0x1f030, 0x1f093, 1), + Array[Int](0x1f0a0, 0x1f0ae, 1), + Array[Int](0x1f0b1, 0x1f0be, 1), + Array[Int](0x1f0c1, 0x1f0cf, 1), + Array[Int](0x1f0d1, 0x1f0df, 1), + Array[Int](0x1f110, 0x1f12e, 1), + Array[Int](0x1f130, 0x1f169, 1), + Array[Int](0x1f170, 0x1f19a, 1), + Array[Int](0x1f1e6, 0x1f202, 1), + Array[Int](0x1f210, 0x1f23a, 1), + Array[Int](0x1f240, 0x1f248, 1), + Array[Int](0x1f250, 0x1f251, 1), + Array[Int](0x1f300, 0x1f320, 1), + Array[Int](0x1f330, 0x1f335, 1), + Array[Int](0x1f337, 0x1f37c, 1), + Array[Int](0x1f380, 0x1f393, 1), + Array[Int](0x1f3a0, 0x1f3c4, 1), + Array[Int](0x1f3c6, 0x1f3ca, 1), + Array[Int](0x1f3e0, 0x1f3f0, 1), + Array[Int](0x1f400, 0x1f43e, 1), + Array[Int](0x1f440, 0x1f442, 2), + Array[Int](0x1f443, 0x1f4f7, 1), + Array[Int](0x1f4f9, 0x1f4fc, 1), + Array[Int](0x1f500, 0x1f53d, 1), + Array[Int](0x1f550, 0x1f567, 1), + Array[Int](0x1f5fb, 0x1f5ff, 1), + Array[Int](0x1f601, 0x1f610, 1), + Array[Int](0x1f612, 0x1f614, 1), + Array[Int](0x1f616, 0x1f61c, 2), + Array[Int](0x1f61d, 0x1f61e, 1), + Array[Int](0x1f620, 0x1f625, 1), + Array[Int](0x1f628, 0x1f62b, 1), + Array[Int](0x1f62d, 0x1f630, 3), + Array[Int](0x1f631, 0x1f633, 1), + Array[Int](0x1f635, 0x1f640, 1), + Array[Int](0x1f645, 0x1f64f, 1), + Array[Int](0x1f680, 0x1f6c5, 1), + Array[Int](0x1f700, 0x1f773, 1) + ) + private final lazy val _Z: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0020, 0x00a0, 128), + Array[Int](0x1680, 0x180e, 398), + Array[Int](0x2000, 0x200a, 1), + Array[Int](0x2028, 0x2029, 1), + Array[Int](0x202f, 0x205f, 48), + Array[Int](0x3000, 0x3000, 1)) + private final lazy val _Me: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0488, 0x0489, 1), + Array[Int](0x20dd, 0x20e0, 1), + Array[Int](0x20e2, 0x20e4, 1), + Array[Int](0xa670, 0xa672, 1)) + private final lazy val _Mc: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0903, 0x093b, 56), + Array[Int](0x093e, 0x0940, 1), + Array[Int](0x0949, 0x094c, 1), + Array[Int](0x094e, 0x094f, 1), + Array[Int](0x0982, 0x0983, 1), + Array[Int](0x09be, 0x09c0, 1), + Array[Int](0x09c7, 0x09c8, 1), + Array[Int](0x09cb, 0x09cc, 1), + Array[Int](0x09d7, 0x0a03, 44), + Array[Int](0x0a3e, 0x0a40, 1), + Array[Int](0x0a83, 0x0abe, 59), + Array[Int](0x0abf, 0x0ac0, 1), + Array[Int](0x0ac9, 0x0acb, 2), + Array[Int](0x0acc, 0x0b02, 54), + Array[Int](0x0b03, 0x0b3e, 59), + Array[Int](0x0b40, 0x0b47, 7), + Array[Int](0x0b48, 0x0b4b, 3), + Array[Int](0x0b4c, 0x0b57, 11), + Array[Int](0x0bbe, 0x0bbf, 1), + Array[Int](0x0bc1, 0x0bc2, 1), + Array[Int](0x0bc6, 0x0bc8, 1), + Array[Int](0x0bca, 0x0bcc, 1), + Array[Int](0x0bd7, 0x0c01, 42), + Array[Int](0x0c02, 0x0c03, 1), + Array[Int](0x0c41, 0x0c44, 1), + Array[Int](0x0c82, 0x0c83, 1), + Array[Int](0x0cbe, 0x0cc0, 2), + Array[Int](0x0cc1, 0x0cc4, 1), + Array[Int](0x0cc7, 0x0cc8, 1), + Array[Int](0x0cca, 0x0ccb, 1), + Array[Int](0x0cd5, 0x0cd6, 1), + Array[Int](0x0d02, 0x0d03, 1), + Array[Int](0x0d3e, 0x0d40, 1), + Array[Int](0x0d46, 0x0d48, 1), + Array[Int](0x0d4a, 0x0d4c, 1), + Array[Int](0x0d57, 0x0d82, 43), + Array[Int](0x0d83, 0x0dcf, 76), + Array[Int](0x0dd0, 0x0dd1, 1), + Array[Int](0x0dd8, 0x0ddf, 1), + Array[Int](0x0df2, 0x0df3, 1), + Array[Int](0x0f3e, 0x0f3f, 1), + Array[Int](0x0f7f, 0x102b, 172), + Array[Int](0x102c, 0x1031, 5), + Array[Int](0x1038, 0x103b, 3), + Array[Int](0x103c, 0x1056, 26), + Array[Int](0x1057, 0x1062, 11), + Array[Int](0x1063, 0x1064, 1), + Array[Int](0x1067, 0x106d, 1), + Array[Int](0x1083, 0x1084, 1), + Array[Int](0x1087, 0x108c, 1), + Array[Int](0x108f, 0x109a, 11), + Array[Int](0x109b, 0x109c, 1), + Array[Int](0x17b6, 0x17be, 8), + Array[Int](0x17bf, 0x17c5, 1), + Array[Int](0x17c7, 0x17c8, 1), + Array[Int](0x1923, 0x1926, 1), + Array[Int](0x1929, 0x192b, 1), + Array[Int](0x1930, 0x1931, 1), + Array[Int](0x1933, 0x1938, 1), + Array[Int](0x19b0, 0x19c0, 1), + Array[Int](0x19c8, 0x19c9, 1), + Array[Int](0x1a19, 0x1a1b, 1), + Array[Int](0x1a55, 0x1a57, 2), + Array[Int](0x1a61, 0x1a63, 2), + Array[Int](0x1a64, 0x1a6d, 9), + Array[Int](0x1a6e, 0x1a72, 1), + Array[Int](0x1b04, 0x1b35, 49), + Array[Int](0x1b3b, 0x1b3d, 2), + Array[Int](0x1b3e, 0x1b41, 1), + Array[Int](0x1b43, 0x1b44, 1), + Array[Int](0x1b82, 0x1ba1, 31), + Array[Int](0x1ba6, 0x1ba7, 1), + Array[Int](0x1baa, 0x1be7, 61), + Array[Int](0x1bea, 0x1bec, 1), + Array[Int](0x1bee, 0x1bf2, 4), + Array[Int](0x1bf3, 0x1c24, 49), + Array[Int](0x1c25, 0x1c2b, 1), + Array[Int](0x1c34, 0x1c35, 1), + Array[Int](0x1ce1, 0x1cf2, 17), + Array[Int](0xa823, 0xa824, 1), + Array[Int](0xa827, 0xa880, 89), + Array[Int](0xa881, 0xa8b4, 51), + Array[Int](0xa8b5, 0xa8c3, 1), + Array[Int](0xa952, 0xa953, 1), + Array[Int](0xa983, 0xa9b4, 49), + Array[Int](0xa9b5, 0xa9ba, 5), + Array[Int](0xa9bb, 0xa9bd, 2), + Array[Int](0xa9be, 0xa9c0, 1), + Array[Int](0xaa2f, 0xaa30, 1), + Array[Int](0xaa33, 0xaa34, 1), + Array[Int](0xaa4d, 0xaa7b, 46), + Array[Int](0xabe3, 0xabe4, 1), + Array[Int](0xabe6, 0xabe7, 1), + Array[Int](0xabe9, 0xabea, 1), + Array[Int](0xabec, 0xabec, 1), + Array[Int](0x11000, 0x11000, 1), + Array[Int](0x11002, 0x11082, 128), + Array[Int](0x110b0, 0x110b2, 1), + Array[Int](0x110b7, 0x110b8, 1), + Array[Int](0x1d165, 0x1d166, 1), + Array[Int](0x1d16d, 0x1d172, 1) + ) + private final lazy val _Mn: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0300, 0x036f, 1), + Array[Int](0x0483, 0x0487, 1), + Array[Int](0x0591, 0x05bd, 1), + Array[Int](0x05bf, 0x05c1, 2), + Array[Int](0x05c2, 0x05c4, 2), + Array[Int](0x05c5, 0x05c7, 2), + Array[Int](0x0610, 0x061a, 1), + Array[Int](0x064b, 0x065f, 1), + Array[Int](0x0670, 0x06d6, 102), + Array[Int](0x06d7, 0x06dc, 1), + Array[Int](0x06df, 0x06e4, 1), + Array[Int](0x06e7, 0x06e8, 1), + Array[Int](0x06ea, 0x06ed, 1), + Array[Int](0x0711, 0x0730, 31), + Array[Int](0x0731, 0x074a, 1), + Array[Int](0x07a6, 0x07b0, 1), + Array[Int](0x07eb, 0x07f3, 1), + Array[Int](0x0816, 0x0819, 1), + Array[Int](0x081b, 0x0823, 1), + Array[Int](0x0825, 0x0827, 1), + Array[Int](0x0829, 0x082d, 1), + Array[Int](0x0859, 0x085b, 1), + Array[Int](0x0900, 0x0902, 1), + Array[Int](0x093a, 0x093c, 2), + Array[Int](0x0941, 0x0948, 1), + Array[Int](0x094d, 0x0951, 4), + Array[Int](0x0952, 0x0957, 1), + Array[Int](0x0962, 0x0963, 1), + Array[Int](0x0981, 0x09bc, 59), + Array[Int](0x09c1, 0x09c4, 1), + Array[Int](0x09cd, 0x09e2, 21), + Array[Int](0x09e3, 0x0a01, 30), + Array[Int](0x0a02, 0x0a3c, 58), + Array[Int](0x0a41, 0x0a42, 1), + Array[Int](0x0a47, 0x0a48, 1), + Array[Int](0x0a4b, 0x0a4d, 1), + Array[Int](0x0a51, 0x0a70, 31), + Array[Int](0x0a71, 0x0a75, 4), + Array[Int](0x0a81, 0x0a82, 1), + Array[Int](0x0abc, 0x0ac1, 5), + Array[Int](0x0ac2, 0x0ac5, 1), + Array[Int](0x0ac7, 0x0ac8, 1), + Array[Int](0x0acd, 0x0ae2, 21), + Array[Int](0x0ae3, 0x0b01, 30), + Array[Int](0x0b3c, 0x0b3f, 3), + Array[Int](0x0b41, 0x0b44, 1), + Array[Int](0x0b4d, 0x0b56, 9), + Array[Int](0x0b62, 0x0b63, 1), + Array[Int](0x0b82, 0x0bc0, 62), + Array[Int](0x0bcd, 0x0c3e, 113), + Array[Int](0x0c3f, 0x0c40, 1), + Array[Int](0x0c46, 0x0c48, 1), + Array[Int](0x0c4a, 0x0c4d, 1), + Array[Int](0x0c55, 0x0c56, 1), + Array[Int](0x0c62, 0x0c63, 1), + Array[Int](0x0cbc, 0x0cbf, 3), + Array[Int](0x0cc6, 0x0ccc, 6), + Array[Int](0x0ccd, 0x0ce2, 21), + Array[Int](0x0ce3, 0x0d41, 94), + Array[Int](0x0d42, 0x0d44, 1), + Array[Int](0x0d4d, 0x0d62, 21), + Array[Int](0x0d63, 0x0dca, 103), + Array[Int](0x0dd2, 0x0dd4, 1), + Array[Int](0x0dd6, 0x0e31, 91), + Array[Int](0x0e34, 0x0e3a, 1), + Array[Int](0x0e47, 0x0e4e, 1), + Array[Int](0x0eb1, 0x0eb4, 3), + Array[Int](0x0eb5, 0x0eb9, 1), + Array[Int](0x0ebb, 0x0ebc, 1), + Array[Int](0x0ec8, 0x0ecd, 1), + Array[Int](0x0f18, 0x0f19, 1), + Array[Int](0x0f35, 0x0f39, 2), + Array[Int](0x0f71, 0x0f7e, 1), + Array[Int](0x0f80, 0x0f84, 1), + Array[Int](0x0f86, 0x0f87, 1), + Array[Int](0x0f8d, 0x0f97, 1), + Array[Int](0x0f99, 0x0fbc, 1), + Array[Int](0x0fc6, 0x102d, 103), + Array[Int](0x102e, 0x1030, 1), + Array[Int](0x1032, 0x1037, 1), + Array[Int](0x1039, 0x103a, 1), + Array[Int](0x103d, 0x103e, 1), + Array[Int](0x1058, 0x1059, 1), + Array[Int](0x105e, 0x1060, 1), + Array[Int](0x1071, 0x1074, 1), + Array[Int](0x1082, 0x1085, 3), + Array[Int](0x1086, 0x108d, 7), + Array[Int](0x109d, 0x135d, 704), + Array[Int](0x135e, 0x135f, 1), + Array[Int](0x1712, 0x1714, 1), + Array[Int](0x1732, 0x1734, 1), + Array[Int](0x1752, 0x1753, 1), + Array[Int](0x1772, 0x1773, 1), + Array[Int](0x17b7, 0x17bd, 1), + Array[Int](0x17c6, 0x17c9, 3), + Array[Int](0x17ca, 0x17d3, 1), + Array[Int](0x17dd, 0x180b, 46), + Array[Int](0x180c, 0x180d, 1), + Array[Int](0x18a9, 0x1920, 119), + Array[Int](0x1921, 0x1922, 1), + Array[Int](0x1927, 0x1928, 1), + Array[Int](0x1932, 0x1939, 7), + Array[Int](0x193a, 0x193b, 1), + Array[Int](0x1a17, 0x1a18, 1), + Array[Int](0x1a56, 0x1a58, 2), + Array[Int](0x1a59, 0x1a5e, 1), + Array[Int](0x1a60, 0x1a62, 2), + Array[Int](0x1a65, 0x1a6c, 1), + Array[Int](0x1a73, 0x1a7c, 1), + Array[Int](0x1a7f, 0x1b00, 129), + Array[Int](0x1b01, 0x1b03, 1), + Array[Int](0x1b34, 0x1b36, 2), + Array[Int](0x1b37, 0x1b3a, 1), + Array[Int](0x1b3c, 0x1b42, 6), + Array[Int](0x1b6b, 0x1b73, 1), + Array[Int](0x1b80, 0x1b81, 1), + Array[Int](0x1ba2, 0x1ba5, 1), + Array[Int](0x1ba8, 0x1ba9, 1), + Array[Int](0x1be6, 0x1be8, 2), + Array[Int](0x1be9, 0x1bed, 4), + Array[Int](0x1bef, 0x1bf1, 1), + Array[Int](0x1c2c, 0x1c33, 1), + Array[Int](0x1c36, 0x1c37, 1), + Array[Int](0x1cd0, 0x1cd2, 1), + Array[Int](0x1cd4, 0x1ce0, 1), + Array[Int](0x1ce2, 0x1ce8, 1), + Array[Int](0x1ced, 0x1dc0, 211), + Array[Int](0x1dc1, 0x1de6, 1), + Array[Int](0x1dfc, 0x1dff, 1), + Array[Int](0x20d0, 0x20dc, 1), + Array[Int](0x20e1, 0x20e5, 4), + Array[Int](0x20e6, 0x20f0, 1), + Array[Int](0x2cef, 0x2cf1, 1), + Array[Int](0x2d7f, 0x2de0, 97), + Array[Int](0x2de1, 0x2dff, 1), + Array[Int](0x302a, 0x302f, 1), + Array[Int](0x3099, 0x309a, 1), + Array[Int](0xa66f, 0xa67c, 13), + Array[Int](0xa67d, 0xa6f0, 115), + Array[Int](0xa6f1, 0xa802, 273), + Array[Int](0xa806, 0xa80b, 5), + Array[Int](0xa825, 0xa826, 1), + Array[Int](0xa8c4, 0xa8e0, 28), + Array[Int](0xa8e1, 0xa8f1, 1), + Array[Int](0xa926, 0xa92d, 1), + Array[Int](0xa947, 0xa951, 1), + Array[Int](0xa980, 0xa982, 1), + Array[Int](0xa9b3, 0xa9b6, 3), + Array[Int](0xa9b7, 0xa9b9, 1), + Array[Int](0xa9bc, 0xaa29, 109), + Array[Int](0xaa2a, 0xaa2e, 1), + Array[Int](0xaa31, 0xaa32, 1), + Array[Int](0xaa35, 0xaa36, 1), + Array[Int](0xaa43, 0xaa4c, 9), + Array[Int](0xaab0, 0xaab2, 2), + Array[Int](0xaab3, 0xaab4, 1), + Array[Int](0xaab7, 0xaab8, 1), + Array[Int](0xaabe, 0xaabf, 1), + Array[Int](0xaac1, 0xabe5, 292), + Array[Int](0xabe8, 0xabed, 5), + Array[Int](0xfb1e, 0xfe00, 738), + Array[Int](0xfe01, 0xfe0f, 1), + Array[Int](0xfe20, 0xfe26, 1), + Array[Int](0x101fd, 0x10a01, 2052), + Array[Int](0x10a02, 0x10a03, 1), + Array[Int](0x10a05, 0x10a06, 1), + Array[Int](0x10a0c, 0x10a0f, 1), + Array[Int](0x10a38, 0x10a3a, 1), + Array[Int](0x10a3f, 0x11001, 1474), + Array[Int](0x11038, 0x11046, 1), + Array[Int](0x11080, 0x11081, 1), + Array[Int](0x110b3, 0x110b6, 1), + Array[Int](0x110b9, 0x110ba, 1), + Array[Int](0x1d167, 0x1d169, 1), + Array[Int](0x1d17b, 0x1d182, 1), + Array[Int](0x1d185, 0x1d18b, 1), + Array[Int](0x1d1aa, 0x1d1ad, 1), + Array[Int](0x1d242, 0x1d244, 1), + Array[Int](0xe0100, 0xe01ef, 1) + ) + private final lazy val _Zl: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x2028, 0x2028, 1)) + private final lazy val _Zp: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x2029, 0x2029, 1)) + private final lazy val _Zs: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0020, 0x00a0, 128), + Array[Int](0x1680, 0x180e, 398), + Array[Int](0x2000, 0x200a, 1), + Array[Int](0x202f, 0x205f, 48), + Array[Int](0x3000, 0x3000, 1)) + private final lazy val _Cs: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xd800, 0xdfff, 1)) + private final lazy val _Co: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xe000, 0xf8ff, 1), + Array[Int](0xf0000, 0xffffd, 1), + Array[Int](0x100000, 0x10fffd, 1)) + private final lazy val _Cf: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x00ad, 0x0600, 1363), + Array[Int](0x0601, 0x0603, 1), + Array[Int](0x06dd, 0x070f, 50), + Array[Int](0x17b4, 0x17b5, 1), + Array[Int](0x200b, 0x200f, 1), + Array[Int](0x202a, 0x202e, 1), + Array[Int](0x2060, 0x2064, 1), + Array[Int](0x206a, 0x206f, 1), + Array[Int](0xfeff, 0xfff9, 250), + Array[Int](0xfffa, 0xfffb, 1), + Array[Int](0x110bd, 0x1d173, 49334), + Array[Int](0x1d174, 0x1d17a, 1), + Array[Int](0xe0001, 0xe0020, 31), + Array[Int](0xe0021, 0xe007f, 1) + ) + private final lazy val _Cc: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0001, 0x001f, 1), Array[Int](0x007f, 0x009f, 1)) + private final lazy val _Po: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0021, 0x0023, 1), + Array[Int](0x0025, 0x0027, 1), + Array[Int](0x002a, 0x002e, 2), + Array[Int](0x002f, 0x003a, 11), + Array[Int](0x003b, 0x003f, 4), + Array[Int](0x0040, 0x005c, 28), + Array[Int](0x00a1, 0x00b7, 22), + Array[Int](0x00bf, 0x037e, 703), + Array[Int](0x0387, 0x055a, 467), + Array[Int](0x055b, 0x055f, 1), + Array[Int](0x0589, 0x05c0, 55), + Array[Int](0x05c3, 0x05c6, 3), + Array[Int](0x05f3, 0x05f4, 1), + Array[Int](0x0609, 0x060a, 1), + Array[Int](0x060c, 0x060d, 1), + Array[Int](0x061b, 0x061e, 3), + Array[Int](0x061f, 0x066a, 75), + Array[Int](0x066b, 0x066d, 1), + Array[Int](0x06d4, 0x0700, 44), + Array[Int](0x0701, 0x070d, 1), + Array[Int](0x07f7, 0x07f9, 1), + Array[Int](0x0830, 0x083e, 1), + Array[Int](0x085e, 0x0964, 262), + Array[Int](0x0965, 0x0970, 11), + Array[Int](0x0df4, 0x0e4f, 91), + Array[Int](0x0e5a, 0x0e5b, 1), + Array[Int](0x0f04, 0x0f12, 1), + Array[Int](0x0f85, 0x0fd0, 75), + Array[Int](0x0fd1, 0x0fd4, 1), + Array[Int](0x0fd9, 0x0fda, 1), + Array[Int](0x104a, 0x104f, 1), + Array[Int](0x10fb, 0x1361, 614), + Array[Int](0x1362, 0x1368, 1), + Array[Int](0x166d, 0x166e, 1), + Array[Int](0x16eb, 0x16ed, 1), + Array[Int](0x1735, 0x1736, 1), + Array[Int](0x17d4, 0x17d6, 1), + Array[Int](0x17d8, 0x17da, 1), + Array[Int](0x1800, 0x1805, 1), + Array[Int](0x1807, 0x180a, 1), + Array[Int](0x1944, 0x1945, 1), + Array[Int](0x1a1e, 0x1a1f, 1), + Array[Int](0x1aa0, 0x1aa6, 1), + Array[Int](0x1aa8, 0x1aad, 1), + Array[Int](0x1b5a, 0x1b60, 1), + Array[Int](0x1bfc, 0x1bff, 1), + Array[Int](0x1c3b, 0x1c3f, 1), + Array[Int](0x1c7e, 0x1c7f, 1), + Array[Int](0x1cd3, 0x2016, 835), + Array[Int](0x2017, 0x2020, 9), + Array[Int](0x2021, 0x2027, 1), + Array[Int](0x2030, 0x2038, 1), + Array[Int](0x203b, 0x203e, 1), + Array[Int](0x2041, 0x2043, 1), + Array[Int](0x2047, 0x2051, 1), + Array[Int](0x2053, 0x2055, 2), + Array[Int](0x2056, 0x205e, 1), + Array[Int](0x2cf9, 0x2cfc, 1), + Array[Int](0x2cfe, 0x2cff, 1), + Array[Int](0x2d70, 0x2e00, 144), + Array[Int](0x2e01, 0x2e06, 5), + Array[Int](0x2e07, 0x2e08, 1), + Array[Int](0x2e0b, 0x2e0e, 3), + Array[Int](0x2e0f, 0x2e16, 1), + Array[Int](0x2e18, 0x2e19, 1), + Array[Int](0x2e1b, 0x2e1e, 3), + Array[Int](0x2e1f, 0x2e2a, 11), + Array[Int](0x2e2b, 0x2e2e, 1), + Array[Int](0x2e30, 0x2e31, 1), + Array[Int](0x3001, 0x3003, 1), + Array[Int](0x303d, 0x30fb, 190), + Array[Int](0xa4fe, 0xa4ff, 1), + Array[Int](0xa60d, 0xa60f, 1), + Array[Int](0xa673, 0xa67e, 11), + Array[Int](0xa6f2, 0xa6f7, 1), + Array[Int](0xa874, 0xa877, 1), + Array[Int](0xa8ce, 0xa8cf, 1), + Array[Int](0xa8f8, 0xa8fa, 1), + Array[Int](0xa92e, 0xa92f, 1), + Array[Int](0xa95f, 0xa9c1, 98), + Array[Int](0xa9c2, 0xa9cd, 1), + Array[Int](0xa9de, 0xa9df, 1), + Array[Int](0xaa5c, 0xaa5f, 1), + Array[Int](0xaade, 0xaadf, 1), + Array[Int](0xabeb, 0xfe10, 21029), + Array[Int](0xfe11, 0xfe16, 1), + Array[Int](0xfe19, 0xfe30, 23), + Array[Int](0xfe45, 0xfe46, 1), + Array[Int](0xfe49, 0xfe4c, 1), + Array[Int](0xfe50, 0xfe52, 1), + Array[Int](0xfe54, 0xfe57, 1), + Array[Int](0xfe5f, 0xfe61, 1), + Array[Int](0xfe68, 0xfe6a, 2), + Array[Int](0xfe6b, 0xff01, 150), + Array[Int](0xff02, 0xff03, 1), + Array[Int](0xff05, 0xff07, 1), + Array[Int](0xff0a, 0xff0e, 2), + Array[Int](0xff0f, 0xff1a, 11), + Array[Int](0xff1b, 0xff1f, 4), + Array[Int](0xff20, 0xff3c, 28), + Array[Int](0xff61, 0xff64, 3), + Array[Int](0xff65, 0xff65, 1), + Array[Int](0x10100, 0x10100, 1), + Array[Int](0x10101, 0x1039f, 670), + Array[Int](0x103d0, 0x10857, 1159), + Array[Int](0x1091f, 0x1093f, 32), + Array[Int](0x10a50, 0x10a58, 1), + Array[Int](0x10a7f, 0x10b39, 186), + Array[Int](0x10b3a, 0x10b3f, 1), + Array[Int](0x11047, 0x1104d, 1), + Array[Int](0x110bb, 0x110bc, 1), + Array[Int](0x110be, 0x110c1, 1), + Array[Int](0x12470, 0x12473, 1) + ) + private final lazy val _Pi: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x00ab, 0x2018, 8045), + Array[Int](0x201b, 0x201c, 1), + Array[Int](0x201f, 0x2039, 26), + Array[Int](0x2e02, 0x2e04, 2), + Array[Int](0x2e09, 0x2e0c, 3), + Array[Int](0x2e1c, 0x2e20, 4)) + private final lazy val _Pf: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x00bb, 0x2019, 8030), + Array[Int](0x201d, 0x203a, 29), + Array[Int](0x2e03, 0x2e05, 2), + Array[Int](0x2e0a, 0x2e0d, 3), + Array[Int](0x2e1d, 0x2e21, 4)) + private final lazy val _Pe: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0029, 0x005d, 52), + Array[Int](0x007d, 0x0f3b, 3774), + Array[Int](0x0f3d, 0x169c, 1887), + Array[Int](0x2046, 0x207e, 56), + Array[Int](0x208e, 0x232a, 668), + Array[Int](0x2769, 0x2775, 2), + Array[Int](0x27c6, 0x27e7, 33), + Array[Int](0x27e9, 0x27ef, 2), + Array[Int](0x2984, 0x2998, 2), + Array[Int](0x29d9, 0x29db, 2), + Array[Int](0x29fd, 0x2e23, 1062), + Array[Int](0x2e25, 0x2e29, 2), + Array[Int](0x3009, 0x3011, 2), + Array[Int](0x3015, 0x301b, 2), + Array[Int](0x301e, 0x301f, 1), + Array[Int](0xfd3f, 0xfe18, 217), + Array[Int](0xfe36, 0xfe44, 2), + Array[Int](0xfe48, 0xfe5a, 18), + Array[Int](0xfe5c, 0xfe5e, 2), + Array[Int](0xff09, 0xff3d, 52), + Array[Int](0xff5d, 0xff63, 3) + ) + private final lazy val _Pd: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x002d, 0x058a, 1373), + Array[Int](0x05be, 0x1400, 3650), + Array[Int](0x1806, 0x2010, 2058), + Array[Int](0x2011, 0x2015, 1), + Array[Int](0x2e17, 0x2e1a, 3), + Array[Int](0x301c, 0x3030, 20), + Array[Int](0x30a0, 0xfe31, 52625), + Array[Int](0xfe32, 0xfe58, 38), + Array[Int](0xfe63, 0xff0d, 170) + ) + private final lazy val _Pc: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x005f, 0x203f, 8160), + Array[Int](0x2040, 0x2054, 20), + Array[Int](0xfe33, 0xfe34, 1), + Array[Int](0xfe4d, 0xfe4f, 1), + Array[Int](0xff3f, 0xff3f, 1)) + private final lazy val _Ps: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0028, 0x005b, 51), + Array[Int](0x007b, 0x0f3a, 3775), + Array[Int](0x0f3c, 0x169b, 1887), + Array[Int](0x201a, 0x201e, 4), + Array[Int](0x2045, 0x207d, 56), + Array[Int](0x208d, 0x2329, 668), + Array[Int](0x2768, 0x2774, 2), + Array[Int](0x27c5, 0x27e6, 33), + Array[Int](0x27e8, 0x27ee, 2), + Array[Int](0x2983, 0x2997, 2), + Array[Int](0x29d8, 0x29da, 2), + Array[Int](0x29fc, 0x2e22, 1062), + Array[Int](0x2e24, 0x2e28, 2), + Array[Int](0x3008, 0x3010, 2), + Array[Int](0x3014, 0x301a, 2), + Array[Int](0x301d, 0xfd3e, 52513), + Array[Int](0xfe17, 0xfe35, 30), + Array[Int](0xfe37, 0xfe43, 2), + Array[Int](0xfe47, 0xfe59, 18), + Array[Int](0xfe5b, 0xfe5d, 2), + Array[Int](0xff08, 0xff3b, 51), + Array[Int](0xff5b, 0xff5f, 4), + Array[Int](0xff62, 0xff62, 1) + ) + private final lazy val _Nd: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0030, 0x0039, 1), + Array[Int](0x0660, 0x0669, 1), + Array[Int](0x06f0, 0x06f9, 1), + Array[Int](0x07c0, 0x07c9, 1), + Array[Int](0x0966, 0x096f, 1), + Array[Int](0x09e6, 0x09ef, 1), + Array[Int](0x0a66, 0x0a6f, 1), + Array[Int](0x0ae6, 0x0aef, 1), + Array[Int](0x0b66, 0x0b6f, 1), + Array[Int](0x0be6, 0x0bef, 1), + Array[Int](0x0c66, 0x0c6f, 1), + Array[Int](0x0ce6, 0x0cef, 1), + Array[Int](0x0d66, 0x0d6f, 1), + Array[Int](0x0e50, 0x0e59, 1), + Array[Int](0x0ed0, 0x0ed9, 1), + Array[Int](0x0f20, 0x0f29, 1), + Array[Int](0x1040, 0x1049, 1), + Array[Int](0x1090, 0x1099, 1), + Array[Int](0x17e0, 0x17e9, 1), + Array[Int](0x1810, 0x1819, 1), + Array[Int](0x1946, 0x194f, 1), + Array[Int](0x19d0, 0x19d9, 1), + Array[Int](0x1a80, 0x1a89, 1), + Array[Int](0x1a90, 0x1a99, 1), + Array[Int](0x1b50, 0x1b59, 1), + Array[Int](0x1bb0, 0x1bb9, 1), + Array[Int](0x1c40, 0x1c49, 1), + Array[Int](0x1c50, 0x1c59, 1), + Array[Int](0xa620, 0xa629, 1), + Array[Int](0xa8d0, 0xa8d9, 1), + Array[Int](0xa900, 0xa909, 1), + Array[Int](0xa9d0, 0xa9d9, 1), + Array[Int](0xaa50, 0xaa59, 1), + Array[Int](0xabf0, 0xabf9, 1), + Array[Int](0xff10, 0xff19, 1), + Array[Int](0x104a0, 0x104a9, 1), + Array[Int](0x11066, 0x1106f, 1), + Array[Int](0x1d7ce, 0x1d7ff, 1) + ) + private final lazy val _Nl: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x16ee, 0x16f0, 1), + Array[Int](0x2160, 0x2182, 1), + Array[Int](0x2185, 0x2188, 1), + Array[Int](0x3007, 0x3021, 26), + Array[Int](0x3022, 0x3029, 1), + Array[Int](0x3038, 0x303a, 1), + Array[Int](0xa6e6, 0xa6ef, 1), + Array[Int](0x10140, 0x10174, 1), + Array[Int](0x10341, 0x1034a, 9), + Array[Int](0x103d1, 0x103d5, 1), + Array[Int](0x12400, 0x12462, 1) + ) + private final lazy val _No: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x00b2, 0x00b3, 1), + Array[Int](0x00b9, 0x00bc, 3), + Array[Int](0x00bd, 0x00be, 1), + Array[Int](0x09f4, 0x09f9, 1), + Array[Int](0x0b72, 0x0b77, 1), + Array[Int](0x0bf0, 0x0bf2, 1), + Array[Int](0x0c78, 0x0c7e, 1), + Array[Int](0x0d70, 0x0d75, 1), + Array[Int](0x0f2a, 0x0f33, 1), + Array[Int](0x1369, 0x137c, 1), + Array[Int](0x17f0, 0x17f9, 1), + Array[Int](0x19da, 0x2070, 1686), + Array[Int](0x2074, 0x2079, 1), + Array[Int](0x2080, 0x2089, 1), + Array[Int](0x2150, 0x215f, 1), + Array[Int](0x2189, 0x2460, 727), + Array[Int](0x2461, 0x249b, 1), + Array[Int](0x24ea, 0x24ff, 1), + Array[Int](0x2776, 0x2793, 1), + Array[Int](0x2cfd, 0x3192, 1173), + Array[Int](0x3193, 0x3195, 1), + Array[Int](0x3220, 0x3229, 1), + Array[Int](0x3251, 0x325f, 1), + Array[Int](0x3280, 0x3289, 1), + Array[Int](0x32b1, 0x32bf, 1), + Array[Int](0xa830, 0xa835, 1), + Array[Int](0x10107, 0x10133, 1), + Array[Int](0x10175, 0x10178, 1), + Array[Int](0x1018a, 0x10320, 406), + Array[Int](0x10321, 0x10323, 1), + Array[Int](0x10858, 0x1085f, 1), + Array[Int](0x10916, 0x1091b, 1), + Array[Int](0x10a40, 0x10a47, 1), + Array[Int](0x10a7d, 0x10a7e, 1), + Array[Int](0x10b58, 0x10b5f, 1), + Array[Int](0x10b78, 0x10b7f, 1), + Array[Int](0x10e60, 0x10e7e, 1), + Array[Int](0x11052, 0x11065, 1), + Array[Int](0x1d360, 0x1d371, 1), + Array[Int](0x1f100, 0x1f10a, 1) + ) + private final lazy val _So: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x00a6, 0x00a7, 1), + Array[Int](0x00a9, 0x00ae, 5), + Array[Int](0x00b0, 0x00b6, 6), + Array[Int](0x0482, 0x060e, 396), + Array[Int](0x060f, 0x06de, 207), + Array[Int](0x06e9, 0x06fd, 20), + Array[Int](0x06fe, 0x07f6, 248), + Array[Int](0x09fa, 0x0b70, 374), + Array[Int](0x0bf3, 0x0bf8, 1), + Array[Int](0x0bfa, 0x0c7f, 133), + Array[Int](0x0d79, 0x0f01, 392), + Array[Int](0x0f02, 0x0f03, 1), + Array[Int](0x0f13, 0x0f17, 1), + Array[Int](0x0f1a, 0x0f1f, 1), + Array[Int](0x0f34, 0x0f38, 2), + Array[Int](0x0fbe, 0x0fc5, 1), + Array[Int](0x0fc7, 0x0fcc, 1), + Array[Int](0x0fce, 0x0fcf, 1), + Array[Int](0x0fd5, 0x0fd8, 1), + Array[Int](0x109e, 0x109f, 1), + Array[Int](0x1360, 0x1390, 48), + Array[Int](0x1391, 0x1399, 1), + Array[Int](0x1940, 0x19de, 158), + Array[Int](0x19df, 0x19ff, 1), + Array[Int](0x1b61, 0x1b6a, 1), + Array[Int](0x1b74, 0x1b7c, 1), + Array[Int](0x2100, 0x2101, 1), + Array[Int](0x2103, 0x2106, 1), + Array[Int](0x2108, 0x2109, 1), + Array[Int](0x2114, 0x2116, 2), + Array[Int](0x2117, 0x211e, 7), + Array[Int](0x211f, 0x2123, 1), + Array[Int](0x2125, 0x2129, 2), + Array[Int](0x212e, 0x213a, 12), + Array[Int](0x213b, 0x214a, 15), + Array[Int](0x214c, 0x214d, 1), + Array[Int](0x214f, 0x2195, 70), + Array[Int](0x2196, 0x2199, 1), + Array[Int](0x219c, 0x219f, 1), + Array[Int](0x21a1, 0x21a2, 1), + Array[Int](0x21a4, 0x21a5, 1), + Array[Int](0x21a7, 0x21ad, 1), + Array[Int](0x21af, 0x21cd, 1), + Array[Int](0x21d0, 0x21d1, 1), + Array[Int](0x21d3, 0x21d5, 2), + Array[Int](0x21d6, 0x21f3, 1), + Array[Int](0x2300, 0x2307, 1), + Array[Int](0x230c, 0x231f, 1), + Array[Int](0x2322, 0x2328, 1), + Array[Int](0x232b, 0x237b, 1), + Array[Int](0x237d, 0x239a, 1), + Array[Int](0x23b4, 0x23db, 1), + Array[Int](0x23e2, 0x23f3, 1), + Array[Int](0x2400, 0x2426, 1), + Array[Int](0x2440, 0x244a, 1), + Array[Int](0x249c, 0x24e9, 1), + Array[Int](0x2500, 0x25b6, 1), + Array[Int](0x25b8, 0x25c0, 1), + Array[Int](0x25c2, 0x25f7, 1), + Array[Int](0x2600, 0x266e, 1), + Array[Int](0x2670, 0x26ff, 1), + Array[Int](0x2701, 0x2767, 1), + Array[Int](0x2794, 0x27bf, 1), + Array[Int](0x2800, 0x28ff, 1), + Array[Int](0x2b00, 0x2b2f, 1), + Array[Int](0x2b45, 0x2b46, 1), + Array[Int](0x2b50, 0x2b59, 1), + Array[Int](0x2ce5, 0x2cea, 1), + Array[Int](0x2e80, 0x2e99, 1), + Array[Int](0x2e9b, 0x2ef3, 1), + Array[Int](0x2f00, 0x2fd5, 1), + Array[Int](0x2ff0, 0x2ffb, 1), + Array[Int](0x3004, 0x3012, 14), + Array[Int](0x3013, 0x3020, 13), + Array[Int](0x3036, 0x3037, 1), + Array[Int](0x303e, 0x303f, 1), + Array[Int](0x3190, 0x3191, 1), + Array[Int](0x3196, 0x319f, 1), + Array[Int](0x31c0, 0x31e3, 1), + Array[Int](0x3200, 0x321e, 1), + Array[Int](0x322a, 0x3250, 1), + Array[Int](0x3260, 0x327f, 1), + Array[Int](0x328a, 0x32b0, 1), + Array[Int](0x32c0, 0x32fe, 1), + Array[Int](0x3300, 0x33ff, 1), + Array[Int](0x4dc0, 0x4dff, 1), + Array[Int](0xa490, 0xa4c6, 1), + Array[Int](0xa828, 0xa82b, 1), + Array[Int](0xa836, 0xa837, 1), + Array[Int](0xa839, 0xaa77, 574), + Array[Int](0xaa78, 0xaa79, 1), + Array[Int](0xfdfd, 0xffe4, 487), + Array[Int](0xffe8, 0xffed, 5), + Array[Int](0xffee, 0xfffc, 14), + Array[Int](0xfffd, 0xfffd, 1), + Array[Int](0x10102, 0x10102, 1), + Array[Int](0x10137, 0x1013f, 1), + Array[Int](0x10179, 0x10189, 1), + Array[Int](0x10190, 0x1019b, 1), + Array[Int](0x101d0, 0x101fc, 1), + Array[Int](0x1d000, 0x1d0f5, 1), + Array[Int](0x1d100, 0x1d126, 1), + Array[Int](0x1d129, 0x1d164, 1), + Array[Int](0x1d16a, 0x1d16c, 1), + Array[Int](0x1d183, 0x1d184, 1), + Array[Int](0x1d18c, 0x1d1a9, 1), + Array[Int](0x1d1ae, 0x1d1dd, 1), + Array[Int](0x1d200, 0x1d241, 1), + Array[Int](0x1d245, 0x1d300, 187), + Array[Int](0x1d301, 0x1d356, 1), + Array[Int](0x1f000, 0x1f02b, 1), + Array[Int](0x1f030, 0x1f093, 1), + Array[Int](0x1f0a0, 0x1f0ae, 1), + Array[Int](0x1f0b1, 0x1f0be, 1), + Array[Int](0x1f0c1, 0x1f0cf, 1), + Array[Int](0x1f0d1, 0x1f0df, 1), + Array[Int](0x1f110, 0x1f12e, 1), + Array[Int](0x1f130, 0x1f169, 1), + Array[Int](0x1f170, 0x1f19a, 1), + Array[Int](0x1f1e6, 0x1f202, 1), + Array[Int](0x1f210, 0x1f23a, 1), + Array[Int](0x1f240, 0x1f248, 1), + Array[Int](0x1f250, 0x1f251, 1), + Array[Int](0x1f300, 0x1f320, 1), + Array[Int](0x1f330, 0x1f335, 1), + Array[Int](0x1f337, 0x1f37c, 1), + Array[Int](0x1f380, 0x1f393, 1), + Array[Int](0x1f3a0, 0x1f3c4, 1), + Array[Int](0x1f3c6, 0x1f3ca, 1), + Array[Int](0x1f3e0, 0x1f3f0, 1), + Array[Int](0x1f400, 0x1f43e, 1), + Array[Int](0x1f440, 0x1f442, 2), + Array[Int](0x1f443, 0x1f4f7, 1), + Array[Int](0x1f4f9, 0x1f4fc, 1), + Array[Int](0x1f500, 0x1f53d, 1), + Array[Int](0x1f550, 0x1f567, 1), + Array[Int](0x1f5fb, 0x1f5ff, 1), + Array[Int](0x1f601, 0x1f610, 1), + Array[Int](0x1f612, 0x1f614, 1), + Array[Int](0x1f616, 0x1f61c, 2), + Array[Int](0x1f61d, 0x1f61e, 1), + Array[Int](0x1f620, 0x1f625, 1), + Array[Int](0x1f628, 0x1f62b, 1), + Array[Int](0x1f62d, 0x1f630, 3), + Array[Int](0x1f631, 0x1f633, 1), + Array[Int](0x1f635, 0x1f640, 1), + Array[Int](0x1f645, 0x1f64f, 1), + Array[Int](0x1f680, 0x1f6c5, 1), + Array[Int](0x1f700, 0x1f773, 1) + ) + private final lazy val _Sm: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x002b, 0x003c, 17), + Array[Int](0x003d, 0x003e, 1), + Array[Int](0x007c, 0x007e, 2), + Array[Int](0x00ac, 0x00b1, 5), + Array[Int](0x00d7, 0x00f7, 32), + Array[Int](0x03f6, 0x0606, 528), + Array[Int](0x0607, 0x0608, 1), + Array[Int](0x2044, 0x2052, 14), + Array[Int](0x207a, 0x207c, 1), + Array[Int](0x208a, 0x208c, 1), + Array[Int](0x2118, 0x2140, 40), + Array[Int](0x2141, 0x2144, 1), + Array[Int](0x214b, 0x2190, 69), + Array[Int](0x2191, 0x2194, 1), + Array[Int](0x219a, 0x219b, 1), + Array[Int](0x21a0, 0x21a6, 3), + Array[Int](0x21ae, 0x21ce, 32), + Array[Int](0x21cf, 0x21d2, 3), + Array[Int](0x21d4, 0x21f4, 32), + Array[Int](0x21f5, 0x22ff, 1), + Array[Int](0x2308, 0x230b, 1), + Array[Int](0x2320, 0x2321, 1), + Array[Int](0x237c, 0x239b, 31), + Array[Int](0x239c, 0x23b3, 1), + Array[Int](0x23dc, 0x23e1, 1), + Array[Int](0x25b7, 0x25c1, 10), + Array[Int](0x25f8, 0x25ff, 1), + Array[Int](0x266f, 0x27c0, 337), + Array[Int](0x27c1, 0x27c4, 1), + Array[Int](0x27c7, 0x27ca, 1), + Array[Int](0x27cc, 0x27ce, 2), + Array[Int](0x27cf, 0x27e5, 1), + Array[Int](0x27f0, 0x27ff, 1), + Array[Int](0x2900, 0x2982, 1), + Array[Int](0x2999, 0x29d7, 1), + Array[Int](0x29dc, 0x29fb, 1), + Array[Int](0x29fe, 0x2aff, 1), + Array[Int](0x2b30, 0x2b44, 1), + Array[Int](0x2b47, 0x2b4c, 1), + Array[Int](0xfb29, 0xfe62, 825), + Array[Int](0xfe64, 0xfe66, 1), + Array[Int](0xff0b, 0xff1c, 17), + Array[Int](0xff1d, 0xff1e, 1), + Array[Int](0xff5c, 0xff5e, 2), + Array[Int](0xffe2, 0xffe9, 7), + Array[Int](0xffea, 0xffec, 1), + Array[Int](0x1d6c1, 0x1d6db, 26), + Array[Int](0x1d6fb, 0x1d715, 26), + Array[Int](0x1d735, 0x1d74f, 26), + Array[Int](0x1d76f, 0x1d789, 26), + Array[Int](0x1d7a9, 0x1d7c3, 26) + ) + private final lazy val _Sk: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x005e, 0x0060, 2), + Array[Int](0x00a8, 0x00af, 7), + Array[Int](0x00b4, 0x00b8, 4), + Array[Int](0x02c2, 0x02c5, 1), + Array[Int](0x02d2, 0x02df, 1), + Array[Int](0x02e5, 0x02eb, 1), + Array[Int](0x02ed, 0x02ef, 2), + Array[Int](0x02f0, 0x02ff, 1), + Array[Int](0x0375, 0x0384, 15), + Array[Int](0x0385, 0x1fbd, 7224), + Array[Int](0x1fbf, 0x1fc1, 1), + Array[Int](0x1fcd, 0x1fcf, 1), + Array[Int](0x1fdd, 0x1fdf, 1), + Array[Int](0x1fed, 0x1fef, 1), + Array[Int](0x1ffd, 0x1ffe, 1), + Array[Int](0x309b, 0x309c, 1), + Array[Int](0xa700, 0xa716, 1), + Array[Int](0xa720, 0xa721, 1), + Array[Int](0xa789, 0xa78a, 1), + Array[Int](0xfbb2, 0xfbc1, 1), + Array[Int](0xff3e, 0xff40, 2), + Array[Int](0xffe3, 0xffe3, 1) + ) + private final lazy val _Sc: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0024, 0x00a2, 126), + Array[Int](0x00a3, 0x00a5, 1), + Array[Int](0x060b, 0x09f2, 999), + Array[Int](0x09f3, 0x09fb, 8), + Array[Int](0x0af1, 0x0bf9, 264), + Array[Int](0x0e3f, 0x17db, 2460), + Array[Int](0x20a0, 0x20b9, 1), + Array[Int](0xa838, 0xfdfc, 21956), + Array[Int](0xfe69, 0xff04, 155), + Array[Int](0xffe0, 0xffe1, 1), + Array[Int](0xffe5, 0xffe6, 1) + ) + private final lazy val _Lu: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0041, 0x005a, 1), + Array[Int](0x00c0, 0x00d6, 1), + Array[Int](0x00d8, 0x00de, 1), + Array[Int](0x0100, 0x0136, 2), + Array[Int](0x0139, 0x0147, 2), + Array[Int](0x014a, 0x0178, 2), + Array[Int](0x0179, 0x017d, 2), + Array[Int](0x0181, 0x0182, 1), + Array[Int](0x0184, 0x0186, 2), + Array[Int](0x0187, 0x0189, 2), + Array[Int](0x018a, 0x018b, 1), + Array[Int](0x018e, 0x0191, 1), + Array[Int](0x0193, 0x0194, 1), + Array[Int](0x0196, 0x0198, 1), + Array[Int](0x019c, 0x019d, 1), + Array[Int](0x019f, 0x01a0, 1), + Array[Int](0x01a2, 0x01a6, 2), + Array[Int](0x01a7, 0x01a9, 2), + Array[Int](0x01ac, 0x01ae, 2), + Array[Int](0x01af, 0x01b1, 2), + Array[Int](0x01b2, 0x01b3, 1), + Array[Int](0x01b5, 0x01b7, 2), + Array[Int](0x01b8, 0x01bc, 4), + Array[Int](0x01c4, 0x01cd, 3), + Array[Int](0x01cf, 0x01db, 2), + Array[Int](0x01de, 0x01ee, 2), + Array[Int](0x01f1, 0x01f4, 3), + Array[Int](0x01f6, 0x01f8, 1), + Array[Int](0x01fa, 0x0232, 2), + Array[Int](0x023a, 0x023b, 1), + Array[Int](0x023d, 0x023e, 1), + Array[Int](0x0241, 0x0243, 2), + Array[Int](0x0244, 0x0246, 1), + Array[Int](0x0248, 0x024e, 2), + Array[Int](0x0370, 0x0372, 2), + Array[Int](0x0376, 0x0386, 16), + Array[Int](0x0388, 0x038a, 1), + Array[Int](0x038c, 0x038e, 2), + Array[Int](0x038f, 0x0391, 2), + Array[Int](0x0392, 0x03a1, 1), + Array[Int](0x03a3, 0x03ab, 1), + Array[Int](0x03cf, 0x03d2, 3), + Array[Int](0x03d3, 0x03d4, 1), + Array[Int](0x03d8, 0x03ee, 2), + Array[Int](0x03f4, 0x03f7, 3), + Array[Int](0x03f9, 0x03fa, 1), + Array[Int](0x03fd, 0x042f, 1), + Array[Int](0x0460, 0x0480, 2), + Array[Int](0x048a, 0x04c0, 2), + Array[Int](0x04c1, 0x04cd, 2), + Array[Int](0x04d0, 0x0526, 2), + Array[Int](0x0531, 0x0556, 1), + Array[Int](0x10a0, 0x10c5, 1), + Array[Int](0x1e00, 0x1e94, 2), + Array[Int](0x1e9e, 0x1efe, 2), + Array[Int](0x1f08, 0x1f0f, 1), + Array[Int](0x1f18, 0x1f1d, 1), + Array[Int](0x1f28, 0x1f2f, 1), + Array[Int](0x1f38, 0x1f3f, 1), + Array[Int](0x1f48, 0x1f4d, 1), + Array[Int](0x1f59, 0x1f5f, 2), + Array[Int](0x1f68, 0x1f6f, 1), + Array[Int](0x1fb8, 0x1fbb, 1), + Array[Int](0x1fc8, 0x1fcb, 1), + Array[Int](0x1fd8, 0x1fdb, 1), + Array[Int](0x1fe8, 0x1fec, 1), + Array[Int](0x1ff8, 0x1ffb, 1), + Array[Int](0x2102, 0x2107, 5), + Array[Int](0x210b, 0x210d, 1), + Array[Int](0x2110, 0x2112, 1), + Array[Int](0x2115, 0x2119, 4), + Array[Int](0x211a, 0x211d, 1), + Array[Int](0x2124, 0x212a, 2), + Array[Int](0x212b, 0x212d, 1), + Array[Int](0x2130, 0x2133, 1), + Array[Int](0x213e, 0x213f, 1), + Array[Int](0x2145, 0x2183, 62), + Array[Int](0x2c00, 0x2c2e, 1), + Array[Int](0x2c60, 0x2c62, 2), + Array[Int](0x2c63, 0x2c64, 1), + Array[Int](0x2c67, 0x2c6d, 2), + Array[Int](0x2c6e, 0x2c70, 1), + Array[Int](0x2c72, 0x2c75, 3), + Array[Int](0x2c7e, 0x2c80, 1), + Array[Int](0x2c82, 0x2ce2, 2), + Array[Int](0x2ceb, 0x2ced, 2), + Array[Int](0xa640, 0xa66c, 2), + Array[Int](0xa680, 0xa696, 2), + Array[Int](0xa722, 0xa72e, 2), + Array[Int](0xa732, 0xa76e, 2), + Array[Int](0xa779, 0xa77d, 2), + Array[Int](0xa77e, 0xa786, 2), + Array[Int](0xa78b, 0xa78d, 2), + Array[Int](0xa790, 0xa7a0, 16), + Array[Int](0xa7a2, 0xa7a8, 2), + Array[Int](0xff21, 0xff3a, 1), + Array[Int](0x10400, 0x10427, 1), + Array[Int](0x1d400, 0x1d419, 1), + Array[Int](0x1d434, 0x1d44d, 1), + Array[Int](0x1d468, 0x1d481, 1), + Array[Int](0x1d49c, 0x1d49e, 2), + Array[Int](0x1d49f, 0x1d4a5, 3), + Array[Int](0x1d4a6, 0x1d4a9, 3), + Array[Int](0x1d4aa, 0x1d4ac, 1), + Array[Int](0x1d4ae, 0x1d4b5, 1), + Array[Int](0x1d4d0, 0x1d4e9, 1), + Array[Int](0x1d504, 0x1d505, 1), + Array[Int](0x1d507, 0x1d50a, 1), + Array[Int](0x1d50d, 0x1d514, 1), + Array[Int](0x1d516, 0x1d51c, 1), + Array[Int](0x1d538, 0x1d539, 1), + Array[Int](0x1d53b, 0x1d53e, 1), + Array[Int](0x1d540, 0x1d544, 1), + Array[Int](0x1d546, 0x1d54a, 4), + Array[Int](0x1d54b, 0x1d550, 1), + Array[Int](0x1d56c, 0x1d585, 1), + Array[Int](0x1d5a0, 0x1d5b9, 1), + Array[Int](0x1d5d4, 0x1d5ed, 1), + Array[Int](0x1d608, 0x1d621, 1), + Array[Int](0x1d63c, 0x1d655, 1), + Array[Int](0x1d670, 0x1d689, 1), + Array[Int](0x1d6a8, 0x1d6c0, 1), + Array[Int](0x1d6e2, 0x1d6fa, 1), + Array[Int](0x1d71c, 0x1d734, 1), + Array[Int](0x1d756, 0x1d76e, 1), + Array[Int](0x1d790, 0x1d7a8, 1), + Array[Int](0x1d7ca, 0x1d7ca, 1) + ) + private final lazy val _Lt: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x01c5, 0x01cb, 3), + Array[Int](0x01f2, 0x1f88, 7574), + Array[Int](0x1f89, 0x1f8f, 1), + Array[Int](0x1f98, 0x1f9f, 1), + Array[Int](0x1fa8, 0x1faf, 1), + Array[Int](0x1fbc, 0x1fcc, 16), + Array[Int](0x1ffc, 0x1ffc, 1) + ) + private final lazy val _Lo: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x01bb, 0x01c0, 5), + Array[Int](0x01c1, 0x01c3, 1), + Array[Int](0x0294, 0x05d0, 828), + Array[Int](0x05d1, 0x05ea, 1), + Array[Int](0x05f0, 0x05f2, 1), + Array[Int](0x0620, 0x063f, 1), + Array[Int](0x0641, 0x064a, 1), + Array[Int](0x066e, 0x066f, 1), + Array[Int](0x0671, 0x06d3, 1), + Array[Int](0x06d5, 0x06ee, 25), + Array[Int](0x06ef, 0x06fa, 11), + Array[Int](0x06fb, 0x06fc, 1), + Array[Int](0x06ff, 0x0710, 17), + Array[Int](0x0712, 0x072f, 1), + Array[Int](0x074d, 0x07a5, 1), + Array[Int](0x07b1, 0x07ca, 25), + Array[Int](0x07cb, 0x07ea, 1), + Array[Int](0x0800, 0x0815, 1), + Array[Int](0x0840, 0x0858, 1), + Array[Int](0x0904, 0x0939, 1), + Array[Int](0x093d, 0x0950, 19), + Array[Int](0x0958, 0x0961, 1), + Array[Int](0x0972, 0x0977, 1), + Array[Int](0x0979, 0x097f, 1), + Array[Int](0x0985, 0x098c, 1), + Array[Int](0x098f, 0x0990, 1), + Array[Int](0x0993, 0x09a8, 1), + Array[Int](0x09aa, 0x09b0, 1), + Array[Int](0x09b2, 0x09b6, 4), + Array[Int](0x09b7, 0x09b9, 1), + Array[Int](0x09bd, 0x09ce, 17), + Array[Int](0x09dc, 0x09dd, 1), + Array[Int](0x09df, 0x09e1, 1), + Array[Int](0x09f0, 0x09f1, 1), + Array[Int](0x0a05, 0x0a0a, 1), + Array[Int](0x0a0f, 0x0a10, 1), + Array[Int](0x0a13, 0x0a28, 1), + Array[Int](0x0a2a, 0x0a30, 1), + Array[Int](0x0a32, 0x0a33, 1), + Array[Int](0x0a35, 0x0a36, 1), + Array[Int](0x0a38, 0x0a39, 1), + Array[Int](0x0a59, 0x0a5c, 1), + Array[Int](0x0a5e, 0x0a72, 20), + Array[Int](0x0a73, 0x0a74, 1), + Array[Int](0x0a85, 0x0a8d, 1), + Array[Int](0x0a8f, 0x0a91, 1), + Array[Int](0x0a93, 0x0aa8, 1), + Array[Int](0x0aaa, 0x0ab0, 1), + Array[Int](0x0ab2, 0x0ab3, 1), + Array[Int](0x0ab5, 0x0ab9, 1), + Array[Int](0x0abd, 0x0ad0, 19), + Array[Int](0x0ae0, 0x0ae1, 1), + Array[Int](0x0b05, 0x0b0c, 1), + Array[Int](0x0b0f, 0x0b10, 1), + Array[Int](0x0b13, 0x0b28, 1), + Array[Int](0x0b2a, 0x0b30, 1), + Array[Int](0x0b32, 0x0b33, 1), + Array[Int](0x0b35, 0x0b39, 1), + Array[Int](0x0b3d, 0x0b5c, 31), + Array[Int](0x0b5d, 0x0b5f, 2), + Array[Int](0x0b60, 0x0b61, 1), + Array[Int](0x0b71, 0x0b83, 18), + Array[Int](0x0b85, 0x0b8a, 1), + Array[Int](0x0b8e, 0x0b90, 1), + Array[Int](0x0b92, 0x0b95, 1), + Array[Int](0x0b99, 0x0b9a, 1), + Array[Int](0x0b9c, 0x0b9e, 2), + Array[Int](0x0b9f, 0x0ba3, 4), + Array[Int](0x0ba4, 0x0ba8, 4), + Array[Int](0x0ba9, 0x0baa, 1), + Array[Int](0x0bae, 0x0bb9, 1), + Array[Int](0x0bd0, 0x0c05, 53), + Array[Int](0x0c06, 0x0c0c, 1), + Array[Int](0x0c0e, 0x0c10, 1), + Array[Int](0x0c12, 0x0c28, 1), + Array[Int](0x0c2a, 0x0c33, 1), + Array[Int](0x0c35, 0x0c39, 1), + Array[Int](0x0c3d, 0x0c58, 27), + Array[Int](0x0c59, 0x0c60, 7), + Array[Int](0x0c61, 0x0c85, 36), + Array[Int](0x0c86, 0x0c8c, 1), + Array[Int](0x0c8e, 0x0c90, 1), + Array[Int](0x0c92, 0x0ca8, 1), + Array[Int](0x0caa, 0x0cb3, 1), + Array[Int](0x0cb5, 0x0cb9, 1), + Array[Int](0x0cbd, 0x0cde, 33), + Array[Int](0x0ce0, 0x0ce1, 1), + Array[Int](0x0cf1, 0x0cf2, 1), + Array[Int](0x0d05, 0x0d0c, 1), + Array[Int](0x0d0e, 0x0d10, 1), + Array[Int](0x0d12, 0x0d3a, 1), + Array[Int](0x0d3d, 0x0d4e, 17), + Array[Int](0x0d60, 0x0d61, 1), + Array[Int](0x0d7a, 0x0d7f, 1), + Array[Int](0x0d85, 0x0d96, 1), + Array[Int](0x0d9a, 0x0db1, 1), + Array[Int](0x0db3, 0x0dbb, 1), + Array[Int](0x0dbd, 0x0dc0, 3), + Array[Int](0x0dc1, 0x0dc6, 1), + Array[Int](0x0e01, 0x0e30, 1), + Array[Int](0x0e32, 0x0e33, 1), + Array[Int](0x0e40, 0x0e45, 1), + Array[Int](0x0e81, 0x0e82, 1), + Array[Int](0x0e84, 0x0e87, 3), + Array[Int](0x0e88, 0x0e8a, 2), + Array[Int](0x0e8d, 0x0e94, 7), + Array[Int](0x0e95, 0x0e97, 1), + Array[Int](0x0e99, 0x0e9f, 1), + Array[Int](0x0ea1, 0x0ea3, 1), + Array[Int](0x0ea5, 0x0ea7, 2), + Array[Int](0x0eaa, 0x0eab, 1), + Array[Int](0x0ead, 0x0eb0, 1), + Array[Int](0x0eb2, 0x0eb3, 1), + Array[Int](0x0ebd, 0x0ec0, 3), + Array[Int](0x0ec1, 0x0ec4, 1), + Array[Int](0x0edc, 0x0edd, 1), + Array[Int](0x0f00, 0x0f40, 64), + Array[Int](0x0f41, 0x0f47, 1), + Array[Int](0x0f49, 0x0f6c, 1), + Array[Int](0x0f88, 0x0f8c, 1), + Array[Int](0x1000, 0x102a, 1), + Array[Int](0x103f, 0x1050, 17), + Array[Int](0x1051, 0x1055, 1), + Array[Int](0x105a, 0x105d, 1), + Array[Int](0x1061, 0x1065, 4), + Array[Int](0x1066, 0x106e, 8), + Array[Int](0x106f, 0x1070, 1), + Array[Int](0x1075, 0x1081, 1), + Array[Int](0x108e, 0x10d0, 66), + Array[Int](0x10d1, 0x10fa, 1), + Array[Int](0x1100, 0x1248, 1), + Array[Int](0x124a, 0x124d, 1), + Array[Int](0x1250, 0x1256, 1), + Array[Int](0x1258, 0x125a, 2), + Array[Int](0x125b, 0x125d, 1), + Array[Int](0x1260, 0x1288, 1), + Array[Int](0x128a, 0x128d, 1), + Array[Int](0x1290, 0x12b0, 1), + Array[Int](0x12b2, 0x12b5, 1), + Array[Int](0x12b8, 0x12be, 1), + Array[Int](0x12c0, 0x12c2, 2), + Array[Int](0x12c3, 0x12c5, 1), + Array[Int](0x12c8, 0x12d6, 1), + Array[Int](0x12d8, 0x1310, 1), + Array[Int](0x1312, 0x1315, 1), + Array[Int](0x1318, 0x135a, 1), + Array[Int](0x1380, 0x138f, 1), + Array[Int](0x13a0, 0x13f4, 1), + Array[Int](0x1401, 0x166c, 1), + Array[Int](0x166f, 0x167f, 1), + Array[Int](0x1681, 0x169a, 1), + Array[Int](0x16a0, 0x16ea, 1), + Array[Int](0x1700, 0x170c, 1), + Array[Int](0x170e, 0x1711, 1), + Array[Int](0x1720, 0x1731, 1), + Array[Int](0x1740, 0x1751, 1), + Array[Int](0x1760, 0x176c, 1), + Array[Int](0x176e, 0x1770, 1), + Array[Int](0x1780, 0x17b3, 1), + Array[Int](0x17dc, 0x1820, 68), + Array[Int](0x1821, 0x1842, 1), + Array[Int](0x1844, 0x1877, 1), + Array[Int](0x1880, 0x18a8, 1), + Array[Int](0x18aa, 0x18b0, 6), + Array[Int](0x18b1, 0x18f5, 1), + Array[Int](0x1900, 0x191c, 1), + Array[Int](0x1950, 0x196d, 1), + Array[Int](0x1970, 0x1974, 1), + Array[Int](0x1980, 0x19ab, 1), + Array[Int](0x19c1, 0x19c7, 1), + Array[Int](0x1a00, 0x1a16, 1), + Array[Int](0x1a20, 0x1a54, 1), + Array[Int](0x1b05, 0x1b33, 1), + Array[Int](0x1b45, 0x1b4b, 1), + Array[Int](0x1b83, 0x1ba0, 1), + Array[Int](0x1bae, 0x1baf, 1), + Array[Int](0x1bc0, 0x1be5, 1), + Array[Int](0x1c00, 0x1c23, 1), + Array[Int](0x1c4d, 0x1c4f, 1), + Array[Int](0x1c5a, 0x1c77, 1), + Array[Int](0x1ce9, 0x1cec, 1), + Array[Int](0x1cee, 0x1cf1, 1), + Array[Int](0x2135, 0x2138, 1), + Array[Int](0x2d30, 0x2d65, 1), + Array[Int](0x2d80, 0x2d96, 1), + Array[Int](0x2da0, 0x2da6, 1), + Array[Int](0x2da8, 0x2dae, 1), + Array[Int](0x2db0, 0x2db6, 1), + Array[Int](0x2db8, 0x2dbe, 1), + Array[Int](0x2dc0, 0x2dc6, 1), + Array[Int](0x2dc8, 0x2dce, 1), + Array[Int](0x2dd0, 0x2dd6, 1), + Array[Int](0x2dd8, 0x2dde, 1), + Array[Int](0x3006, 0x303c, 54), + Array[Int](0x3041, 0x3096, 1), + Array[Int](0x309f, 0x30a1, 2), + Array[Int](0x30a2, 0x30fa, 1), + Array[Int](0x30ff, 0x3105, 6), + Array[Int](0x3106, 0x312d, 1), + Array[Int](0x3131, 0x318e, 1), + Array[Int](0x31a0, 0x31ba, 1), + Array[Int](0x31f0, 0x31ff, 1), + Array[Int](0x3400, 0x4db5, 1), + Array[Int](0x4e00, 0x9fcb, 1), + Array[Int](0xa000, 0xa014, 1), + Array[Int](0xa016, 0xa48c, 1), + Array[Int](0xa4d0, 0xa4f7, 1), + Array[Int](0xa500, 0xa60b, 1), + Array[Int](0xa610, 0xa61f, 1), + Array[Int](0xa62a, 0xa62b, 1), + Array[Int](0xa66e, 0xa6a0, 50), + Array[Int](0xa6a1, 0xa6e5, 1), + Array[Int](0xa7fb, 0xa801, 1), + Array[Int](0xa803, 0xa805, 1), + Array[Int](0xa807, 0xa80a, 1), + Array[Int](0xa80c, 0xa822, 1), + Array[Int](0xa840, 0xa873, 1), + Array[Int](0xa882, 0xa8b3, 1), + Array[Int](0xa8f2, 0xa8f7, 1), + Array[Int](0xa8fb, 0xa90a, 15), + Array[Int](0xa90b, 0xa925, 1), + Array[Int](0xa930, 0xa946, 1), + Array[Int](0xa960, 0xa97c, 1), + Array[Int](0xa984, 0xa9b2, 1), + Array[Int](0xaa00, 0xaa28, 1), + Array[Int](0xaa40, 0xaa42, 1), + Array[Int](0xaa44, 0xaa4b, 1), + Array[Int](0xaa60, 0xaa6f, 1), + Array[Int](0xaa71, 0xaa76, 1), + Array[Int](0xaa7a, 0xaa80, 6), + Array[Int](0xaa81, 0xaaaf, 1), + Array[Int](0xaab1, 0xaab5, 4), + Array[Int](0xaab6, 0xaab9, 3), + Array[Int](0xaaba, 0xaabd, 1), + Array[Int](0xaac0, 0xaac2, 2), + Array[Int](0xaadb, 0xaadc, 1), + Array[Int](0xab01, 0xab06, 1), + Array[Int](0xab09, 0xab0e, 1), + Array[Int](0xab11, 0xab16, 1), + Array[Int](0xab20, 0xab26, 1), + Array[Int](0xab28, 0xab2e, 1), + Array[Int](0xabc0, 0xabe2, 1), + Array[Int](0xac00, 0xd7a3, 1), + Array[Int](0xd7b0, 0xd7c6, 1), + Array[Int](0xd7cb, 0xd7fb, 1), + Array[Int](0xf900, 0xfa2d, 1), + Array[Int](0xfa30, 0xfa6d, 1), + Array[Int](0xfa70, 0xfad9, 1), + Array[Int](0xfb1d, 0xfb1f, 2), + Array[Int](0xfb20, 0xfb28, 1), + Array[Int](0xfb2a, 0xfb36, 1), + Array[Int](0xfb38, 0xfb3c, 1), + Array[Int](0xfb3e, 0xfb40, 2), + Array[Int](0xfb41, 0xfb43, 2), + Array[Int](0xfb44, 0xfb46, 2), + Array[Int](0xfb47, 0xfbb1, 1), + Array[Int](0xfbd3, 0xfd3d, 1), + Array[Int](0xfd50, 0xfd8f, 1), + Array[Int](0xfd92, 0xfdc7, 1), + Array[Int](0xfdf0, 0xfdfb, 1), + Array[Int](0xfe70, 0xfe74, 1), + Array[Int](0xfe76, 0xfefc, 1), + Array[Int](0xff66, 0xff6f, 1), + Array[Int](0xff71, 0xff9d, 1), + Array[Int](0xffa0, 0xffbe, 1), + Array[Int](0xffc2, 0xffc7, 1), + Array[Int](0xffca, 0xffcf, 1), + Array[Int](0xffd2, 0xffd7, 1), + Array[Int](0xffda, 0xffdc, 1), + Array[Int](0x10000, 0x1000b, 1), + Array[Int](0x1000d, 0x10026, 1), + Array[Int](0x10028, 0x1003a, 1), + Array[Int](0x1003c, 0x1003d, 1), + Array[Int](0x1003f, 0x1004d, 1), + Array[Int](0x10050, 0x1005d, 1), + Array[Int](0x10080, 0x100fa, 1), + Array[Int](0x10280, 0x1029c, 1), + Array[Int](0x102a0, 0x102d0, 1), + Array[Int](0x10300, 0x1031e, 1), + Array[Int](0x10330, 0x10340, 1), + Array[Int](0x10342, 0x10349, 1), + Array[Int](0x10380, 0x1039d, 1), + Array[Int](0x103a0, 0x103c3, 1), + Array[Int](0x103c8, 0x103cf, 1), + Array[Int](0x10450, 0x1049d, 1), + Array[Int](0x10800, 0x10805, 1), + Array[Int](0x10808, 0x1080a, 2), + Array[Int](0x1080b, 0x10835, 1), + Array[Int](0x10837, 0x10838, 1), + Array[Int](0x1083c, 0x1083f, 3), + Array[Int](0x10840, 0x10855, 1), + Array[Int](0x10900, 0x10915, 1), + Array[Int](0x10920, 0x10939, 1), + Array[Int](0x10a00, 0x10a10, 16), + Array[Int](0x10a11, 0x10a13, 1), + Array[Int](0x10a15, 0x10a17, 1), + Array[Int](0x10a19, 0x10a33, 1), + Array[Int](0x10a60, 0x10a7c, 1), + Array[Int](0x10b00, 0x10b35, 1), + Array[Int](0x10b40, 0x10b55, 1), + Array[Int](0x10b60, 0x10b72, 1), + Array[Int](0x10c00, 0x10c48, 1), + Array[Int](0x11003, 0x11037, 1), + Array[Int](0x11083, 0x110af, 1), + Array[Int](0x12000, 0x1236e, 1), + Array[Int](0x13000, 0x1342e, 1), + Array[Int](0x16800, 0x16a38, 1), + Array[Int](0x1b000, 0x1b001, 1), + Array[Int](0x20000, 0x2a6d6, 1), + Array[Int](0x2a700, 0x2b734, 1), + Array[Int](0x2b740, 0x2b81d, 1), + Array[Int](0x2f800, 0x2fa1d, 1) + ) + final lazy val Cc: Array[Array[Int]] = _Cc + final lazy val Cf: Array[Array[Int]] = _Cf + final lazy val Co: Array[Array[Int]] = _Co + final lazy val Cs: Array[Array[Int]] = _Cs + final lazy val Digit: Array[Array[Int]] = _Nd + final lazy val Nd: Array[Array[Int]] = _Nd + final lazy val Letter: Array[Array[Int]] = _L + final lazy val L: Array[Array[Int]] = _L + final lazy val Lm: Array[Array[Int]] = _Lm + final lazy val Lo: Array[Array[Int]] = _Lo + final lazy val Lower: Array[Array[Int]] = _Ll + final lazy val Ll: Array[Array[Int]] = _Ll + final lazy val Mark: Array[Array[Int]] = _M + final lazy val M: Array[Array[Int]] = _M + final lazy val Mc: Array[Array[Int]] = _Mc + final lazy val Me: Array[Array[Int]] = _Me + final lazy val Mn: Array[Array[Int]] = _Mn + final lazy val Nl: Array[Array[Int]] = _Nl + final lazy val No: Array[Array[Int]] = _No + final lazy val Number: Array[Array[Int]] = _N + final lazy val N: Array[Array[Int]] = _N + final lazy val Other: Array[Array[Int]] = _C + final lazy val C: Array[Array[Int]] = _C + final lazy val Pc: Array[Array[Int]] = _Pc + final lazy val Pd: Array[Array[Int]] = _Pd + final lazy val Pe: Array[Array[Int]] = _Pe + final lazy val Pf: Array[Array[Int]] = _Pf + final lazy val Pi: Array[Array[Int]] = _Pi + final lazy val Po: Array[Array[Int]] = _Po + final lazy val Ps: Array[Array[Int]] = _Ps + final lazy val Punct: Array[Array[Int]] = _P + final lazy val P: Array[Array[Int]] = _P + final lazy val Sc: Array[Array[Int]] = _Sc + final lazy val Sk: Array[Array[Int]] = _Sk + final lazy val Sm: Array[Array[Int]] = _Sm + final lazy val So: Array[Array[Int]] = _So + final lazy val Space: Array[Array[Int]] = _Z + final lazy val Z: Array[Array[Int]] = _Z + final lazy val Symbol: Array[Array[Int]] = _S + final lazy val S: Array[Array[Int]] = _S + final lazy val Title: Array[Array[Int]] = _Lt + final lazy val Lt: Array[Array[Int]] = _Lt + final lazy val Upper: Array[Array[Int]] = _Lu + final lazy val Lu: Array[Array[Int]] = _Lu + final lazy val Zl: Array[Array[Int]] = _Zl + final lazy val Zp: Array[Array[Int]] = _Zp + final lazy val Zs: Array[Array[Int]] = _Zs + private final lazy val _Katakana: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x30a1, 0x30fa, 1), + Array[Int](0x30fd, 0x30ff, 1), + Array[Int](0x31f0, 0x31ff, 1), + Array[Int](0x32d0, 0x32fe, 1), + Array[Int](0x3300, 0x3357, 1), + Array[Int](0xff66, 0xff6f, 1), + Array[Int](0xff71, 0xff9d, 1), + Array[Int](0x1b000, 0x1b000, 1) + ) + private final lazy val _Malayalam: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0d02, 0x0d03, 1), + Array[Int](0x0d05, 0x0d0c, 1), + Array[Int](0x0d0e, 0x0d10, 1), + Array[Int](0x0d12, 0x0d3a, 1), + Array[Int](0x0d3d, 0x0d44, 1), + Array[Int](0x0d46, 0x0d48, 1), + Array[Int](0x0d4a, 0x0d4e, 1), + Array[Int](0x0d57, 0x0d57, 1), + Array[Int](0x0d60, 0x0d63, 1), + Array[Int](0x0d66, 0x0d75, 1), + Array[Int](0x0d79, 0x0d7f, 1) + ) + private final lazy val _Phags_Pa: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xa840, 0xa877, 1)) + private final lazy val _Inscriptional_Parthian: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10b40, 0x10b55, 1), Array[Int](0x10b58, 0x10b5f, 1)) + private final lazy val _Latin: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0041, 0x005a, 1), + Array[Int](0x0061, 0x007a, 1), + Array[Int](0x00aa, 0x00aa, 1), + Array[Int](0x00ba, 0x00ba, 1), + Array[Int](0x00c0, 0x00d6, 1), + Array[Int](0x00d8, 0x00f6, 1), + Array[Int](0x00f8, 0x02b8, 1), + Array[Int](0x02e0, 0x02e4, 1), + Array[Int](0x1d00, 0x1d25, 1), + Array[Int](0x1d2c, 0x1d5c, 1), + Array[Int](0x1d62, 0x1d65, 1), + Array[Int](0x1d6b, 0x1d77, 1), + Array[Int](0x1d79, 0x1dbe, 1), + Array[Int](0x1e00, 0x1eff, 1), + Array[Int](0x2071, 0x2071, 1), + Array[Int](0x207f, 0x207f, 1), + Array[Int](0x2090, 0x209c, 1), + Array[Int](0x212a, 0x212b, 1), + Array[Int](0x2132, 0x2132, 1), + Array[Int](0x214e, 0x214e, 1), + Array[Int](0x2160, 0x2188, 1), + Array[Int](0x2c60, 0x2c7f, 1), + Array[Int](0xa722, 0xa787, 1), + Array[Int](0xa78b, 0xa78e, 1), + Array[Int](0xa790, 0xa791, 1), + Array[Int](0xa7a0, 0xa7a9, 1), + Array[Int](0xa7fa, 0xa7ff, 1), + Array[Int](0xfb00, 0xfb06, 1), + Array[Int](0xff21, 0xff3a, 1), + Array[Int](0xff41, 0xff5a, 1) + ) + private final lazy val _Inscriptional_Pahlavi: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10b60, 0x10b72, 1), Array[Int](0x10b78, 0x10b7f, 1)) + private final lazy val _Osmanya: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10480, 0x1049d, 1), Array[Int](0x104a0, 0x104a9, 1)) + private final lazy val _Khmer: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1780, 0x17dd, 1), + Array[Int](0x17e0, 0x17e9, 1), + Array[Int](0x17f0, 0x17f9, 1), + Array[Int](0x19e0, 0x19ff, 1)) + private final lazy val _Inherited: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0300, 0x036f, 1), + Array[Int](0x0485, 0x0486, 1), + Array[Int](0x064b, 0x0655, 1), + Array[Int](0x065f, 0x065f, 1), + Array[Int](0x0670, 0x0670, 1), + Array[Int](0x0951, 0x0952, 1), + Array[Int](0x1cd0, 0x1cd2, 1), + Array[Int](0x1cd4, 0x1ce0, 1), + Array[Int](0x1ce2, 0x1ce8, 1), + Array[Int](0x1ced, 0x1ced, 1), + Array[Int](0x1dc0, 0x1de6, 1), + Array[Int](0x1dfc, 0x1dff, 1), + Array[Int](0x200c, 0x200d, 1), + Array[Int](0x20d0, 0x20f0, 1), + Array[Int](0x302a, 0x302d, 1), + Array[Int](0x3099, 0x309a, 1), + Array[Int](0xfe00, 0xfe0f, 1), + Array[Int](0xfe20, 0xfe26, 1), + Array[Int](0x101fd, 0x101fd, 1), + Array[Int](0x1d167, 0x1d169, 1), + Array[Int](0x1d17b, 0x1d182, 1), + Array[Int](0x1d185, 0x1d18b, 1), + Array[Int](0x1d1aa, 0x1d1ad, 1), + Array[Int](0xe0100, 0xe01ef, 1) + ) + private final lazy val _Telugu: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0c01, 0x0c03, 1), + Array[Int](0x0c05, 0x0c0c, 1), + Array[Int](0x0c0e, 0x0c10, 1), + Array[Int](0x0c12, 0x0c28, 1), + Array[Int](0x0c2a, 0x0c33, 1), + Array[Int](0x0c35, 0x0c39, 1), + Array[Int](0x0c3d, 0x0c44, 1), + Array[Int](0x0c46, 0x0c48, 1), + Array[Int](0x0c4a, 0x0c4d, 1), + Array[Int](0x0c55, 0x0c56, 1), + Array[Int](0x0c58, 0x0c59, 1), + Array[Int](0x0c60, 0x0c63, 1), + Array[Int](0x0c66, 0x0c6f, 1), + Array[Int](0x0c78, 0x0c7f, 1) + ) + private final lazy val _Samaritan: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0800, 0x082d, 1), Array[Int](0x0830, 0x083e, 1)) + private final lazy val _Bopomofo: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x02ea, 0x02eb, 1), + Array[Int](0x3105, 0x312d, 1), + Array[Int](0x31a0, 0x31ba, 1)) + private final lazy val _Imperial_Aramaic: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10840, 0x10855, 1), Array[Int](0x10857, 0x1085f, 1)) + private final lazy val _Kaithi: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x11080, 0x110c1, 1)) + private final lazy val _Mandaic: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0840, 0x085b, 1), Array[Int](0x085e, 0x085e, 1)) + private final lazy val _Old_South_Arabian: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10a60, 0x10a7f, 1)) + private final lazy val _Kayah_Li: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xa900, 0xa92f, 1)) + private final lazy val _New_Tai_Lue: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1980, 0x19ab, 1), + Array[Int](0x19b0, 0x19c9, 1), + Array[Int](0x19d0, 0x19da, 1), + Array[Int](0x19de, 0x19df, 1)) + private final lazy val _Tai_Le: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1950, 0x196d, 1), Array[Int](0x1970, 0x1974, 1)) + private final lazy val _Kharoshthi: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x10a00, 0x10a03, 1), + Array[Int](0x10a05, 0x10a06, 1), + Array[Int](0x10a0c, 0x10a13, 1), + Array[Int](0x10a15, 0x10a17, 1), + Array[Int](0x10a19, 0x10a33, 1), + Array[Int](0x10a38, 0x10a3a, 1), + Array[Int](0x10a3f, 0x10a47, 1), + Array[Int](0x10a50, 0x10a58, 1) + ) + private final lazy val _Common: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0000, 0x0040, 1), + Array[Int](0x005b, 0x0060, 1), + Array[Int](0x007b, 0x00a9, 1), + Array[Int](0x00ab, 0x00b9, 1), + Array[Int](0x00bb, 0x00bf, 1), + Array[Int](0x00d7, 0x00d7, 1), + Array[Int](0x00f7, 0x00f7, 1), + Array[Int](0x02b9, 0x02df, 1), + Array[Int](0x02e5, 0x02e9, 1), + Array[Int](0x02ec, 0x02ff, 1), + Array[Int](0x0374, 0x0374, 1), + Array[Int](0x037e, 0x037e, 1), + Array[Int](0x0385, 0x0385, 1), + Array[Int](0x0387, 0x0387, 1), + Array[Int](0x0589, 0x0589, 1), + Array[Int](0x060c, 0x060c, 1), + Array[Int](0x061b, 0x061b, 1), + Array[Int](0x061f, 0x061f, 1), + Array[Int](0x0640, 0x0640, 1), + Array[Int](0x0660, 0x0669, 1), + Array[Int](0x06dd, 0x06dd, 1), + Array[Int](0x0964, 0x0965, 1), + Array[Int](0x0970, 0x0970, 1), + Array[Int](0x0e3f, 0x0e3f, 1), + Array[Int](0x0fd5, 0x0fd8, 1), + Array[Int](0x10fb, 0x10fb, 1), + Array[Int](0x16eb, 0x16ed, 1), + Array[Int](0x1735, 0x1736, 1), + Array[Int](0x1802, 0x1803, 1), + Array[Int](0x1805, 0x1805, 1), + Array[Int](0x1cd3, 0x1cd3, 1), + Array[Int](0x1ce1, 0x1ce1, 1), + Array[Int](0x1ce9, 0x1cec, 1), + Array[Int](0x1cee, 0x1cf2, 1), + Array[Int](0x2000, 0x200b, 1), + Array[Int](0x200e, 0x2064, 1), + Array[Int](0x206a, 0x2070, 1), + Array[Int](0x2074, 0x207e, 1), + Array[Int](0x2080, 0x208e, 1), + Array[Int](0x20a0, 0x20b9, 1), + Array[Int](0x2100, 0x2125, 1), + Array[Int](0x2127, 0x2129, 1), + Array[Int](0x212c, 0x2131, 1), + Array[Int](0x2133, 0x214d, 1), + Array[Int](0x214f, 0x215f, 1), + Array[Int](0x2189, 0x2189, 1), + Array[Int](0x2190, 0x23f3, 1), + Array[Int](0x2400, 0x2426, 1), + Array[Int](0x2440, 0x244a, 1), + Array[Int](0x2460, 0x26ff, 1), + Array[Int](0x2701, 0x27ca, 1), + Array[Int](0x27cc, 0x27cc, 1), + Array[Int](0x27ce, 0x27ff, 1), + Array[Int](0x2900, 0x2b4c, 1), + Array[Int](0x2b50, 0x2b59, 1), + Array[Int](0x2e00, 0x2e31, 1), + Array[Int](0x2ff0, 0x2ffb, 1), + Array[Int](0x3000, 0x3004, 1), + Array[Int](0x3006, 0x3006, 1), + Array[Int](0x3008, 0x3020, 1), + Array[Int](0x3030, 0x3037, 1), + Array[Int](0x303c, 0x303f, 1), + Array[Int](0x309b, 0x309c, 1), + Array[Int](0x30a0, 0x30a0, 1), + Array[Int](0x30fb, 0x30fc, 1), + Array[Int](0x3190, 0x319f, 1), + Array[Int](0x31c0, 0x31e3, 1), + Array[Int](0x3220, 0x325f, 1), + Array[Int](0x327f, 0x32cf, 1), + Array[Int](0x3358, 0x33ff, 1), + Array[Int](0x4dc0, 0x4dff, 1), + Array[Int](0xa700, 0xa721, 1), + Array[Int](0xa788, 0xa78a, 1), + Array[Int](0xa830, 0xa839, 1), + Array[Int](0xfd3e, 0xfd3f, 1), + Array[Int](0xfdfd, 0xfdfd, 1), + Array[Int](0xfe10, 0xfe19, 1), + Array[Int](0xfe30, 0xfe52, 1), + Array[Int](0xfe54, 0xfe66, 1), + Array[Int](0xfe68, 0xfe6b, 1), + Array[Int](0xfeff, 0xfeff, 1), + Array[Int](0xff01, 0xff20, 1), + Array[Int](0xff3b, 0xff40, 1), + Array[Int](0xff5b, 0xff65, 1), + Array[Int](0xff70, 0xff70, 1), + Array[Int](0xff9e, 0xff9f, 1), + Array[Int](0xffe0, 0xffe6, 1), + Array[Int](0xffe8, 0xffee, 1), + Array[Int](0xfff9, 0xfffd, 1), + Array[Int](0x10100, 0x10102, 1), + Array[Int](0x10107, 0x10133, 1), + Array[Int](0x10137, 0x1013f, 1), + Array[Int](0x10190, 0x1019b, 1), + Array[Int](0x101d0, 0x101fc, 1), + Array[Int](0x1d000, 0x1d0f5, 1), + Array[Int](0x1d100, 0x1d126, 1), + Array[Int](0x1d129, 0x1d166, 1), + Array[Int](0x1d16a, 0x1d17a, 1), + Array[Int](0x1d183, 0x1d184, 1), + Array[Int](0x1d18c, 0x1d1a9, 1), + Array[Int](0x1d1ae, 0x1d1dd, 1), + Array[Int](0x1d300, 0x1d356, 1), + Array[Int](0x1d360, 0x1d371, 1), + Array[Int](0x1d400, 0x1d454, 1), + Array[Int](0x1d456, 0x1d49c, 1), + Array[Int](0x1d49e, 0x1d49f, 1), + Array[Int](0x1d4a2, 0x1d4a2, 1), + Array[Int](0x1d4a5, 0x1d4a6, 1), + Array[Int](0x1d4a9, 0x1d4ac, 1), + Array[Int](0x1d4ae, 0x1d4b9, 1), + Array[Int](0x1d4bb, 0x1d4bb, 1), + Array[Int](0x1d4bd, 0x1d4c3, 1), + Array[Int](0x1d4c5, 0x1d505, 1), + Array[Int](0x1d507, 0x1d50a, 1), + Array[Int](0x1d50d, 0x1d514, 1), + Array[Int](0x1d516, 0x1d51c, 1), + Array[Int](0x1d51e, 0x1d539, 1), + Array[Int](0x1d53b, 0x1d53e, 1), + Array[Int](0x1d540, 0x1d544, 1), + Array[Int](0x1d546, 0x1d546, 1), + Array[Int](0x1d54a, 0x1d550, 1), + Array[Int](0x1d552, 0x1d6a5, 1), + Array[Int](0x1d6a8, 0x1d7cb, 1), + Array[Int](0x1d7ce, 0x1d7ff, 1), + Array[Int](0x1f000, 0x1f02b, 1), + Array[Int](0x1f030, 0x1f093, 1), + Array[Int](0x1f0a0, 0x1f0ae, 1), + Array[Int](0x1f0b1, 0x1f0be, 1), + Array[Int](0x1f0c1, 0x1f0cf, 1), + Array[Int](0x1f0d1, 0x1f0df, 1), + Array[Int](0x1f100, 0x1f10a, 1), + Array[Int](0x1f110, 0x1f12e, 1), + Array[Int](0x1f130, 0x1f169, 1), + Array[Int](0x1f170, 0x1f19a, 1), + Array[Int](0x1f1e6, 0x1f1ff, 1), + Array[Int](0x1f201, 0x1f202, 1), + Array[Int](0x1f210, 0x1f23a, 1), + Array[Int](0x1f240, 0x1f248, 1), + Array[Int](0x1f250, 0x1f251, 1), + Array[Int](0x1f300, 0x1f320, 1), + Array[Int](0x1f330, 0x1f335, 1), + Array[Int](0x1f337, 0x1f37c, 1), + Array[Int](0x1f380, 0x1f393, 1), + Array[Int](0x1f3a0, 0x1f3c4, 1), + Array[Int](0x1f3c6, 0x1f3ca, 1), + Array[Int](0x1f3e0, 0x1f3f0, 1), + Array[Int](0x1f400, 0x1f43e, 1), + Array[Int](0x1f440, 0x1f440, 1), + Array[Int](0x1f442, 0x1f4f7, 1), + Array[Int](0x1f4f9, 0x1f4fc, 1), + Array[Int](0x1f500, 0x1f53d, 1), + Array[Int](0x1f550, 0x1f567, 1), + Array[Int](0x1f5fb, 0x1f5ff, 1), + Array[Int](0x1f601, 0x1f610, 1), + Array[Int](0x1f612, 0x1f614, 1), + Array[Int](0x1f616, 0x1f616, 1), + Array[Int](0x1f618, 0x1f618, 1), + Array[Int](0x1f61a, 0x1f61a, 1), + Array[Int](0x1f61c, 0x1f61e, 1), + Array[Int](0x1f620, 0x1f625, 1), + Array[Int](0x1f628, 0x1f62b, 1), + Array[Int](0x1f62d, 0x1f62d, 1), + Array[Int](0x1f630, 0x1f633, 1), + Array[Int](0x1f635, 0x1f640, 1), + Array[Int](0x1f645, 0x1f64f, 1), + Array[Int](0x1f680, 0x1f6c5, 1), + Array[Int](0x1f700, 0x1f773, 1), + Array[Int](0xe0001, 0xe0001, 1), + Array[Int](0xe0020, 0xe007f, 1) + ) + private final lazy val _Kannada: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0c82, 0x0c83, 1), + Array[Int](0x0c85, 0x0c8c, 1), + Array[Int](0x0c8e, 0x0c90, 1), + Array[Int](0x0c92, 0x0ca8, 1), + Array[Int](0x0caa, 0x0cb3, 1), + Array[Int](0x0cb5, 0x0cb9, 1), + Array[Int](0x0cbc, 0x0cc4, 1), + Array[Int](0x0cc6, 0x0cc8, 1), + Array[Int](0x0cca, 0x0ccd, 1), + Array[Int](0x0cd5, 0x0cd6, 1), + Array[Int](0x0cde, 0x0cde, 1), + Array[Int](0x0ce0, 0x0ce3, 1), + Array[Int](0x0ce6, 0x0cef, 1), + Array[Int](0x0cf1, 0x0cf2, 1) + ) + private final lazy val _Old_Turkic: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10c00, 0x10c48, 1)) + private final lazy val _Tamil: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0b82, 0x0b83, 1), + Array[Int](0x0b85, 0x0b8a, 1), + Array[Int](0x0b8e, 0x0b90, 1), + Array[Int](0x0b92, 0x0b95, 1), + Array[Int](0x0b99, 0x0b9a, 1), + Array[Int](0x0b9c, 0x0b9c, 1), + Array[Int](0x0b9e, 0x0b9f, 1), + Array[Int](0x0ba3, 0x0ba4, 1), + Array[Int](0x0ba8, 0x0baa, 1), + Array[Int](0x0bae, 0x0bb9, 1), + Array[Int](0x0bbe, 0x0bc2, 1), + Array[Int](0x0bc6, 0x0bc8, 1), + Array[Int](0x0bca, 0x0bcd, 1), + Array[Int](0x0bd0, 0x0bd0, 1), + Array[Int](0x0bd7, 0x0bd7, 1), + Array[Int](0x0be6, 0x0bfa, 1) + ) + private final lazy val _Tagalog: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1700, 0x170c, 1), Array[Int](0x170e, 0x1714, 1)) + private final lazy val _Brahmi: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x11000, 0x1104d, 1), Array[Int](0x11052, 0x1106f, 1)) + private final lazy val _Arabic: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0600, 0x0603, 1), + Array[Int](0x0606, 0x060b, 1), + Array[Int](0x060d, 0x061a, 1), + Array[Int](0x061e, 0x061e, 1), + Array[Int](0x0620, 0x063f, 1), + Array[Int](0x0641, 0x064a, 1), + Array[Int](0x0656, 0x065e, 1), + Array[Int](0x066a, 0x066f, 1), + Array[Int](0x0671, 0x06dc, 1), + Array[Int](0x06de, 0x06ff, 1), + Array[Int](0x0750, 0x077f, 1), + Array[Int](0xfb50, 0xfbc1, 1), + Array[Int](0xfbd3, 0xfd3d, 1), + Array[Int](0xfd50, 0xfd8f, 1), + Array[Int](0xfd92, 0xfdc7, 1), + Array[Int](0xfdf0, 0xfdfc, 1), + Array[Int](0xfe70, 0xfe74, 1), + Array[Int](0xfe76, 0xfefc, 1), + Array[Int](0x10e60, 0x10e7e, 1) + ) + private final lazy val _Tagbanwa: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1760, 0x176c, 1), + Array[Int](0x176e, 0x1770, 1), + Array[Int](0x1772, 0x1773, 1)) + private final lazy val _Canadian_Aboriginal: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1400, 0x167f, 1), Array[Int](0x18b0, 0x18f5, 1)) + private final lazy val _Tibetan: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0f00, 0x0f47, 1), + Array[Int](0x0f49, 0x0f6c, 1), + Array[Int](0x0f71, 0x0f97, 1), + Array[Int](0x0f99, 0x0fbc, 1), + Array[Int](0x0fbe, 0x0fcc, 1), + Array[Int](0x0fce, 0x0fd4, 1), + Array[Int](0x0fd9, 0x0fda, 1) + ) + private final lazy val _Coptic: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x03e2, 0x03ef, 1), + Array[Int](0x2c80, 0x2cf1, 1), + Array[Int](0x2cf9, 0x2cff, 1)) + private final lazy val _Hiragana: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x3041, 0x3096, 1), + Array[Int](0x309d, 0x309f, 1), + Array[Int](0x1b001, 0x1b001, 1), + Array[Int](0x1f200, 0x1f200, 1)) + private final lazy val _Limbu: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1900, 0x191c, 1), + Array[Int](0x1920, 0x192b, 1), + Array[Int](0x1930, 0x193b, 1), + Array[Int](0x1940, 0x1940, 1), + Array[Int](0x1944, 0x194f, 1)) + private final lazy val _Egyptian_Hieroglyphs: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x13000, 0x1342e, 1)) + private final lazy val _Avestan: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10b00, 0x10b35, 1), Array[Int](0x10b39, 0x10b3f, 1)) + private final lazy val _Myanmar: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1000, 0x109f, 1), Array[Int](0xaa60, 0xaa7b, 1)) + private final lazy val _Armenian: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0531, 0x0556, 1), + Array[Int](0x0559, 0x055f, 1), + Array[Int](0x0561, 0x0587, 1), + Array[Int](0x058a, 0x058a, 1), + Array[Int](0xfb13, 0xfb17, 1)) + private final lazy val _Sinhala: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0d82, 0x0d83, 1), + Array[Int](0x0d85, 0x0d96, 1), + Array[Int](0x0d9a, 0x0db1, 1), + Array[Int](0x0db3, 0x0dbb, 1), + Array[Int](0x0dbd, 0x0dbd, 1), + Array[Int](0x0dc0, 0x0dc6, 1), + Array[Int](0x0dca, 0x0dca, 1), + Array[Int](0x0dcf, 0x0dd4, 1), + Array[Int](0x0dd6, 0x0dd6, 1), + Array[Int](0x0dd8, 0x0ddf, 1), + Array[Int](0x0df2, 0x0df4, 1) + ) + private final lazy val _Bengali: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0981, 0x0983, 1), + Array[Int](0x0985, 0x098c, 1), + Array[Int](0x098f, 0x0990, 1), + Array[Int](0x0993, 0x09a8, 1), + Array[Int](0x09aa, 0x09b0, 1), + Array[Int](0x09b2, 0x09b2, 1), + Array[Int](0x09b6, 0x09b9, 1), + Array[Int](0x09bc, 0x09c4, 1), + Array[Int](0x09c7, 0x09c8, 1), + Array[Int](0x09cb, 0x09ce, 1), + Array[Int](0x09d7, 0x09d7, 1), + Array[Int](0x09dc, 0x09dd, 1), + Array[Int](0x09df, 0x09e3, 1), + Array[Int](0x09e6, 0x09fb, 1) + ) + private final lazy val _Greek: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0370, 0x0373, 1), + Array[Int](0x0375, 0x0377, 1), + Array[Int](0x037a, 0x037d, 1), + Array[Int](0x0384, 0x0384, 1), + Array[Int](0x0386, 0x0386, 1), + Array[Int](0x0388, 0x038a, 1), + Array[Int](0x038c, 0x038c, 1), + Array[Int](0x038e, 0x03a1, 1), + Array[Int](0x03a3, 0x03e1, 1), + Array[Int](0x03f0, 0x03ff, 1), + Array[Int](0x1d26, 0x1d2a, 1), + Array[Int](0x1d5d, 0x1d61, 1), + Array[Int](0x1d66, 0x1d6a, 1), + Array[Int](0x1dbf, 0x1dbf, 1), + Array[Int](0x1f00, 0x1f15, 1), + Array[Int](0x1f18, 0x1f1d, 1), + Array[Int](0x1f20, 0x1f45, 1), + Array[Int](0x1f48, 0x1f4d, 1), + Array[Int](0x1f50, 0x1f57, 1), + Array[Int](0x1f59, 0x1f59, 1), + Array[Int](0x1f5b, 0x1f5b, 1), + Array[Int](0x1f5d, 0x1f5d, 1), + Array[Int](0x1f5f, 0x1f7d, 1), + Array[Int](0x1f80, 0x1fb4, 1), + Array[Int](0x1fb6, 0x1fc4, 1), + Array[Int](0x1fc6, 0x1fd3, 1), + Array[Int](0x1fd6, 0x1fdb, 1), + Array[Int](0x1fdd, 0x1fef, 1), + Array[Int](0x1ff2, 0x1ff4, 1), + Array[Int](0x1ff6, 0x1ffe, 1), + Array[Int](0x2126, 0x2126, 1), + Array[Int](0x10140, 0x1018a, 1), + Array[Int](0x1d200, 0x1d245, 1) + ) + private final lazy val _Cham: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xaa00, 0xaa36, 1), + Array[Int](0xaa40, 0xaa4d, 1), + Array[Int](0xaa50, 0xaa59, 1), + Array[Int](0xaa5c, 0xaa5f, 1)) + private final lazy val _Hebrew: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0591, 0x05c7, 1), + Array[Int](0x05d0, 0x05ea, 1), + Array[Int](0x05f0, 0x05f4, 1), + Array[Int](0xfb1d, 0xfb36, 1), + Array[Int](0xfb38, 0xfb3c, 1), + Array[Int](0xfb3e, 0xfb3e, 1), + Array[Int](0xfb40, 0xfb41, 1), + Array[Int](0xfb43, 0xfb44, 1), + Array[Int](0xfb46, 0xfb4f, 1) + ) + private final lazy val _Meetei_Mayek: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xabc0, 0xabed, 1), Array[Int](0xabf0, 0xabf9, 1)) + private final lazy val _Saurashtra: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xa880, 0xa8c4, 1), Array[Int](0xa8ce, 0xa8d9, 1)) + private final lazy val _Hangul: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x1100, 0x11ff, 1), + Array[Int](0x302e, 0x302f, 1), + Array[Int](0x3131, 0x318e, 1), + Array[Int](0x3200, 0x321e, 1), + Array[Int](0x3260, 0x327e, 1), + Array[Int](0xa960, 0xa97c, 1), + Array[Int](0xac00, 0xd7a3, 1), + Array[Int](0xd7b0, 0xd7c6, 1), + Array[Int](0xd7cb, 0xd7fb, 1), + Array[Int](0xffa0, 0xffbe, 1), + Array[Int](0xffc2, 0xffc7, 1), + Array[Int](0xffca, 0xffcf, 1), + Array[Int](0xffd2, 0xffd7, 1), + Array[Int](0xffda, 0xffdc, 1) + ) + private final lazy val _Runic: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x16a0, 0x16ea, 1), Array[Int](0x16ee, 0x16f0, 1)) + private final lazy val _Deseret: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10400, 0x1044f, 1)) + private final lazy val _Lisu: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xa4d0, 0xa4ff, 1)) + private final lazy val _Sundanese: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1b80, 0x1baa, 1), Array[Int](0x1bae, 0x1bb9, 1)) + private final lazy val _Glagolitic: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x2c00, 0x2c2e, 1), Array[Int](0x2c30, 0x2c5e, 1)) + private final lazy val _Oriya: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0b01, 0x0b03, 1), + Array[Int](0x0b05, 0x0b0c, 1), + Array[Int](0x0b0f, 0x0b10, 1), + Array[Int](0x0b13, 0x0b28, 1), + Array[Int](0x0b2a, 0x0b30, 1), + Array[Int](0x0b32, 0x0b33, 1), + Array[Int](0x0b35, 0x0b39, 1), + Array[Int](0x0b3c, 0x0b44, 1), + Array[Int](0x0b47, 0x0b48, 1), + Array[Int](0x0b4b, 0x0b4d, 1), + Array[Int](0x0b56, 0x0b57, 1), + Array[Int](0x0b5c, 0x0b5d, 1), + Array[Int](0x0b5f, 0x0b63, 1), + Array[Int](0x0b66, 0x0b77, 1) + ) + private final lazy val _Buhid: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1740, 0x1753, 1)) + private final lazy val _Ethiopic: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x1200, 0x1248, 1), + Array[Int](0x124a, 0x124d, 1), + Array[Int](0x1250, 0x1256, 1), + Array[Int](0x1258, 0x1258, 1), + Array[Int](0x125a, 0x125d, 1), + Array[Int](0x1260, 0x1288, 1), + Array[Int](0x128a, 0x128d, 1), + Array[Int](0x1290, 0x12b0, 1), + Array[Int](0x12b2, 0x12b5, 1), + Array[Int](0x12b8, 0x12be, 1), + Array[Int](0x12c0, 0x12c0, 1), + Array[Int](0x12c2, 0x12c5, 1), + Array[Int](0x12c8, 0x12d6, 1), + Array[Int](0x12d8, 0x1310, 1), + Array[Int](0x1312, 0x1315, 1), + Array[Int](0x1318, 0x135a, 1), + Array[Int](0x135d, 0x137c, 1), + Array[Int](0x1380, 0x1399, 1), + Array[Int](0x2d80, 0x2d96, 1), + Array[Int](0x2da0, 0x2da6, 1), + Array[Int](0x2da8, 0x2dae, 1), + Array[Int](0x2db0, 0x2db6, 1), + Array[Int](0x2db8, 0x2dbe, 1), + Array[Int](0x2dc0, 0x2dc6, 1), + Array[Int](0x2dc8, 0x2dce, 1), + Array[Int](0x2dd0, 0x2dd6, 1), + Array[Int](0x2dd8, 0x2dde, 1), + Array[Int](0xab01, 0xab06, 1), + Array[Int](0xab09, 0xab0e, 1), + Array[Int](0xab11, 0xab16, 1), + Array[Int](0xab20, 0xab26, 1), + Array[Int](0xab28, 0xab2e, 1) + ) + private final lazy val _Javanese: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xa980, 0xa9cd, 1), + Array[Int](0xa9cf, 0xa9d9, 1), + Array[Int](0xa9de, 0xa9df, 1)) + private final lazy val _Syloti_Nagri: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xa800, 0xa82b, 1)) + private final lazy val _Vai: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xa500, 0xa62b, 1)) + private final lazy val _Cherokee: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x13a0, 0x13f4, 1)) + private final lazy val _Ogham: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1680, 0x169c, 1)) + private final lazy val _Batak: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1bc0, 0x1bf3, 1), Array[Int](0x1bfc, 0x1bff, 1)) + private final lazy val _Syriac: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0700, 0x070d, 1), + Array[Int](0x070f, 0x074a, 1), + Array[Int](0x074d, 0x074f, 1)) + private final lazy val _Gurmukhi: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0a01, 0x0a03, 1), + Array[Int](0x0a05, 0x0a0a, 1), + Array[Int](0x0a0f, 0x0a10, 1), + Array[Int](0x0a13, 0x0a28, 1), + Array[Int](0x0a2a, 0x0a30, 1), + Array[Int](0x0a32, 0x0a33, 1), + Array[Int](0x0a35, 0x0a36, 1), + Array[Int](0x0a38, 0x0a39, 1), + Array[Int](0x0a3c, 0x0a3c, 1), + Array[Int](0x0a3e, 0x0a42, 1), + Array[Int](0x0a47, 0x0a48, 1), + Array[Int](0x0a4b, 0x0a4d, 1), + Array[Int](0x0a51, 0x0a51, 1), + Array[Int](0x0a59, 0x0a5c, 1), + Array[Int](0x0a5e, 0x0a5e, 1), + Array[Int](0x0a66, 0x0a75, 1) + ) + private final lazy val _Tai_Tham: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1a20, 0x1a5e, 1), + Array[Int](0x1a60, 0x1a7c, 1), + Array[Int](0x1a7f, 0x1a89, 1), + Array[Int](0x1a90, 0x1a99, 1), + Array[Int](0x1aa0, 0x1aad, 1)) + private final lazy val _Ol_Chiki: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1c50, 0x1c7f, 1)) + private final lazy val _Mongolian: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1800, 0x1801, 1), + Array[Int](0x1804, 0x1804, 1), + Array[Int](0x1806, 0x180e, 1), + Array[Int](0x1810, 0x1819, 1), + Array[Int](0x1820, 0x1877, 1), + Array[Int](0x1880, 0x18aa, 1)) + private final lazy val _Hanunoo: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1720, 0x1734, 1)) + private final lazy val _Cypriot: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10800, 0x10805, 1), + Array[Int](0x10808, 0x10808, 1), + Array[Int](0x1080a, 0x10835, 1), + Array[Int](0x10837, 0x10838, 1), + Array[Int](0x1083c, 0x1083c, 1), + Array[Int](0x1083f, 0x1083f, 1)) + private final lazy val _Buginese: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1a00, 0x1a1b, 1), Array[Int](0x1a1e, 0x1a1f, 1)) + private final lazy val _Bamum: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xa6a0, 0xa6f7, 1), Array[Int](0x16800, 0x16a38, 1)) + private final lazy val _Lepcha: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1c00, 0x1c37, 1), + Array[Int](0x1c3b, 0x1c49, 1), + Array[Int](0x1c4d, 0x1c4f, 1)) + private final lazy val _Thaana: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0780, 0x07b1, 1)) + private final lazy val _Old_Persian: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x103a0, 0x103c3, 1), Array[Int](0x103c8, 0x103d5, 1)) + private final lazy val _Cuneiform: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x12000, 0x1236e, 1), + Array[Int](0x12400, 0x12462, 1), + Array[Int](0x12470, 0x12473, 1)) + private final lazy val _Rejang: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xa930, 0xa953, 1), Array[Int](0xa95f, 0xa95f, 1)) + private final lazy val _Georgian: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10a0, 0x10c5, 1), + Array[Int](0x10d0, 0x10fa, 1), + Array[Int](0x10fc, 0x10fc, 1), + Array[Int](0x2d00, 0x2d25, 1)) + private final lazy val _Shavian: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10450, 0x1047f, 1)) + private final lazy val _Lycian: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10280, 0x1029c, 1)) + private final lazy val _Nko: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x07c0, 0x07fa, 1)) + private final lazy val _Yi: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xa000, 0xa48c, 1), Array[Int](0xa490, 0xa4c6, 1)) + private final lazy val _Lao: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0e81, 0x0e82, 1), + Array[Int](0x0e84, 0x0e84, 1), + Array[Int](0x0e87, 0x0e88, 1), + Array[Int](0x0e8a, 0x0e8a, 1), + Array[Int](0x0e8d, 0x0e8d, 1), + Array[Int](0x0e94, 0x0e97, 1), + Array[Int](0x0e99, 0x0e9f, 1), + Array[Int](0x0ea1, 0x0ea3, 1), + Array[Int](0x0ea5, 0x0ea5, 1), + Array[Int](0x0ea7, 0x0ea7, 1), + Array[Int](0x0eaa, 0x0eab, 1), + Array[Int](0x0ead, 0x0eb9, 1), + Array[Int](0x0ebb, 0x0ebd, 1), + Array[Int](0x0ec0, 0x0ec4, 1), + Array[Int](0x0ec6, 0x0ec6, 1), + Array[Int](0x0ec8, 0x0ecd, 1), + Array[Int](0x0ed0, 0x0ed9, 1), + Array[Int](0x0edc, 0x0edd, 1) + ) + private final lazy val _Linear_B: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x10000, 0x1000b, 1), + Array[Int](0x1000d, 0x10026, 1), + Array[Int](0x10028, 0x1003a, 1), + Array[Int](0x1003c, 0x1003d, 1), + Array[Int](0x1003f, 0x1004d, 1), + Array[Int](0x10050, 0x1005d, 1), + Array[Int](0x10080, 0x100fa, 1) + ) + private final lazy val _Old_Italic: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10300, 0x1031e, 1), Array[Int](0x10320, 0x10323, 1)) + private final lazy val _Tai_Viet: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0xaa80, 0xaac2, 1), Array[Int](0xaadb, 0xaadf, 1)) + private final lazy val _Devanagari: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0900, 0x0950, 1), + Array[Int](0x0953, 0x0963, 1), + Array[Int](0x0966, 0x096f, 1), + Array[Int](0x0971, 0x0977, 1), + Array[Int](0x0979, 0x097f, 1), + Array[Int](0xa8e0, 0xa8fb, 1)) + private final lazy val _Lydian: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10920, 0x10939, 1), Array[Int](0x1093f, 0x1093f, 1)) + private final lazy val _Tifinagh: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x2d30, 0x2d65, 1), + Array[Int](0x2d6f, 0x2d70, 1), + Array[Int](0x2d7f, 0x2d7f, 1)) + private final lazy val _Ugaritic: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10380, 0x1039d, 1), Array[Int](0x1039f, 0x1039f, 1)) + private final lazy val _Thai: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0e01, 0x0e3a, 1), Array[Int](0x0e40, 0x0e5b, 1)) + private final lazy val _Cyrillic: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0400, 0x0484, 1), + Array[Int](0x0487, 0x0527, 1), + Array[Int](0x1d2b, 0x1d2b, 1), + Array[Int](0x1d78, 0x1d78, 1), + Array[Int](0x2de0, 0x2dff, 1), + Array[Int](0xa640, 0xa673, 1), + Array[Int](0xa67c, 0xa697, 1) + ) + private final lazy val _Gujarati: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0a81, 0x0a83, 1), + Array[Int](0x0a85, 0x0a8d, 1), + Array[Int](0x0a8f, 0x0a91, 1), + Array[Int](0x0a93, 0x0aa8, 1), + Array[Int](0x0aaa, 0x0ab0, 1), + Array[Int](0x0ab2, 0x0ab3, 1), + Array[Int](0x0ab5, 0x0ab9, 1), + Array[Int](0x0abc, 0x0ac5, 1), + Array[Int](0x0ac7, 0x0ac9, 1), + Array[Int](0x0acb, 0x0acd, 1), + Array[Int](0x0ad0, 0x0ad0, 1), + Array[Int](0x0ae0, 0x0ae3, 1), + Array[Int](0x0ae6, 0x0aef, 1), + Array[Int](0x0af1, 0x0af1, 1) + ) + private final lazy val _Carian: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x102a0, 0x102d0, 1)) + private final lazy val _Phoenician: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10900, 0x1091b, 1), Array[Int](0x1091f, 0x1091f, 1)) + private final lazy val _Balinese: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x1b00, 0x1b4b, 1), Array[Int](0x1b50, 0x1b7c, 1)) + private final lazy val _Braille: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x2800, 0x28ff, 1)) + private final lazy val _Han: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x2e80, 0x2e99, 1), + Array[Int](0x2e9b, 0x2ef3, 1), + Array[Int](0x2f00, 0x2fd5, 1), + Array[Int](0x3005, 0x3005, 1), + Array[Int](0x3007, 0x3007, 1), + Array[Int](0x3021, 0x3029, 1), + Array[Int](0x3038, 0x303b, 1), + Array[Int](0x3400, 0x4db5, 1), + Array[Int](0x4e00, 0x9fcb, 1), + Array[Int](0xf900, 0xfa2d, 1), + Array[Int](0xfa30, 0xfa6d, 1), + Array[Int](0xfa70, 0xfad9, 1), + Array[Int](0x20000, 0x2a6d6, 1), + Array[Int](0x2a700, 0x2b734, 1), + Array[Int](0x2b740, 0x2b81d, 1), + Array[Int](0x2f800, 0x2fa1d, 1) + ) + private final lazy val _Gothic: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x10330, 0x1034a, 1)) + final lazy val Arabic: Array[Array[Int]] = _Arabic + final lazy val Armenian: Array[Array[Int]] = _Armenian + final lazy val Avestan: Array[Array[Int]] = _Avestan + final lazy val Balinese: Array[Array[Int]] = _Balinese + final lazy val Bamum: Array[Array[Int]] = _Bamum + final lazy val Batak: Array[Array[Int]] = _Batak + final lazy val Bengali: Array[Array[Int]] = _Bengali + final lazy val Bopomofo: Array[Array[Int]] = _Bopomofo + final lazy val Brahmi: Array[Array[Int]] = _Brahmi + final lazy val Braille: Array[Array[Int]] = _Braille + final lazy val Buginese: Array[Array[Int]] = _Buginese + final lazy val Buhid: Array[Array[Int]] = _Buhid + final lazy val Canadian_Aboriginal: Array[Array[Int]] = _Canadian_Aboriginal + final lazy val Carian: Array[Array[Int]] = _Carian + final lazy val Cham: Array[Array[Int]] = _Cham + final lazy val Cherokee: Array[Array[Int]] = _Cherokee + final lazy val Common: Array[Array[Int]] = _Common + final lazy val Coptic: Array[Array[Int]] = _Coptic + final lazy val Cuneiform: Array[Array[Int]] = _Cuneiform + final lazy val Cypriot: Array[Array[Int]] = _Cypriot + final lazy val Cyrillic: Array[Array[Int]] = _Cyrillic + final lazy val Deseret: Array[Array[Int]] = _Deseret + final lazy val Devanagari: Array[Array[Int]] = _Devanagari + final lazy val Egyptian_Hieroglyphs: Array[Array[Int]] = _Egyptian_Hieroglyphs + final lazy val Ethiopic: Array[Array[Int]] = _Ethiopic + final lazy val Georgian: Array[Array[Int]] = _Georgian + final lazy val Glagolitic: Array[Array[Int]] = _Glagolitic + final lazy val Gothic: Array[Array[Int]] = _Gothic + final lazy val Greek: Array[Array[Int]] = _Greek + final lazy val Gujarati: Array[Array[Int]] = _Gujarati + final lazy val Gurmukhi: Array[Array[Int]] = _Gurmukhi + final lazy val Han: Array[Array[Int]] = _Han + final lazy val Hangul: Array[Array[Int]] = _Hangul + final lazy val Hanunoo: Array[Array[Int]] = _Hanunoo + final lazy val Hebrew: Array[Array[Int]] = _Hebrew + final lazy val Hiragana: Array[Array[Int]] = _Hiragana + final lazy val Imperial_Aramaic: Array[Array[Int]] = _Imperial_Aramaic + final lazy val Inherited: Array[Array[Int]] = _Inherited + final lazy val Inscriptional_Pahlavi: Array[Array[Int]] = _Inscriptional_Pahlavi + final lazy val Inscriptional_Parthian: Array[Array[Int]] = _Inscriptional_Parthian + final lazy val Javanese: Array[Array[Int]] = _Javanese + final lazy val Kaithi: Array[Array[Int]] = _Kaithi + final lazy val Kannada: Array[Array[Int]] = _Kannada + final lazy val Katakana: Array[Array[Int]] = _Katakana + final lazy val Kayah_Li: Array[Array[Int]] = _Kayah_Li + final lazy val Kharoshthi: Array[Array[Int]] = _Kharoshthi + final lazy val Khmer: Array[Array[Int]] = _Khmer + final lazy val Lao: Array[Array[Int]] = _Lao + final lazy val Latin: Array[Array[Int]] = _Latin + final lazy val Lepcha: Array[Array[Int]] = _Lepcha + final lazy val Limbu: Array[Array[Int]] = _Limbu + final lazy val Linear_B: Array[Array[Int]] = _Linear_B + final lazy val Lisu: Array[Array[Int]] = _Lisu + final lazy val Lycian: Array[Array[Int]] = _Lycian + final lazy val Lydian: Array[Array[Int]] = _Lydian + final lazy val Malayalam: Array[Array[Int]] = _Malayalam + final lazy val Mandaic: Array[Array[Int]] = _Mandaic + final lazy val Meetei_Mayek: Array[Array[Int]] = _Meetei_Mayek + final lazy val Mongolian: Array[Array[Int]] = _Mongolian + final lazy val Myanmar: Array[Array[Int]] = _Myanmar + final lazy val New_Tai_Lue: Array[Array[Int]] = _New_Tai_Lue + final lazy val Nko: Array[Array[Int]] = _Nko + final lazy val Ogham: Array[Array[Int]] = _Ogham + final lazy val Ol_Chiki: Array[Array[Int]] = _Ol_Chiki + final lazy val Old_Italic: Array[Array[Int]] = _Old_Italic + final lazy val Old_Persian: Array[Array[Int]] = _Old_Persian + final lazy val Old_South_Arabian: Array[Array[Int]] = _Old_South_Arabian + final lazy val Old_Turkic: Array[Array[Int]] = _Old_Turkic + final lazy val Oriya: Array[Array[Int]] = _Oriya + final lazy val Osmanya: Array[Array[Int]] = _Osmanya + final lazy val Phags_Pa: Array[Array[Int]] = _Phags_Pa + final lazy val Phoenician: Array[Array[Int]] = _Phoenician + final lazy val Rejang: Array[Array[Int]] = _Rejang + final lazy val Runic: Array[Array[Int]] = _Runic + final lazy val Samaritan: Array[Array[Int]] = _Samaritan + final lazy val Saurashtra: Array[Array[Int]] = _Saurashtra + final lazy val Shavian: Array[Array[Int]] = _Shavian + final lazy val Sinhala: Array[Array[Int]] = _Sinhala + final lazy val Sundanese: Array[Array[Int]] = _Sundanese + final lazy val Syloti_Nagri: Array[Array[Int]] = _Syloti_Nagri + final lazy val Syriac: Array[Array[Int]] = _Syriac + final lazy val Tagalog: Array[Array[Int]] = _Tagalog + final lazy val Tagbanwa: Array[Array[Int]] = _Tagbanwa + final lazy val Tai_Le: Array[Array[Int]] = _Tai_Le + final lazy val Tai_Tham: Array[Array[Int]] = _Tai_Tham + final lazy val Tai_Viet: Array[Array[Int]] = _Tai_Viet + final lazy val Tamil: Array[Array[Int]] = _Tamil + final lazy val Telugu: Array[Array[Int]] = _Telugu + final lazy val Thaana: Array[Array[Int]] = _Thaana + final lazy val Thai: Array[Array[Int]] = _Thai + final lazy val Tibetan: Array[Array[Int]] = _Tibetan + final lazy val Tifinagh: Array[Array[Int]] = _Tifinagh + final lazy val Ugaritic: Array[Array[Int]] = _Ugaritic + final lazy val Vai: Array[Array[Int]] = _Vai + final lazy val Yi: Array[Array[Int]] = _Yi + private final lazy val _Pattern_Syntax: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0021, 0x002f, 1), + Array[Int](0x003a, 0x0040, 1), + Array[Int](0x005b, 0x005e, 1), + Array[Int](0x0060, 0x0060, 1), + Array[Int](0x007b, 0x007e, 1), + Array[Int](0x00a1, 0x00a7, 1), + Array[Int](0x00a9, 0x00a9, 1), + Array[Int](0x00ab, 0x00ac, 1), + Array[Int](0x00ae, 0x00ae, 1), + Array[Int](0x00b0, 0x00b1, 1), + Array[Int](0x00b6, 0x00b6, 1), + Array[Int](0x00bb, 0x00bb, 1), + Array[Int](0x00bf, 0x00bf, 1), + Array[Int](0x00d7, 0x00d7, 1), + Array[Int](0x00f7, 0x00f7, 1), + Array[Int](0x2010, 0x2027, 1), + Array[Int](0x2030, 0x203e, 1), + Array[Int](0x2041, 0x2053, 1), + Array[Int](0x2055, 0x205e, 1), + Array[Int](0x2190, 0x245f, 1), + Array[Int](0x2500, 0x2775, 1), + Array[Int](0x2794, 0x2bff, 1), + Array[Int](0x2e00, 0x2e7f, 1), + Array[Int](0x3001, 0x3003, 1), + Array[Int](0x3008, 0x3020, 1), + Array[Int](0x3030, 0x3030, 1), + Array[Int](0xfd3e, 0xfd3f, 1), + Array[Int](0xfe45, 0xfe46, 1) + ) + private final lazy val _Other_ID_Start: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x2118, 0x2118, 1), + Array[Int](0x212e, 0x212e, 1), + Array[Int](0x309b, 0x309c, 1)) + private final lazy val _Pattern_White_Space: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0009, 0x000d, 1), + Array[Int](0x0020, 0x0020, 1), + Array[Int](0x0085, 0x0085, 1), + Array[Int](0x200e, 0x200f, 1), + Array[Int](0x2028, 0x2029, 1)) + private final lazy val _Other_Lowercase: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x02b0, 0x02b8, 1), + Array[Int](0x02c0, 0x02c1, 1), + Array[Int](0x02e0, 0x02e4, 1), + Array[Int](0x0345, 0x0345, 1), + Array[Int](0x037a, 0x037a, 1), + Array[Int](0x1d2c, 0x1d61, 1), + Array[Int](0x1d78, 0x1d78, 1), + Array[Int](0x1d9b, 0x1dbf, 1), + Array[Int](0x2090, 0x2094, 1), + Array[Int](0x2170, 0x217f, 1), + Array[Int](0x24d0, 0x24e9, 1), + Array[Int](0x2c7d, 0x2c7d, 1), + Array[Int](0xa770, 0xa770, 1) + ) + private final lazy val _Soft_Dotted: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0069, 0x006a, 1), + Array[Int](0x012f, 0x012f, 1), + Array[Int](0x0249, 0x0249, 1), + Array[Int](0x0268, 0x0268, 1), + Array[Int](0x029d, 0x029d, 1), + Array[Int](0x02b2, 0x02b2, 1), + Array[Int](0x03f3, 0x03f3, 1), + Array[Int](0x0456, 0x0456, 1), + Array[Int](0x0458, 0x0458, 1), + Array[Int](0x1d62, 0x1d62, 1), + Array[Int](0x1d96, 0x1d96, 1), + Array[Int](0x1da4, 0x1da4, 1), + Array[Int](0x1da8, 0x1da8, 1), + Array[Int](0x1e2d, 0x1e2d, 1), + Array[Int](0x1ecb, 0x1ecb, 1), + Array[Int](0x2071, 0x2071, 1), + Array[Int](0x2148, 0x2149, 1), + Array[Int](0x2c7c, 0x2c7c, 1), + Array[Int](0x1d422, 0x1d423, 1), + Array[Int](0x1d456, 0x1d457, 1), + Array[Int](0x1d48a, 0x1d48b, 1), + Array[Int](0x1d4be, 0x1d4bf, 1), + Array[Int](0x1d4f2, 0x1d4f3, 1), + Array[Int](0x1d526, 0x1d527, 1), + Array[Int](0x1d55a, 0x1d55b, 1), + Array[Int](0x1d58e, 0x1d58f, 1), + Array[Int](0x1d5c2, 0x1d5c3, 1), + Array[Int](0x1d5f6, 0x1d5f7, 1), + Array[Int](0x1d62a, 0x1d62b, 1), + Array[Int](0x1d65e, 0x1d65f, 1), + Array[Int](0x1d692, 0x1d693, 1) + ) + private final lazy val _Hex_Digit: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0030, 0x0039, 1), + Array[Int](0x0041, 0x0046, 1), + Array[Int](0x0061, 0x0066, 1), + Array[Int](0xff10, 0xff19, 1), + Array[Int](0xff21, 0xff26, 1), + Array[Int](0xff41, 0xff46, 1)) + private final lazy val _ASCII_Hex_Digit: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0030, 0x0039, 1), + Array[Int](0x0041, 0x0046, 1), + Array[Int](0x0061, 0x0066, 1)) + private final lazy val _Deprecated: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0149, 0x0149, 1), + Array[Int](0x0673, 0x0673, 1), + Array[Int](0x0f77, 0x0f77, 1), + Array[Int](0x0f79, 0x0f79, 1), + Array[Int](0x17a3, 0x17a4, 1), + Array[Int](0x206a, 0x206f, 1), + Array[Int](0x2329, 0x232a, 1), + Array[Int](0xe0001, 0xe0001, 1), + Array[Int](0xe0020, 0xe007f, 1) + ) + private final lazy val _Terminal_Punctuation: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0021, 0x0021, 1), + Array[Int](0x002c, 0x002c, 1), + Array[Int](0x002e, 0x002e, 1), + Array[Int](0x003a, 0x003b, 1), + Array[Int](0x003f, 0x003f, 1), + Array[Int](0x037e, 0x037e, 1), + Array[Int](0x0387, 0x0387, 1), + Array[Int](0x0589, 0x0589, 1), + Array[Int](0x05c3, 0x05c3, 1), + Array[Int](0x060c, 0x060c, 1), + Array[Int](0x061b, 0x061b, 1), + Array[Int](0x061f, 0x061f, 1), + Array[Int](0x06d4, 0x06d4, 1), + Array[Int](0x0700, 0x070a, 1), + Array[Int](0x070c, 0x070c, 1), + Array[Int](0x07f8, 0x07f9, 1), + Array[Int](0x0830, 0x083e, 1), + Array[Int](0x085e, 0x085e, 1), + Array[Int](0x0964, 0x0965, 1), + Array[Int](0x0e5a, 0x0e5b, 1), + Array[Int](0x0f08, 0x0f08, 1), + Array[Int](0x0f0d, 0x0f12, 1), + Array[Int](0x104a, 0x104b, 1), + Array[Int](0x1361, 0x1368, 1), + Array[Int](0x166d, 0x166e, 1), + Array[Int](0x16eb, 0x16ed, 1), + Array[Int](0x17d4, 0x17d6, 1), + Array[Int](0x17da, 0x17da, 1), + Array[Int](0x1802, 0x1805, 1), + Array[Int](0x1808, 0x1809, 1), + Array[Int](0x1944, 0x1945, 1), + Array[Int](0x1aa8, 0x1aab, 1), + Array[Int](0x1b5a, 0x1b5b, 1), + Array[Int](0x1b5d, 0x1b5f, 1), + Array[Int](0x1c3b, 0x1c3f, 1), + Array[Int](0x1c7e, 0x1c7f, 1), + Array[Int](0x203c, 0x203d, 1), + Array[Int](0x2047, 0x2049, 1), + Array[Int](0x2e2e, 0x2e2e, 1), + Array[Int](0x3001, 0x3002, 1), + Array[Int](0xa4fe, 0xa4ff, 1), + Array[Int](0xa60d, 0xa60f, 1), + Array[Int](0xa6f3, 0xa6f7, 1), + Array[Int](0xa876, 0xa877, 1), + Array[Int](0xa8ce, 0xa8cf, 1), + Array[Int](0xa92f, 0xa92f, 1), + Array[Int](0xa9c7, 0xa9c9, 1), + Array[Int](0xaa5d, 0xaa5f, 1), + Array[Int](0xaadf, 0xaadf, 1), + Array[Int](0xabeb, 0xabeb, 1), + Array[Int](0xfe50, 0xfe52, 1), + Array[Int](0xfe54, 0xfe57, 1), + Array[Int](0xff01, 0xff01, 1), + Array[Int](0xff0c, 0xff0c, 1), + Array[Int](0xff0e, 0xff0e, 1), + Array[Int](0xff1a, 0xff1b, 1), + Array[Int](0xff1f, 0xff1f, 1), + Array[Int](0xff61, 0xff61, 1), + Array[Int](0xff64, 0xff64, 1), + Array[Int](0x1039f, 0x1039f, 1), + Array[Int](0x103d0, 0x103d0, 1), + Array[Int](0x10857, 0x10857, 1), + Array[Int](0x1091f, 0x1091f, 1), + Array[Int](0x10b3a, 0x10b3f, 1), + Array[Int](0x11047, 0x1104d, 1), + Array[Int](0x110be, 0x110c1, 1), + Array[Int](0x12470, 0x12473, 1) + ) + private final lazy val _Quotation_Mark: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0022, 0x0022, 1), + Array[Int](0x0027, 0x0027, 1), + Array[Int](0x00ab, 0x00ab, 1), + Array[Int](0x00bb, 0x00bb, 1), + Array[Int](0x2018, 0x201f, 1), + Array[Int](0x2039, 0x203a, 1), + Array[Int](0x300c, 0x300f, 1), + Array[Int](0x301d, 0x301f, 1), + Array[Int](0xfe41, 0xfe44, 1), + Array[Int](0xff02, 0xff02, 1), + Array[Int](0xff07, 0xff07, 1), + Array[Int](0xff62, 0xff63, 1) + ) + private final lazy val _Other_ID_Continue: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x00b7, 0x00b7, 1), + Array[Int](0x0387, 0x0387, 1), + Array[Int](0x1369, 0x1371, 1), + Array[Int](0x19da, 0x19da, 1)) + private final lazy val _Bidi_Control: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x200e, 0x200f, 1), Array[Int](0x202a, 0x202e, 1)) + private final lazy val _Variation_Selector: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x180b, 0x180d, 1), + Array[Int](0xfe00, 0xfe0f, 1), + Array[Int](0xe0100, 0xe01ef, 1)) + private final lazy val _Noncharacter_Code_Point: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0xfdd0, 0xfdef, 1), + Array[Int](0xfffe, 0xffff, 1), + Array[Int](0x1fffe, 0x1ffff, 1), + Array[Int](0x2fffe, 0x2ffff, 1), + Array[Int](0x3fffe, 0x3ffff, 1), + Array[Int](0x4fffe, 0x4ffff, 1), + Array[Int](0x5fffe, 0x5ffff, 1), + Array[Int](0x6fffe, 0x6ffff, 1), + Array[Int](0x7fffe, 0x7ffff, 1), + Array[Int](0x8fffe, 0x8ffff, 1), + Array[Int](0x9fffe, 0x9ffff, 1), + Array[Int](0xafffe, 0xaffff, 1), + Array[Int](0xbfffe, 0xbffff, 1), + Array[Int](0xcfffe, 0xcffff, 1), + Array[Int](0xdfffe, 0xdffff, 1), + Array[Int](0xefffe, 0xeffff, 1), + Array[Int](0xffffe, 0xfffff, 1), + Array[Int](0x10fffe, 0x10ffff, 1) + ) + private final lazy val _Other_Math: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x005e, 0x005e, 1), + Array[Int](0x03d0, 0x03d2, 1), + Array[Int](0x03d5, 0x03d5, 1), + Array[Int](0x03f0, 0x03f1, 1), + Array[Int](0x03f4, 0x03f5, 1), + Array[Int](0x2016, 0x2016, 1), + Array[Int](0x2032, 0x2034, 1), + Array[Int](0x2040, 0x2040, 1), + Array[Int](0x2061, 0x2064, 1), + Array[Int](0x207d, 0x207e, 1), + Array[Int](0x208d, 0x208e, 1), + Array[Int](0x20d0, 0x20dc, 1), + Array[Int](0x20e1, 0x20e1, 1), + Array[Int](0x20e5, 0x20e6, 1), + Array[Int](0x20eb, 0x20ef, 1), + Array[Int](0x2102, 0x2102, 1), + Array[Int](0x2107, 0x2107, 1), + Array[Int](0x210a, 0x2113, 1), + Array[Int](0x2115, 0x2115, 1), + Array[Int](0x2119, 0x211d, 1), + Array[Int](0x2124, 0x2124, 1), + Array[Int](0x2128, 0x2129, 1), + Array[Int](0x212c, 0x212d, 1), + Array[Int](0x212f, 0x2131, 1), + Array[Int](0x2133, 0x2138, 1), + Array[Int](0x213c, 0x213f, 1), + Array[Int](0x2145, 0x2149, 1), + Array[Int](0x2195, 0x2199, 1), + Array[Int](0x219c, 0x219f, 1), + Array[Int](0x21a1, 0x21a2, 1), + Array[Int](0x21a4, 0x21a5, 1), + Array[Int](0x21a7, 0x21a7, 1), + Array[Int](0x21a9, 0x21ad, 1), + Array[Int](0x21b0, 0x21b1, 1), + Array[Int](0x21b6, 0x21b7, 1), + Array[Int](0x21bc, 0x21cd, 1), + Array[Int](0x21d0, 0x21d1, 1), + Array[Int](0x21d3, 0x21d3, 1), + Array[Int](0x21d5, 0x21db, 1), + Array[Int](0x21dd, 0x21dd, 1), + Array[Int](0x21e4, 0x21e5, 1), + Array[Int](0x23b4, 0x23b5, 1), + Array[Int](0x23b7, 0x23b7, 1), + Array[Int](0x23d0, 0x23d0, 1), + Array[Int](0x23e2, 0x23e2, 1), + Array[Int](0x25a0, 0x25a1, 1), + Array[Int](0x25ae, 0x25b6, 1), + Array[Int](0x25bc, 0x25c0, 1), + Array[Int](0x25c6, 0x25c7, 1), + Array[Int](0x25ca, 0x25cb, 1), + Array[Int](0x25cf, 0x25d3, 1), + Array[Int](0x25e2, 0x25e2, 1), + Array[Int](0x25e4, 0x25e4, 1), + Array[Int](0x25e7, 0x25ec, 1), + Array[Int](0x2605, 0x2606, 1), + Array[Int](0x2640, 0x2640, 1), + Array[Int](0x2642, 0x2642, 1), + Array[Int](0x2660, 0x2663, 1), + Array[Int](0x266d, 0x266e, 1), + Array[Int](0x27c5, 0x27c6, 1), + Array[Int](0x27e6, 0x27ef, 1), + Array[Int](0x2983, 0x2998, 1), + Array[Int](0x29d8, 0x29db, 1), + Array[Int](0x29fc, 0x29fd, 1), + Array[Int](0xfe61, 0xfe61, 1), + Array[Int](0xfe63, 0xfe63, 1), + Array[Int](0xfe68, 0xfe68, 1), + Array[Int](0xff3c, 0xff3c, 1), + Array[Int](0xff3e, 0xff3e, 1), + Array[Int](0x1d400, 0x1d454, 1), + Array[Int](0x1d456, 0x1d49c, 1), + Array[Int](0x1d49e, 0x1d49f, 1), + Array[Int](0x1d4a2, 0x1d4a2, 1), + Array[Int](0x1d4a5, 0x1d4a6, 1), + Array[Int](0x1d4a9, 0x1d4ac, 1), + Array[Int](0x1d4ae, 0x1d4b9, 1), + Array[Int](0x1d4bb, 0x1d4bb, 1), + Array[Int](0x1d4bd, 0x1d4c3, 1), + Array[Int](0x1d4c5, 0x1d505, 1), + Array[Int](0x1d507, 0x1d50a, 1), + Array[Int](0x1d50d, 0x1d514, 1), + Array[Int](0x1d516, 0x1d51c, 1), + Array[Int](0x1d51e, 0x1d539, 1), + Array[Int](0x1d53b, 0x1d53e, 1), + Array[Int](0x1d540, 0x1d544, 1), + Array[Int](0x1d546, 0x1d546, 1), + Array[Int](0x1d54a, 0x1d550, 1), + Array[Int](0x1d552, 0x1d6a5, 1), + Array[Int](0x1d6a8, 0x1d6c0, 1), + Array[Int](0x1d6c2, 0x1d6da, 1), + Array[Int](0x1d6dc, 0x1d6fa, 1), + Array[Int](0x1d6fc, 0x1d714, 1), + Array[Int](0x1d716, 0x1d734, 1), + Array[Int](0x1d736, 0x1d74e, 1), + Array[Int](0x1d750, 0x1d76e, 1), + Array[Int](0x1d770, 0x1d788, 1), + Array[Int](0x1d78a, 0x1d7a8, 1), + Array[Int](0x1d7aa, 0x1d7c2, 1), + Array[Int](0x1d7c4, 0x1d7cb, 1), + Array[Int](0x1d7ce, 0x1d7ff, 1) + ) + private final lazy val _Unified_Ideograph: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x3400, 0x4db5, 1), + Array[Int](0x4e00, 0x9fcb, 1), + Array[Int](0xfa0e, 0xfa0f, 1), + Array[Int](0xfa11, 0xfa11, 1), + Array[Int](0xfa13, 0xfa14, 1), + Array[Int](0xfa1f, 0xfa1f, 1), + Array[Int](0xfa21, 0xfa21, 1), + Array[Int](0xfa23, 0xfa24, 1), + Array[Int](0xfa27, 0xfa29, 1), + Array[Int](0x20000, 0x2a6d6, 1), + Array[Int](0x2a700, 0x2b734, 1), + Array[Int](0x2b740, 0x2b81d, 1) + ) + private final lazy val _Hyphen: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x002d, 0x002d, 1), + Array[Int](0x00ad, 0x00ad, 1), + Array[Int](0x058a, 0x058a, 1), + Array[Int](0x1806, 0x1806, 1), + Array[Int](0x2010, 0x2011, 1), + Array[Int](0x2e17, 0x2e17, 1), + Array[Int](0x30fb, 0x30fb, 1), + Array[Int](0xfe63, 0xfe63, 1), + Array[Int](0xff0d, 0xff0d, 1), + Array[Int](0xff65, 0xff65, 1) + ) + private final lazy val _IDS_Binary_Operator: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x2ff0, 0x2ff1, 1), Array[Int](0x2ff4, 0x2ffb, 1)) + private final lazy val _Logical_Order_Exception: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0e40, 0x0e44, 1), + Array[Int](0x0ec0, 0x0ec4, 1), + Array[Int](0xaab5, 0xaab6, 1), + Array[Int](0xaab9, 0xaab9, 1), + Array[Int](0xaabb, 0xaabc, 1)) + private final lazy val _Radical: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x2e80, 0x2e99, 1), + Array[Int](0x2e9b, 0x2ef3, 1), + Array[Int](0x2f00, 0x2fd5, 1)) + private final lazy val _Other_Uppercase: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x2160, 0x216f, 1), Array[Int](0x24b6, 0x24cf, 1)) + private final lazy val _STerm: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0021, 0x0021, 1), + Array[Int](0x002e, 0x002e, 1), + Array[Int](0x003f, 0x003f, 1), + Array[Int](0x055c, 0x055c, 1), + Array[Int](0x055e, 0x055e, 1), + Array[Int](0x0589, 0x0589, 1), + Array[Int](0x061f, 0x061f, 1), + Array[Int](0x06d4, 0x06d4, 1), + Array[Int](0x0700, 0x0702, 1), + Array[Int](0x07f9, 0x07f9, 1), + Array[Int](0x0964, 0x0965, 1), + Array[Int](0x104a, 0x104b, 1), + Array[Int](0x1362, 0x1362, 1), + Array[Int](0x1367, 0x1368, 1), + Array[Int](0x166e, 0x166e, 1), + Array[Int](0x1735, 0x1736, 1), + Array[Int](0x1803, 0x1803, 1), + Array[Int](0x1809, 0x1809, 1), + Array[Int](0x1944, 0x1945, 1), + Array[Int](0x1aa8, 0x1aab, 1), + Array[Int](0x1b5a, 0x1b5b, 1), + Array[Int](0x1b5e, 0x1b5f, 1), + Array[Int](0x1c3b, 0x1c3c, 1), + Array[Int](0x1c7e, 0x1c7f, 1), + Array[Int](0x203c, 0x203d, 1), + Array[Int](0x2047, 0x2049, 1), + Array[Int](0x2e2e, 0x2e2e, 1), + Array[Int](0x3002, 0x3002, 1), + Array[Int](0xa4ff, 0xa4ff, 1), + Array[Int](0xa60e, 0xa60f, 1), + Array[Int](0xa6f3, 0xa6f3, 1), + Array[Int](0xa6f7, 0xa6f7, 1), + Array[Int](0xa876, 0xa877, 1), + Array[Int](0xa8ce, 0xa8cf, 1), + Array[Int](0xa92f, 0xa92f, 1), + Array[Int](0xa9c8, 0xa9c9, 1), + Array[Int](0xaa5d, 0xaa5f, 1), + Array[Int](0xabeb, 0xabeb, 1), + Array[Int](0xfe52, 0xfe52, 1), + Array[Int](0xfe56, 0xfe57, 1), + Array[Int](0xff01, 0xff01, 1), + Array[Int](0xff0e, 0xff0e, 1), + Array[Int](0xff1f, 0xff1f, 1), + Array[Int](0xff61, 0xff61, 1), + Array[Int](0x10a56, 0x10a57, 1), + Array[Int](0x11047, 0x11048, 1), + Array[Int](0x110be, 0x110c1, 1) + ) + private final lazy val _Other_Alphabetic: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0345, 0x0345, 1), + Array[Int](0x05b0, 0x05bd, 1), + Array[Int](0x05bf, 0x05bf, 1), + Array[Int](0x05c1, 0x05c2, 1), + Array[Int](0x05c4, 0x05c5, 1), + Array[Int](0x05c7, 0x05c7, 1), + Array[Int](0x0610, 0x061a, 1), + Array[Int](0x064b, 0x0657, 1), + Array[Int](0x0659, 0x065f, 1), + Array[Int](0x0670, 0x0670, 1), + Array[Int](0x06d6, 0x06dc, 1), + Array[Int](0x06e1, 0x06e4, 1), + Array[Int](0x06e7, 0x06e8, 1), + Array[Int](0x06ed, 0x06ed, 1), + Array[Int](0x0711, 0x0711, 1), + Array[Int](0x0730, 0x073f, 1), + Array[Int](0x07a6, 0x07b0, 1), + Array[Int](0x0816, 0x0817, 1), + Array[Int](0x081b, 0x0823, 1), + Array[Int](0x0825, 0x0827, 1), + Array[Int](0x0829, 0x082c, 1), + Array[Int](0x0900, 0x0903, 1), + Array[Int](0x093a, 0x093b, 1), + Array[Int](0x093e, 0x094c, 1), + Array[Int](0x094e, 0x094f, 1), + Array[Int](0x0955, 0x0957, 1), + Array[Int](0x0962, 0x0963, 1), + Array[Int](0x0981, 0x0983, 1), + Array[Int](0x09be, 0x09c4, 1), + Array[Int](0x09c7, 0x09c8, 1), + Array[Int](0x09cb, 0x09cc, 1), + Array[Int](0x09d7, 0x09d7, 1), + Array[Int](0x09e2, 0x09e3, 1), + Array[Int](0x0a01, 0x0a03, 1), + Array[Int](0x0a3e, 0x0a42, 1), + Array[Int](0x0a47, 0x0a48, 1), + Array[Int](0x0a4b, 0x0a4c, 1), + Array[Int](0x0a51, 0x0a51, 1), + Array[Int](0x0a70, 0x0a71, 1), + Array[Int](0x0a75, 0x0a75, 1), + Array[Int](0x0a81, 0x0a83, 1), + Array[Int](0x0abe, 0x0ac5, 1), + Array[Int](0x0ac7, 0x0ac9, 1), + Array[Int](0x0acb, 0x0acc, 1), + Array[Int](0x0ae2, 0x0ae3, 1), + Array[Int](0x0b01, 0x0b03, 1), + Array[Int](0x0b3e, 0x0b44, 1), + Array[Int](0x0b47, 0x0b48, 1), + Array[Int](0x0b4b, 0x0b4c, 1), + Array[Int](0x0b56, 0x0b57, 1), + Array[Int](0x0b62, 0x0b63, 1), + Array[Int](0x0b82, 0x0b82, 1), + Array[Int](0x0bbe, 0x0bc2, 1), + Array[Int](0x0bc6, 0x0bc8, 1), + Array[Int](0x0bca, 0x0bcc, 1), + Array[Int](0x0bd7, 0x0bd7, 1), + Array[Int](0x0c01, 0x0c03, 1), + Array[Int](0x0c3e, 0x0c44, 1), + Array[Int](0x0c46, 0x0c48, 1), + Array[Int](0x0c4a, 0x0c4c, 1), + Array[Int](0x0c55, 0x0c56, 1), + Array[Int](0x0c62, 0x0c63, 1), + Array[Int](0x0c82, 0x0c83, 1), + Array[Int](0x0cbe, 0x0cc4, 1), + Array[Int](0x0cc6, 0x0cc8, 1), + Array[Int](0x0cca, 0x0ccc, 1), + Array[Int](0x0cd5, 0x0cd6, 1), + Array[Int](0x0ce2, 0x0ce3, 1), + Array[Int](0x0d02, 0x0d03, 1), + Array[Int](0x0d3e, 0x0d44, 1), + Array[Int](0x0d46, 0x0d48, 1), + Array[Int](0x0d4a, 0x0d4c, 1), + Array[Int](0x0d57, 0x0d57, 1), + Array[Int](0x0d62, 0x0d63, 1), + Array[Int](0x0d82, 0x0d83, 1), + Array[Int](0x0dcf, 0x0dd4, 1), + Array[Int](0x0dd6, 0x0dd6, 1), + Array[Int](0x0dd8, 0x0ddf, 1), + Array[Int](0x0df2, 0x0df3, 1), + Array[Int](0x0e31, 0x0e31, 1), + Array[Int](0x0e34, 0x0e3a, 1), + Array[Int](0x0e4d, 0x0e4d, 1), + Array[Int](0x0eb1, 0x0eb1, 1), + Array[Int](0x0eb4, 0x0eb9, 1), + Array[Int](0x0ebb, 0x0ebc, 1), + Array[Int](0x0ecd, 0x0ecd, 1), + Array[Int](0x0f71, 0x0f81, 1), + Array[Int](0x0f8d, 0x0f97, 1), + Array[Int](0x0f99, 0x0fbc, 1), + Array[Int](0x102b, 0x1036, 1), + Array[Int](0x1038, 0x1038, 1), + Array[Int](0x103b, 0x103e, 1), + Array[Int](0x1056, 0x1059, 1), + Array[Int](0x105e, 0x1060, 1), + Array[Int](0x1062, 0x1062, 1), + Array[Int](0x1067, 0x1068, 1), + Array[Int](0x1071, 0x1074, 1), + Array[Int](0x1082, 0x1086, 1), + Array[Int](0x109c, 0x109d, 1), + Array[Int](0x135f, 0x135f, 1), + Array[Int](0x1712, 0x1713, 1), + Array[Int](0x1732, 0x1733, 1), + Array[Int](0x1752, 0x1753, 1), + Array[Int](0x1772, 0x1773, 1), + Array[Int](0x17b6, 0x17c8, 1), + Array[Int](0x18a9, 0x18a9, 1), + Array[Int](0x1920, 0x192b, 1), + Array[Int](0x1930, 0x1938, 1), + Array[Int](0x19b0, 0x19c0, 1), + Array[Int](0x19c8, 0x19c9, 1), + Array[Int](0x1a17, 0x1a1b, 1), + Array[Int](0x1a55, 0x1a5e, 1), + Array[Int](0x1a61, 0x1a74, 1), + Array[Int](0x1b00, 0x1b04, 1), + Array[Int](0x1b35, 0x1b43, 1), + Array[Int](0x1b80, 0x1b82, 1), + Array[Int](0x1ba1, 0x1ba9, 1), + Array[Int](0x1be7, 0x1bf1, 1), + Array[Int](0x1c24, 0x1c35, 1), + Array[Int](0x1cf2, 0x1cf2, 1), + Array[Int](0x24b6, 0x24e9, 1), + Array[Int](0x2de0, 0x2dff, 1), + Array[Int](0xa823, 0xa827, 1), + Array[Int](0xa880, 0xa881, 1), + Array[Int](0xa8b4, 0xa8c3, 1), + Array[Int](0xa926, 0xa92a, 1), + Array[Int](0xa947, 0xa952, 1), + Array[Int](0xa980, 0xa983, 1), + Array[Int](0xa9b4, 0xa9bf, 1), + Array[Int](0xaa29, 0xaa36, 1), + Array[Int](0xaa43, 0xaa43, 1), + Array[Int](0xaa4c, 0xaa4d, 1), + Array[Int](0xaab0, 0xaab0, 1), + Array[Int](0xaab2, 0xaab4, 1), + Array[Int](0xaab7, 0xaab8, 1), + Array[Int](0xaabe, 0xaabe, 1), + Array[Int](0xabe3, 0xabea, 1), + Array[Int](0xfb1e, 0xfb1e, 1), + Array[Int](0x10a01, 0x10a03, 1), + Array[Int](0x10a05, 0x10a06, 1), + Array[Int](0x10a0c, 0x10a0f, 1), + Array[Int](0x11000, 0x11002, 1), + Array[Int](0x11038, 0x11045, 1), + Array[Int](0x11082, 0x11082, 1), + Array[Int](0x110b0, 0x110b8, 1) + ) + private final lazy val _Diacritic: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x005e, 0x005e, 1), + Array[Int](0x0060, 0x0060, 1), + Array[Int](0x00a8, 0x00a8, 1), + Array[Int](0x00af, 0x00af, 1), + Array[Int](0x00b4, 0x00b4, 1), + Array[Int](0x00b7, 0x00b8, 1), + Array[Int](0x02b0, 0x034e, 1), + Array[Int](0x0350, 0x0357, 1), + Array[Int](0x035d, 0x0362, 1), + Array[Int](0x0374, 0x0375, 1), + Array[Int](0x037a, 0x037a, 1), + Array[Int](0x0384, 0x0385, 1), + Array[Int](0x0483, 0x0487, 1), + Array[Int](0x0559, 0x0559, 1), + Array[Int](0x0591, 0x05a1, 1), + Array[Int](0x05a3, 0x05bd, 1), + Array[Int](0x05bf, 0x05bf, 1), + Array[Int](0x05c1, 0x05c2, 1), + Array[Int](0x05c4, 0x05c4, 1), + Array[Int](0x064b, 0x0652, 1), + Array[Int](0x0657, 0x0658, 1), + Array[Int](0x06df, 0x06e0, 1), + Array[Int](0x06e5, 0x06e6, 1), + Array[Int](0x06ea, 0x06ec, 1), + Array[Int](0x0730, 0x074a, 1), + Array[Int](0x07a6, 0x07b0, 1), + Array[Int](0x07eb, 0x07f5, 1), + Array[Int](0x0818, 0x0819, 1), + Array[Int](0x093c, 0x093c, 1), + Array[Int](0x094d, 0x094d, 1), + Array[Int](0x0951, 0x0954, 1), + Array[Int](0x0971, 0x0971, 1), + Array[Int](0x09bc, 0x09bc, 1), + Array[Int](0x09cd, 0x09cd, 1), + Array[Int](0x0a3c, 0x0a3c, 1), + Array[Int](0x0a4d, 0x0a4d, 1), + Array[Int](0x0abc, 0x0abc, 1), + Array[Int](0x0acd, 0x0acd, 1), + Array[Int](0x0b3c, 0x0b3c, 1), + Array[Int](0x0b4d, 0x0b4d, 1), + Array[Int](0x0bcd, 0x0bcd, 1), + Array[Int](0x0c4d, 0x0c4d, 1), + Array[Int](0x0cbc, 0x0cbc, 1), + Array[Int](0x0ccd, 0x0ccd, 1), + Array[Int](0x0d4d, 0x0d4d, 1), + Array[Int](0x0dca, 0x0dca, 1), + Array[Int](0x0e47, 0x0e4c, 1), + Array[Int](0x0e4e, 0x0e4e, 1), + Array[Int](0x0ec8, 0x0ecc, 1), + Array[Int](0x0f18, 0x0f19, 1), + Array[Int](0x0f35, 0x0f35, 1), + Array[Int](0x0f37, 0x0f37, 1), + Array[Int](0x0f39, 0x0f39, 1), + Array[Int](0x0f3e, 0x0f3f, 1), + Array[Int](0x0f82, 0x0f84, 1), + Array[Int](0x0f86, 0x0f87, 1), + Array[Int](0x0fc6, 0x0fc6, 1), + Array[Int](0x1037, 0x1037, 1), + Array[Int](0x1039, 0x103a, 1), + Array[Int](0x1087, 0x108d, 1), + Array[Int](0x108f, 0x108f, 1), + Array[Int](0x109a, 0x109b, 1), + Array[Int](0x17c9, 0x17d3, 1), + Array[Int](0x17dd, 0x17dd, 1), + Array[Int](0x1939, 0x193b, 1), + Array[Int](0x1a75, 0x1a7c, 1), + Array[Int](0x1a7f, 0x1a7f, 1), + Array[Int](0x1b34, 0x1b34, 1), + Array[Int](0x1b44, 0x1b44, 1), + Array[Int](0x1b6b, 0x1b73, 1), + Array[Int](0x1baa, 0x1baa, 1), + Array[Int](0x1c36, 0x1c37, 1), + Array[Int](0x1c78, 0x1c7d, 1), + Array[Int](0x1cd0, 0x1ce8, 1), + Array[Int](0x1ced, 0x1ced, 1), + Array[Int](0x1d2c, 0x1d6a, 1), + Array[Int](0x1dc4, 0x1dcf, 1), + Array[Int](0x1dfd, 0x1dff, 1), + Array[Int](0x1fbd, 0x1fbd, 1), + Array[Int](0x1fbf, 0x1fc1, 1), + Array[Int](0x1fcd, 0x1fcf, 1), + Array[Int](0x1fdd, 0x1fdf, 1), + Array[Int](0x1fed, 0x1fef, 1), + Array[Int](0x1ffd, 0x1ffe, 1), + Array[Int](0x2cef, 0x2cf1, 1), + Array[Int](0x2e2f, 0x2e2f, 1), + Array[Int](0x302a, 0x302f, 1), + Array[Int](0x3099, 0x309c, 1), + Array[Int](0x30fc, 0x30fc, 1), + Array[Int](0xa66f, 0xa66f, 1), + Array[Int](0xa67c, 0xa67d, 1), + Array[Int](0xa67f, 0xa67f, 1), + Array[Int](0xa6f0, 0xa6f1, 1), + Array[Int](0xa717, 0xa721, 1), + Array[Int](0xa788, 0xa788, 1), + Array[Int](0xa8c4, 0xa8c4, 1), + Array[Int](0xa8e0, 0xa8f1, 1), + Array[Int](0xa92b, 0xa92e, 1), + Array[Int](0xa953, 0xa953, 1), + Array[Int](0xa9b3, 0xa9b3, 1), + Array[Int](0xa9c0, 0xa9c0, 1), + Array[Int](0xaa7b, 0xaa7b, 1), + Array[Int](0xaabf, 0xaac2, 1), + Array[Int](0xabec, 0xabed, 1), + Array[Int](0xfb1e, 0xfb1e, 1), + Array[Int](0xfe20, 0xfe26, 1), + Array[Int](0xff3e, 0xff3e, 1), + Array[Int](0xff40, 0xff40, 1), + Array[Int](0xff70, 0xff70, 1), + Array[Int](0xff9e, 0xff9f, 1), + Array[Int](0xffe3, 0xffe3, 1), + Array[Int](0x110b9, 0x110ba, 1), + Array[Int](0x1d167, 0x1d169, 1), + Array[Int](0x1d16d, 0x1d172, 1), + Array[Int](0x1d17b, 0x1d182, 1), + Array[Int](0x1d185, 0x1d18b, 1), + Array[Int](0x1d1aa, 0x1d1ad, 1) + ) + private final lazy val _Extender: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x00b7, 0x00b7, 1), + Array[Int](0x02d0, 0x02d1, 1), + Array[Int](0x0640, 0x0640, 1), + Array[Int](0x07fa, 0x07fa, 1), + Array[Int](0x0e46, 0x0e46, 1), + Array[Int](0x0ec6, 0x0ec6, 1), + Array[Int](0x1843, 0x1843, 1), + Array[Int](0x1aa7, 0x1aa7, 1), + Array[Int](0x1c36, 0x1c36, 1), + Array[Int](0x1c7b, 0x1c7b, 1), + Array[Int](0x3005, 0x3005, 1), + Array[Int](0x3031, 0x3035, 1), + Array[Int](0x309d, 0x309e, 1), + Array[Int](0x30fc, 0x30fe, 1), + Array[Int](0xa015, 0xa015, 1), + Array[Int](0xa60c, 0xa60c, 1), + Array[Int](0xa9cf, 0xa9cf, 1), + Array[Int](0xaa70, 0xaa70, 1), + Array[Int](0xaadd, 0xaadd, 1), + Array[Int](0xff70, 0xff70, 1) + ) + private final lazy val _Join_Control: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x200c, 0x200d, 1)) + private final lazy val _Ideographic: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x3006, 0x3007, 1), + Array[Int](0x3021, 0x3029, 1), + Array[Int](0x3038, 0x303a, 1), + Array[Int](0x3400, 0x4db5, 1), + Array[Int](0x4e00, 0x9fcb, 1), + Array[Int](0xf900, 0xfa2d, 1), + Array[Int](0xfa30, 0xfa6d, 1), + Array[Int](0xfa70, 0xfad9, 1), + Array[Int](0x20000, 0x2a6d6, 1), + Array[Int](0x2a700, 0x2b734, 1), + Array[Int](0x2b740, 0x2b81d, 1), + Array[Int](0x2f800, 0x2fa1d, 1) + ) + private final lazy val _Dash: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x002d, 0x002d, 1), + Array[Int](0x058a, 0x058a, 1), + Array[Int](0x05be, 0x05be, 1), + Array[Int](0x1400, 0x1400, 1), + Array[Int](0x1806, 0x1806, 1), + Array[Int](0x2010, 0x2015, 1), + Array[Int](0x2053, 0x2053, 1), + Array[Int](0x207b, 0x207b, 1), + Array[Int](0x208b, 0x208b, 1), + Array[Int](0x2212, 0x2212, 1), + Array[Int](0x2e17, 0x2e17, 1), + Array[Int](0x2e1a, 0x2e1a, 1), + Array[Int](0x301c, 0x301c, 1), + Array[Int](0x3030, 0x3030, 1), + Array[Int](0x30a0, 0x30a0, 1), + Array[Int](0xfe31, 0xfe32, 1), + Array[Int](0xfe58, 0xfe58, 1), + Array[Int](0xfe63, 0xfe63, 1), + Array[Int](0xff0d, 0xff0d, 1) + ) + private final lazy val _IDS_Trinary_Operator: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x2ff2, 0x2ff3, 1)) + private final lazy val _Other_Grapheme_Extend: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x09be, 0x09be, 1), + Array[Int](0x09d7, 0x09d7, 1), + Array[Int](0x0b3e, 0x0b3e, 1), + Array[Int](0x0b57, 0x0b57, 1), + Array[Int](0x0bbe, 0x0bbe, 1), + Array[Int](0x0bd7, 0x0bd7, 1), + Array[Int](0x0cc2, 0x0cc2, 1), + Array[Int](0x0cd5, 0x0cd6, 1), + Array[Int](0x0d3e, 0x0d3e, 1), + Array[Int](0x0d57, 0x0d57, 1), + Array[Int](0x0dcf, 0x0dcf, 1), + Array[Int](0x0ddf, 0x0ddf, 1), + Array[Int](0x200c, 0x200d, 1), + Array[Int](0xff9e, 0xff9f, 1), + Array[Int](0x1d165, 0x1d165, 1), + Array[Int](0x1d16e, 0x1d172, 1) + ) + private final lazy val _Other_Default_Ignorable_Code_Point: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x034f, 0x034f, 1), + Array[Int](0x115f, 0x1160, 1), + Array[Int](0x2065, 0x2069, 1), + Array[Int](0x3164, 0x3164, 1), + Array[Int](0xffa0, 0xffa0, 1), + Array[Int](0xfff0, 0xfff8, 1), + Array[Int](0xe0000, 0xe0000, 1), + Array[Int](0xe0002, 0xe001f, 1), + Array[Int](0xe0080, 0xe00ff, 1), + Array[Int](0xe01f0, 0xe0fff, 1) + ) + private final lazy val _White_Space: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0009, 0x000d, 1), + Array[Int](0x0020, 0x0020, 1), + Array[Int](0x0085, 0x0085, 1), + Array[Int](0x00a0, 0x00a0, 1), + Array[Int](0x1680, 0x1680, 1), + Array[Int](0x180e, 0x180e, 1), + Array[Int](0x2000, 0x200a, 1), + Array[Int](0x2028, 0x2029, 1), + Array[Int](0x202f, 0x202f, 1), + Array[Int](0x205f, 0x205f, 1), + Array[Int](0x3000, 0x3000, 1) + ) + final lazy val ASCII_Hex_Digit: Array[Array[Int]] = _ASCII_Hex_Digit + final lazy val Bidi_Control: Array[Array[Int]] = _Bidi_Control + final lazy val Dash: Array[Array[Int]] = _Dash + final lazy val Deprecated: Array[Array[Int]] = _Deprecated + final lazy val Diacritic: Array[Array[Int]] = _Diacritic + final lazy val Extender: Array[Array[Int]] = _Extender + final lazy val Hex_Digit: Array[Array[Int]] = _Hex_Digit + final lazy val Hyphen: Array[Array[Int]] = _Hyphen + final lazy val IDS_Binary_Operator: Array[Array[Int]] = _IDS_Binary_Operator + final lazy val IDS_Trinary_Operator: Array[Array[Int]] = _IDS_Trinary_Operator + final lazy val Ideographic: Array[Array[Int]] = _Ideographic + final lazy val Join_Control: Array[Array[Int]] = _Join_Control + final lazy val Logical_Order_Exception: Array[Array[Int]] = _Logical_Order_Exception + final lazy val Noncharacter_Code_Point: Array[Array[Int]] = _Noncharacter_Code_Point + final lazy val Other_Alphabetic: Array[Array[Int]] = _Other_Alphabetic + final lazy val Other_Default_Ignorable_Code_Point: Array[Array[Int]] = + _Other_Default_Ignorable_Code_Point + final lazy val Other_Grapheme_Extend: Array[Array[Int]] = _Other_Grapheme_Extend + final lazy val Other_ID_Continue: Array[Array[Int]] = _Other_ID_Continue + final lazy val Other_ID_Start: Array[Array[Int]] = _Other_ID_Start + final lazy val Other_Lowercase: Array[Array[Int]] = _Other_Lowercase + final lazy val Other_Math: Array[Array[Int]] = _Other_Math + final lazy val Other_Uppercase: Array[Array[Int]] = _Other_Uppercase + final lazy val Pattern_Syntax: Array[Array[Int]] = _Pattern_Syntax + final lazy val Pattern_White_Space: Array[Array[Int]] = _Pattern_White_Space + final lazy val Quotation_Mark: Array[Array[Int]] = _Quotation_Mark + final lazy val Radical: Array[Array[Int]] = _Radical + final lazy val STerm: Array[Array[Int]] = _STerm + final lazy val Soft_Dotted: Array[Array[Int]] = _Soft_Dotted + final lazy val Terminal_Punctuation: Array[Array[Int]] = _Terminal_Punctuation + final lazy val Unified_Ideograph: Array[Array[Int]] = _Unified_Ideograph + final lazy val Variation_Selector: Array[Array[Int]] = _Variation_Selector + final lazy val White_Space: Array[Array[Int]] = _White_Space + final lazy val CASE_RANGES: Array[Array[Int]] = Array[Array[Int]]( + Array[Int](0x0041, 0x005A, 0, 32, 0), + Array[Int](0x0061, 0x007A, -32, 0, -32), + Array[Int](0x00B5, 0x00B5, 743, 0, 743), + Array[Int](0x00C0, 0x00D6, 0, 32, 0), + Array[Int](0x00D8, 0x00DE, 0, 32, 0), + Array[Int](0x00E0, 0x00F6, -32, 0, -32), + Array[Int](0x00F8, 0x00FE, -32, 0, -32), + Array[Int](0x00FF, 0x00FF, 121, 0, 121), + Array[Int](0x0100, 0x012F, UpperLower, UpperLower, UpperLower), + Array[Int](0x0130, 0x0130, 0, -199, 0), + Array[Int](0x0131, 0x0131, -232, 0, -232), + Array[Int](0x0132, 0x0137, UpperLower, UpperLower, UpperLower), + Array[Int](0x0139, 0x0148, UpperLower, UpperLower, UpperLower), + Array[Int](0x014A, 0x0177, UpperLower, UpperLower, UpperLower), + Array[Int](0x0178, 0x0178, 0, -121, 0), + Array[Int](0x0179, 0x017E, UpperLower, UpperLower, UpperLower), + Array[Int](0x017F, 0x017F, -300, 0, -300), + Array[Int](0x0180, 0x0180, 195, 0, 195), + Array[Int](0x0181, 0x0181, 0, 210, 0), + Array[Int](0x0182, 0x0185, UpperLower, UpperLower, UpperLower), + Array[Int](0x0186, 0x0186, 0, 206, 0), + Array[Int](0x0187, 0x0188, UpperLower, UpperLower, UpperLower), + Array[Int](0x0189, 0x018A, 0, 205, 0), + Array[Int](0x018B, 0x018C, UpperLower, UpperLower, UpperLower), + Array[Int](0x018E, 0x018E, 0, 79, 0), + Array[Int](0x018F, 0x018F, 0, 202, 0), + Array[Int](0x0190, 0x0190, 0, 203, 0), + Array[Int](0x0191, 0x0192, UpperLower, UpperLower, UpperLower), + Array[Int](0x0193, 0x0193, 0, 205, 0), + Array[Int](0x0194, 0x0194, 0, 207, 0), + Array[Int](0x0195, 0x0195, 97, 0, 97), + Array[Int](0x0196, 0x0196, 0, 211, 0), + Array[Int](0x0197, 0x0197, 0, 209, 0), + Array[Int](0x0198, 0x0199, UpperLower, UpperLower, UpperLower), + Array[Int](0x019A, 0x019A, 163, 0, 163), + Array[Int](0x019C, 0x019C, 0, 211, 0), + Array[Int](0x019D, 0x019D, 0, 213, 0), + Array[Int](0x019E, 0x019E, 130, 0, 130), + Array[Int](0x019F, 0x019F, 0, 214, 0), + Array[Int](0x01A0, 0x01A5, UpperLower, UpperLower, UpperLower), + Array[Int](0x01A6, 0x01A6, 0, 218, 0), + Array[Int](0x01A7, 0x01A8, UpperLower, UpperLower, UpperLower), + Array[Int](0x01A9, 0x01A9, 0, 218, 0), + Array[Int](0x01AC, 0x01AD, UpperLower, UpperLower, UpperLower), + Array[Int](0x01AE, 0x01AE, 0, 218, 0), + Array[Int](0x01AF, 0x01B0, UpperLower, UpperLower, UpperLower), + Array[Int](0x01B1, 0x01B2, 0, 217, 0), + Array[Int](0x01B3, 0x01B6, UpperLower, UpperLower, UpperLower), + Array[Int](0x01B7, 0x01B7, 0, 219, 0), + Array[Int](0x01B8, 0x01B9, UpperLower, UpperLower, UpperLower), + Array[Int](0x01BC, 0x01BD, UpperLower, UpperLower, UpperLower), + Array[Int](0x01BF, 0x01BF, 56, 0, 56), + Array[Int](0x01C4, 0x01C4, 0, 2, 1), + Array[Int](0x01C5, 0x01C5, -1, 1, 0), + Array[Int](0x01C6, 0x01C6, -2, 0, -1), + Array[Int](0x01C7, 0x01C7, 0, 2, 1), + Array[Int](0x01C8, 0x01C8, -1, 1, 0), + Array[Int](0x01C9, 0x01C9, -2, 0, -1), + Array[Int](0x01CA, 0x01CA, 0, 2, 1), + Array[Int](0x01CB, 0x01CB, -1, 1, 0), + Array[Int](0x01CC, 0x01CC, -2, 0, -1), + Array[Int](0x01CD, 0x01DC, UpperLower, UpperLower, UpperLower), + Array[Int](0x01DD, 0x01DD, -79, 0, -79), + Array[Int](0x01DE, 0x01EF, UpperLower, UpperLower, UpperLower), + Array[Int](0x01F1, 0x01F1, 0, 2, 1), + Array[Int](0x01F2, 0x01F2, -1, 1, 0), + Array[Int](0x01F3, 0x01F3, -2, 0, -1), + Array[Int](0x01F4, 0x01F5, UpperLower, UpperLower, UpperLower), + Array[Int](0x01F6, 0x01F6, 0, -97, 0), + Array[Int](0x01F7, 0x01F7, 0, -56, 0), + Array[Int](0x01F8, 0x021F, UpperLower, UpperLower, UpperLower), + Array[Int](0x0220, 0x0220, 0, -130, 0), + Array[Int](0x0222, 0x0233, UpperLower, UpperLower, UpperLower), + Array[Int](0x023A, 0x023A, 0, 10795, 0), + Array[Int](0x023B, 0x023C, UpperLower, UpperLower, UpperLower), + Array[Int](0x023D, 0x023D, 0, -163, 0), + Array[Int](0x023E, 0x023E, 0, 10792, 0), + Array[Int](0x023F, 0x0240, 10815, 0, 10815), + Array[Int](0x0241, 0x0242, UpperLower, UpperLower, UpperLower), + Array[Int](0x0243, 0x0243, 0, -195, 0), + Array[Int](0x0244, 0x0244, 0, 69, 0), + Array[Int](0x0245, 0x0245, 0, 71, 0), + Array[Int](0x0246, 0x024F, UpperLower, UpperLower, UpperLower), + Array[Int](0x0250, 0x0250, 10783, 0, 10783), + Array[Int](0x0251, 0x0251, 10780, 0, 10780), + Array[Int](0x0252, 0x0252, 10782, 0, 10782), + Array[Int](0x0253, 0x0253, -210, 0, -210), + Array[Int](0x0254, 0x0254, -206, 0, -206), + Array[Int](0x0256, 0x0257, -205, 0, -205), + Array[Int](0x0259, 0x0259, -202, 0, -202), + Array[Int](0x025B, 0x025B, -203, 0, -203), + Array[Int](0x0260, 0x0260, -205, 0, -205), + Array[Int](0x0263, 0x0263, -207, 0, -207), + Array[Int](0x0265, 0x0265, 42280, 0, 42280), + Array[Int](0x0268, 0x0268, -209, 0, -209), + Array[Int](0x0269, 0x0269, -211, 0, -211), + Array[Int](0x026B, 0x026B, 10743, 0, 10743), + Array[Int](0x026F, 0x026F, -211, 0, -211), + Array[Int](0x0271, 0x0271, 10749, 0, 10749), + Array[Int](0x0272, 0x0272, -213, 0, -213), + Array[Int](0x0275, 0x0275, -214, 0, -214), + Array[Int](0x027D, 0x027D, 10727, 0, 10727), + Array[Int](0x0280, 0x0280, -218, 0, -218), + Array[Int](0x0283, 0x0283, -218, 0, -218), + Array[Int](0x0288, 0x0288, -218, 0, -218), + Array[Int](0x0289, 0x0289, -69, 0, -69), + Array[Int](0x028A, 0x028B, -217, 0, -217), + Array[Int](0x028C, 0x028C, -71, 0, -71), + Array[Int](0x0292, 0x0292, -219, 0, -219), + Array[Int](0x0345, 0x0345, 84, 0, 84), + Array[Int](0x0370, 0x0373, UpperLower, UpperLower, UpperLower), + Array[Int](0x0376, 0x0377, UpperLower, UpperLower, UpperLower), + Array[Int](0x037B, 0x037D, 130, 0, 130), + Array[Int](0x0386, 0x0386, 0, 38, 0), + Array[Int](0x0388, 0x038A, 0, 37, 0), + Array[Int](0x038C, 0x038C, 0, 64, 0), + Array[Int](0x038E, 0x038F, 0, 63, 0), + Array[Int](0x0391, 0x03A1, 0, 32, 0), + Array[Int](0x03A3, 0x03AB, 0, 32, 0), + Array[Int](0x03AC, 0x03AC, -38, 0, -38), + Array[Int](0x03AD, 0x03AF, -37, 0, -37), + Array[Int](0x03B1, 0x03C1, -32, 0, -32), + Array[Int](0x03C2, 0x03C2, -31, 0, -31), + Array[Int](0x03C3, 0x03CB, -32, 0, -32), + Array[Int](0x03CC, 0x03CC, -64, 0, -64), + Array[Int](0x03CD, 0x03CE, -63, 0, -63), + Array[Int](0x03CF, 0x03CF, 0, 8, 0), + Array[Int](0x03D0, 0x03D0, -62, 0, -62), + Array[Int](0x03D1, 0x03D1, -57, 0, -57), + Array[Int](0x03D5, 0x03D5, -47, 0, -47), + Array[Int](0x03D6, 0x03D6, -54, 0, -54), + Array[Int](0x03D7, 0x03D7, -8, 0, -8), + Array[Int](0x03D8, 0x03EF, UpperLower, UpperLower, UpperLower), + Array[Int](0x03F0, 0x03F0, -86, 0, -86), + Array[Int](0x03F1, 0x03F1, -80, 0, -80), + Array[Int](0x03F2, 0x03F2, 7, 0, 7), + Array[Int](0x03F4, 0x03F4, 0, -60, 0), + Array[Int](0x03F5, 0x03F5, -96, 0, -96), + Array[Int](0x03F7, 0x03F8, UpperLower, UpperLower, UpperLower), + Array[Int](0x03F9, 0x03F9, 0, -7, 0), + Array[Int](0x03FA, 0x03FB, UpperLower, UpperLower, UpperLower), + Array[Int](0x03FD, 0x03FF, 0, -130, 0), + Array[Int](0x0400, 0x040F, 0, 80, 0), + Array[Int](0x0410, 0x042F, 0, 32, 0), + Array[Int](0x0430, 0x044F, -32, 0, -32), + Array[Int](0x0450, 0x045F, -80, 0, -80), + Array[Int](0x0460, 0x0481, UpperLower, UpperLower, UpperLower), + Array[Int](0x048A, 0x04BF, UpperLower, UpperLower, UpperLower), + Array[Int](0x04C0, 0x04C0, 0, 15, 0), + Array[Int](0x04C1, 0x04CE, UpperLower, UpperLower, UpperLower), + Array[Int](0x04CF, 0x04CF, -15, 0, -15), + Array[Int](0x04D0, 0x0527, UpperLower, UpperLower, UpperLower), + Array[Int](0x0531, 0x0556, 0, 48, 0), + Array[Int](0x0561, 0x0586, -48, 0, -48), + Array[Int](0x10A0, 0x10C5, 0, 7264, 0), + Array[Int](0x1D79, 0x1D79, 35332, 0, 35332), + Array[Int](0x1D7D, 0x1D7D, 3814, 0, 3814), + Array[Int](0x1E00, 0x1E95, UpperLower, UpperLower, UpperLower), + Array[Int](0x1E9B, 0x1E9B, -59, 0, -59), + Array[Int](0x1E9E, 0x1E9E, 0, -7615, 0), + Array[Int](0x1EA0, 0x1EFF, UpperLower, UpperLower, UpperLower), + Array[Int](0x1F00, 0x1F07, 8, 0, 8), + Array[Int](0x1F08, 0x1F0F, 0, -8, 0), + Array[Int](0x1F10, 0x1F15, 8, 0, 8), + Array[Int](0x1F18, 0x1F1D, 0, -8, 0), + Array[Int](0x1F20, 0x1F27, 8, 0, 8), + Array[Int](0x1F28, 0x1F2F, 0, -8, 0), + Array[Int](0x1F30, 0x1F37, 8, 0, 8), + Array[Int](0x1F38, 0x1F3F, 0, -8, 0), + Array[Int](0x1F40, 0x1F45, 8, 0, 8), + Array[Int](0x1F48, 0x1F4D, 0, -8, 0), + Array[Int](0x1F51, 0x1F51, 8, 0, 8), + Array[Int](0x1F53, 0x1F53, 8, 0, 8), + Array[Int](0x1F55, 0x1F55, 8, 0, 8), + Array[Int](0x1F57, 0x1F57, 8, 0, 8), + Array[Int](0x1F59, 0x1F59, 0, -8, 0), + Array[Int](0x1F5B, 0x1F5B, 0, -8, 0), + Array[Int](0x1F5D, 0x1F5D, 0, -8, 0), + Array[Int](0x1F5F, 0x1F5F, 0, -8, 0), + Array[Int](0x1F60, 0x1F67, 8, 0, 8), + Array[Int](0x1F68, 0x1F6F, 0, -8, 0), + Array[Int](0x1F70, 0x1F71, 74, 0, 74), + Array[Int](0x1F72, 0x1F75, 86, 0, 86), + Array[Int](0x1F76, 0x1F77, 100, 0, 100), + Array[Int](0x1F78, 0x1F79, 128, 0, 128), + Array[Int](0x1F7A, 0x1F7B, 112, 0, 112), + Array[Int](0x1F7C, 0x1F7D, 126, 0, 126), + Array[Int](0x1F80, 0x1F87, 8, 0, 8), + Array[Int](0x1F88, 0x1F8F, 0, -8, 0), + Array[Int](0x1F90, 0x1F97, 8, 0, 8), + Array[Int](0x1F98, 0x1F9F, 0, -8, 0), + Array[Int](0x1FA0, 0x1FA7, 8, 0, 8), + Array[Int](0x1FA8, 0x1FAF, 0, -8, 0), + Array[Int](0x1FB0, 0x1FB1, 8, 0, 8), + Array[Int](0x1FB3, 0x1FB3, 9, 0, 9), + Array[Int](0x1FB8, 0x1FB9, 0, -8, 0), + Array[Int](0x1FBA, 0x1FBB, 0, -74, 0), + Array[Int](0x1FBC, 0x1FBC, 0, -9, 0), + Array[Int](0x1FBE, 0x1FBE, -7205, 0, -7205), + Array[Int](0x1FC3, 0x1FC3, 9, 0, 9), + Array[Int](0x1FC8, 0x1FCB, 0, -86, 0), + Array[Int](0x1FCC, 0x1FCC, 0, -9, 0), + Array[Int](0x1FD0, 0x1FD1, 8, 0, 8), + Array[Int](0x1FD8, 0x1FD9, 0, -8, 0), + Array[Int](0x1FDA, 0x1FDB, 0, -100, 0), + Array[Int](0x1FE0, 0x1FE1, 8, 0, 8), + Array[Int](0x1FE5, 0x1FE5, 7, 0, 7), + Array[Int](0x1FE8, 0x1FE9, 0, -8, 0), + Array[Int](0x1FEA, 0x1FEB, 0, -112, 0), + Array[Int](0x1FEC, 0x1FEC, 0, -7, 0), + Array[Int](0x1FF3, 0x1FF3, 9, 0, 9), + Array[Int](0x1FF8, 0x1FF9, 0, -128, 0), + Array[Int](0x1FFA, 0x1FFB, 0, -126, 0), + Array[Int](0x1FFC, 0x1FFC, 0, -9, 0), + Array[Int](0x2126, 0x2126, 0, -7517, 0), + Array[Int](0x212A, 0x212A, 0, -8383, 0), + Array[Int](0x212B, 0x212B, 0, -8262, 0), + Array[Int](0x2132, 0x2132, 0, 28, 0), + Array[Int](0x214E, 0x214E, -28, 0, -28), + Array[Int](0x2160, 0x216F, 0, 16, 0), + Array[Int](0x2170, 0x217F, -16, 0, -16), + Array[Int](0x2183, 0x2184, UpperLower, UpperLower, UpperLower), + Array[Int](0x24B6, 0x24CF, 0, 26, 0), + Array[Int](0x24D0, 0x24E9, -26, 0, -26), + Array[Int](0x2C00, 0x2C2E, 0, 48, 0), + Array[Int](0x2C30, 0x2C5E, -48, 0, -48), + Array[Int](0x2C60, 0x2C61, UpperLower, UpperLower, UpperLower), + Array[Int](0x2C62, 0x2C62, 0, -10743, 0), + Array[Int](0x2C63, 0x2C63, 0, -3814, 0), + Array[Int](0x2C64, 0x2C64, 0, -10727, 0), + Array[Int](0x2C65, 0x2C65, -10795, 0, -10795), + Array[Int](0x2C66, 0x2C66, -10792, 0, -10792), + Array[Int](0x2C67, 0x2C6C, UpperLower, UpperLower, UpperLower), + Array[Int](0x2C6D, 0x2C6D, 0, -10780, 0), + Array[Int](0x2C6E, 0x2C6E, 0, -10749, 0), + Array[Int](0x2C6F, 0x2C6F, 0, -10783, 0), + Array[Int](0x2C70, 0x2C70, 0, -10782, 0), + Array[Int](0x2C72, 0x2C73, UpperLower, UpperLower, UpperLower), + Array[Int](0x2C75, 0x2C76, UpperLower, UpperLower, UpperLower), + Array[Int](0x2C7E, 0x2C7F, 0, -10815, 0), + Array[Int](0x2C80, 0x2CE3, UpperLower, UpperLower, UpperLower), + Array[Int](0x2CEB, 0x2CEE, UpperLower, UpperLower, UpperLower), + Array[Int](0x2D00, 0x2D25, -7264, 0, -7264), + Array[Int](0xA640, 0xA66D, UpperLower, UpperLower, UpperLower), + Array[Int](0xA680, 0xA697, UpperLower, UpperLower, UpperLower), + Array[Int](0xA722, 0xA72F, UpperLower, UpperLower, UpperLower), + Array[Int](0xA732, 0xA76F, UpperLower, UpperLower, UpperLower), + Array[Int](0xA779, 0xA77C, UpperLower, UpperLower, UpperLower), + Array[Int](0xA77D, 0xA77D, 0, -35332, 0), + Array[Int](0xA77E, 0xA787, UpperLower, UpperLower, UpperLower), + Array[Int](0xA78B, 0xA78C, UpperLower, UpperLower, UpperLower), + Array[Int](0xA78D, 0xA78D, 0, -42280, 0), + Array[Int](0xA790, 0xA791, UpperLower, UpperLower, UpperLower), + Array[Int](0xA7A0, 0xA7A9, UpperLower, UpperLower, UpperLower), + Array[Int](0xFF21, 0xFF3A, 0, 32, 0), + Array[Int](0xFF41, 0xFF5A, -32, 0, -32), + Array[Int](0x10400, 0x10427, 0, 40, 0), + Array[Int](0x10428, 0x1044F, -40, 0, -40) + ) + final lazy val CASE_ORBIT: Array[Array[Int]] = Array[Array[Int]]( + Array[Int](0x004B, 0x006B), + Array[Int](0x0053, 0x0073), + Array[Int](0x006B, 0x212A), + Array[Int](0x0073, 0x017F), + Array[Int](0x00B5, 0x039C), + Array[Int](0x00C5, 0x00E5), + Array[Int](0x00DF, 0x1E9E), + Array[Int](0x00E5, 0x212B), + Array[Int](0x0130, 0x0130), + Array[Int](0x0131, 0x0131), + Array[Int](0x017F, 0x0053), + Array[Int](0x01C4, 0x01C5), + Array[Int](0x01C5, 0x01C6), + Array[Int](0x01C6, 0x01C4), + Array[Int](0x01C7, 0x01C8), + Array[Int](0x01C8, 0x01C9), + Array[Int](0x01C9, 0x01C7), + Array[Int](0x01CA, 0x01CB), + Array[Int](0x01CB, 0x01CC), + Array[Int](0x01CC, 0x01CA), + Array[Int](0x01F1, 0x01F2), + Array[Int](0x01F2, 0x01F3), + Array[Int](0x01F3, 0x01F1), + Array[Int](0x0345, 0x0399), + Array[Int](0x0392, 0x03B2), + Array[Int](0x0395, 0x03B5), + Array[Int](0x0398, 0x03B8), + Array[Int](0x0399, 0x03B9), + Array[Int](0x039A, 0x03BA), + Array[Int](0x039C, 0x03BC), + Array[Int](0x03A0, 0x03C0), + Array[Int](0x03A1, 0x03C1), + Array[Int](0x03A3, 0x03C2), + Array[Int](0x03A6, 0x03C6), + Array[Int](0x03A9, 0x03C9), + Array[Int](0x03B2, 0x03D0), + Array[Int](0x03B5, 0x03F5), + Array[Int](0x03B8, 0x03D1), + Array[Int](0x03B9, 0x1FBE), + Array[Int](0x03BA, 0x03F0), + Array[Int](0x03BC, 0x00B5), + Array[Int](0x03C0, 0x03D6), + Array[Int](0x03C1, 0x03F1), + Array[Int](0x03C2, 0x03C3), + Array[Int](0x03C3, 0x03A3), + Array[Int](0x03C6, 0x03D5), + Array[Int](0x03C9, 0x2126), + Array[Int](0x03D0, 0x0392), + Array[Int](0x03D1, 0x03F4), + Array[Int](0x03D5, 0x03A6), + Array[Int](0x03D6, 0x03A0), + Array[Int](0x03F0, 0x039A), + Array[Int](0x03F1, 0x03A1), + Array[Int](0x03F4, 0x0398), + Array[Int](0x03F5, 0x0395), + Array[Int](0x1E60, 0x1E61), + Array[Int](0x1E61, 0x1E9B), + Array[Int](0x1E9B, 0x1E60), + Array[Int](0x1E9E, 0x00DF), + Array[Int](0x1FBE, 0x0345), + Array[Int](0x2126, 0x03A9), + Array[Int](0x212A, 0x004B), + Array[Int](0x212B, 0x00C5) + ) + private final lazy val foldLl: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0041, 0x005a, 1), + Array[Int](0x00c0, 0x00d6, 1), + Array[Int](0x00d8, 0x00de, 1), + Array[Int](0x0100, 0x012e, 2), + Array[Int](0x0132, 0x0136, 2), + Array[Int](0x0139, 0x0147, 2), + Array[Int](0x014a, 0x0178, 2), + Array[Int](0x0179, 0x017d, 2), + Array[Int](0x0181, 0x0182, 1), + Array[Int](0x0184, 0x0186, 2), + Array[Int](0x0187, 0x0189, 2), + Array[Int](0x018a, 0x018b, 1), + Array[Int](0x018e, 0x0191, 1), + Array[Int](0x0193, 0x0194, 1), + Array[Int](0x0196, 0x0198, 1), + Array[Int](0x019c, 0x019d, 1), + Array[Int](0x019f, 0x01a0, 1), + Array[Int](0x01a2, 0x01a6, 2), + Array[Int](0x01a7, 0x01a9, 2), + Array[Int](0x01ac, 0x01ae, 2), + Array[Int](0x01af, 0x01b1, 2), + Array[Int](0x01b2, 0x01b3, 1), + Array[Int](0x01b5, 0x01b7, 2), + Array[Int](0x01b8, 0x01bc, 4), + Array[Int](0x01c4, 0x01c5, 1), + Array[Int](0x01c7, 0x01c8, 1), + Array[Int](0x01ca, 0x01cb, 1), + Array[Int](0x01cd, 0x01db, 2), + Array[Int](0x01de, 0x01ee, 2), + Array[Int](0x01f1, 0x01f2, 1), + Array[Int](0x01f4, 0x01f6, 2), + Array[Int](0x01f7, 0x01f8, 1), + Array[Int](0x01fa, 0x0232, 2), + Array[Int](0x023a, 0x023b, 1), + Array[Int](0x023d, 0x023e, 1), + Array[Int](0x0241, 0x0243, 2), + Array[Int](0x0244, 0x0246, 1), + Array[Int](0x0248, 0x024e, 2), + Array[Int](0x0345, 0x0370, 43), + Array[Int](0x0372, 0x0376, 4), + Array[Int](0x0386, 0x0388, 2), + Array[Int](0x0389, 0x038a, 1), + Array[Int](0x038c, 0x038e, 2), + Array[Int](0x038f, 0x0391, 2), + Array[Int](0x0392, 0x03a1, 1), + Array[Int](0x03a3, 0x03ab, 1), + Array[Int](0x03cf, 0x03d8, 9), + Array[Int](0x03da, 0x03ee, 2), + Array[Int](0x03f4, 0x03f7, 3), + Array[Int](0x03f9, 0x03fa, 1), + Array[Int](0x03fd, 0x042f, 1), + Array[Int](0x0460, 0x0480, 2), + Array[Int](0x048a, 0x04c0, 2), + Array[Int](0x04c1, 0x04cd, 2), + Array[Int](0x04d0, 0x0526, 2), + Array[Int](0x0531, 0x0556, 1), + Array[Int](0x10a0, 0x10c5, 1), + Array[Int](0x1e00, 0x1e94, 2), + Array[Int](0x1e9e, 0x1efe, 2), + Array[Int](0x1f08, 0x1f0f, 1), + Array[Int](0x1f18, 0x1f1d, 1), + Array[Int](0x1f28, 0x1f2f, 1), + Array[Int](0x1f38, 0x1f3f, 1), + Array[Int](0x1f48, 0x1f4d, 1), + Array[Int](0x1f59, 0x1f5f, 2), + Array[Int](0x1f68, 0x1f6f, 1), + Array[Int](0x1f88, 0x1f8f, 1), + Array[Int](0x1f98, 0x1f9f, 1), + Array[Int](0x1fa8, 0x1faf, 1), + Array[Int](0x1fb8, 0x1fbc, 1), + Array[Int](0x1fc8, 0x1fcc, 1), + Array[Int](0x1fd8, 0x1fdb, 1), + Array[Int](0x1fe8, 0x1fec, 1), + Array[Int](0x1ff8, 0x1ffc, 1), + Array[Int](0x2126, 0x212a, 4), + Array[Int](0x212b, 0x2132, 7), + Array[Int](0x2183, 0x2c00, 2685), + Array[Int](0x2c01, 0x2c2e, 1), + Array[Int](0x2c60, 0x2c62, 2), + Array[Int](0x2c63, 0x2c64, 1), + Array[Int](0x2c67, 0x2c6d, 2), + Array[Int](0x2c6e, 0x2c70, 1), + Array[Int](0x2c72, 0x2c75, 3), + Array[Int](0x2c7e, 0x2c80, 1), + Array[Int](0x2c82, 0x2ce2, 2), + Array[Int](0x2ceb, 0x2ced, 2), + Array[Int](0xa640, 0xa66c, 2), + Array[Int](0xa680, 0xa696, 2), + Array[Int](0xa722, 0xa72e, 2), + Array[Int](0xa732, 0xa76e, 2), + Array[Int](0xa779, 0xa77d, 2), + Array[Int](0xa77e, 0xa786, 2), + Array[Int](0xa78b, 0xa78d, 2), + Array[Int](0xa790, 0xa7a0, 16), + Array[Int](0xa7a2, 0xa7a8, 2), + Array[Int](0xff21, 0xff3a, 1), + Array[Int](0x10400, 0x10427, 1) + ) + private final lazy val foldInherited: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0399, 0x03b9, 32), Array[Int](0x1fbe, 0x1fbe, 1)) + private final lazy val foldM: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0399, 0x03b9, 32), Array[Int](0x1fbe, 0x1fbe, 1)) + private final lazy val foldL: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0345, 0x0345, 1)) + private final lazy val foldMn: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x0399, 0x03b9, 32), Array[Int](0x1fbe, 0x1fbe, 1)) + private final lazy val foldCommon: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x039c, 0x03bc, 32)) + private final lazy val foldGreek: Array[Array[Int]] = + Array[Array[Int]](Array[Int](0x00b5, 0x0345, 656)) + private final lazy val foldLu: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x0061, 0x007a, 1), + Array[Int](0x00b5, 0x00df, 42), + Array[Int](0x00e0, 0x00f6, 1), + Array[Int](0x00f8, 0x00ff, 1), + Array[Int](0x0101, 0x012f, 2), + Array[Int](0x0133, 0x0137, 2), + Array[Int](0x013a, 0x0148, 2), + Array[Int](0x014b, 0x0177, 2), + Array[Int](0x017a, 0x017e, 2), + Array[Int](0x017f, 0x0180, 1), + Array[Int](0x0183, 0x0185, 2), + Array[Int](0x0188, 0x018c, 4), + Array[Int](0x0192, 0x0195, 3), + Array[Int](0x0199, 0x019a, 1), + Array[Int](0x019e, 0x01a1, 3), + Array[Int](0x01a3, 0x01a5, 2), + Array[Int](0x01a8, 0x01ad, 5), + Array[Int](0x01b0, 0x01b4, 4), + Array[Int](0x01b6, 0x01b9, 3), + Array[Int](0x01bd, 0x01bf, 2), + Array[Int](0x01c5, 0x01c6, 1), + Array[Int](0x01c8, 0x01c9, 1), + Array[Int](0x01cb, 0x01cc, 1), + Array[Int](0x01ce, 0x01dc, 2), + Array[Int](0x01dd, 0x01ef, 2), + Array[Int](0x01f2, 0x01f3, 1), + Array[Int](0x01f5, 0x01f9, 4), + Array[Int](0x01fb, 0x021f, 2), + Array[Int](0x0223, 0x0233, 2), + Array[Int](0x023c, 0x023f, 3), + Array[Int](0x0240, 0x0242, 2), + Array[Int](0x0247, 0x024f, 2), + Array[Int](0x0250, 0x0254, 1), + Array[Int](0x0256, 0x0257, 1), + Array[Int](0x0259, 0x025b, 2), + Array[Int](0x0260, 0x0263, 3), + Array[Int](0x0265, 0x0268, 3), + Array[Int](0x0269, 0x026b, 2), + Array[Int](0x026f, 0x0271, 2), + Array[Int](0x0272, 0x0275, 3), + Array[Int](0x027d, 0x0283, 3), + Array[Int](0x0288, 0x028c, 1), + Array[Int](0x0292, 0x0345, 179), + Array[Int](0x0371, 0x0373, 2), + Array[Int](0x0377, 0x037b, 4), + Array[Int](0x037c, 0x037d, 1), + Array[Int](0x03ac, 0x03af, 1), + Array[Int](0x03b1, 0x03ce, 1), + Array[Int](0x03d0, 0x03d1, 1), + Array[Int](0x03d5, 0x03d7, 1), + Array[Int](0x03d9, 0x03ef, 2), + Array[Int](0x03f0, 0x03f2, 1), + Array[Int](0x03f5, 0x03fb, 3), + Array[Int](0x0430, 0x045f, 1), + Array[Int](0x0461, 0x0481, 2), + Array[Int](0x048b, 0x04bf, 2), + Array[Int](0x04c2, 0x04ce, 2), + Array[Int](0x04cf, 0x0527, 2), + Array[Int](0x0561, 0x0586, 1), + Array[Int](0x1d79, 0x1d7d, 4), + Array[Int](0x1e01, 0x1e95, 2), + Array[Int](0x1e9b, 0x1ea1, 6), + Array[Int](0x1ea3, 0x1eff, 2), + Array[Int](0x1f00, 0x1f07, 1), + Array[Int](0x1f10, 0x1f15, 1), + Array[Int](0x1f20, 0x1f27, 1), + Array[Int](0x1f30, 0x1f37, 1), + Array[Int](0x1f40, 0x1f45, 1), + Array[Int](0x1f51, 0x1f57, 2), + Array[Int](0x1f60, 0x1f67, 1), + Array[Int](0x1f70, 0x1f7d, 1), + Array[Int](0x1fb0, 0x1fb1, 1), + Array[Int](0x1fbe, 0x1fd0, 18), + Array[Int](0x1fd1, 0x1fe0, 15), + Array[Int](0x1fe1, 0x1fe5, 4), + Array[Int](0x214e, 0x2184, 54), + Array[Int](0x2c30, 0x2c5e, 1), + Array[Int](0x2c61, 0x2c65, 4), + Array[Int](0x2c66, 0x2c6c, 2), + Array[Int](0x2c73, 0x2c76, 3), + Array[Int](0x2c81, 0x2ce3, 2), + Array[Int](0x2cec, 0x2cee, 2), + Array[Int](0x2d00, 0x2d25, 1), + Array[Int](0xa641, 0xa66d, 2), + Array[Int](0xa681, 0xa697, 2), + Array[Int](0xa723, 0xa72f, 2), + Array[Int](0xa733, 0xa76f, 2), + Array[Int](0xa77a, 0xa77c, 2), + Array[Int](0xa77f, 0xa787, 2), + Array[Int](0xa78c, 0xa791, 5), + Array[Int](0xa7a1, 0xa7a9, 2), + Array[Int](0xff41, 0xff5a, 1), + Array[Int](0x10428, 0x1044f, 1) + ) + private final lazy val foldLt: Array[Array[Int]] = + Array[Array[Int]]( + Array[Int](0x01c4, 0x01c6, 2), + Array[Int](0x01c7, 0x01c9, 2), + Array[Int](0x01ca, 0x01cc, 2), + Array[Int](0x01f1, 0x01f3, 2), + Array[Int](0x1f80, 0x1f87, 1), + Array[Int](0x1f90, 0x1f97, 1), + Array[Int](0x1fa0, 0x1fa7, 1), + Array[Int](0x1fb3, 0x1fc3, 16), + Array[Int](0x1ff3, 0x1ff3, 1) + ) + final lazy val CATEGORIES: Map[String, Array[Array[Int]]] = { + lazy val map: HashMap[String, Array[Array[Int]]] = new HashMap[String, Array[Array[Int]]]() + map.put("Lm", Lm) + map.put("Ll", Ll) + map.put("C", C) + map.put("M", M) + map.put("L", L) + map.put("N", N) + map.put("P", P) + map.put("S", S) + map.put("Z", Z) + map.put("Me", Me) + map.put("Mc", Mc) + map.put("Mn", Mn) + map.put("Zl", Zl) + map.put("Zp", Zp) + map.put("Zs", Zs) + map.put("Cs", Cs) + map.put("Co", Co) + map.put("Cf", Cf) + map.put("Cc", Cc) + map.put("Po", Po) + map.put("Pi", Pi) + map.put("Pf", Pf) + map.put("Pe", Pe) + map.put("Pd", Pd) + map.put("Pc", Pc) + map.put("Ps", Ps) + map.put("Nd", Nd) + map.put("Nl", Nl) + map.put("No", No) + map.put("So", So) + map.put("Sm", Sm) + map.put("Sk", Sk) + map.put("Sc", Sc) + map.put("Lu", Lu) + map.put("Lt", Lt) + map.put("Lo", Lo) + map + } + final lazy val SCRIPTS: Map[String, Array[Array[Int]]] = { + lazy val map: HashMap[String, Array[Array[Int]]] = new HashMap[String, Array[Array[Int]]]() + map.put("Katakana", Katakana) + map.put("Malayalam", Malayalam) + map.put("Phags_Pa", Phags_Pa) + map.put("Inscriptional_Parthian", Inscriptional_Parthian) + map.put("Latin", Latin) + map.put("Inscriptional_Pahlavi", Inscriptional_Pahlavi) + map.put("Osmanya", Osmanya) + map.put("Khmer", Khmer) + map.put("Inherited", Inherited) + map.put("Telugu", Telugu) + map.put("Samaritan", Samaritan) + map.put("Bopomofo", Bopomofo) + map.put("Imperial_Aramaic", Imperial_Aramaic) + map.put("Kaithi", Kaithi) + map.put("Mandaic", Mandaic) + map.put("Old_South_Arabian", Old_South_Arabian) + map.put("Kayah_Li", Kayah_Li) + map.put("New_Tai_Lue", New_Tai_Lue) + map.put("Tai_Le", Tai_Le) + map.put("Kharoshthi", Kharoshthi) + map.put("Common", Common) + map.put("Kannada", Kannada) + map.put("Old_Turkic", Old_Turkic) + map.put("Tamil", Tamil) + map.put("Tagalog", Tagalog) + map.put("Brahmi", Brahmi) + map.put("Arabic", Arabic) + map.put("Tagbanwa", Tagbanwa) + map.put("Canadian_Aboriginal", Canadian_Aboriginal) + map.put("Tibetan", Tibetan) + map.put("Coptic", Coptic) + map.put("Hiragana", Hiragana) + map.put("Limbu", Limbu) + map.put("Egyptian_Hieroglyphs", Egyptian_Hieroglyphs) + map.put("Avestan", Avestan) + map.put("Myanmar", Myanmar) + map.put("Armenian", Armenian) + map.put("Sinhala", Sinhala) + map.put("Bengali", Bengali) + map.put("Greek", Greek) + map.put("Cham", Cham) + map.put("Hebrew", Hebrew) + map.put("Meetei_Mayek", Meetei_Mayek) + map.put("Saurashtra", Saurashtra) + map.put("Hangul", Hangul) + map.put("Runic", Runic) + map.put("Deseret", Deseret) + map.put("Lisu", Lisu) + map.put("Sundanese", Sundanese) + map.put("Glagolitic", Glagolitic) + map.put("Oriya", Oriya) + map.put("Buhid", Buhid) + map.put("Ethiopic", Ethiopic) + map.put("Javanese", Javanese) + map.put("Syloti_Nagri", Syloti_Nagri) + map.put("Vai", Vai) + map.put("Cherokee", Cherokee) + map.put("Ogham", Ogham) + map.put("Batak", Batak) + map.put("Syriac", Syriac) + map.put("Gurmukhi", Gurmukhi) + map.put("Tai_Tham", Tai_Tham) + map.put("Ol_Chiki", Ol_Chiki) + map.put("Mongolian", Mongolian) + map.put("Hanunoo", Hanunoo) + map.put("Cypriot", Cypriot) + map.put("Buginese", Buginese) + map.put("Bamum", Bamum) + map.put("Lepcha", Lepcha) + map.put("Thaana", Thaana) + map.put("Old_Persian", Old_Persian) + map.put("Cuneiform", Cuneiform) + map.put("Rejang", Rejang) + map.put("Georgian", Georgian) + map.put("Shavian", Shavian) + map.put("Lycian", Lycian) + map.put("Nko", Nko) + map.put("Yi", Yi) + map.put("Lao", Lao) + map.put("Linear_B", Linear_B) + map.put("Old_Italic", Old_Italic) + map.put("Tai_Viet", Tai_Viet) + map.put("Devanagari", Devanagari) + map.put("Lydian", Lydian) + map.put("Tifinagh", Tifinagh) + map.put("Ugaritic", Ugaritic) + map.put("Thai", Thai) + map.put("Cyrillic", Cyrillic) + map.put("Gujarati", Gujarati) + map.put("Carian", Carian) + map.put("Phoenician", Phoenician) + map.put("Balinese", Balinese) + map.put("Braille", Braille) + map.put("Han", Han) + map.put("Gothic", Gothic) + map + } + final lazy val PROPERTIES: Map[String, Array[Array[Int]]] = { + lazy val map: HashMap[String, Array[Array[Int]]] = new HashMap[String, Array[Array[Int]]]() + map.put("Pattern_Syntax", Pattern_Syntax) + map.put("Other_ID_Start", Other_ID_Start) + map.put("Pattern_White_Space", Pattern_White_Space) + map.put("Other_Lowercase", Other_Lowercase) + map.put("Soft_Dotted", Soft_Dotted) + map.put("Hex_Digit", Hex_Digit) + map.put("ASCII_Hex_Digit", ASCII_Hex_Digit) + map.put("Deprecated", Deprecated) + map.put("Terminal_Punctuation", Terminal_Punctuation) + map.put("Quotation_Mark", Quotation_Mark) + map.put("Other_ID_Continue", Other_ID_Continue) + map.put("Bidi_Control", Bidi_Control) + map.put("Variation_Selector", Variation_Selector) + map.put("Noncharacter_Code_Point", Noncharacter_Code_Point) + map.put("Other_Math", Other_Math) + map.put("Unified_Ideograph", Unified_Ideograph) + map.put("Hyphen", Hyphen) + map.put("IDS_Binary_Operator", IDS_Binary_Operator) + map.put("Logical_Order_Exception", Logical_Order_Exception) + map.put("Radical", Radical) + map.put("Other_Uppercase", Other_Uppercase) + map.put("STerm", STerm) + map.put("Other_Alphabetic", Other_Alphabetic) + map.put("Diacritic", Diacritic) + map.put("Extender", Extender) + map.put("Join_Control", Join_Control) + map.put("Ideographic", Ideographic) + map.put("Dash", Dash) + map.put("IDS_Trinary_Operator", IDS_Trinary_Operator) + map.put("Other_Grapheme_Extend", Other_Grapheme_Extend) + map.put("Other_Default_Ignorable_Code_Point", + Other_Default_Ignorable_Code_Point) + map.put("White_Space", White_Space) + map + } + final lazy val FOLD_CATEGORIES: Map[String, Array[Array[Int]]] = { + lazy val map: HashMap[String, Array[Array[Int]]] = new HashMap[String, Array[Array[Int]]]() + map.put("Ll", foldLl) + map.put("Inherited", foldInherited) + map.put("M", foldM) + map.put("L", foldL) + map.put("Mn", foldMn) + map.put("Common", foldCommon) + map.put("Greek", foldGreek) + map.put("Lu", foldLu) + map.put("Lt", foldLt) + map + } + final lazy val FOLD_SCRIPT: Map[String, Array[Array[Int]]] = { + new HashMap[String, Array[Array[Int]]]() + } +// Fold orbit bytes: 63 pairs, 252 bytes +} diff --git a/input/rsc/Utils.scala b/input/rsc/Utils.scala new file mode 100644 index 0000000..29b3540 --- /dev/null +++ b/input/rsc/Utils.scala @@ -0,0 +1,207 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package com.twitter.re2s + +/** + * Various constants and helper utilities. + */ +object Utils { + + final val EMPTY_INTS: Array[Int] = new Array[Int](0) + + // Returns true iff |c| is an ASCII letter or decimal digit. + def isalnum(c: Int): Boolean = + '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' + + // If |c| is an ASCII hex digit, returns its value, otherwise -1. + def unhex(c: Int): Int = { + if ('0' <= c && c <= '9') { + return c - '0' + } + if ('a' <= c && c <= 'f') { + return c - 'a' + 10 + } + if ('A' <= c && c <= 'F') { + return c - 'A' + 10 + } + return -1 + } + + private final val METACHARACTERS: String = "\\.+*?()|[]{}^$" + + // Appends a RE2 literal to |out| for rune |rune|, + // with regexp metacharacters escaped. + def escapeRune(out: java.lang.StringBuilder, rune: Int): Unit = { + if (Unicode.isPrint(rune)) { + if (METACHARACTERS.indexOf(rune.toChar) >= 0) { + out.append('\\') + } + out.appendCodePoint(rune) + return + } + + rune match { + case '"' => + out.append("\\\"") + case '\\' => + out.append("\\\\") + case '\t' => + out.append("\\t") + case '\n' => + out.append("\\n") + case '\r' => + out.append("\\r") + case '\b' => + out.append("\\b") + case '\f' => + out.append("\\f") + case _ => + val s: String = Integer.toHexString(rune) + if (rune < 0x100) { + out.append("\\x") + if (s.length() == 1) { + out.append('0') + } + out.append(s) + } else { + out.append("\\x{").append(s).append('}') + } + } + } + + // Returns the array of runes in the specified Java UTF-16 string. + def stringToRunes(str: String): Array[Int] = { + val charlen: Int = str.length() + val runelen: Int = str.codePointCount(0, charlen) + val runes: Array[Int] = new Array[Int](runelen) + var r: Int = 0 + var c: Int = 0 + while (c < charlen) { + val rune: Int = str.codePointAt(c) + runes(r) = rune + r += 1 + c += Character.charCount(rune) + } + runes + } + + // Returns the Java UTF-16 string containing the single rune |r|. + def runeToString(r: Int): String = { + val c: Char = r.toChar + if (r == c) { + String.valueOf(c) + } else { + new String(Character.toChars(c)) + } + } + + // Returns a new copy of the specified subarray. + def subarray_i(array: Array[Int], start: Int, end: Int): Array[Int] = { + val r: Array[Int] = new Array[Int](end - start) + var i: Int = start + while (i < end) { + r(i - start) = array(i) + i += 1 + } + r + } + + // Returns a new copy of the specified subarray. + def subarray_b(array: Array[Byte], start: Int, end: Int): Array[Byte] = { + val r: Array[Byte] = new Array[Byte](end - start) + var i: Int = start + while (i < end) { + r(i - start) = array(i) + i += 1 + } + r + } + + // Returns the index of the first occurrence of array |target| within + // array |source| after |fromIndex|, or -1 if not found. + def indexOf(source: Array[Byte], target: Array[Byte], _fromIndex: Int): Int = { + var fromIndex: Int = _fromIndex + if (fromIndex >= source.length) { + return (if (target.length == 0) source.length else -1) + } + if (fromIndex < 0) { + fromIndex = 0 + } + if (target.length == 0) { + return fromIndex + } + + val first: Byte = target(0) + val max: Int = source.length - target.length + var i: Int = fromIndex + while (i <= max) { + // Look for first byte. + if (source(i) != first) { + while ({ i += 1; i } <= max && source(i) != first) {} + } + + // Found first byte, now look at the rest of v2. + if (i <= max) { + var j: Int = i + 1 + val end: Int = j + target.length - 1 + var k: Int = 1 + while (j < end && source(j) == target(k)) { j += 1; k += 1 } + + if (j == end) { + return i // found whole array + } + } + + i += 1 + } + return -1 + } + + // isWordRune reports whether r is consider a ``word character'' + // during the evaluation of the \b and \B zero-width assertions. + // These assertions are ASCII-only: the word characters are [A-Za-z0-9_]. + def isWordRune(r: Int): Boolean = + ('A' <= r && r <= 'Z' || + 'a' <= r && r <= 'z' || + '0' <= r && r <= '9' || + r == '_') + + //// EMPTY_* flags + + final val EMPTY_BEGIN_LINE: Int = 0x01 + final val EMPTY_END_LINE: Int = 0x02 + final val EMPTY_BEGIN_TEXT: Int = 0x04 + final val EMPTY_END_TEXT: Int = 0x08 + final val EMPTY_WORD_BOUNDARY: Int = 0x10 + final val EMPTY_NO_WORD_BOUNDARY: Int = 0x20 + final val EMPTY_ALL: Int = -1 // (impossible) + + // emptyOpContext returns the zero-width assertions satisfied at the position + // between the runes r1 and r2, a bitmask of EMPTY_* flags. + // Passing r1 == -1 indicates that the position is at the beginning of the + // text. + // Passing r2 == -1 indicates that the position is at the end of the text. + final def emptyOpContext(r1: Int, r2: Int): Int = { + var op: Int = 0 + if (r1 < 0) { + op |= EMPTY_BEGIN_TEXT | EMPTY_BEGIN_LINE + } + if (r1 == '\n') { + op |= EMPTY_BEGIN_LINE + } + if (r2 < 0) { + op |= EMPTY_END_TEXT | EMPTY_END_LINE + } + if (r2 == '\n') { + op |= EMPTY_END_LINE + } + if (isWordRune(r1) != isWordRune(r2)) { + op |= EMPTY_WORD_BOUNDARY + } else { + op |= EMPTY_NO_WORD_BOUNDARY + } + op + } +} diff --git a/output/rsc.RscBenchmark b/output/rsc.RscBenchmark new file mode 100644 index 0000000..573541a --- /dev/null +++ b/output/rsc.RscBenchmark @@ -0,0 +1 @@ +0 diff --git a/project/plugins.sbt b/project/plugins.sbt index afc9d5a..2a63bf0 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1 +1 @@ -addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.3.7") +addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.0-SNAPSHOT") diff --git a/scripts/run.py b/scripts/run.py index 187069b..7a02658 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -35,7 +35,7 @@ def run(cmd): return subp.check_output(cmd) def compile(bench, compilecmd): - cmd = [sbt, '-J-Xmx2G', 'clean'] + cmd = [sbt, '-J-Xmx6G', 'clean'] cmd.append('set mainClass in Compile := Some("{}")'.format(bench)) cmd.append(compilecmd) return run(cmd) @@ -43,26 +43,29 @@ def compile(bench, compilecmd): sbt = where('sbt') benchmarks = [ - 'bounce.BounceBenchmark', - 'list.ListBenchmark', - 'richards.RichardsBenchmark', - 'queens.QueensBenchmark', - 'permute.PermuteBenchmark', - 'deltablue.DeltaBlueBenchmark', - 'tracer.TracerBenchmark', - 'brainfuck.BrainfuckBenchmark', - 'json.JsonBenchmark', - 'cd.CDBenchmark', - 'kmeans.KmeansBenchmark', - 'gcbench.GCBenchBenchmark', - 'mandelbrot.MandelbrotBenchmark', - 'nbody.NbodyBenchmark', - 'sudoku.SudokuBenchmark', + 'bounce.BounceBenchmark', + 'list.ListBenchmark', + 'queens.QueensBenchmark', + 'richards.RichardsBenchmark', + 'permute.PermuteBenchmark', + 'deltablue.DeltaBlueBenchmark', + 'tracer.TracerBenchmark', + 'json.JsonBenchmark', + 'sudoku.SudokuBenchmark', + 'brainfuck.BrainfuckBenchmark', + 'cd.CDBenchmark', + 'kmeans.KmeansBenchmark', + 'nbody.NbodyBenchmark', + 'rsc.RscBenchmark', + 'gcbench.GCBenchBenchmark', + 'mandelbrot.MandelbrotBenchmark', ] configurations = [ - 'jvm', - 'scala-native-0.3.7', + # 'jvm', + # 'scala-native-0.3.8', + # 'baseline', + 'current' ] if 'GRAALVM_HOME' in os.environ: @@ -72,7 +75,7 @@ def compile(bench, compilecmd): ] runs = 20 -batches = 3000 +batches = 4000 batch_size = 1 if __name__ == "__main__": diff --git a/scripts/summary.py b/scripts/summary.py index b953295..e505dbe 100644 --- a/scripts/summary.py +++ b/scripts/summary.py @@ -10,8 +10,8 @@ def config_data(bench, conf): with open('results/{}/{}/{}'.format(conf, bench, run)) as data: for line in data.readlines(): points.append(float(line)) - # take only last 1000 to account for startup - points = points[-1000:] + # take only last 2000 to account for startup + points = points[-2000:] # filter out 1% worst measurements as outliers pmax = np.percentile(points, 99) for point in points: diff --git a/src/main/scala/rsc/Compiler.scala b/src/main/scala/rsc/Compiler.scala new file mode 100644 index 0000000..e18fed6 --- /dev/null +++ b/src/main/scala/rsc/Compiler.scala @@ -0,0 +1,162 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc + +import java.nio.file._ +import rsc.lexis._ +import rsc.parse._ +import rsc.pretty._ +import rsc.report._ +import rsc.settings._ +import rsc.syntax._ +import rsc.typecheck._ +import rsc.util._ + +class Compiler(val settings: Settings, val reporter: Reporter) extends Pretty { + var trees: List[Source] = Nil + var symtab: Symtab = Symtab() + var todo: Todo = Todo() + + def run(): Unit = { + for ((taskName, taskFn) <- tasks) { + val start = System.nanoTime() + try { + taskFn() + } catch { + case crash @ CrashException(pos, message, ex) => + val ex1 = if (ex != null) ex else crash + reporter.append(CrashMessage(pos, message, ex1)) + case ex: Throwable => + reporter.append(CrashMessage(NoPosition, ex.getMessage, ex)) + } + val end = System.nanoTime() + val ms = (end - start) / 1000000 + if (settings.xprint("timings")) { + reporter.append(VerboseMessage(s"Finished $taskName in $ms ms")) + } + if (settings.xprint(taskName)) { + reporter.append(VerboseMessage(this.str)) + } + if (settings.ystopAfter(taskName)) { + return + } + if (reporter.problems.nonEmpty) { + val numProblems = reporter.problems.length + if (numProblems == 1) println("one error found") + else if (numProblems == 2) println("two errors found") + else if (numProblems == 3) println("three errors found") + else if (numProblems == 4) println("four errors found") + else println(s"$numProblems errors found") + return + } + } + } + + private def tasks: List[(String, () => Unit)] = List( + "parse" -> parse, + "link" -> link, + "schedule" -> schedule, + "scope" -> scope, + "outline" -> outline, + "typecheck" -> typecheck + ) + + private def parse(): Unit = { + val inputs = settings.ins + trees = inputs.flatMap { input => + if (Files.exists(input.path)) { + val parser = Parser(settings, reporter, input) + parser.accept(BOF) + val tree = parser.source() + parser.accept(EOF) + Some(tree) + } else { + reporter.append(FileNotFound(input)) + None + } + } + if (trees.isEmpty) { + reporter.append(FilesNotFound()) + } + } + + private def link(): Unit = { + val linker = Linker(settings, reporter, symtab, todo) + linker.apply(trees, settings.classpath) + } + + private def schedule(): Unit = { + val rootEnv = Env(symtab.scopes("_root_."), symtab.scopes("π.")) + + val javaLangQual = TermSelect(TermId("java"), TermId("lang")) + val javaLangImporter = Importer(javaLangQual, List(ImporteeWildcard())) + val javaLangScope = ImporterScope(javaLangImporter) + todo.scopes.add(rootEnv -> javaLangScope) + val javaLangEnv = javaLangScope :: rootEnv + + val scalaImporter = Importer(TermId("scala"), List(ImporteeWildcard())) + val scalaScope = ImporterScope(scalaImporter) + todo.scopes.add(javaLangEnv -> scalaScope) + val scalaEnv = scalaScope :: javaLangEnv + + val predefQual = TermSelect(TermId("scala"), TermId("Predef")) + val predefImporter = Importer(predefQual, List(ImporteeWildcard())) + val predefScope = ImporterScope(predefImporter) + todo.scopes.add(scalaEnv -> predefScope) + val predefEnv = predefScope :: scalaEnv + + val scheduler = Scheduler(settings, reporter, symtab, todo) + trees.foreach(scheduler.apply(predefEnv, _)) + } + + private def scope(): Unit = { + val scoper = Scoper(settings, reporter, symtab, todo) + while (!todo.scopes.isEmpty) { + val (env, scope) = todo.scopes.remove() + scope.unblock() + if (scope.status.isPending) { + scoper.apply(env, scope) + } + if (scope.status.isBlocked) { + todo.scopes.add(env -> scope) + } + if (scope.status.isCyclic) { + reporter.append(IllegalCyclicReference(scope)) + } + } + } + + private def outline(): Unit = { + val outliner = Outliner(settings, reporter, symtab) + while (!todo.mods.isEmpty) { + val (env, mod) = todo.mods.remove() + outliner.apply(env, mod) + } + while (!todo.tpts.isEmpty) { + val (env, tpt) = todo.tpts.remove() + outliner.apply(env, tpt) + } + } + + private def typecheck(): Unit = { + val typechecker = Typechecker(settings, reporter, symtab) + while (!todo.terms.isEmpty) { + val (env, term) = todo.terms.remove() + typechecker.apply(env, term) + } + } + + def printStr(p: Printer): Unit = { + PrettyCompiler.str(p, this) + } + + def printRepl(p: Printer): Unit = { + PrettyCompiler.repl(p, this) + } +} + +object Compiler { + def apply(settings: Settings, reporter: Reporter): Compiler = { + new Compiler(settings, reporter) + } +} diff --git a/src/main/scala/rsc/RscBenchmark.scala b/src/main/scala/rsc/RscBenchmark.scala new file mode 100644 index 0000000..e19f4dd --- /dev/null +++ b/src/main/scala/rsc/RscBenchmark.scala @@ -0,0 +1,9 @@ +package rsc + +object RscBenchmark extends communitybench.Benchmark { + def run(input: String): Int = + rsc.cli.Main.run(input.split("\\s+")) + + override def main(args: Array[String]): Unit = + super.main(args) +} diff --git a/src/main/scala/rsc/cli/Main.scala b/src/main/scala/rsc/cli/Main.scala new file mode 100644 index 0000000..6514edc --- /dev/null +++ b/src/main/scala/rsc/cli/Main.scala @@ -0,0 +1,25 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.cli + +import rsc.Compiler +import rsc.report._ +import rsc.settings._ + +object Main { + def main(args: Array[String]): Unit = { + run(args) + } + + def run(args: Array[String]): Int = { + Settings.parse(args.toList) match { + case Some(settings) => + val reporter = ConsoleReporter(settings) + val compiler = Compiler(settings, reporter) + compiler.run() + if (reporter.problems.nonEmpty) 1 else 0 + case None => + 1 + } + } +} diff --git a/src/main/scala/rsc/lexis/Inputs.scala b/src/main/scala/rsc/lexis/Inputs.scala new file mode 100644 index 0000000..d21f20c --- /dev/null +++ b/src/main/scala/rsc/lexis/Inputs.scala @@ -0,0 +1,76 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.lexis + +import java.nio.file._ +import scala.collection.mutable +import rsc.pretty._ + +sealed class Input protected (val path: Path) extends Pretty { + lazy val string: String = { + val codec = scala.io.Codec.UTF8 + val source = scala.io.Source.fromFile(path.toFile)(codec) + try source.mkString + finally source.close() + } + + lazy val chars: Array[Char] = { + string.toCharArray + } + + private lazy val cachedLineIndices: Array[Int] = { + val buf = new mutable.ArrayBuffer[Int] + buf += 0 + var i = 0 + while (i < chars.length) { + if (chars(i) == '\n') { + buf += (i + 1) + } + i += 1 + } + if (buf.last != chars.length) { + buf += chars.length // sentinel value used for binary search + } + buf.toArray + } + + def lineToOffset(line: Int): Int = { + cachedLineIndices(line) + } + + def offsetToLine(offset: Int): Int = { + val a = cachedLineIndices + // NOTE: chars.length requires a really ugly special case. + // If the file doesn't end with \n, then it's simply last_line:last_col+1. + // But if the file does end with \n, then it's last_line+1:0. + if (offset == chars.length && + (0 < chars.length && chars(offset - 1) == '\n')) { + return a.length - 1 + } + var lo = 0 + var hi = a.length - 1 + while (hi - lo > 1) { + val mid = (hi + lo) / 2 + if (offset < a(mid)) hi = mid + else if (a(mid) == offset) return mid + else /* if (a(mid) < offset */ lo = mid + } + return lo + } + + def printStr(p: Printer): Unit = { + PrettyInput.str(p, this) + } + + def printRepl(p: Printer): Unit = { + PrettyInput.repl(p, this) + } +} + +object Input { + def apply(path: Path): Input = { + new Input(path) + } +} + +object NoInput extends Input(Paths.get("")) diff --git a/src/main/scala/rsc/lexis/Keywords.scala b/src/main/scala/rsc/lexis/Keywords.scala new file mode 100644 index 0000000..1b918d7 --- /dev/null +++ b/src/main/scala/rsc/lexis/Keywords.scala @@ -0,0 +1,63 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.lexis + +import java.util.HashMap + +trait Keywords { + val keywords = { + val results = new HashMap[String, Token] + results.put("abstract", ABSTRACT) + results.put("case", CASE) + results.put("catch", CATCH) + results.put("class", CLASS) + results.put("def", DEF) + results.put("do", DO) + results.put("else", ELSE) + results.put("extends", EXTENDS) + results.put("false", FALSE) + results.put("final", FINAL) + results.put("finally", FINALLY) + results.put("for", FOR) + results.put("forSome", FORSOME) + results.put("if", IF) + results.put("implicit", IMPLICIT) + results.put("import", IMPORT) + results.put("lazy", LAZY) + results.put("match", MATCH) + results.put("new", NEW) + results.put("null", NULL) + results.put("object", OBJECT) + results.put("override", OVERRIDE) + results.put("package", PACKAGE) + results.put("private", PRIVATE) + results.put("protected", PROTECTED) + results.put("return", RETURN) + results.put("sealed", SEALED) + results.put("super", SUPER) + results.put("this", THIS) + results.put("throw", THROW) + results.put("trait", TRAIT) + results.put("try", TRY) + results.put("true", TRUE) + results.put("type", TYPE) + results.put("val", VAL) + results.put("var", VAR) + results.put("while", WHILE) + results.put("with", WITH) + results.put("yield", YIELD) + results.put("_", USCORE) + results.put(":", COLON) + results.put("=", EQUALS) + results.put("=>", ARROW) + results.put("⇒", ARROW) + results.put("<-", LARROW) + results.put("←", LARROW) + results.put("<:", SUBTYPE) + results.put("<%", CATCH) + results.put(">:", SUPERTYPE) + results.put("#", HASH) + results.put("@", AT) + results + } +} diff --git a/src/main/scala/rsc/lexis/Names.scala b/src/main/scala/rsc/lexis/Names.scala new file mode 100644 index 0000000..7a2537c --- /dev/null +++ b/src/main/scala/rsc/lexis/Names.scala @@ -0,0 +1,47 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.lexis + +import rsc.util._ + +trait Names { + implicit class NameOps(value: String) { + def isLeftAssoc: Boolean = { + !isRightAssoc + } + + def isRightAssoc: Boolean = { + value.endsWith(":") + } + + def isOpAssignment: Boolean = { + if (value.isEmpty) false + else if (value == "!=" || value == "<=" || value == ">=") false + else { + val headOk = value.head != '=' && isSymbolicIdStart(value.head) + val lastOk = value.last == '=' + headOk && lastOk + } + } + + def precedence: Int = { + if (value.isEmpty || value.isOpAssignment) { + 0 + } else if (isAlphanumericIdStart(value.head)) { + 1 + } else { + value.head match { + case '|' => 2 + case '^' => 3 + case '&' => 4 + case '=' | '!' => 5 + case '<' | '>' => 6 + case ':' => 7 + case '+' | '-' => 8 + case '*' | '/' | '%' => 9 + case _ => 10 + } + } + } + } +} diff --git a/src/main/scala/rsc/lexis/Offsets.scala b/src/main/scala/rsc/lexis/Offsets.scala new file mode 100644 index 0000000..47f8250 --- /dev/null +++ b/src/main/scala/rsc/lexis/Offsets.scala @@ -0,0 +1,10 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.lexis + +trait Offsets { + type Offset = Int + val NoOffset: Offset = -1 + val NoLine: Offset = -1 + val NoColumn: Offset = -1 +} diff --git a/src/main/scala/rsc/lexis/Positions.scala b/src/main/scala/rsc/lexis/Positions.scala new file mode 100644 index 0000000..4727340 --- /dev/null +++ b/src/main/scala/rsc/lexis/Positions.scala @@ -0,0 +1,31 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.lexis + +import rsc.pretty._ + +sealed class Position protected ( + val input: Input, + val start: Offset, + val end: Offset) + extends Pretty { + def startLine: Int = input.offsetToLine(start) + def startColumn: Int = start - input.lineToOffset(startLine) + def endLine: Int = input.offsetToLine(end) + def endColumn: Int = end - input.lineToOffset(endLine) + def printStr(p: Printer): Unit = PrettyPosition.str(p, this) + def printRepl(p: Printer): Unit = PrettyPosition.repl(p, this) +} + +object Position { + def apply(input: Input, start: Offset, end: Offset): Position = { + new Position(input, start, end) + } +} + +object NoPosition extends Position(NoInput, NoOffset, NoOffset) { + override def startLine: Int = NoLine + override def startColumn: Int = NoColumn + override def endLine: Int = NoLine + override def endColumn: Int = NoColumn +} diff --git a/src/main/scala/rsc/lexis/Tokens.scala b/src/main/scala/rsc/lexis/Tokens.scala new file mode 100644 index 0000000..3786f12 --- /dev/null +++ b/src/main/scala/rsc/lexis/Tokens.scala @@ -0,0 +1,94 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.lexis + +import rsc.pretty._ + +trait Tokens { + final val ABSTRACT = 1 + final val ARROW = 2 + final val AT = 3 + final val BOF = 4 + final val CASE = 5 + final val CATCH = 6 + final val CLASS = 7 + final val COLON = 8 + final val COMMA = 9 + final val COMMENT = 10 + final val DEF = 11 + final val DO = 12 + final val DOT = 13 + final val ELSE = 14 + final val EOF = 15 + final val ERROR = 16 + final val EQUALS = 17 + final val EXTENDS = 18 + final val FALSE = 19 + final val FINAL = 20 + final val FINALLY = 21 + final val FOR = 22 + final val FORSOME = 23 + final val HASH = 24 + final val ID = 25 + final val IF = 26 + final val IMPLICIT = 27 + final val IMPORT = 28 + final val LARROW = 29 + final val LAZY = 30 + final val LBRACE = 31 + final val LBRACKET = 32 + final val LITCHAR = 33 + final val LITDOUBLE = 34 + final val LITFLOAT = 35 + final val LITINT = 36 + final val LITLONG = 37 + final val LITSTRING = 38 + final val LITSYMBOL = 39 + final val LPAREN = 40 + final val MATCH = 41 + final val NEW = 42 + final val NEWLINE = 43 + final val NULL = 44 + final val OBJECT = 45 + final val OVERRIDE = 46 + final val PACKAGE = 47 + final val PRIVATE = 48 + final val PROTECTED = 49 + final val RBRACE = 50 + final val RBRACKET = 51 + final val RETURN = 52 + final val RPAREN = 53 + final val SEALED = 54 + final val SEMI = 55 + final val SUBTYPE = 56 + final val SUPER = 57 + final val SUPERTYPE = 58 + final val THIS = 59 + final val THROW = 60 + final val TRAIT = 61 + final val TRUE = 62 + final val TRY = 63 + final val TYPE = 64 + final val USCORE = 65 + final val VAL = 66 + final val VAR = 67 + final val VIEWBOUND = 68 + final val WHILE = 69 + final val WHITESPACE = 70 + final val WITH = 71 + final val YIELD = 72 + + type Token = Int + + def tokenStr(token: Token): String = { + val p = new Printer + PrettyToken.str(p, token) + p.toString + } + + def tokenRepl(token: Token): String = { + val p = new Printer + PrettyToken.repl(p, token) + p.toString + } +} \ No newline at end of file diff --git a/src/main/scala/rsc/lexis/package.scala b/src/main/scala/rsc/lexis/package.scala new file mode 100644 index 0000000..506e691 --- /dev/null +++ b/src/main/scala/rsc/lexis/package.scala @@ -0,0 +1,5 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc + +package object lexis extends Keywords with Names with Offsets with Tokens diff --git a/src/main/scala/rsc/parse/Bounds.scala b/src/main/scala/rsc/parse/Bounds.scala new file mode 100644 index 0000000..7e5f87a --- /dev/null +++ b/src/main/scala/rsc/parse/Bounds.scala @@ -0,0 +1,55 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.syntax._ + +trait Bounds { + self: Parser => + + def upperBound(): Option[Tpt] = { + bound(SUBTYPE) + } + + def lowerBound(): Option[Tpt] = { + bound(SUPERTYPE) + } + + def viewBounds(): List[Tpt] = { + if (in.token == VIEWBOUND) { + crash("implicit parameters") + } else { + Nil + } + } + + def contextBounds(): List[Tpt] = { + if (in.token == COLON) { + crash("implicit parameters") + } else { + Nil + } + } + + private def bounds(token: Token): List[Tpt] = { + def loop(bounds: List[Tpt]): List[Tpt] = { + if (in.token == token) { + in.nextToken() + bounds :+ tpt() + } else { + bounds + } + } + loop(Nil) + } + + private def bound(token: Token): Option[Tpt] = { + if (in.token == token) { + in.nextToken() + Some(tpt()) + } else { + None + } + } +} diff --git a/src/main/scala/rsc/parse/Contexts.scala b/src/main/scala/rsc/parse/Contexts.scala new file mode 100644 index 0000000..0a06b8b --- /dev/null +++ b/src/main/scala/rsc/parse/Contexts.scala @@ -0,0 +1,85 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +trait Contexts { + self: Parser => + + sealed trait ParamContext { + def allowsAnonymous: Boolean + def allowsContextBounds: Boolean + def allowsDefaults: Boolean + def allowsInferred: Boolean + def allowsTermParams: Boolean + def allowsTypeBounds: Boolean + def allowsTypeParams: Boolean + def allowsVariance: Boolean + def allowsViewBounds: Boolean + } + + sealed trait TemplateContext + + final case object DefnClassContext extends ParamContext with TemplateContext { + def allowsAnonymous = false + def allowsContextBounds = true + def allowsDefaults = true + def allowsInferred = false + def allowsTermParams = true + def allowsTypeBounds = true + def allowsTypeParams = true + def allowsVariance = true + def allowsViewBounds = true + } + + final case object DefnObjectContext extends TemplateContext + + final case object DefnTraitContext extends ParamContext with TemplateContext { + def allowsAnonymous = false + def allowsContextBounds = false + def allowsDefaults = true + def allowsInferred = false + def allowsTermParams = true + def allowsTypeBounds = true + def allowsTypeParams = true + def allowsVariance = true + def allowsViewBounds = false + } + + final case object DefnDefContext extends ParamContext { + def allowsAnonymous = false + def allowsContextBounds = true + def allowsDefaults = true + def allowsInferred = false + def allowsTermParams = true + def allowsTypeBounds = true + def allowsTypeParams = false + def allowsVariance = false + def allowsViewBounds = true + } + + final case object DefnTypeContext extends ParamContext { + def allowsAnonymous = true + def allowsContextBounds = false + def allowsDefaults = false + def allowsInferred = false + def allowsTermParams = false + def allowsTypeBounds = true + def allowsTypeParams = true + def allowsVariance = true + def allowsViewBounds = false + } + + final case object PrimaryCtorContext extends ParamContext { + def allowsAnonymous = false + def allowsContextBounds = false + def allowsDefaults = true + def allowsInferred = false + def allowsTermParams = true + def allowsTypeBounds = false + def allowsTypeParams = false + def allowsVariance = false + def allowsViewBounds = false + } + + final case object TermNewContext extends TemplateContext +} diff --git a/src/main/scala/rsc/parse/Defns.scala b/src/main/scala/rsc/parse/Defns.scala new file mode 100644 index 0000000..5a95bef --- /dev/null +++ b/src/main/scala/rsc/parse/Defns.scala @@ -0,0 +1,124 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.report._ +import rsc.syntax._ + +trait Defns { + self: Parser => + + def defnClass(start: Offset, mods: List[Mod]): DefnClass = { + val id = tptId() + val tparams = typeParams(DefnTraitContext) + val ctor = primaryCtor() + val Template(inits, statsOpt) = defnTemplate(DefnClassContext) + val stats = statsOpt.getOrElse(Nil) + atPos(start)(DefnClass(mods, id, tparams, ctor, inits, stats)) + } + + def defnDef(start: Offset, mods: List[Mod]): DefnDef = { + if (in.token == THIS) { + crash("secondary constructors") + } else { + val id = termId() + val tparams = typeParams(DefnDefContext) + val params = termParams(DefnDefContext) + val ret = { + if (in.token == COLON) { + in.nextToken() + tpt() + } else { + crash("type inference") + } + } + val body = { + if (in.token == EQUALS) { + in.nextToken() + Some(term()) + } else { + None + } + } + atPos(start)(DefnDef(mods, id, tparams, params, ret, body)) + } + } + + def defnField(start: Offset, mods: List[Mod]): DefnField = { + val id = { + if (in.token == ID) { + termId() + } else { + crash("pattern definitions") + } + } + val tpt = { + if (in.token == COLON) { + in.nextToken() + this.tpt() + } else { + crash("type inference") + } + } + val rhs = { + if (in.token == EQUALS) { + in.nextToken() + if (in.token == USCORE) { + crash("default initial values in vars") + } else { + Some(term()) + } + } else { + None + } + } + atPos(start)(DefnField(mods, id, tpt, rhs)) + } + + def defnObject(start: Offset, mods: List[Mod]): DefnObject = { + val id = termId() + val Template(inits, statsOpt) = defnTemplate(DefnObjectContext) + val stats = statsOpt.getOrElse(Nil) + atPos(start)(DefnObject(mods, id, inits, stats)) + } + + def defnTrait(start: Offset, mods: List[Mod]): DefnTrait = { + val id = tptId() + val tparams = typeParams(DefnTraitContext) + val Template(inits, statsOpt) = defnTemplate(DefnTraitContext) + val stats = statsOpt.getOrElse(Nil) + atPos(start)(DefnTrait(mods, id, tparams, inits, stats)) + } + + def defnType(start: Offset, mods: List[Mod]): DefnType = { + newLinesOpt() + val id = tptId() + val tparams = typeParams(DefnTypeContext) + val rhs = { + in.token match { + case EQUALS => + in.nextToken() + tpt() + case token if token.isStatSep => + crash("abstract type members") + case SUPERTYPE | SUBTYPE | COMMA | RBRACE => + crash("abstract type members") + case _ => + val errOffset = in.offset + reportOffset(in.offset, ExpectedTypeRhs) + atPos(errOffset)(errorTpt()) + } + } + atPos(start)(DefnType(mods, id, tparams, rhs)) + } + + private def primaryCtor(): PrimaryCtor = { + val start = in.offset + val mods = primaryCtorMods() + val params = termParams(PrimaryCtorContext) + val ctor = atPos(start)(PrimaryCtor(mods, params)) + ctor.id.pos = Position(input, start, start) + ctor + } +} diff --git a/src/main/scala/rsc/parse/Groups.scala b/src/main/scala/rsc/parse/Groups.scala new file mode 100644 index 0000000..0bc47e0 --- /dev/null +++ b/src/main/scala/rsc/parse/Groups.scala @@ -0,0 +1,68 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import scala.collection.immutable.BitSet +import rsc.lexis._ + +trait Groups { + self: Parser => + + object introTokens { + private val common = BitSet(CASECLASS, CASEOBJECT, CLASS, TRAIT, OBJECT) + private val toplevelOnly = BitSet(PACKAGE) + private val memberOnly = BitSet(DEF, TYPE, VAL, VAR) + private val localOnly = memberOnly + val packageDefn = modTokens.packageDefn | common | toplevelOnly + val templateDefn = modTokens.templateDefn | common | memberOnly + val localDefn = modTokens.localDefn | common | localOnly + val refineDefn = modTokens.refineDefn | memberOnly + val defn = packageDefn | templateDefn | localDefn | refineDefn + val term = termTokens.atomic | termTokens.extra + } + + object litTokens { + private val x1 = BitSet(FALSE, LITCHAR, LITDOUBLE, LITFLOAT, LITINT) + private val x2 = BitSet(LITLONG, LITSTRING, LITSYMBOL, NULL, TRUE) + val all = x1 | x2 + } + + object modTokens { + private val common = BitSet(ABSTRACT, AT, FINAL, IMPLICIT, LAZY, SEALED) + private val toplevelOnly = BitSet(PRIVATE, PROTECTED) + private val memberOnly = BitSet(OVERRIDE, PRIVATE, PROTECTED) + private val localOnly = BitSet() + val packageDefn = common | toplevelOnly + val templateDefn = common | memberOnly + val localDefn = common | localOnly + val refineDefn = BitSet() + val defn = packageDefn | templateDefn | localDefn | refineDefn + val primaryCtor = BitSet(AT, PRIVATE, PROTECTED) + val termParam = BitSet(AT, FINAL, IMPLICIT, LAZY, PRIVATE, PROTECTED) + } + + object statTokens { + val mustStart = BitSet(IMPORT) | introTokens.defn + val canStart = introTokens.term | mustStart | BitSet(AT, CASE) + val canEnd = termTokens.atomic | BitSet(TYPE, RPAREN, RBRACE, RBRACKET) + val sep = BitSet(NL1, NL2, SEMI) + val seqEnd = BitSet(RBRACE, EOF) + } + + object termTokens { + val atomic = litTokens.all | BitSet(ID, RETURN, SUPER, THIS, USCORE) + val extra = BitSet(DO, FOR, IF, LBRACE, LPAREN, NEW, THROW, TRY, WHILE) + } + + implicit class TokenGroupOps(token: Token) { + def isPackageDefnIntro = introTokens.packageDefn.contains(token) + def isTemplateDefnIntro = introTokens.templateDefn.contains(token) + def isLocalDefnIntro = introTokens.localDefn.contains(token) + def isRefineDefnIntro = introTokens.refineDefn.contains(token) + def isLit = litTokens.all.contains(token) + def isTermIntro = introTokens.term.contains(token) + def isStatSep: Boolean = statTokens.sep.contains(token) + def isStatSeqEnd = statTokens.seqEnd.contains(token) + def mustStartStat = statTokens.mustStart.contains(token) + } +} diff --git a/src/main/scala/rsc/parse/Helpers.scala b/src/main/scala/rsc/parse/Helpers.scala new file mode 100644 index 0000000..e2a592a --- /dev/null +++ b/src/main/scala/rsc/parse/Helpers.scala @@ -0,0 +1,101 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.report._ +import rsc.syntax._ + +trait Helpers { + self: Parser => + + def accept(token: Token): Unit = { + val offset = in.offset + if (in.token != token) { + reportOffset(offset, ExpectedToken(_, token, in.token)) + } + if (in.token == token) { + in.nextToken() + } + } + + def acceptStatSep(): Unit = { + if (in.token.isStatSep) in.nextToken() + else accept(SEMI) + } + + def acceptStatSepUnlessAtEnd(altEnd: Token = EOF): Unit = { + if (!in.token.isStatSeqEnd) + in.token match { + case EOF => + () + case `altEnd` => + () + case token if token.isStatSep => + in.nextToken() + case _ => + in.nextToken() // needed to ensure progress; otherwise we might cycle forever + accept(SEMI) + } + } + + def atPos[T <: Tree](start: Offset)(t: T): T = { + atPos(start, in.lastOffset)(t) + } + + def atPos[T <: Tree](start: Offset, end: Offset)(t: T): T = { + atPos(Position(input, start, end))(t) + } + + def atPos[T <: Tree](pos: Position)(t: T): T = { + t.pos = pos + t + } + + def commaSeparated[T](part: => T): List[T] = { + tokenSeparated(COMMA, part) + } + + def errorStat(): Stat = { + errorTerm() + } + + def inBraces[T](body: => T): T = { + accept(LBRACE) + val result = body + accept(RBRACE) + result + } + + def inBrackets[T](body: => T): T = { + accept(LBRACKET) + val result = body + accept(RBRACKET) + result + } + + def inParens[T](body: => T): T = { + accept(LPAREN) + val result = body + accept(RPAREN) + result + } + + def makeTptTuple(start: Offset, tpts: List[Tpt]): Tpt = { + tpts match { + case Nil => crash(tpts) + case tpt :: Nil => tpt + case tpts => atPos(start)(TptTuple(tpts)) + } + } + + def tokenSeparated[T](separator: Int, part: => T): List[T] = { + val ts = List.newBuilder[T] + ts += part + while (in.token == separator) { + in.nextToken() + ts += part + } + ts.result + } +} diff --git a/src/main/scala/rsc/parse/Imports.scala b/src/main/scala/rsc/parse/Imports.scala new file mode 100644 index 0000000..b1e1818 --- /dev/null +++ b/src/main/scala/rsc/parse/Imports.scala @@ -0,0 +1,85 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.syntax._ + +trait Imports { + self: Parser => + + def `import`(): Import = { + val start = in.offset + accept(IMPORT) + val importers = this.importers() + atPos(start)(Import(importers)) + } + + private def importers(): List[Importer] = { + commaSeparated(importer) + } + + private def importer(): Importer = { + def loop(qual: TermPath): Importer = { + val start = qual.pos.start + if (in.token == ID) { + val tree = someId() + if (in.token == DOT) { + in.nextToken() + val id = atPos(tree.pos)(TermId(tree.value)) + loop(atPos(start)(TermSelect(qual, id))) + } else { + val importees = List(atPos(tree.pos)(ImporteeName(tree))) + atPos(start)(Importer(qual, importees)) + } + } else if (in.token == USCORE) { + val importees = List(importee()) + atPos(start)(Importer(qual, importees)) + } else if (in.token == LBRACE) { + val importees = this.importees() + atPos(start)(Importer(qual, importees)) + } else { + val errImportee = List(importee()) + atPos(start)(Importer(qual, errImportee)) + } + } + val qual = termId() + accept(DOT) + loop(qual) + } + + private def importees(): List[Importee] = { + if (in.token == RBRACE) Nil + else inBraces(commaSeparated(importee)) + } + + private def importee(): Importee = { + val start = in.offset + if (in.token == ID) { + val id1 = someId() + if (in.token != ARROW) { + atPos(id1.pos)(ImporteeName(id1)) + } else { + val start = id1.pos.start + in.nextToken() + if (in.token == ID) { + val id2 = someId() + atPos(start)(ImporteeRename(id1, id2)) + } else if (in.token == USCORE) { + in.nextToken() + atPos(start)(ImporteeUnimport(id1)) + } else { + val idErr = errorSomeId() + atPos(start)(ImporteeRename(id1, idErr)) + } + } + } else if (in.token == USCORE) { + val start = in.offset + accept(USCORE) + atPos(start)(ImporteeWildcard()) + } else { + val idErr = errorSomeId() + atPos(start)(ImporteeName(idErr)) + } + } +} diff --git a/src/main/scala/rsc/parse/Lits.scala b/src/main/scala/rsc/parse/Lits.scala new file mode 100644 index 0000000..8c7e168 --- /dev/null +++ b/src/main/scala/rsc/parse/Lits.scala @@ -0,0 +1,41 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import scala.{Symbol => StdlibSymbol} +import rsc.lexis._ +import rsc.report._ + +trait Lits { + self: Parser => + + def literal(): Any = { + val value = in.token match { + case LITCHAR => + in.value.asInstanceOf[Char] + case LITINT => + in.value.asInstanceOf[Int] + case LITLONG => + in.value.asInstanceOf[Long] + case LITFLOAT => + in.value.asInstanceOf[Float] + case LITDOUBLE => + in.value.asInstanceOf[Double] + case LITSTRING => + in.value.asInstanceOf[String] + case TRUE => + true + case FALSE => + false + case NULL => + null + case LITSYMBOL => + in.value.asInstanceOf[StdlibSymbol] + case _ => + reportOffset(in.offset, IllegalLiteral) + null + } + in.nextToken() + value + } +} diff --git a/src/main/scala/rsc/parse/Messages.scala b/src/main/scala/rsc/parse/Messages.scala new file mode 100644 index 0000000..17434c2 --- /dev/null +++ b/src/main/scala/rsc/parse/Messages.scala @@ -0,0 +1,60 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.pretty._ +import rsc.report._ + +trait Messages { + self: Parser => + + def crash[T: Str: Repl](x: T): Nothing = { + val pos = Position(input, in.offset, in.offset) + rsc.util.crash(pos, x) + } + + // Cf. `def syntaxError(msg: => Message, pos: Position): Unit`. + // The Dotty counterpart emits messages unconditionally, so our method + // only accepts fatal messages, because only those are emitted unconditionally. + def reportPos(pos: Position, msgFn: Position => Message): Message = { + val msg = msgFn(pos) + if (msg.sev != FatalSeverity) { + crash(msg) + } + reporter.append(msg) + } + + // Cf. `syntaxErrorOrIncomplete(String)` that is implicitly taking in.offset. + // Also cf. `syntaxError(msg: => Message, offset: Int = in.offset): Unit`. + // + // The former method was reporting an error and skipping until the next safe point. + // The latter method was just reporting an error. + // + // This distinction is now encapsulated in Message.sev. + // If the severity is fatal, we skip. + // If the severity is error, we don't. + def reportOffset(offset: Offset, msgFn: Position => Message): Message = { + val length = if (in.token == ID) in.idValue.length else 0 + val pos = Position(input, offset, offset + length) + val msg = msgFn(pos) + reporter.append(msg) + if (msg.sev == FatalSeverity) { + skip() + } + msg + } + + /** Skip on error to next safe point. + * Safe points are: + * - Closing braces, provided they match an opening brace before the error point. + * - Closing parens and brackets, provided they match an opening parent or bracket + * before the error point and there are no intervening other kinds of parens. + * - Semicolons and newlines, provided there are no intervening braces. + * - Definite statement starts on new lines, provided they are not more indented + * than the last known statement start before the error point. + */ + private def skip(): Unit = { + crash("smart handling of fatal parsing errors") + } +} diff --git a/src/main/scala/rsc/parse/Mods.scala b/src/main/scala/rsc/parse/Mods.scala new file mode 100644 index 0000000..5b4542e --- /dev/null +++ b/src/main/scala/rsc/parse/Mods.scala @@ -0,0 +1,177 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import scala.collection.immutable.BitSet +import rsc.lexis._ +import rsc.report._ +import rsc.syntax._ + +trait Mods { + self: Parser => + + def defnMods(modTokens: BitSet): List[Mod] = { + val annots = this.annots(skipNewLines = true) + val flags = this.defnFlags(modTokens) + annots ++ flags + } + + def primaryCtorMods(): List[Mod] = { + val annots = this.annots(skipNewLines = false) + val flags = this.primaryCtorFlags() + annots ++ flags + } + + def termParamMods(ctx: ParamContext): List[Mod] = { + val annots = this.annots(skipNewLines = false) + val flags = this.termParamFlags(ctx) + annots ++ flags + } + + def typeParamMods(ctx: ParamContext): List[Mod] = { + val annots = this.annots(skipNewLines = false) + val flags = this.typeParamFlags(ctx) + annots ++ flags + } + + def termAnnotateMods(): List[Mod] = { + annots(skipNewLines = false) + } + + def typeAnnotateMods(): List[Mod] = { + annots(skipNewLines = false) + } + + private def annots(skipNewLines: Boolean): List[Mod] = { + if (skipNewLines) { + newLineOptWhenFollowedBy(AT) + } + if (in.token == AT) { + crash("annotations") + } else { + Nil + } + } + + private def annotInit(): Init = { + val initstart = in.offset + val tpt = simpleTpt() + val idstart = in.offset + val args = { + if (in.token != LPAREN) { + crash("nullary argument lists") + } + val result = termArgs() + if (in.token == LPAREN) { + crash("multiple argument lists") + } + result + } + val init = atPos(initstart)(Init(tpt, args)) + init.id.pos = Position(input, idstart, idstart) + init + } + + private def defnFlags(modTokens: BitSet): List[Mod] = { + def addFlag(flags: List[Mod], flag: Mod): List[Mod] = { + val isRepeated = flags.exists(_.productPrefix == flag.productPrefix) + if (isRepeated) reportOffset(in.offset, RepeatedModifier) + flags :+ flag + } + def loop(flags: List[Mod]): List[Mod] = { + if (modTokens.contains(in.token)) { + def within(): Option[Path] = { + if (in.token == LBRACKET) { + inBrackets { + if (in.token == THIS) { + val id = anonId() + val start = in.offset + in.nextToken() + Some(atPos(start)(TermThis(id))) + } else { + Some(someId()) + } + } + } else { + None + } + } + val start = in.offset + val flag = { + in.token match { + case ABSTRACT => + in.nextToken() + atPos(start)(ModAbstract()) + case FINAL => + in.nextToken() + atPos(start)(ModFinal()) + case LAZY => + in.nextToken() + atPos(start)(ModLazy()) + case IMPLICIT => + crash("implicit parameters") + case OVERRIDE => + in.nextToken() + atPos(start)(ModOverride()) + case PRIVATE => + in.nextToken() + atPos(start)(ModPrivate(within())) + case PROTECTED => + in.nextToken() + atPos(start)(ModProtected(within())) + case SEALED => + in.nextToken() + atPos(start)(ModSealed()) + case _ => + crash(tokenRepl(in.token)) + } + } + loop(addFlag(flags, flag)) + } else { + flags + } + } + loop(Nil) + } + + private def primaryCtorFlags(): List[Mod] = { + defnFlags(modTokens.primaryCtor) + } + + private def termParamFlags(ownedBy: ParamContext): List[Mod] = { + val flags = defnFlags(modTokens.termParam) + val extraFlags = { + if (in.token == VAL) { + val start = in.offset + in.nextToken() + List(atPos(start)(ModVal())) + } else if (in.token == VAR) { + val start = in.offset + in.nextToken() + List(atPos(start)(ModVar())) + } else { + Nil + } + } + if (flags.nonEmpty && extraFlags.isEmpty) { + reportOffset(in.offset, IllegalModifier) + } + flags ++ extraFlags + } + + private def typeParamFlags(ownedBy: ParamContext): List[Mod] = { + if (ownedBy.allowsVariance) { + if (in.token == ID && in.idValue == "+") { + val start = in.offset + in.nextToken() + List(atPos(start)(ModCovariant())) + } else if (in.token == ID && in.idValue == "-") { + val start = in.offset + in.nextToken() + List(atPos(start)(ModContravariant())) + } else Nil + } else { + Nil + } + } +} diff --git a/src/main/scala/rsc/parse/Newlines.scala b/src/main/scala/rsc/parse/Newlines.scala new file mode 100644 index 0000000..d08fa8c --- /dev/null +++ b/src/main/scala/rsc/parse/Newlines.scala @@ -0,0 +1,139 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import scala.annotation.switch +import rsc.lexis._ +import rsc.scan._ + +trait Newlines { + self: Parser => + + private val scanner = Scanner(settings, reporter, input) + private var oneTokenBehind = false + private var overrideToken = -1 + + object in { + var lastOffset: Offset = 0 + var offset: Offset = 0 + var token: Token = BOF + var value: Any = null + def idValue: String = { + if (value != null) value.asInstanceOf[String] + else crash(tokenRepl(token)) + } + + def nextToken(): Unit = { + if (oneTokenBehind) { + oneTokenBehind = false + offset = scanner.start + token = if (overrideToken != -1) overrideToken else scanner.token + overrideToken = -1 + value = scanner.value + adjustRegions(token) + } else { + lastOffset = scanner.end + scanner.next() + + var firstNewLine = NoOffset + var secondNewLine = NoOffset + var prevTokenWasNewline = false + while (scanner.token == WHITESPACE || + scanner.token == NEWLINE || + scanner.token == COMMENT) { + if (scanner.token == NEWLINE) { + if (firstNewLine == NoOffset) { + firstNewLine = scanner.start + } else if (secondNewLine == NoOffset && prevTokenWasNewline) { + secondNewLine = scanner.start + } + } + prevTokenWasNewline = scanner.token == NEWLINE + scanner.next() + } + + if (scanner.token == CASE) { + val snapshot = scanner.snapshot() + while (scanner.token == WHITESPACE || + scanner.token == NEWLINE || + scanner.token == COMMENT) { + scanner.next() + } + if (scanner.token == CLASS) overrideToken = CASECLASS + else if (scanner.token == OBJECT) overrideToken = CASEOBJECT + else scanner.restore(snapshot) + } + + if (firstNewLine != NoOffset && + statTokens.canEnd.contains(in.token) && + statTokens.canStart.contains(scanner.token) && + (regions.isEmpty || + regions.head == RBRACE || + regions.head == ARROW && scanner.token == CASE)) { + oneTokenBehind = true + offset = firstNewLine + token = if (secondNewLine == NoOffset) NL1 else NL2 + value = null + } else { + oneTokenBehind = false + offset = scanner.start + token = if (overrideToken != -1) overrideToken else scanner.token + overrideToken = -1 + value = scanner.value + adjustRegions(token) + } + } + } + + private var regions = List[Token]() + private def adjustRegions(token: Token): Unit = { + (token: @switch) match { + case LBRACE => + regions = RBRACE :: regions + case LBRACKET => + regions = RBRACKET :: regions + case LPAREN => + regions = RPAREN :: regions + case CASE => + regions = ARROW :: regions + case RBRACE | RBRACKET | RPAREN => + regions = regions.tail + case ARROW => + if (regions.nonEmpty && regions.head == ARROW) { + regions = regions.tail + } + case _ => + () + } + } + } + + def newLinesOpt(): Unit = { + if (in.token == NL1 || in.token == NL2) { + in.nextToken() + } + } + + def newLineOptWhen(cond: Boolean): Unit = { + if (in.token == NL1 && cond) { + in.nextToken() + } + } + + def newLineOptWhenFollowedBy(token: Token): Unit = { + if (in.token == NL1 && scanner.token == token) { + in.nextToken() + } + } + + def newLineOptWhenFollowedBy(p: Token => Boolean): Unit = { + if (in.token == NL1 && p(scanner.token)) { + in.nextToken() + } + } + + final val CASECLASS = 256 + final val CASEOBJECT = 257 + final val NL1 = 258 + final val NL2 = 259 +} diff --git a/src/main/scala/rsc/parse/Params.scala b/src/main/scala/rsc/parse/Params.scala new file mode 100644 index 0000000..ddc4fec --- /dev/null +++ b/src/main/scala/rsc/parse/Params.scala @@ -0,0 +1,107 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.syntax._ + +trait Params { + self: Parser => + + def termParams(ctx: ParamContext): List[TermParam] = { + newLineOptWhenFollowedBy(LPAREN) + if (in.token != LPAREN) { + crash("nullary parameter lists") + } + val result = { + inParens { + if (in.token == RPAREN) { + Nil + } else if (in.token == IMPLICIT) { + crash("implicit parameters") + } else { + commaSeparated(termParam(ctx)) + } + } + } + if (in.token == LPAREN) { + crash("multiple parameter lists") + } + result + } + + def termParam(ctx: ParamContext): TermParam = { + val start = in.offset + val mods = termParamMods(ctx) + val id = { + if (in.token == USCORE) { + if (ctx.allowsAnonymous) { + crash("anonymous parameters") + } else { + errorTermId() + } + } else { + termId() + } + } + val tpt = { + if (in.token == COLON) { + in.nextToken() + paramTpt() + } else { + if (ctx.allowsInferred) { + crash("type inference") + } else { + val errOffset = in.offset + accept(COLON) + atPos(errOffset)(errorTpt()) + } + } + } + val rhs = { + if (ctx.allowsDefaults) { + if (in.token == EQUALS) { + crash("named and default arguments") + } else { + None + } + } else { + None + } + } + atPos(start)(TermParam(mods, id, tpt)) + } + + def typeParams(ctx: ParamContext): List[TypeParam] = { + if (in.token == LBRACKET) inBrackets(commaSeparated(typeParam(ctx))) + else Nil + } + + def typeParam(ctx: ParamContext): TypeParam = { + val start = in.offset + val mods = typeParamMods(ctx) + val id = { + if (in.token == USCORE) { + if (ctx.allowsAnonymous) { + crash("anonymous type parameters") + } else { + errorTptId() + } + } else { + tptId() + } + } + val tparams = { + if (in.token == LBRACKET) { + crash("higher-kinded types") + } else { + Nil + } + } + val ubound = upperBound() + val lbound = lowerBound() + val vbounds = if (ctx.allowsViewBounds) viewBounds() else Nil + val cbounds = if (ctx.allowsContextBounds) contextBounds() else Nil + atPos(start)(TypeParam(mods, id, ubound, lbound)) + } +} diff --git a/src/main/scala/rsc/parse/Parser.scala b/src/main/scala/rsc/parse/Parser.scala new file mode 100644 index 0000000..471db1c --- /dev/null +++ b/src/main/scala/rsc/parse/Parser.scala @@ -0,0 +1,35 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.report._ +import rsc.settings._ + +final class Parser private ( + val settings: Settings, + val reporter: Reporter, + val input: Input) + extends Bounds + with Contexts + with Defns + with Groups + with Helpers + with Imports + with Lits + with Messages + with Mods + with Newlines + with Params + with Paths + with Pats + with Sources + with Templates + with Terms + with Tpts + +object Parser { + def apply(settings: Settings, reporter: Reporter, input: Input): Parser = { + new Parser(settings, reporter, input) + } +} diff --git a/src/main/scala/rsc/parse/Paths.scala b/src/main/scala/rsc/parse/Paths.scala new file mode 100644 index 0000000..53ebfd2 --- /dev/null +++ b/src/main/scala/rsc/parse/Paths.scala @@ -0,0 +1,161 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.report._ +import rsc.syntax._ + +trait Paths { + self: Parser => + + private def value(): String = { + if (in.token == ID) { + val value = in.idValue + if (value == "") { + reportOffset(in.offset, IllegalIdentifier) + Error.value + } else { + in.nextToken() + value + } + } else { + reportOffset(in.offset, ExpectedToken(_, ID, in.token)) + Error.value + } + } + + def anonId(): AnonId = { + atPos(in.offset)(AnonId()) + } + + def someId(): SomeId = { + atPos(in.offset)(SomeId(value())) + } + + def termId(): TermId = { + atPos(in.offset)(TermId(value())) + } + + def tptId(): TptId = { + atPos(in.offset)(TptId(value())) + } + + def termPath(): TermPath = { + val start = in.offset + rawPath() match { + case UnstartedPath => + errorTermId() + case UnfinishedPath(path) => + val idErr = errorTermId() + atPos(start)(TermSelect(path, idErr)) + case FinishedPath(path) => + path + } + } + + def tptPath(): TptPath = { + val start = in.offset + rawPath() match { + case UnstartedPath => + errorTptId() + case UnfinishedPath(path) => + if (in.token == TYPE) { + crash("singleton types") + } else { + val idErr = errorTptId() + atPos(start)(TptSelect(path, idErr)) + } + case FinishedPath(path) => + path match { + case TermId(value) => + atPos(path.pos)(TptId(value)) + case TermSelect(qual: TermPath, termId @ TermId(value)) => + val tptId = atPos(termId.pos)(TptId(value)) + atPos(path.pos)(TptSelect(qual, tptId)) + case _ => + crash(path) + } + } + } + + private sealed trait RawPath + private case object UnstartedPath extends RawPath + private case class UnfinishedPath(path: TermPath) extends RawPath + private case class FinishedPath(path: TermPath) extends RawPath + + private def rawPath(): RawPath = { + val start = in.offset + def loop(qual: TermPath): RawPath = { + if (in.token == DOT) { + in.nextToken() + if (in.token == ID) { + val nextId = termId() + loop(atPos(start)(TermSelect(qual, nextId))) + } else { + UnfinishedPath(qual) + } + } else { + FinishedPath(qual) + } + } + if (in.token == ID) { + val firstId = termId() + if (in.token == DOT) { + in.nextToken() + if (in.token == ID) { + val nextId = termId() + loop(atPos(start)(TermSelect(firstId, nextId))) + } else if (in.token == THIS) { + in.nextToken() + val qual = atPos(firstId.pos)(SomeId(firstId.value)) + loop(atPos(start)(TermThis(qual))) + } else if (in.token == SUPER) { + in.nextToken() + val qual = atPos(firstId.pos)(SomeId(firstId.value)) + val mix = { + if (in.token == LBRACKET) { + inBrackets(atPos(in.offset)(someId())) + } else { + anonId() + } + } + loop(atPos(start)(TermSuper(qual, mix))) + } else { + UnfinishedPath(firstId) + } + } else { + FinishedPath(firstId) + } + } else if (in.token == THIS) { + in.nextToken() + val qual = anonId() + loop(atPos(start)(TermThis(qual))) + } else if (in.token == SUPER) { + in.nextToken() + val qual = anonId() + val mix = { + if (in.token == LBRACKET) { + inBrackets(atPos(in.offset)(someId())) + } else { + anonId() + } + } + loop(atPos(start)(TermSuper(qual, mix))) + } else { + UnstartedPath + } + } + + def errorSomeId(): SomeId = { + someId() + } + + def errorTermId(): TermId = { + termId() + } + + def errorTptId(): TptId = { + tptId() + } +} diff --git a/src/main/scala/rsc/parse/Pats.scala b/src/main/scala/rsc/parse/Pats.scala new file mode 100644 index 0000000..252bc6b --- /dev/null +++ b/src/main/scala/rsc/parse/Pats.scala @@ -0,0 +1,163 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.report._ +import rsc.syntax._ + +trait Pats { + self: Parser => + + def cases(): List[Case] = { + val buf = List.newBuilder[Case] + buf += `case`() + while (in.token == CASE) { + buf += `case`() + } + buf.result + } + + private def `case`(): Case = { + val start = in.offset + in.nextToken() + val pat = this.pat() + val cond = { + if (in.token == IF) { + in.nextToken() + Some(postfixTerm()) + } else { + None + } + } + accept(ARROW) + val body = blockStats() + atPos(start)(Case(pat, cond, body)) + } + + private def patArgs(): List[Pat] = { + inParens { + if (in.token == RPAREN) Nil + else commaSeparated(pat()) + } + } + + private def pat(): Pat = { + val start = in.offset + val unfinished = infixPat() + unfinished match { + case PatVar(_, Some(_)) => + unfinished + case _ => + if (in.token == ID && in.idValue == "|") { + val pats = List.newBuilder[Pat] + pats += unfinished + while (in.token == ID && in.idValue == "|") { + in.nextToken() + pats += infixPat() + } + atPos(start)(PatAlternative(pats.result)) + } else { + unfinished + } + } + } + + private def infixPat(): Pat = { + val unfinished = simplePat() + unfinished match { + case PatVar(_, Some(_)) => + unfinished + case _ => + if (in.token == ID) { + if (in.idValue == "|") { + unfinished + } else { + crash("infix patterns") + } + } else { + unfinished + } + } + } + + private def simplePat(): Pat = { + val start = in.offset + in.token match { + case ID | THIS => + val isPatVar = { + if (in.token == ID) { + val value = in.idValue + value(0).isLower && value(0).isLetter + } else { + false + } + } + if (isPatVar) { + val start = in.offset + val id = termId() + if (in.token == COLON) { + in.nextToken() + val tpt = Some(refinedTpt()) + atPos(start)(PatVar(id, tpt)) + } else { + crash("type inference") + } + } else { + def reinterpretAsPat(path: TermPath): Pat = { + path match { + case TermId(value) => + atPos(path.pos)(PatId(value)) + case TermSelect(qual: TermPath, termId) => + atPos(path.pos)(PatSelect(qual, termId)) + case _ => + crash(path) + } + } + val path = termPath() + in.token match { + case LBRACKET => + val fun = path + val targs = tptArgs() + val args = patArgs() + atPos(start)(PatExtract(fun, targs, args)) + case LPAREN => + val fun = path + val args = patArgs() + atPos(start)(PatExtract(fun, Nil, args)) + case _ => + reinterpretAsPat(path) + } + } + case USCORE => + in.nextToken() + val id = atPos(start)(anonId()) + val tpt = None + val unfinished = atPos(start)(PatVar(id, tpt)) + if (in.token == ID && in.idValue == "*") { + in.nextToken() + atPos(start)(PatRepeat(unfinished)) + } else { + unfinished + } + case token if token.isLit => + val value = literal() + atPos(start)(PatLit(value)) + case LPAREN => + val pats = patArgs() + pats match { + case Nil => atPos(start)(PatLit(())) + case pat :: Nil => pat + case pats => atPos(start)(PatTuple(pats)) + } + case _ => + val errOffset = in.offset + reportOffset(errOffset, IllegalStartOfSimplePat) + atPos(errOffset)(errorPat()) + } + } + + def errorPat(): Pat = { + PatId(Error.value) + } +} diff --git a/src/main/scala/rsc/parse/Sources.scala b/src/main/scala/rsc/parse/Sources.scala new file mode 100644 index 0000000..0739e9b --- /dev/null +++ b/src/main/scala/rsc/parse/Sources.scala @@ -0,0 +1,97 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.report._ +import rsc.syntax._ + +trait Sources { + self: Parser => + + def source(): Source = { + val start = in.offset + atPos(start)(Source(sourceStats())) + } + + private def sourceStats(): List[Stat] = { + val stats = List.newBuilder[Stat] + while (in.token == SEMI) in.nextToken() + val start = in.offset + if (in.token == PACKAGE) { + in.nextToken() + if (in.token == OBJECT) { + crash("package objects") + } else { + val id = termPath() + newLineOptWhenFollowedBy(LBRACE) + if (in.token == EOF) { + stats += atPos(start)(DefnPackage(id, Nil)) + } else if (in.token == LBRACE) { + stats += inBraces(atPos(start)(DefnPackage(id, packageStats()))) + acceptStatSepUnlessAtEnd() + stats ++= packageStats() + } else { + acceptStatSep() + stats += atPos(start)(DefnPackage(id, sourceStats())) + } + } + } else { + stats ++= packageStats() + } + stats.result + } + + private def packageStats(): List[Stat] = { + val stats = List.newBuilder[Stat] + while (!in.token.isStatSeqEnd) { + if (in.token == IMPORT) { + stats += `import`() + } else if (in.token.isPackageDefnIntro) { + val start = in.offset + val mods = defnMods(modTokens.packageDefn) + val stat = in.token match { + case CASECLASS => + val modCase = atPos(in.offset)(ModCase()) + in.nextToken() + defnClass(start, modCase +: mods) + case CASEOBJECT => + val modCase = atPos(in.offset)(ModCase()) + in.nextToken() + defnObject(start, modCase +: mods) + case CLASS => + in.nextToken() + defnClass(start, mods) + case OBJECT => + in.nextToken() + defnObject(start, mods) + case PACKAGE => + in.nextToken() + if (in.token == OBJECT) { + crash("package objects") + } else { + val id = termPath() + newLineOptWhenFollowedBy(LBRACE) + inBraces(atPos(start)(DefnPackage(id, packageStats))) + } + case TRAIT => + in.nextToken() + defnTrait(start, mods) + case _ => + val errOffset = in.offset + reportOffset(errOffset, ExpectedStartOfDefinition) + atPos(errOffset)(errorStat()) + } + stats += stat + } else if (!in.token.isStatSep) { + if (in.token == CASE) { + reportOffset(in.offset, IllegalStartOfDefinition) + } else { + reportOffset(in.offset, ExpectedClassOrObjectDefinition) + } + } + acceptStatSepUnlessAtEnd() + } + stats.result + } +} diff --git a/src/main/scala/rsc/parse/Templates.scala b/src/main/scala/rsc/parse/Templates.scala new file mode 100644 index 0000000..28e0a42 --- /dev/null +++ b/src/main/scala/rsc/parse/Templates.scala @@ -0,0 +1,151 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.report._ +import rsc.syntax._ + +trait Templates { + self: Parser => + + // NOTE: Template is no longer a tree, but we do need some way + // to avoid duplication in parser logic, so it made a reappearance. + case class Template(inits: List[Init], stats: Option[List[Stat]]) + + def defnTemplate(ctx: TemplateContext): Template = { + if (in.token == EXTENDS) { + in.nextToken() + newLineOptWhenFollowedBy(LBRACE) + if (in.token == LBRACE) { + crash("early definitions") + } else { + val inits = templateInits() + templateBraces(ctx, inits) + } + } else { + val inits = Nil + templateBraces(ctx, inits) + } + } + + def newTemplate(): Template = { + if (in.token == LBRACE) { + crash("early definitions") + } else { + val inits = templateInits() + templateBraces(TermNewContext, inits) + } + } + + private def templateInits(): List[Init] = { + tokenSeparated(WITH, templateInit) + } + + private def templateInit(): Init = { + val initstart = in.offset + val tpt = annotTpt() + val idstart = in.offset + val args = { + if (in.token != LPAREN) { + crash("nullary argument lists") + } + val result = termArgs() + if (in.token == LPAREN) { + crash("multiple argument lists") + } + result + } + val init = atPos(initstart)(Init(tpt, args)) + init.id.pos = Position(input, idstart, idstart) + init + } + + private def templateBraces( + ctx: TemplateContext, + inits: List[Init]): Template = { + newLineOptWhenFollowedBy(LBRACE) + if (in.token == LBRACE) { + inBraces { + val stats = List.newBuilder[Stat] + var exitOnError = false + while (!in.token.isStatSeqEnd && !exitOnError) { + if (in.token == IMPORT) { + stats += `import`() + } else if (in.token.isTermIntro) { + stats += term() + } else if (in.token.isTemplateDefnIntro) { + val start = in.offset + val mods = defnMods(modTokens.templateDefn) + val stat = in.token match { + case CASECLASS => + if (ctx == DefnObjectContext) { + val modCase = atPos(in.offset)(ModCase()) + in.nextToken() + defnClass(start, modCase +: mods) + } else { + crash("inner classes") + } + case CASEOBJECT => + if (ctx == DefnObjectContext) { + val modCase = atPos(in.offset)(ModCase()) + in.nextToken() + defnClass(start, modCase +: mods) + } else { + crash("inner objects") + } + case CLASS => + if (ctx == DefnObjectContext) { + in.nextToken() + defnClass(start, mods) + } else { + crash("inner classes") + } + case DEF => + in.nextToken() + defnDef(start, mods) + case OBJECT => + if (ctx == DefnObjectContext) { + in.nextToken() + defnObject(start, mods) + } else { + crash("inner objects") + } + case TRAIT => + if (ctx == DefnObjectContext) { + in.nextToken() + defnTrait(start, mods) + } else { + crash("inner traits") + } + case TYPE => + in.nextToken() + defnType(start, mods) + case VAL => + val modVal = atPos(in.offset)(ModVal()) + in.nextToken() + defnField(start, mods :+ modVal) + case VAR => + val modVar = atPos(in.offset)(ModVar()) + in.nextToken() + defnField(start, mods :+ modVar) + case _ => + val errOffset = in.offset + reportOffset(errOffset, ExpectedStartOfDefinition) + atPos(errOffset)(errorStat()) + } + stats += stat + } else if (!in.token.isStatSep) { + exitOnError = in.token.mustStartStat + reportOffset(in.offset, IllegalStartOfDefinition) + } + acceptStatSepUnlessAtEnd() + } + Template(inits, Some(stats.result)) + } + } else { + val stats = None + Template(inits, stats) + } + } +} diff --git a/src/main/scala/rsc/parse/Terms.scala b/src/main/scala/rsc/parse/Terms.scala new file mode 100644 index 0000000..b7796c0 --- /dev/null +++ b/src/main/scala/rsc/parse/Terms.scala @@ -0,0 +1,397 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.report._ +import rsc.syntax._ + +trait Terms { + self: Parser => + + def termArgs(): List[Term] = { + val start = in.offset + if (in.token == LBRACE) { + List(blockBraces(start)) + } else { + inParens { + if (in.token == RPAREN) { + Nil + } else { + commaSeparated { + val unfinished = term() + unfinished match { + case TermAssign(id: TermId, rhs) => + crash("named and default arguments") + case _ => + unfinished + } + } + } + } + } + } + + def term(): Term = { + if (in.token == IMPLICIT) { + crash("implicit parameters") + } else { + val start = in.offset + val unfinished = term1() + if (in.token == ARROW) { + in.nextToken() + val unfinishedReinterpretedAsTermParams = { + object TermParamLike { + def unapply(term: Term): Option[TermParam] = term match { + case id: TermId => + crash("type inference") + case TermAscribe(id: TermId, tpt) => + Some(atPos(term.pos)(TermParam(Nil, id, tpt))) + case _ => + crash(term) + } + } + unfinished match { + case TermLit(()) => + Nil + case TermParamLike(termParam) => + List(termParam) + case TermTuple(args) => + val termParams = args.flatMap(TermParamLike.unapply) + if (args.length != termParams.length) crash(unfinished) + termParams + case _ => + crash(unfinished) + } + } + val body = term() + atPos(start)(TermFunction(unfinishedReinterpretedAsTermParams, body)) + } else { + unfinished + } + } + } + + def term1(): Term = { + val start = in.offset + in.token match { + case IF => + in.nextToken() + val cond = inParens(term()) + newLinesOpt() + val thenp = term() + val elsep = { + if (in.token == ELSE) { + in.nextToken() + Some(term()) + } else { + None + } + } + atPos(start)(TermIf(cond, thenp, elsep)) + case WHILE => + in.nextToken() + val cond = inParens(term()) + newLinesOpt() + val body = term() + atPos(start)(TermWhile(cond, body)) + case DO => + in.nextToken() + val body = term() + if (in.token.isStatSep) in.nextToken() + accept(WHILE) + val cond = inParens(term()) + atPos(start)(TermDo(body, cond)) + case TRY => + crash("exception handling") + case THROW => + in.nextToken() + val term = this.term() + atPos(start)(TermThrow(term)) + case RETURN => + in.nextToken() + val term = { + if (in.token.isTermIntro) { + Some(this.term()) + } else { + None + } + } + atPos(start)(TermReturn(term)) + case FOR => + crash("for comprehensions") + case _ => + val unfinished = postfixTerm() + in.token match { + case EQUALS => + unfinished match { + case TermId(_) | TermSelect(_, _) | TermApply(_, _) => + in.nextToken() + val lhs = unfinished + val rhs = term() + atPos(start)(TermAssign(lhs, rhs)) + case _ => + unfinished + } + case COLON => + in.nextToken() + in.token match { + case USCORE => + in.nextToken() + if (in.token == ID && in.idValue == "*") { + in.nextToken() + val term = unfinished + atPos(start)(TermRepeat(term)) + } else { + val errOffset = in.offset + reportOffset(errOffset, ExpectedId(_, "*", in.token)) + atPos(errOffset)(errorTerm()) + } + case AT => + crash("annotations") + case _ => + val term = unfinished + val tpt = infixTpt() + atPos(start)(TermAscribe(term, tpt)) + } + case MATCH => + in.nextToken() + val term = unfinished + val cases = inBraces(this.cases()) + atPos(start)(TermMatch(term, cases)) + case _ => + unfinished + } + } + } + + def postfixTerm(): Term = { + val unfinished = prefixTerm() + infixOps( + unfinished, + canStartOperand = introTokens.term, + operand = () => prefixTerm(), + maybePostfix = true) + } + + private case class OpInfo(operand: Term, operator: TermId, offset: Offset) + private var opStack: List[OpInfo] = Nil + + private def reduceStack( + base: List[OpInfo], + top: Term, + op2: String, + force: Boolean): Term = { + def op1 = opStack.head.operator.value + if (opStack != base && op1.precedence == op2.precedence) { + if (op1.isLeftAssoc != op2.isLeftAssoc) { + reportOffset( + opStack.head.offset, + MixedLeftAndRightAssociativeOps(_, op1, op2)) + } + } + def loop(top: Term): Term = { + if (opStack == base) { + top + } else { + val op1Info = opStack.head + val op1 = op1Info.operator.value + val lowerPrecedence = op2.precedence < op1.precedence + val samePrecedence = op2.precedence == op1.precedence && op1.isLeftAssoc + if (force || lowerPrecedence || samePrecedence) { + opStack = opStack.tail + val parts = List(op1Info.operand, op1Info.operator, top) + val start = parts.map(_.pos.start).min + val end = parts.map(_.pos.end).max + val top1 = atPos(start, end)( + TermApplyInfix(op1Info.operand, op1Info.operator, Nil, top)) + loop(top1) + } else { + top + } + } + } + loop(top) + } + + private def infixOps( + first: Term, + canStartOperand: Token => Boolean, + operand: () => Term, + notAnOperator: String = "", + maybePostfix: Boolean = false): Term = { + val base = opStack + var top = first + while (in.token == ID && in.idValue != notAnOperator) { + val op = termId() + top = reduceStack(base, top, op.value, force = false) + opStack = OpInfo(top, op, in.offset) :: opStack + newLineOptWhenFollowedBy(canStartOperand) + if (maybePostfix && !canStartOperand(in.token)) { + val topInfo = opStack.head + opStack = opStack.tail + val od = + reduceStack(base, topInfo.operand, "", force = true) + return atPos(od.pos.start)(TermApplyPostfix(od, topInfo.operator)) + } + top = operand() + } + reduceStack(base, top, "", force = true) + } + + private def prefixTerm(): Term = { + val start = in.offset + if (in.token == ID && (in.idValue == "-" || + in.idValue == "+" || + in.idValue == "~" || + in.idValue == "!")) { + val id = termId() + val arg = simpleTerm() + atPos(start)(TermApplyPrefix(id, arg)) + } else { + simpleTerm() + } + } + + private def simpleTerm(): Term = { + val start = in.offset + var canApply = true + val unfinished = in.token match { + case ID | THIS | SUPER => + canApply = true + termPath() + case USCORE => + crash("type inference") + case LPAREN => + canApply = true + val terms = termArgs() + terms match { + case Nil => atPos(start)(TermLit(())) + case term :: Nil => term + case terms => atPos(start)(TermTuple(terms)) + } + case LBRACE => + canApply = false + blockBraces(start) + case NEW => + canApply = false + in.nextToken() + newTemplate() match { + case Template(List(init), None) => + atPos(start)(TermNew(init)) + case Template(inits, statsOpt) => + crash("anonymous classes") + } + case _ => + if (in.token.isLit) { + canApply = true + val value = literal() + atPos(start)(TermLit(value)) + } else { + canApply = true + val errOffset = in.offset + reportOffset(errOffset, IllegalStartOfSimpleTerm) + atPos(errOffset)(errorTerm()) + } + } + simpleTermRest(start, unfinished, canApply = canApply) + } + + private def simpleTermRest( + start: Offset, + unfinished: Term, + canApply: Boolean): Term = { + if (canApply) newLineOptWhenFollowedBy(LBRACE) + in.token match { + case DOT => + in.nextToken() + val qual = unfinished + val id = termId() + val unfinished1 = atPos(start)(TermSelect(qual, id)) + simpleTermRest(start, unfinished1, canApply = true) + case LBRACKET => + val fun = unfinished + val args = tptArgs() + val unfinished1 = atPos(start)(TermApplyType(fun, args)) + simpleTermRest(start, unfinished1, canApply = true) + case LPAREN | LBRACE if canApply => + val fun = unfinished + val args = termArgs() + val unfinished1 = atPos(start)(TermApply(fun, args)) + simpleTermRest(start, unfinished1, canApply = true) + case USCORE => + in.nextToken() + val term = unfinished + atPos(start)(TermEta(term)) + case _ => + unfinished + } + } + + private def blockBraces(start: Offset): Term = { + inBraces { + if (in.token == CASE) { + val cases = this.cases() + atPos(start)(TermPartialFunction(cases)) + } else { + val stats = blockStats() + atPos(start)(TermBlock(stats)) + } + } + } + + def blockStats(): List[Stat] = { + val stats = List.newBuilder[Stat] + var exitOnError = false + while (!in.token.isStatSeqEnd && in.token != CASE && !exitOnError) { + if (in.token == IMPORT) { + stats += `import`() + } else if (in.token.isTermIntro) { + stats += term() + } else if (in.token.isLocalDefnIntro) { + val start = in.offset + val mods = defnMods(modTokens.localDefn) + val stat = in.token match { + case CASECLASS => + crash("local classes") + case CASEOBJECT => + crash("local objects") + case CLASS => + crash("local classes") + case DEF => + crash("local methods") + case OBJECT => + crash("local objects") + case TRAIT => + crash("local traits") + case TYPE => + crash("local types") + case VAL => + val modVal = atPos(in.offset)(ModVal()) + in.nextToken() + defnField(start, mods :+ modVal) + case VAR => + val modVar = atPos(in.offset)(ModVar()) + in.nextToken() + defnField(start, mods :+ modVar) + case _ => + val errOffset = in.offset + reportOffset(errOffset, ExpectedStartOfDefinition) + atPos(errOffset)(errorStat()) + } + stats += stat + } else if (!in.token.isStatSep && in.token != CASE) { + exitOnError = in.token.mustStartStat + val errOffset = in.offset + reportOffset(errOffset, IllegalStartOfStatement) + stats += atPos(errOffset)(errorTerm()) + } + acceptStatSepUnlessAtEnd(CASE) + } + stats.result + } + + def errorTerm(): Term = { + TermId(Error.value) + } +} diff --git a/src/main/scala/rsc/parse/Tpts.scala b/src/main/scala/rsc/parse/Tpts.scala new file mode 100644 index 0000000..5c9ae46 --- /dev/null +++ b/src/main/scala/rsc/parse/Tpts.scala @@ -0,0 +1,201 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.parse + +import rsc.lexis._ +import rsc.report._ +import rsc.syntax._ + +trait Tpts { + self: Parser => + + def tptArgs(): List[Tpt] = { + inBrackets(commaSeparated(tpt())) + } + + def paramTpt(): Tpt = { + if (in.token == ARROW) { + crash("by-name types") + } else { + tpt() + } + } + + def tpt(): Tpt = { + val start = in.offset + val unfinished = { + if (in.token == LPAREN) { + in.nextToken() + if (in.token == RPAREN) { + in.nextToken() + accept(ARROW) + val ret = tpt() + atPos(start)(TptFunction(List(ret))) + } else { + val params = commaSeparated(paramTpt) + accept(RPAREN) + if (in.token == ARROW) { + in.nextToken() + val ret = tpt() + atPos(start)(TptFunction(params :+ ret)) + } else { + var unfinished = makeTptTuple(start, params) + unfinished = simpleTptRest(start, unfinished) + unfinished = annotTptRest(start, unfinished) + unfinished = withTptRest(start, unfinished) + unfinished = refinedTptRest(start, unfinished) + infixTptRest(start, unfinished) + } + } + } else { + infixTpt() + } + } + in.token match { + case ARROW => + accept(ARROW) + val params = List(unfinished) + val ret = tpt() + atPos(start)(TptFunction(params :+ ret)) + case FORSOME => + crash("existential types") + case _ => + unfinished + } + } + + def infixTpt(): Tpt = { + val start = in.offset + val unfinished = refinedTpt() + infixTptRest(start, unfinished) + } + + private def infixTptRest(start: Offset, unfinished: Tpt): Tpt = { + if (in.token == ID) { + if (in.idValue == "*") { + in.nextToken() + val tpt = unfinished + atPos(start)(TptRepeat(tpt)) + } else { + crash("infix types") + } + } else { + unfinished + } + } + + def refinedTpt(): Tpt = { + val start = in.offset + if (in.token == LBRACE) { + crash("compound types") + } else { + val unfinished = withTpt() + refinedTptRest(start, unfinished) + } + } + + private def refinedTptRest(start: Offset, unfinished: Tpt): Tpt = { + newLineOptWhenFollowedBy(LBRACE) + if (in.token == LBRACE) { + crash("compound types") + } else { + unfinished + } + } + + private def refineStats(): List[Stat] = inBraces { + val stats = List.newBuilder[Stat] + while (!in.token.isStatSeqEnd) { + if (in.token.isRefineDefnIntro) { + val start = in.offset + val mods = defnMods(modTokens.refineDefn) + val stat = in.token match { + case VAL => + val modVal = atPos(in.offset)(ModVal()) + in.nextToken() + defnField(start, mods :+ modVal) + case VAR => + val modVar = atPos(in.offset)(ModVar()) + in.nextToken() + defnField(start, mods :+ modVar) + case DEF => + in.nextToken() + defnDef(start, mods) + case TYPE => + in.nextToken() + defnType(start, mods) + case _ => + val errOffset = in.offset + reportOffset(errOffset, ExpectedStartOfDefinition) + atPos(errOffset)(errorStat()) + } + stats += stat + } else if (!in.token.isStatSep) { + reportOffset(in.offset, IllegalStartOfDeclaration) + } + acceptStatSepUnlessAtEnd() + } + stats.result + } + + private def withTpt(): Tpt = { + val start = in.offset + val unfinished = annotTpt() + withTptRest(start, unfinished) + } + + private def withTptRest(start: Offset, unfinished: Tpt): Tpt = { + if (in.token == WITH) { + crash("compound types") + } else { + unfinished + } + } + + def annotTpt(): Tpt = { + val start = in.offset + val unfinished = simpleTpt() + annotTptRest(start, unfinished) + } + + private def annotTptRest(start: Offset, unfinished: Tpt): Tpt = { + if (in.token == AT) { + crash("annotations") + } else { + unfinished + } + } + + def simpleTpt(): Tpt = { + val start = in.offset + val unfinished = { + if (in.token == LPAREN) { + makeTptTuple(start, tptArgs()) + } else if (in.token == LBRACE) { + crash("compound types") + } else if (in.token == USCORE) { + crash("existential types") + } else { + tptPath() + } + } + simpleTptRest(start, unfinished) + } + + private def simpleTptRest(start: Offset, unfinished: Tpt): Tpt = { + in.token match { + case HASH => + crash("type projections") + case LBRACKET => + val fun = unfinished + val args = tptArgs() + atPos(start)(TptParameterize(fun, args)) + case _ => + unfinished + } + } + + def errorTpt(): Tpt = { + TptId(Error.value) + } +} diff --git a/src/main/scala/rsc/pretty/Escape.scala b/src/main/scala/rsc/pretty/Escape.scala new file mode 100644 index 0000000..2baa556 --- /dev/null +++ b/src/main/scala/rsc/pretty/Escape.scala @@ -0,0 +1,26 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +object Escape { + private def escape(x: Char, quote: Char): String = { + x match { + case x if x == quote => "\\" + x + case '\b' => "\\b" + case '\n' => "\\n" + case '\t' => "\\t" + case '\r' => "\\r" + case '\f' => "\\f" + case '\\' => "\\\\" + case other => other.toString + } + } + + def apply(x: Char): String = { + escape(x, '\'') + } + + def apply(x: String): String = { + x.flatMap(escape(_, '"')) + } +} diff --git a/src/main/scala/rsc/pretty/Ops.scala b/src/main/scala/rsc/pretty/Ops.scala new file mode 100644 index 0000000..e6bcf3a --- /dev/null +++ b/src/main/scala/rsc/pretty/Ops.scala @@ -0,0 +1,21 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +trait Ops { + implicit class StrOps[T: Str](x: T) { + def str: String = { + val p = new Printer + implicitly[Str[T]].apply(p, x) + p.toString + } + } + + implicit class ReplOps[T: Repl](x: T) { + def repl: String = { + val p = new Printer + implicitly[Repl[T]].apply(p, x) + p.toString + } + } +} diff --git a/src/main/scala/rsc/pretty/Pretty.scala b/src/main/scala/rsc/pretty/Pretty.scala new file mode 100644 index 0000000..2523449 --- /dev/null +++ b/src/main/scala/rsc/pretty/Pretty.scala @@ -0,0 +1,25 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +trait Pretty { + def printStr(p: Printer): Unit + + def str: String = { + val p = new Printer + printStr(p) + p.toString + } + + def printRepl(p: Printer): Unit + + def repl: String = { + val p = new Printer + printRepl(p) + p.toString + } + + final override def toString: String = { + str + } +} diff --git a/src/main/scala/rsc/pretty/PrettyAtom.scala b/src/main/scala/rsc/pretty/PrettyAtom.scala new file mode 100644 index 0000000..d009e73 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyAtom.scala @@ -0,0 +1,45 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.syntax._ +import rsc.typecheck._ + +object PrettyAtom { + def str(p: Printer, x: Atom): Unit = { + x match { + case ApplyAtom(args) => + p.str("[") + p.rep(args, ", ")(p.str) + p.str("]") + case IdAtom(id) => + p.str(id) + case SuperAtom(mix) => + p.str("super") + mix match { + case AnonId() => + () + case NamedId(value) => + p.str("[") + p.str(value) + p.str("]") + } + case ThisAtom(qual) => + qual match { + case AnonId() => + () + case NamedId(value) => + p.str(value) + p.str(".") + } + p.str("this") + case UnsupportedAtom(unsupported) => + p.str("unsupported:") + p.str(unsupported) + } + } + + def repl(p: Printer, x: Atom): Unit = { + new ProductRepl(p).apply(x) + } +} diff --git a/src/main/scala/rsc/pretty/PrettyCompiler.scala b/src/main/scala/rsc/pretty/PrettyCompiler.scala new file mode 100644 index 0000000..b2d5b4b --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyCompiler.scala @@ -0,0 +1,25 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.Compiler +import rsc.util._ + +object PrettyCompiler { + def str(p: Printer, x: Compiler): Unit = { + p.settings = x.settings + p.rep(x.trees, EOL) { tree => + p.header(tree.pos.input.path.toString) + p.str(tree) + p.newline() + } + p.newline() + p.str(x.symtab) + p.newline() + p.str(x.todo) + } + + def repl(p: Printer, x: Compiler): Unit = { + crash(x) + } +} diff --git a/src/main/scala/rsc/pretty/PrettyEnv.scala b/src/main/scala/rsc/pretty/PrettyEnv.scala new file mode 100644 index 0000000..45c2d1b --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyEnv.scala @@ -0,0 +1,16 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.typecheck._ +import rsc.util._ + +object PrettyEnv { + def str(p: Printer, x: Env): Unit = { + p.str(x._scopes) + } + + def repl(p: Printer, x: Env): Unit = { + crash(x) + } +} diff --git a/src/main/scala/rsc/pretty/PrettyInput.scala b/src/main/scala/rsc/pretty/PrettyInput.scala new file mode 100644 index 0000000..15449fd --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyInput.scala @@ -0,0 +1,27 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.lexis._ + +object PrettyInput { + def str(p: Printer, x: Input): Unit = { + x match { + case NoInput => + p.str("ø") + case _ => + p.str(x.path.toString) + } + } + + def repl(p: Printer, x: Input): Unit = { + x match { + case NoInput => + p.str("NoInput") + case _ => + p.str("Input(Paths.get(") + p.repl(x.path.toString) + p.str("))") + } + } +} diff --git a/src/main/scala/rsc/pretty/PrettyMessage.scala b/src/main/scala/rsc/pretty/PrettyMessage.scala new file mode 100644 index 0000000..4bb7c0d --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyMessage.scala @@ -0,0 +1,45 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.lexis._ +import rsc.report._ + +object PrettyMessage { + def str(p: Printer, x: Message): Unit = { + if (x.pos.input != NoInput) { + p.str(s"${x.pos.input.path}:") + } + if (x.pos.start != NoOffset) { + p.str(s"${x.pos.startLine + 1}: ") + } + x.sev match { + case FatalSeverity => p.str("error: ") + case ErrorSeverity => p.str("error: ") + case WarningSeverity => p.str("warning: ") + case VerboseSeverity => p.str("") + } + p.str(x.text) + if (x.pos.start != NoOffset) { + p.str(EOL) + val lineContent = { + val input = x.pos.input + val start = input.lineToOffset(x.pos.startLine) + val notEof = start < input.string.length + val end = if (notEof) input.lineToOffset(x.pos.startLine + 1) else start + input.string.substring(start, end).stripLineEnd + } + p.str(lineContent) + p.str(EOL) + p.str(" " * x.pos.startColumn + "^") + } + if (x.explanation != "") { + p.str(EOL) + p.str(x.explanation) + } + } + + def repl(p: Printer, x: Message): Unit = { + new ProductRepl(p).apply(x) + } +} diff --git a/src/main/scala/rsc/pretty/PrettyName.scala b/src/main/scala/rsc/pretty/PrettyName.scala new file mode 100644 index 0000000..8de0377 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyName.scala @@ -0,0 +1,22 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.semantics._ + +object PrettyName { + def str(p: Printer, x: Name): Unit = { + x match { + case SomeName(value) => + p.str(value) + case TermName(value) => + p.str(value + ".") + case TypeName(value) => + p.str(value + "#") + } + } + + def repl(p: Printer, x: Name): Unit = { + new ProductRepl(p).apply(x) + } +} diff --git a/src/main/scala/rsc/pretty/PrettyOutline.scala b/src/main/scala/rsc/pretty/PrettyOutline.scala new file mode 100644 index 0000000..d990104 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyOutline.scala @@ -0,0 +1,38 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.syntax._ + +object PrettyOutline { + def desc(outline: Outline): String = { + outline match { + case outline: DefnClass => + s"class ${outline.id.value}" + case outline: DefnDef => + s"method ${outline.id.value}" + case outline: DefnField => + val isVal = outline.mods.exists(_.isInstanceOf[ModVal]) + if (isVal) s"val ${outline.id.value}" else s"var ${outline.id.value}" + case outline: DefnObject => + s"object ${outline.id.value}" + case outline: DefnPackage => + s"package ${outline.pid}" + case outline: DefnTrait => + s"trait ${outline.id.value}" + case outline: DefnType => + s"type ${outline.id.value}" + case outline: PatVar => + outline.id match { + case AnonId() => s"pattern" + case id: NamedId => s"pattern ${id.value}" + } + case outline: PrimaryCtor => + s"constructor" + case outline: TermParam => + s"parameter ${outline.id.value}" + case outline: TypeParam => + s"type parameter ${outline.id.value}" + } + } +} diff --git a/src/main/scala/rsc/pretty/PrettyPosition.scala b/src/main/scala/rsc/pretty/PrettyPosition.scala new file mode 100644 index 0000000..18fb288 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyPosition.scala @@ -0,0 +1,33 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.lexis._ + +object PrettyPosition { + def str(p: Printer, x: Position): Unit = { + x match { + case NoPosition => + p.str("ø") + case _ => + p.str(x.input) + p.str("@") + p.str(s"${x.start}..${x.end}") + } + } + + def repl(p: Printer, x: Position): Unit = { + x match { + case NoPosition => + p.str("NoPosition") + case _ => + p.str("Position(") + p.repl(x.input) + p.str(", ") + p.repl(x.start) + p.str(", ") + p.repl(x.end) + p.str(")") + } + } +} diff --git a/src/main/scala/rsc/pretty/PrettyResolution.scala b/src/main/scala/rsc/pretty/PrettyResolution.scala new file mode 100644 index 0000000..81c2106 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyResolution.scala @@ -0,0 +1,25 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.typecheck._ + +object PrettyResolution { + def str(p: Printer, x: Resolution): Unit = { + x match { + case BlockedResolution(scope) => + p.str("b:") + p.str(scope.sym) + case MissingResolution => + p.str("m") + case ErrorResolution => + p.str("e") + case FoundResolution(sym) => + p.str(sym) + } + } + + def repl(p: Printer, x: Resolution): Unit = { + new ProductRepl(p).apply(x) + } +} diff --git a/src/main/scala/rsc/pretty/PrettyScope.scala b/src/main/scala/rsc/pretty/PrettyScope.scala new file mode 100644 index 0000000..10e3a07 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyScope.scala @@ -0,0 +1,43 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import scala.collection.JavaConverters._ +import rsc.typecheck._ +import rsc.util._ + +object PrettyScope { + def str(p: Printer, x: Scope): Unit = { + p.str(x.sym) + p.str(" ") + p.str(x.status) + if (x.status.isSucceeded) { + x match { + case x: ImporterScope => + p.str(" ") + p.str(x.parent.sym) + case x: TemplateScope => + p.str(" ") + p.rep(x.parents, " with ")(scope => p.str(scope.sym)) + case x: SuperScope => + p.str(" ") + p.rep(x.underlying.parents, " with ")(scope => p.str(scope.sym)) + case _ => + () + } + } + x match { + case x: OwnerScope => + p.str(" [") + val storage = x._storage.asScala.toList.sortBy(_._1.str) + p.rep(storage.map(_._2), ", ")(p.str) + p.str("]") + case _ => + () + } + } + + def repl(p: Printer, x: Scope): Unit = { + crash(x) + } +} diff --git a/src/main/scala/rsc/pretty/PrettySeverity.scala b/src/main/scala/rsc/pretty/PrettySeverity.scala new file mode 100644 index 0000000..ec23bcf --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettySeverity.scala @@ -0,0 +1,15 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.report._ + +object PrettySeverity { + def str(p: Printer, x: Severity): Unit = { + p.str(x.productPrefix) + } + + def repl(p: Printer, x: Severity): Unit = { + p.str(x.productPrefix) + } +} diff --git a/src/main/scala/rsc/pretty/PrettyStatus.scala b/src/main/scala/rsc/pretty/PrettyStatus.scala new file mode 100644 index 0000000..d859ed9 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyStatus.scala @@ -0,0 +1,28 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.typecheck._ + +object PrettyStatus { + def str(p: Printer, x: Status): Unit = { + x match { + case PendingStatus => + p.str("?") + case BlockedStatus(scope) => + p.str("b:") + p.str(scope.sym) + case CyclicStatus(scopes) => + p.str("c:") + p.str(scopes.map(_.sym)) + case ErrorStatus => + p.str("e") + case SucceededStatus => + p.str("!") + } + } + + def repl(p: Printer, x: Status): Unit = { + new ProductRepl(p).apply(x) + } +} diff --git a/src/main/scala/rsc/pretty/PrettySymtab.scala b/src/main/scala/rsc/pretty/PrettySymtab.scala new file mode 100644 index 0000000..809a4b9 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettySymtab.scala @@ -0,0 +1,37 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import scala.collection.JavaConverters._ +import rsc.typecheck._ +import rsc.util._ + +object PrettySymtab { + def str(p: Printer, x: Symtab): Unit = { + p.header("Scopes (symtab)") + val scopes = x._scopes.asScala.toList.sortBy(_._1.str) + p.rep(scopes, EOL) { + case (_, scope) => + p.str(scope) + } + if (scopes.nonEmpty) { + p.newline() + } + p.newline() + p.header("Outlines (symtab)") + val outlines = x._outlines.asScala.toList.sortBy(_._1.str) + p.rep(outlines, EOL) { + case (sym, outline) => + p.str(sym) + p.str(" => ") + p.str(outline) + } + if (scopes.nonEmpty) { + p.newline() + } + } + + def repl(p: Printer, x: Symtab): Unit = { + crash(x) + } +} diff --git a/src/main/scala/rsc/pretty/PrettyTodo.scala b/src/main/scala/rsc/pretty/PrettyTodo.scala new file mode 100644 index 0000000..2e95bd8 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyTodo.scala @@ -0,0 +1,71 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import scala.collection.JavaConverters._ +import rsc.typecheck._ +import rsc.util._ + +object PrettyTodo { + def str(p: Printer, x: Todo): Unit = { + p.header("Scopes (todo)") + val scopes = x.scopes.asScala.toList + p.rep(scopes, EOL) { + case (env, scope) => + p.str(scope) + if (p.settings.xprint("envs")) { + p.str(" => ") + p.str(env) + } + } + if (scopes.nonEmpty) { + p.newline() + } + p.newline() + p.header("Mods (todo)") + val mods = x.mods.asScala.toList + p.rep(mods, EOL) { + case (env, mod) => + p.str(mod) + if (p.settings.xprint("envs")) { + p.str(" => ") + p.str(env) + } + } + if (mods.nonEmpty) { + p.newline() + } + p.newline() + p.header("Tpts (todo)") + val tpts = x.tpts.asScala.toList + p.rep(tpts, EOL) { + case (env, tpt) => + p.str(tpt) + if (p.settings.xprint("envs")) { + p.str(" => ") + p.str(env) + } + } + if (tpts.nonEmpty) { + p.newline() + } + p.newline() + p.header("Terms (todo)") + val terms = x.terms.asScala.toList + p.rep(terms, EOL) { + case (env, term) => + p.str(term) + if (p.settings.xprint("envs")) { + p.str(" => ") + p.str(env) + } + } + if (terms.nonEmpty) { + p.newline() + } + } + + def repl(p: Printer, x: Todo): Unit = { + crash(x) + } +} diff --git a/src/main/scala/rsc/pretty/PrettyToken.scala b/src/main/scala/rsc/pretty/PrettyToken.scala new file mode 100644 index 0000000..4f31a22 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyToken.scala @@ -0,0 +1,96 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.lexis._ + +object PrettyToken { + def str(p: Printer, x: Token): Unit = { + p.str(pretty(x)._1) + } + + def repl(p: Printer, x: Token): Unit = { + p.str(pretty(x)._2) + } + + private def pretty(x: Token): (String, String) = { + x match { + case ABSTRACT => ("abstract", "ABSTRACT") + case ARROW => ("=>", "ARROW") + case AT => ("@", "AT") + case BOF => ("beginning of file", "BOF") + case CASE => ("case", "CASE") + case CATCH => ("catch", "CATCH") + case CLASS => ("class", "CLASS") + case COLON => (":", "COLON") + case COMMA => (",", "COMMA") + case COMMENT => ("comment", "COMMENT") + case DEF => ("def", "DEF") + case DO => ("do", "DO") + case DOT => (".", "DOT") + case ELSE => ("else", "ELSE") + case EOF => ("eof", "EOF") + case EQUALS => ("equals", "EQUALS") + case ERROR => ("error", "ERROR") + case EXTENDS => ("extends", "EXTENDS") + case FALSE => ("false", "FALSE") + case FINAL => ("final", "FINAL") + case FINALLY => ("finally", "FINALLY") + case FOR => ("for", "FOR") + case FORSOME => ("forSome", "FORSOME") + case HASH => ("#", "HASH") + case ID => ("identifier", "ID") + case IF => ("if", "IF") + case IMPLICIT => ("implicit", "IMPLICIT") + case IMPORT => ("import", "IMPORT") + case LARROW => ("<-", "LARROW") + case LAZY => ("lazy", "LAZY") + case LBRACE => ("{", "LBRACE") + case LBRACKET => ("[", "LBRACKET") + case LITCHAR => ("character literal", "LITCHAR") + case LITDOUBLE => ("double literal", "LITDOUBLE") + case LITFLOAT => ("float literal", "LITFLOAT") + case LITINT => ("integer literal", "LITINT") + case LITLONG => ("long literal", "LITLONG") + case LITSTRING => ("string literal", "LITSTRING") + case LITSYMBOL => ("symbol literal", "LITSYMBOL") + case LPAREN => ("(", "LPAREN") + case MATCH => ("match", "MATCH") + case NEW => ("new", "NEW") + case NEWLINE => ("newline", "NEWLINE") + case NULL => ("null", "NULL") + case OBJECT => ("object", "OBJECT") + case OVERRIDE => ("override", "OVERRIDE") + case PACKAGE => ("package", "PACKAGE") + case PRIVATE => ("private", "PRIVATE") + case PROTECTED => ("protected", "PROTECTED") + case RBRACE => ("}", "RBRACE") + case RBRACKET => ("]", "RBRACKET") + case RETURN => ("return", "RETURN") + case RPAREN => (")", "RPAREN") + case SEALED => ("sealed", "SEALED") + case SEMI => (";", "SEMI") + case SUBTYPE => ("<:", "SUBTYPE") + case SUPER => ("super", "SUPER") + case SUPERTYPE => (">:", "SUPERTYPE") + case THIS => ("this", "THIS") + case THROW => ("throw", "THROW") + case TRAIT => ("trait", "TRAIT") + case TRUE => ("true", "TRUE") + case TRY => ("try", "TRY") + case TYPE => ("type", "TYPE") + case USCORE => ("_", "USCORE") + case VAL => ("val", "VAL") + case VAR => ("var", "VAR") + case VIEWBOUND => ("<%", "VIEWBOUND") + case WHILE => ("while", "WHILE") + case WHITESPACE => ("whitespace", "WHITESPACE") + case WITH => ("with", "WITH") + case YIELD => ("yield", "YIELD") + case 256 => ("case class", "CASECLASS") + case 257 => ("case object", "CASEOBJECT") + case 258 => ("newline", "NL1") + case 259 => ("newline", "NL2") + } + } +} diff --git a/src/main/scala/rsc/pretty/PrettyTree.scala b/src/main/scala/rsc/pretty/PrettyTree.scala new file mode 100644 index 0000000..69e3744 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyTree.scala @@ -0,0 +1,15 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.syntax._ + +object PrettyTree { + def str(p: Printer, tree: Tree): Unit = { + new TreeStr(p).apply(tree) + } + + def repl(p: Printer, tree: Tree): Unit = { + new ProductRepl(p).apply(tree) + } +} diff --git a/src/main/scala/rsc/pretty/PrettyType.scala b/src/main/scala/rsc/pretty/PrettyType.scala new file mode 100644 index 0000000..af7af45 --- /dev/null +++ b/src/main/scala/rsc/pretty/PrettyType.scala @@ -0,0 +1,38 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.semantics._ + +object PrettyType { + def str(p: Printer, tpe: Type): Unit = { + tpe match { + case NoType => + p.str("ø") + case SimpleType(sym, targs) => + p.str("<" + sym + ">") + if (targs.nonEmpty) { + p.str("[") + p.rep(targs, ", ")(p.str) + p.str("]") + } + case MethodType(tparams, vparamss, ret) => + if (tparams.nonEmpty) { + p.str("[") + p.rep(tparams, ", ")(sym => p.str("<" + sym + ">")) + p.str("]") + } + vparamss.foreach { vparams => + p.str("(") + p.rep(vparams, ", ")(sym => p.str("<" + sym + ">")) + p.str(")") + } + p.str(":") + p.str(ret) + } + } + + def repl(p: Printer, tpe: Type): Unit = { + new ProductRepl(p).apply(tpe) + } +} diff --git a/src/main/scala/rsc/pretty/Printer.scala b/src/main/scala/rsc/pretty/Printer.scala new file mode 100644 index 0000000..75954e1 --- /dev/null +++ b/src/main/scala/rsc/pretty/Printer.scala @@ -0,0 +1,138 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import java.lang.StringBuilder +import scala.collection.mutable + +final class Printer { + private var sb = new StringBuilder + private var indentation = 0 + private var afterNewline = true + val props = mutable.Map[String, Any]() + + private[pretty] def append(s: String): Unit = { + if (afterNewline) { + sb.append(" " * indentation) + afterNewline = false + } + sb.append(s) + afterNewline = s.contains(EOL) + } + + def str[T: Str](x: T): Unit = { + implicitly[Str[T]].apply(this, x) + } + + def str(x: Pretty): Unit = { + if (x != null) x.printStr(this) + else str("null") + } + + def str(x: String): Unit = { + append(x) + } + + def repl[T: Repl](x: T): Unit = { + implicitly[Repl[T]].apply(this, x) + } + + def repl(x: Pretty): Unit = { + if (x != null) x.printRepl(this) + else str("null") + } + + def rep[T](xs: Iterable[T], sep: String)(f: T => Unit): Unit = { + if (xs.nonEmpty) { + xs.init.foreach { x => + f(x) + append(sep) + } + f(xs.last) + } + } + + def indent(n: Int = 1): Unit = { + indentation += n + } + + def unindent(n: Int = 1): Unit = { + indentation -= n + } + + def newline(): Unit = { + append(EOL) + } + + override def toString = sb.toString + + trait Wrap { + def prefix: Unit = () + def suffix: Unit = () + + def apply[A](xs: Iterable[A])(f: Iterable[A] => Unit): Unit = { + when(xs.nonEmpty)(f(xs)) + } + + def apply(body: => Unit): Unit = { + prefix + body + suffix + } + + def when(predicate: Boolean): Wrap = + if (predicate) this else NoWrap + + def unless(predicate: Boolean): Wrap = + if (predicate) NoWrap else this + } + + case object Braces extends Wrap { + override def prefix = str("{") + override def suffix = str("}") + } + + case object Brackets extends Wrap { + override def prefix = str("[") + override def suffix = str("]") + } + + case object Indent extends Wrap { + override def prefix = { + str(EOL) + indent() + } + override def suffix = { + unindent() + str(EOL) + } + } + + case object NoWrap extends Wrap + + case object Nest extends Wrap { + override def prefix = { + str("{") + str(EOL) + indent() + } + override def suffix = { + unindent() + str(EOL) + str("}") + } + } + + case object Parens extends Wrap { + override def prefix = str("(") + override def suffix = str(")") + } + + case class Prefix(pre: String) extends Wrap { + override def prefix = str(pre) + } + + case class Suffix(suf: String) extends Wrap { + override def suffix = str(suf) + } +} \ No newline at end of file diff --git a/src/main/scala/rsc/pretty/ProductRepl.scala b/src/main/scala/rsc/pretty/ProductRepl.scala new file mode 100644 index 0000000..5077b86 --- /dev/null +++ b/src/main/scala/rsc/pretty/ProductRepl.scala @@ -0,0 +1,59 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import rsc.util._ + +// NOTE: This helper is unsound, but also unquestionably useful. +// That's why we can't turn it into a typeclass instance in the Repl object, +// but we'll still be using it nonetheless. + +class ProductRepl(p: Printer) { + protected def printProduct(x: Product): Unit = { + printProductPrefix(x) + printProductElements(x) + } + + protected def printProductPrefix(x: Product): Unit = { + if (x.isInstanceOf[::[_]]) p.str("List") + else if (x.productPrefix.startsWith("Tuple")) p.str("") + else p.str(x.productPrefix) + } + + protected def printProductElements(x: Product): Unit = x match { + case xs: ::[_] => + p.str("(") + p.rep(xs, ", ")(f => printProductElement(f)) + p.str(")") + case _ => + val nonEmptyElements = { + x.productArity > 0 || !x.getClass.getName.endsWith("$") + } + if (nonEmptyElements) { + p.str("(") + p.rep(x.productIterator.toList, ", ")(f => printProductElement(f)) + p.str(")") + } + } + + protected def printProductElement(f: Any): Unit = f match { + case f: Unit => p.repl(f) + case f: Boolean => p.repl(f) + case f: Byte => p.repl(f) + case f: Short => p.repl(f) + case f: Char => p.repl(f) + case f: Int => p.repl(f) + case f: Float => p.repl(f) + case f: Long => p.repl(f) + case f: Double => p.repl(f) + case f: String => p.repl(f) + case null => p.str("null") + case f: Pretty => p.repl(f) + case f: Product => printProduct(f) + case other => crash(f.getClass.toString) + } + + def apply(x: Product): Unit = { + printProduct(x) + } +} diff --git a/src/main/scala/rsc/pretty/Repl.scala b/src/main/scala/rsc/pretty/Repl.scala new file mode 100644 index 0000000..45504fb --- /dev/null +++ b/src/main/scala/rsc/pretty/Repl.scala @@ -0,0 +1,137 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import scala.{Symbol => StdlibSymbol} +import rsc.util._ + +trait Repl[T] { + def apply(p: Printer, x: T): Unit +} + +object Repl { + def apply[T](f: (Printer, T) => Unit): Repl[T] = { + new Repl[T] { def apply(p: Printer, x: T): Unit = f(p, x) } + } + + implicit def unit[T <: Unit]: Repl[T] = Repl { (p, x) => + p.str(x.toString) + } + + implicit def boolean[T <: Boolean]: Repl[T] = Repl { (p, x) => + p.str(x.toString) + } + + implicit def byte[T <: Byte]: Repl[T] = Repl { (p, x) => + p.str(x.toString + ".toByte") + } + + implicit def short[T <: Short]: Repl[T] = Repl { (p, x) => + p.str(x.toString + ".toShort") + } + + implicit def char[T <: Char]: Repl[T] = Repl { (p, x) => + p.str("'" + Escape(x) + "'") + } + + implicit def int[T <: Int]: Repl[T] = Repl { (p, x) => + p.str(x.toString) + } + + implicit def float[T <: Float]: Repl[T] = Repl { (p, x) => + x match { + case x if x.isNaN => p.str("Float.NaN") + case Float.PositiveInfinity => p.str("Float.PositiveInfinity") + case Float.NegativeInfinity => p.str("Float.NegativeInfinity") + case _ => + p.str(stripExtraTrailingZeros(x.toString)) + p.str("f") + } + } + + implicit def long[T <: Long]: Repl[T] = Repl { (p, x) => + p.str(x.toString) + p.str("L") + } + + implicit def double[T <: Double]: Repl[T] = Repl { (p, x) => + x match { + case x if x.isNaN => p.str("Double.NaN") + case Double.PositiveInfinity => p.str("Double.PositiveInfinity") + case Double.NegativeInfinity => p.str("Double.NegativeInfinity") + case _ => + p.str(stripExtraTrailingZeros(x.toString)) + p.str("d") + } + } + + implicit def string[T <: String]: Repl[T] = Repl { (p, x) => + if (x != null) p.str("\"" + Escape(x) + "\"") + else p.str("null") + } + + implicit def stdlibSymbol[T <: StdlibSymbol]: Repl[T] = Repl { (p, x) => + if (x != null) { + p.str("'") + p.str(x.toString) + } else { + p.str("null") + } + } + + implicit def pretty[T <: Pretty]: Repl[T] = Repl { (p, x) => + if (x != null) x.printRepl(p) + else p.str("null") + } + + implicit def list[T: Repl]: Repl[List[T]] = Repl { (p, xs) => + if (xs.isEmpty) { + p.str("Nil") + } else { + p.str("List(") + p.rep(xs, ", ")(x => p.repl(x)) + p.str(")") + } + } + + implicit def nil: Repl[Nil.type] = Repl { (p, xs) => + p.str("Nil") + } + + implicit def cons[T: Repl]: Repl[::[T]] = Repl { (p, xs) => + p.str("List(") + p.rep(xs, ", ")(x => p.repl(x)) + p.str(")") + } + + implicit def option[T: Repl]: Repl[Option[T]] = Repl { (p, xopt) => + if (xopt.isEmpty) { + p.str("None") + } else { + p.str("Some(") + p.repl(xopt.get) + p.str(")") + } + } + + implicit def none: Repl[None.type] = Repl { (p, xopt) => + p.str("None") + } + + implicit def some[T: Repl]: Repl[Some[T]] = Repl { (p, xopt) => + p.str("Some(") + p.repl(xopt.get) + p.str(")") + } + + implicit def map[T: Repl, U: Repl]: Repl[Map[T, U]] = Repl { (p, m) => + p.str("Map(") + p.rep(m.toList.sortBy(_._1.repl), ", ") { + case (k, v) => + p.repl(k) + p.str(" -> ") + p.repl(v) + } + p.str(")") + } +} diff --git a/src/main/scala/rsc/pretty/Str.scala b/src/main/scala/rsc/pretty/Str.scala new file mode 100644 index 0000000..52b290b --- /dev/null +++ b/src/main/scala/rsc/pretty/Str.scala @@ -0,0 +1,110 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import scala.{Symbol => StdlibSymbol} +import rsc.util._ + +trait Str[T] { + def apply(p: Printer, x: T): Unit +} + +object Str { + def apply[T](f: (Printer, T) => Unit): Str[T] = { + new Str[T] { def apply(p: Printer, x: T): Unit = f(p, x) } + } + + implicit def unit[T <: Unit]: Str[T] = Str { (p, x) => + p.str(x.toString) + } + + implicit def boolean[T <: Boolean]: Str[T] = Str { (p, x) => + p.str(x.toString) + } + + implicit def byte[T <: Byte]: Str[T] = Str { (p, x) => + p.str(x.toString) + } + + implicit def short[T <: Short]: Str[T] = Str { (p, x) => + p.str(x.toString) + } + + implicit def char[T <: Char]: Str[T] = Str { (p, x) => + p.str(x.toString) + } + + implicit def int[T <: Int]: Str[T] = Str { (p, x) => + p.str(x.toString) + } + + implicit def float[T <: Float]: Str[T] = Str { (p, x) => + p.str(stripExtraTrailingZeros(x.toString)) + } + + implicit def long[T <: Long]: Str[T] = Str { (p, x) => + p.str(x.toString) + } + + implicit def double[T <: Double]: Str[T] = Str { (p, x) => + p.str(stripExtraTrailingZeros(x.toString)) + } + + implicit def string[T <: String]: Str[T] = Str { (p, x) => + if (x != null) p.append(x) + else p.str("null") + } + + implicit def pretty[T <: Pretty]: Str[T] = Str { (p, x) => + if (x != null) x.printStr(p) + else p.str("null") + } + + implicit def stdlibSymbol[T <: StdlibSymbol]: Repl[T] = Repl { (p, x) => + if (x != null) { + p.str("'") + p.str(x.toString) + } else { + p.str("null") + } + } + + implicit def list[T: Str]: Str[List[T]] = Str { (p, xs) => + p.str("[") + p.rep(xs, ", ")(x => p.str(x)) + p.str("]") + } + + implicit def nil: Str[Nil.type] = Str { (p, xs) => + p.str("[]") + } + + implicit def cons[T: Str]: Str[::[T]] = Str { (p, xs) => + p.str("[") + p.rep(xs, ", ")(x => p.str(x)) + p.str("]") + } + + implicit def option[T: Str]: Str[Option[T]] = Str { (p, xopt) => + p.str(xopt.toList) + } + + implicit def none: Str[None.type] = Str { (p, xopt) => + p.str("[]") + } + + implicit def some[T: Str]: Str[Some[T]] = Str { (p, xopt) => + p.str(xopt.toList) + } + + implicit def map[T: Str, U: Str]: Str[Map[T, U]] = Str { (p, m) => + p.str("{") + p.rep(m.toList.sortBy(_._1.str), ", ") { + case (k, v) => + p.str(k) + p.str(" -> ") + p.str(v) + } + p.str("}") + } +} diff --git a/src/main/scala/rsc/pretty/TreeStr.scala b/src/main/scala/rsc/pretty/TreeStr.scala new file mode 100644 index 0000000..3195612 --- /dev/null +++ b/src/main/scala/rsc/pretty/TreeStr.scala @@ -0,0 +1,304 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.pretty + +import scala.{Symbol => StdlibSymbol} +import rsc.semantics._ +import rsc.syntax._ +import rsc.util._ + +class TreeStr(val p: Printer) { + def apply(x: Tree): Unit = { + x match { + case AnonId() => + p.str("") + case Case(pat, cond, stats) => + p.str("case ") + apply(pat) + p.Prefix(" if ")(cond)(apply(_, "")) + p.str(" =>") + p.Indent(stats)(apply(_, EOL)) + case DefnDef(mods, id, tparams, params, ret, rhs) => + p.Suffix(" ")(mods)(apply(_, " ")) + p.str("def ") + apply(id) + p.Brackets(tparams)(apply(_, ", ")) + p.Parens(apply(params, ", ")) + p.Prefix(": ")(apply(ret)) + p.Prefix(" = ")(rhs)(apply(_, "")) + case DefnField(mods, id, tpt, rhs) => + p.Suffix(" ")(mods)(apply(_, " ")) + apply(id) + p.Prefix(": ")(apply(tpt)) + p.Prefix(" = ")(rhs)(apply(_, "")) + case DefnPackage(pid, stats) => + p.str("package ") + p.str(pid) + p.str(" ") + p.Nest.when(stats.nonEmpty)(apply(stats, EOL)) + case x @ DefnTemplate(mods, id, tparams, ctor, inits, stats) => + p.Suffix(" ")(mods)(apply(_, " ")) + x match { + case _: DefnClass => p.str("class ") + case _: DefnTrait => p.str("trait ") + case _: DefnObject => p.str("object ") + } + apply(id) + p.Brackets(tparams)(apply(_, ", ")) + x match { + case _: DefnClass => apply(ctor) + case _: DefnTrait => + case _: DefnObject => + } + p.Prefix(" extends ")(inits)(apply(_, " with ")) + if (stats.nonEmpty) p.str(" ") + p.Nest.when(stats.nonEmpty)(apply(stats, EOL)) + case DefnType(mods, id, tparams, tpt) => + p.Suffix(" ")(mods)(apply(_, " ")) + p.str("type ") + apply(id) + p.Brackets(tparams)(apply(_, ", ")) + p.Prefix(" = ")(apply(tpt)) + case Import(importers) => + p.str("import ") + apply(importers, ", ") + case ImporteeName(id) => + apply(id) + case ImporteeRename(from, to) => + apply(from) + p.str(" => ") + apply(to) + case ImporteeUnimport(id) => + apply(id) + p.str(" => _") + case ImporteeWildcard() => + p.str("_") + case Importer(qual, importees) => + apply(qual) + p.str(".") + val needsBraces = importees match { + case List(_: ImporteeRename) => true + case List(_: ImporteeUnimport) => true + case List(_) => false + case _ => true + } + p.Braces.when(needsBraces)(apply(importees, ", ")) + case Init(tpt, args) => + apply(tpt) + p.Parens(apply(args, ", ")) + case ModAbstract() => + p.str("abstract") + case ModCase() => + p.str("case") + case ModContravariant() => + p.str("-") + case ModCovariant() => + p.str("+") + case ModFinal() => + p.str("final") + case ModLazy() => + p.str("lazy") + case ModOverride() => + p.str("override") + case ModPrivate(within) => + p.str("private") + p.Brackets(within)(apply(_, "")) + case ModProtected(within) => + p.str("protected") + p.Brackets(within)(apply(_, "")) + case ModSealed() => + p.str("sealed") + case ModVal() => + p.str("val") + case ModVar() => + p.str("var") + case x @ NamedId(value) => + if (x.sym != NoSymbol) p.str("<" + x.sym + ">") + else p.str(value) + case PatAlternative(pats) => + apply(pats, " | ") + case PatExtract(fun, targs, args) => + apply(fun) + p.Brackets(args)(apply(_, ", ")) + p.Parens(apply(args, ", ")) + case PatExtractInfix(lhs, op, rhs) => + apply(lhs) + apply(op) + rhs match { + case List(rhs) => apply(rhs) + case args => p.Parens(apply(args, ", ")) + } + case PatLit(value) => + apply(TermLit(value)) + case PatRepeat(pat) => + apply(pat) + p.str(" @ _*") + case PatSelect(qual, id) => + apply(qual) + p.str(".") + apply(id) + case tree @ PatTuple(args) => + crash(tree) + case PatVar(id, tpt) => + id match { + case AnonId() => p.str("_") + case _ => apply(id) + } + p.Prefix(": ")(tpt)(apply(_, "")) + case tree @ PrimaryCtor(mods, params) => + if (mods.nonEmpty) { + p.str(" ") + p.Suffix(" ")(mods)(apply(_, " ")) + } + if (tree.id.sym != NoSymbol) p.str("<" + tree.id.sym + ">") + else () + p.Parens(apply(params, ", ")) + case Source(stats) => + apply(stats, EOL) + case TermApply(fun, args) => + apply(fun) + p.Parens(apply(args, ", ")) + case TermApplyInfix(lhs, op, targs, rhs) => + p.Parens(apply(lhs)) + p.str(" ") + apply(op) + p.Brackets(targs)(apply(_, ", ")) + p.str(" ") + p.Parens(apply(rhs)) + case TermApplyPostfix(arg, op) => + apply(arg) + p.str(" ") + apply(op) + case TermApplyPrefix(op, arg) => + apply(op) + p.Parens(apply(arg)) + case TermApplyType(fun, targs) => + apply(fun) + p.Brackets(apply(targs, ", ")) + case TermAscribe(term, tpt) => + apply(term) + p.str(": ") + apply(tpt) + case TermAssign(lhs, rhs) => + apply(lhs) + p.str(" = ") + apply(rhs) + case TermBlock(stats) => + p.Nest(apply(stats, EOL)) + case TermDo(body, cond) => + p.str("do ") + apply(body) + p.str(" while ") + p.Parens(apply(cond)) + case TermEta(term) => + apply(term) + p.str(" _") + case TermFunction(params, body) => + p.Parens(apply(params, ", ")) + p.str(" =>") + p.Indent(apply(body)) + case TermIf(cond, thenp, elsep) => + p.str("if ") + p.Parens(apply(cond)) + p.str(" ") + apply(thenp) + p.Prefix(" else ")(elsep)(apply(_, "")) + case TermLit(value: Unit) => + p.repl(value) + case TermLit(value: Char) => + p.repl(value) + case TermLit(value: Int) => + p.repl(value) + case TermLit(value: Long) => + p.repl(value) + case TermLit(value: Float) => + p.repl(value) + case TermLit(value: Double) => + p.repl(value) + case TermLit(value: String) => + p.repl(value) + case TermLit(true) => + p.repl(true) + case TermLit(false) => + p.repl(false) + case TermLit(null) => + p.repl(null) + case TermLit(value: StdlibSymbol) => + p.repl(value) + case TermLit(other) => + crash(other.getClass.toString) + case TermMatch(term, cases) => + apply(term) + p.str(" match ") + p.Nest(apply(cases, EOL)) + case TermNew(init) => + p.str("new ") + apply(init) + case TermParam(mods, id, tpt) => + p.Suffix(" ")(mods)(apply(_, " ")) + apply(id) + p.str(": ") + apply(tpt) + case TermPartialFunction(cases) => + p.Nest(apply(cases, EOL)) + case TermRepeat(term) => + apply(term) + p.str(": _*") + case TermReturn(term) => + p.str("return") + p.Prefix(" ")(term)(apply(_, "")) + case TermSelect(qual, id) => + apply(qual) + p.str(".") + apply(id) + case TermSuper(qual, mix) => + p.Suffix(".").when(qual.isInstanceOf[NamedId])(apply(qual)) + p.str("super") + p.Brackets.when(mix.isInstanceOf[NamedId])(apply(mix)) + case TermThis(qual) => + p.Suffix(".").when(qual.isInstanceOf[NamedId])(apply(qual)) + p.str("this") + case TermThrow(term) => + p.str("throw ") + apply(term) + case TermTuple(args) => + p.Parens(apply(args, ", ")) + case TermWhile(cond, body) => + p.str("while ") + p.Parens(apply(cond)) + p.str(" ") + apply(body) + case TptFunction(targs) => + p.Parens.when(targs.size != 2)(apply(targs.init, ", ")) + p.str(" => ") + apply(targs.last) + case TptParameterize(fun, targs) => + apply(fun) + p.Brackets(apply(targs, ", ")) + case TptParameterizeInfix(lhs, op, rhs) => + apply(lhs) + p.str(" ") + apply(op) + p.str(" ") + apply(rhs) + case TptRepeat(targ) => + apply(targ) + p.str("*") + case TptSelect(qual, id) => + apply(qual) + p.str(".") + apply(id) + case TptTuple(targs) => + p.Parens(apply(targs, ", ")) + case TypeParam(mods, id, ubound, lbound) => + apply(mods, " ") + apply(id) + p.Prefix(" >: ")(ubound)(apply(_, "")) + p.Prefix(" <: ")(lbound)(apply(_, "")) + } + } + + def apply(xs: Iterable[Tree], separator: String): Unit = { + p.rep(xs, separator)(apply) + } +} diff --git a/src/main/scala/rsc/pretty/package.scala b/src/main/scala/rsc/pretty/package.scala new file mode 100644 index 0000000..8d131ca --- /dev/null +++ b/src/main/scala/rsc/pretty/package.scala @@ -0,0 +1,7 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc + +package object pretty extends Ops { + val EOL = System.lineSeparator() +} diff --git a/src/main/scala/rsc/report/ConsoleReporter.scala b/src/main/scala/rsc/report/ConsoleReporter.scala new file mode 100644 index 0000000..81f491e --- /dev/null +++ b/src/main/scala/rsc/report/ConsoleReporter.scala @@ -0,0 +1,25 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.report + +import rsc.settings._ + +sealed class ConsoleReporter private (settings: Settings) extends Reporter { + private var buf = List.newBuilder[Message] + + def append(msg: Message): Message = { + buf += msg + println(msg.str) + msg + } + + def problems: List[Message] = { + buf.result.filter(m => m.sev == FatalSeverity || m.sev == ErrorSeverity) + } +} + +object ConsoleReporter { + def apply(settings: Settings): Reporter = { + new ConsoleReporter(settings) + } +} diff --git a/src/main/scala/rsc/report/Messages.scala b/src/main/scala/rsc/report/Messages.scala new file mode 100644 index 0000000..e95d332 --- /dev/null +++ b/src/main/scala/rsc/report/Messages.scala @@ -0,0 +1,295 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.report + +import java.io._ +import rsc.lexis._ +import rsc.pretty._ +import rsc.semantics._ +import rsc.syntax._ +import rsc.typecheck._ +import rsc.util._ + +sealed trait Message extends Pretty with Product { + def sev: Severity + def pos: Position + def text: String + def explanation: String = "" + def printStr(p: Printer): Unit = PrettyMessage.str(p, this) + def printRepl(p: Printer): Unit = PrettyMessage.repl(p, this) +} + +// ============ FUNDAMENTAL ============ + +final case class CrashMessage(pos: Position, message: String, ex: Throwable) + extends Message { + def sev = FatalSeverity + def text = "compiler crash" + override def explanation = { + if (ex != null) { + val details = new StringWriter() + ex.printStackTrace(new PrintWriter(details)) + details.toString + } else { + "" + } + } +} + +final case class VerboseMessage(pos: Position, text: String) extends Message { + def sev = VerboseSeverity +} + +object VerboseMessage { + def apply(text: String): VerboseMessage = VerboseMessage(NoPosition, text) +} + +// ============ LEXER ============ + +final case class IllegalCharacter(pos: Position) extends Message { + def sev = FatalSeverity + def text = "illegal character" +} + +final case class IllegalComment(pos: Position) extends Message { + def sev = FatalSeverity + def text = "illegal comment" +} + +final case class IllegalEscape(pos: Position) extends Message { + def sev = FatalSeverity + def text = "illegal escape" +} + +final case class IllegalNumber(pos: Position) extends Message { + def sev = FatalSeverity + def text = "illegal number" +} + +final case class LeadingZero(pos: Position) extends Message { + def sev = FatalSeverity + def text = "leading zeros not allowed" +} + +final case class UnclosedCharacter(pos: Position) extends Message { + def sev = FatalSeverity + def text = "unclosed character literal" +} + +final case class UnclosedSinglelineString(pos: Position) extends Message { + def sev = FatalSeverity + def text = "unclosed string literal" +} + +final case class UnclosedMultilineString(pos: Position) extends Message { + def sev = FatalSeverity + def text = "unclosed multi-line string literal" +} + +// ============ PARSER ============ + +final case class ExpectedToken(pos: Position, expected: Token, actual: Token) + extends Message { + def sev = FatalSeverity + def text = s"${tokenStr(expected)} expected but ${tokenStr(actual)} found" +} + +final case class ExpectedId(pos: Position, expected: String, actual: Token) + extends Message { + def sev = FatalSeverity + def text = "$expected expected but ${tokenStr(actual)} found" +} + +final case class ExpectedClassOrObjectDefinition(pos: Position) + extends Message { + def sev = FatalSeverity + def text = "expected class or object definition" +} + +final case class ExpectedStartOfDefinition(pos: Position) extends Message { + def sev = FatalSeverity + def text = "expected start of definition" +} + +final case class ExpectedTypeRhs(pos: Position) extends Message { + def sev = FatalSeverity + def text = "expected =, >:, or <:" +} + +final case class FileNotFound(input: Input) extends Message { + def sev = ErrorSeverity + def pos = Position(input, NoOffset, NoOffset) + def text = s"file not found" +} + +final case class FilesNotFound() extends Message { + def sev = ErrorSeverity + def pos = Position(NoInput, NoOffset, NoOffset) + def text = s"nothing to compile" +} + +final case class IllegalIdentifier(pos: Position) extends Message { + def sev = FatalSeverity + def text = "illegal identifier" +} + +final case class IllegalLiteral(pos: Position) extends Message { + def sev = FatalSeverity + def text = "illegal literal" +} + +final case class IllegalModifier(pos: Position) extends Message { + def sev = ErrorSeverity + def text = "illegal modifier" +} + +final case class IllegalStartOfDeclaration(pos: Position) extends Message { + def sev = FatalSeverity + def text = "illegal start of declaration" +} + +final case class IllegalStartOfDefinition(pos: Position) extends Message { + def sev = FatalSeverity + def text = "illegal start of definition" +} + +final case class IllegalStartOfSimplePat(pos: Position) extends Message { + def sev = FatalSeverity + def text = "illegal start of simple pattern" +} + +final case class IllegalStartOfSimpleTerm(pos: Position) extends Message { + def sev = FatalSeverity + def text = "illegal start of simple expression" +} + +final case class IllegalStartOfStatement(pos: Position) extends Message { + def sev = FatalSeverity + def text = "illegal start of statement" +} + +final case class MixedLeftAndRightAssociativeOps( + pos: Position, + op1: String, + op2: String) + extends Message { + def sev = ErrorSeverity + def text = { + def status(op: String) = { + if (op1.isLeftAssoc) "which is left-associative" + else "which is right-associative" + } + val status1 = status(op1) + val status2 = status(op2) + val error = "have same precedence and may not be mixed" + s"`$op1` ($status1) and `$op2` ($status2) $error" + } +} + +final case class RepeatedModifier(pos: Position) extends Message { + def sev = ErrorSeverity + def text = s"repeated modifier" +} + +// ============ TYPECHECKER ============ + +final case class DoubleDef(tree: Outline, existing: Outline) extends Message { + def sev = ErrorSeverity + def pos = tree.id.point + def text = { + if (tree.isInstanceOf[DefnDef] || existing.isInstanceOf[DefnDef]) { + crash("overloading") + } else { + val treeDesc = { + tree.id match { + case AnonId() => crash(tree) + case id: NamedId => id.value + } + } + val existingDesc = PrettyOutline.desc(existing) + s"$treeDesc is already defined as $existingDesc" + } + } +} + +final case class IllegalCyclicReference(scope: Scope) extends Message { + def sev = ErrorSeverity + def pos = { + scope match { + case scope: ImporterScope => scope.tree.point + case scope: TemplateScope => scope.tree.point + case _ => crash(scope) + } + } + def text = { + val CyclicStatus(cycle) = scope.status + val description = { + def loop(scopes: List[Scope]): String = { + scopes match { + case List() => crash(cycle) + case List(only) => name(only) + case List(prelast, last) => name(prelast) + " and " + name(last) + case scope :: rest => name(scope) + ", " + loop(rest) + } + } + loop(cycle) + } + s"illegal cyclic reference involving $description" + } + private def name(scope: Scope): String = { + scope match { + case scope: TemplateScope => + PrettyOutline.desc(scope.tree) + case scope: ImporterScope => + val p = new Printer + p.str("import ") + p.str(scope.tree) + p.toString + case _ => + crash(scope) + } + } +} + +final case class IllegalOutlinePart(part: Tree) extends Message { + def sev = ErrorSeverity + def pos = part.point + def text = "illegal outline part" +} + +final case class NonValue(term: Term, tpe: Type) extends Message { + def sev = ErrorSeverity + def pos = term.point + def text = s"not a value: $tpe" +} + +final case class UnboundMember(qualSym: Symbol, id: Id) extends Message { + def sev = ErrorSeverity + def pos = id.point + def text = { + val qualDesc = qualSym + id match { + case AnonId() => crash(id) + case CtorId() => crash(id) + case PatId(value) => s"unbound: value $qualDesc.$value" + case SomeId(value) => s"unbound: $qualDesc.$value" + case TermId(value) => s"unbound: value $qualDesc.$value" + case TptId(value) => s"unbound: type $qualDesc.$value" + } + } +} + +final case class UnboundId(id: Id) extends Message { + def sev = ErrorSeverity + def pos = id.point + def text = { + id match { + case AnonId() => crash(id) + case CtorId() => crash(id) + case PatId(value) => s"unbound: value $value" + case SomeId(value) => s"unbound: $value" + case TermId(value) => s"unbound: value $value" + case TptId(value) => s"unbound: type $value" + } + } +} diff --git a/src/main/scala/rsc/report/Points.scala b/src/main/scala/rsc/report/Points.scala new file mode 100644 index 0000000..97df59b --- /dev/null +++ b/src/main/scala/rsc/report/Points.scala @@ -0,0 +1,19 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.report + +import rsc.lexis._ +import rsc.syntax._ + +trait Points { + implicit class TreePointOps(tree: Tree) { + def point: Position = { + tree match { + case DefnTemplate(_, id, _, _, _, _) => id.pos + case tree: Path => tree.id.pos + case TermApplyInfix(_, op, _, _) => op.pos + case _ => tree.pos + } + } + } +} diff --git a/src/main/scala/rsc/report/Reporter.scala b/src/main/scala/rsc/report/Reporter.scala new file mode 100644 index 0000000..a28cda5 --- /dev/null +++ b/src/main/scala/rsc/report/Reporter.scala @@ -0,0 +1,8 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.report + +trait Reporter { + def append(msg: Message): Message + def problems: List[Message] +} diff --git a/src/main/scala/rsc/report/Severity.scala b/src/main/scala/rsc/report/Severity.scala new file mode 100644 index 0000000..b09cf7e --- /dev/null +++ b/src/main/scala/rsc/report/Severity.scala @@ -0,0 +1,18 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.report + +import rsc.pretty._ + +sealed trait Severity extends Pretty with Product { + def printStr(p: Printer): Unit = PrettySeverity.str(p, this) + def printRepl(p: Printer): Unit = PrettySeverity.repl(p, this) +} + +case object FatalSeverity extends Severity + +case object ErrorSeverity extends Severity + +case object WarningSeverity extends Severity + +case object VerboseSeverity extends Severity diff --git a/src/main/scala/rsc/report/StoreReporter.scala b/src/main/scala/rsc/report/StoreReporter.scala new file mode 100644 index 0000000..b99894e --- /dev/null +++ b/src/main/scala/rsc/report/StoreReporter.scala @@ -0,0 +1,24 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.report + +import rsc.settings._ + +sealed class StoreReporter private (settings: Settings) extends Reporter { + private var buf = List.newBuilder[Message] + + def append(msg: Message): Message = { + buf += msg + msg + } + + def problems: List[Message] = { + buf.result.filter(m => m.sev == FatalSeverity || m.sev == ErrorSeverity) + } +} + +object StoreReporter { + def apply(settings: Settings): Reporter = { + new StoreReporter(settings) + } +} diff --git a/src/main/scala/rsc/report/package.scala b/src/main/scala/rsc/report/package.scala new file mode 100644 index 0000000..1395521 --- /dev/null +++ b/src/main/scala/rsc/report/package.scala @@ -0,0 +1,5 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc + +package object report extends Points diff --git a/src/main/scala/rsc/scan/Characters.scala b/src/main/scala/rsc/scan/Characters.scala new file mode 100644 index 0000000..6b46db4 --- /dev/null +++ b/src/main/scala/rsc/scan/Characters.scala @@ -0,0 +1,31 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.scan + +import rsc.lexis._ +import rsc.util._ + +trait Characters { + self: Scanner => + + val chs: Array[Char] = input.chars + var offset: Offset = 0 + + def ch: Char = getChar(offset) + def ch1: Char = getChar(offset + 1) + def ch2: Char = getChar(offset + 2) + private def getChar(offset: Offset): Char = { + if (offset < chs.length) chs(offset) + else SU + } + + def nextChar(): Unit = { + if (offset < chs.length) { + offset += 1 + } + } + + def lexeme: String = { + new String(chs, end, offset - end) + } +} diff --git a/src/main/scala/rsc/scan/History.scala b/src/main/scala/rsc/scan/History.scala new file mode 100644 index 0000000..74ee540 --- /dev/null +++ b/src/main/scala/rsc/scan/History.scala @@ -0,0 +1,19 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.scan + +trait History { + self: Scanner => + + def snapshot(): Snapshot = { + Snapshot(offset, start, end, token, value) + } + + def restore(snapshot: Snapshot): Unit = { + this.offset = snapshot.offset + this.start = snapshot.start + this.end = snapshot.end + this.token = snapshot.token + this.value = snapshot.value + } +} diff --git a/src/main/scala/rsc/scan/Messages.scala b/src/main/scala/rsc/scan/Messages.scala new file mode 100644 index 0000000..125e242 --- /dev/null +++ b/src/main/scala/rsc/scan/Messages.scala @@ -0,0 +1,25 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.scan + +import rsc.lexis._ +import rsc.report._ +import rsc.util._ + +trait Messages { + self: Scanner => + + def reportOffset(offset: Offset, msgFn: Position => Message): Message = { + val pos = Position(input, offset, offset) + val msg = msgFn(pos) + reporter.append(msg) + if (msg.sev == FatalSeverity) { + skip() + } + msg + } + + private def skip(): Unit = { + crash("smart handling of fatal lexing errors") + } +} diff --git a/src/main/scala/rsc/scan/Scanner.scala b/src/main/scala/rsc/scan/Scanner.scala new file mode 100644 index 0000000..4eefb2d --- /dev/null +++ b/src/main/scala/rsc/scan/Scanner.scala @@ -0,0 +1,466 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.scan + +import scala.annotation.switch +import scala.{Symbol => StdlibSymbol} +import rsc.lexis._ +import rsc.report._ +import rsc.settings._ +import rsc.util._ + +final class Scanner private ( + val settings: Settings, + val reporter: Reporter, + val input: Input) + extends Characters + with History + with Messages { + var start: Offset = 0 + var end: Offset = 0 + var token: Token = BOF + var value: Any = null + + def next(): Unit = { + (ch: @switch) match { + case SU => + start = end + token = EOF + value = null + case '/' => + if (ch1 == '/' || ch1 == '*') comment() + else symbolicIdOrKeyword() + case ' ' | '\t' => + whitespace() + case CR | LF | FF => + newline() + case 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | + 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | + 'W' | 'X' | 'Y' | 'Z' | '$' | '_' | 'a' | 'b' | 'c' | 'd' | 'e' | + 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | + 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' => + alphanumericIdOrKeyword() + if (ch == '"' && token == ID) { + crash("string interpolation") + } + case '-' => + if (isDecimalDigit(ch1)) { + number() + } else { + symbolicIdOrKeyword() + } + case '~' | '!' | '@' | '#' | '%' | '^' | '*' | '+' | '<' | '>' | '?' | + ':' | '=' | '&' | '|' | '\\' | '⇒' | '←' => + symbolicIdOrKeyword() + case '`' => + backquotedId() + case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => + number() + case '"' => + string() + case '\'' => + characterOrSymbol() + case '.' => + if (isDecimalDigit(ch1)) { + number() + } else { + nextChar() + emit(DOT, null) + } + case ';' => + nextChar() + emit(SEMI, null) + case ',' => + nextChar() + emit(COMMA, null) + case '(' => + nextChar() + emit(LPAREN, null) + case '{' => + nextChar() + emit(LBRACE, null) + case ')' => + nextChar() + emit(RPAREN, null) + case '}' => + nextChar() + emit(RBRACE, null) + case '[' => + nextChar() + emit(LBRACKET, null) + case ']' => + nextChar() + emit(RBRACKET, null) + case other => + if (isAlphanumericIdStart(other)) { + alphanumericIdOrKeyword() + } else if (isSymbolicIdStart(other)) { + symbolicIdOrKeyword() + } else { + val message = reportOffset(offset, IllegalCharacter) + emit(ERROR, message) + nextChar() + } + } + } + + private def alphanumericIdOrKeyword(): Unit = { + nextChar() + if (isAlphanumericIdPart(ch)) { + alphanumericIdOrKeyword() + } else if (ch == '_') { + alphanumericSymbolicId() + } else { + emitIdOrKeyword() + } + } + + private def alphanumericSymbolicId(): Unit = { + nextChar() + if (isAlphanumericIdPart(ch)) { + alphanumericIdOrKeyword() + } else if (isSymbolicIdPart(ch)) { + symbolicIdOrKeyword() + } else { + emitIdOrKeyword() + } + } + + private def backquotedId(): Unit = { + crash("backquoted identifiers") + } + + private def characterOrSymbol(): Unit = { + nextChar() + if (isAlphanumericIdStart(ch) && ch1 != '\'') { + alphanumericIdOrKeyword() + token = LITSYMBOL + value = StdlibSymbol(value.asInstanceOf[String].stripPrefix("'")) + } else if (isSymbolicIdStart(ch) && ch != '\\' && ch1 != '\'') { + symbolicIdOrKeyword() + token = LITSYMBOL + value = StdlibSymbol(value.asInstanceOf[String].stripPrefix("'")) + } else { + val result = quote('\'') + if (ch == '\'') { + if (result.length == 1) { + nextChar() + emit(LITCHAR, result.head) + } else { + val message = reportOffset(offset, IllegalCharacter) + emit(ERROR, message) + nextChar() + } + } else { + val message = reportOffset(offset, UnclosedCharacter) + emit(ERROR, message) + } + } + } + + private def comment(): Unit = { + nextChar() + (ch: @switch) match { + case '/' => + while (ch != CR && ch != LF && ch != FF && ch != SU) { + nextChar() + } + emit(COMMENT, null) + case '*' => + nextChar() + while (ch != '*' || ch1 != '/') { + if (ch == SU) { + val message = reportOffset(offset, IllegalComment) + emit(ERROR, message) + return + } else { + nextChar() + } + } + nextChar() + nextChar() + emit(COMMENT, null) + case _ => + crash(ch) + } + } + + private def decimalNumber(): Unit = { + while (isDecimalDigit(ch)) { + nextChar() + } + val default = { + if (ch == '.') { + nextChar() + while (isDecimalDigit(ch)) { + nextChar() + } + LITDOUBLE + } else { + LITINT + } + } + if (ch == 'e' || ch == 'E') { + if (isDecimalDigit(ch1)) { + while (isDecimalDigit(ch)) { + nextChar() + } + } else if ((ch1 == '+' || ch1 == '-') && isDecimalDigit(ch2)) { + nextChar() + while (isDecimalDigit(ch)) { + nextChar() + } + } + } + val parsee = lexeme + val token = { + ch match { + case 'l' | 'L' => + if (default == LITINT) { + nextChar() + LITLONG + } else { + nextChar() + val message = reportOffset(offset, IllegalNumber) + emit(ERROR, message) + return + } + case 'f' | 'F' => + nextChar() + LITFLOAT + case 'd' | 'D' => + nextChar() + LITDOUBLE + case _ => + default + } + } + if (isAlphanumericIdPart(ch)) { + val message = reportOffset(offset, IllegalNumber) + emit(ERROR, message) + return + } + try { + val number: AnyVal = { + if (token == LITINT) java.lang.Integer.parseInt(parsee, 10) + else if (token == LITLONG) java.lang.Long.parseLong(parsee, 10) + else if (token == LITFLOAT) java.lang.Float.parseFloat(parsee) + else java.lang.Double.parseDouble(parsee) + } + emit(token, number) + } catch { + case ex: NumberFormatException => + val message = reportOffset(offset, IllegalNumber) + emit(ERROR, message) + } + } + + private def hexadecimalNumber(): Unit = { + nextChar() + nextChar() + while (isHexadecimalDigit(ch)) { + nextChar() + } + val parsee = { + val lexeme = this.lexeme + if (lexeme.startsWith("-")) "-" + lexeme.substring(3) + else lexeme.substring(2) + } + val token = { + ch match { + case 'l' | 'L' => + nextChar() + LITLONG + case _ => + LITINT + } + } + try { + val number: AnyVal = { + if (token == LITINT) java.lang.Integer.parseInt(parsee, 16) + else java.lang.Long.parseLong(parsee, 16) + } + emit(token, number) + } catch { + case ex: NumberFormatException => + val message = reportOffset(offset, IllegalNumber) + emit(ERROR, message) + } + } + + private def newline(): Unit = { + if (ch == CR && ch1 == LF) { + nextChar() + } + nextChar() + emit(NEWLINE, null) + } + + private def number(): Unit = { + if (ch == '-') { + nextChar() + } + if (ch == '0') { + if (ch1 == 'x' || ch1 == 'X') { + hexadecimalNumber() + } else if (isDecimalDigit(ch1)) { + val message = reportOffset(offset, LeadingZero) + emit(ERROR, message) + } else { + decimalNumber() + } + } else { + decimalNumber() + } + } + + private def quote(delim: Char): String = { + val buf = new StringBuilder + while (ch != delim && ch != CR && ch != LF && ch != FF && ch != SU) { + if (ch == '\\') { + nextChar() + ch match { + case 'b' => + nextChar() + buf += '\b' + case 't' => + nextChar() + buf += '\t' + case 'n' => + nextChar() + buf += '\n' + case 'f' => + nextChar() + buf += '\f' + case 'r' => + nextChar() + buf += '\r' + case '"' => + nextChar() + buf += '"' + case '\'' => + nextChar() + buf += '\'' + case '\\' => + nextChar() + buf += '\\' + case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' => + val leadch: Char = ch + var oct: Int = ch - '0' + nextChar() + if ('0' <= ch && ch <= '7') { + oct = oct * 8 + (ch - '0') + nextChar() + if (leadch <= '3' && '0' <= ch && ch <= '7') { + oct = oct * 8 + (ch - '0') + nextChar() + } + } + buf += oct.toChar + case 'u' => + val uoffset = offset + nextChar() + var unicode: Int = 0 + var i = 0 + while (i < 4) { + if (isHexadecimalDigit(ch)) { + unicode = unicode << 4 + Integer.parseInt(ch.toString, 16) + } else { + reportOffset(uoffset, IllegalEscape) + } + nextChar() + i += 1 + } + buf += unicode.toChar + case other => + reportOffset(offset, IllegalEscape) + nextChar() + } + } else { + buf += ch + nextChar() + } + } + buf.toString + } + + private def string(): Unit = { + nextChar() + if (ch == '"') { + nextChar() + if (ch == '"') { + nextChar() + val buf = new StringBuilder + while (ch != '\"' || ch1 != '\"' || ch2 != '\"') { + if (ch == SU) { + val message = reportOffset(offset, UnclosedMultilineString) + emit(ERROR, message) + return + } + buf += ch + } + nextChar() + nextChar() + nextChar() + emit(LITSTRING, buf.toString) + } else { + emit(LITSTRING, "") + } + } else { + val result = quote('"') + if (ch == '"') { + nextChar() + emit(LITSTRING, result) + } else { + val message = reportOffset(offset, UnclosedSinglelineString) + emit(ERROR, message) + } + } + } + + private def symbolicIdOrKeyword(): Unit = { + nextChar() + if (ch == '/') { + if (ch1 == '/' || ch1 == '*') { + emitIdOrKeyword() + } else { + symbolicIdOrKeyword() + } + } else if (isSymbolicIdPart(ch)) { + symbolicIdOrKeyword() + } else { + emitIdOrKeyword() + } + } + + private def whitespace(): Unit = { + nextChar() + while (ch == ' ' || ch == '\t') { + nextChar() + } + emit(WHITESPACE, null) + } + + private def emit(token: Token, value: Any): Unit = { + start = end + end = offset + this.token = token + this.value = value + } + + private def emitIdOrKeyword(): Unit = { + val lexeme = this.lexeme + val token = keywords.get(lexeme) + if (token == 0) { + emit(ID, lexeme) + } else { + emit(token, null) + } + } +} + +object Scanner { + def apply(settings: Settings, reporter: Reporter, input: Input): Scanner = { + new Scanner(settings, reporter, input) + } +} diff --git a/src/main/scala/rsc/scan/Snapshot.scala b/src/main/scala/rsc/scan/Snapshot.scala new file mode 100644 index 0000000..3cd6364 --- /dev/null +++ b/src/main/scala/rsc/scan/Snapshot.scala @@ -0,0 +1,12 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.scan + +import rsc.lexis._ + +final case class Snapshot( + offset: Offset, + start: Offset, + end: Offset, + token: Token, + value: Any) diff --git a/src/main/scala/rsc/semantics/Names.scala b/src/main/scala/rsc/semantics/Names.scala new file mode 100644 index 0000000..3cee1ea --- /dev/null +++ b/src/main/scala/rsc/semantics/Names.scala @@ -0,0 +1,26 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.semantics + +import rsc.pretty._ + +sealed trait Name extends Pretty with Product { + def value: String + def printStr(p: Printer): Unit = PrettyName.str(p, this) + def printRepl(p: Printer): Unit = PrettyName.repl(p, this) +} + +final case class SomeName(value: String) extends Name { + override val hashCode: Int = value.hashCode * 3 + override def str: String = value +} + +final case class TermName(value: String) extends Name { + override val hashCode: Int = value.hashCode * 5 + override def str: String = value + "." +} + +final case class TypeName(value: String) extends Name { + override val hashCode: Int = value.hashCode * 7 + override def str: String = value + "#" +} diff --git a/src/main/scala/rsc/semantics/Symbols.scala b/src/main/scala/rsc/semantics/Symbols.scala new file mode 100644 index 0000000..8c477bf --- /dev/null +++ b/src/main/scala/rsc/semantics/Symbols.scala @@ -0,0 +1,14 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.semantics + +trait Symbols { + type Symbol = String + val NoSymbol: Symbol = "" + + private var counter = 0 + def freshSym(): Symbol = { + counter += 1 + counter.toString + } +} diff --git a/src/main/scala/rsc/semantics/Types.scala b/src/main/scala/rsc/semantics/Types.scala new file mode 100644 index 0000000..e302f26 --- /dev/null +++ b/src/main/scala/rsc/semantics/Types.scala @@ -0,0 +1,20 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.semantics + +import rsc.pretty._ + +sealed trait Type extends Pretty with Product { + def printStr(p: Printer): Unit = PrettyType.str(p, this) + def printRepl(p: Printer): Unit = PrettyType.repl(p, this) +} + +final case object NoType extends Type + +final case class SimpleType(sym: Symbol, targs: List[SimpleType]) extends Type + +final case class MethodType( + tparams: List[Symbol], + params: List[Symbol], + ret: SimpleType) + extends Type diff --git a/src/main/scala/rsc/semantics/package.scala b/src/main/scala/rsc/semantics/package.scala new file mode 100644 index 0000000..61367b4 --- /dev/null +++ b/src/main/scala/rsc/semantics/package.scala @@ -0,0 +1,5 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc + +package object semantics extends Symbols diff --git a/src/main/scala/rsc/settings/Settings.scala b/src/main/scala/rsc/settings/Settings.scala new file mode 100644 index 0000000..9a8202a --- /dev/null +++ b/src/main/scala/rsc/settings/Settings.scala @@ -0,0 +1,49 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.settings + +import java.io._ +import java.nio.file._ + +final case class Settings( + classpath: List[Path] = Nil, + ins: List[rsc.lexis.Input] = Nil, + xprint: Set[String] = Set[String](), + ystopAfter: Set[String] = Set[String]() +) + +object Settings { + def parse(args: List[String]): Option[Settings] = { + def loop( + settings: Settings, + allowOptions: Boolean, + args: List[String]): Option[Settings] = { + args match { + case "--" +: rest => + loop(settings, false, args) + case ("-classpath" | "-cp") +: s_cp +: rest if allowOptions => + val cp = s_cp.split(File.pathSeparator).map(s => Paths.get(s)).toList + loop(settings.copy(classpath = settings.classpath ++ cp), true, rest) + case opt +: rest if allowOptions && opt.startsWith("-Xprint:") => + val stripped = opt.stripPrefix("-Xprint:").split(",") + val xprint = stripped.map(_.trim).toSet + val xprint1 = settings.xprint ++ xprint + loop(settings.copy(xprint = xprint1), true, rest) + case opt +: rest if allowOptions && opt.startsWith("-Ystop-after:") => + val stripped = opt.stripPrefix("-Ystop-after:").split(",") + val ystopAfter = stripped.map(_.trim).toSet + val ystopAfter1 = settings.ystopAfter ++ ystopAfter + loop(settings.copy(ystopAfter = ystopAfter1), true, rest) + case flag +: rest if allowOptions && flag.startsWith("-") => + println(s"unknown flag $flag") + None + case in +: rest => + val ins = List(rsc.lexis.Input(Paths.get(in))) + loop(settings.copy(ins = settings.ins ++ ins), allowOptions, rest) + case Nil => + Some(settings) + } + } + loop(Settings(), true, args) + } +} diff --git a/src/main/scala/rsc/syntax/Trees.scala b/src/main/scala/rsc/syntax/Trees.scala new file mode 100644 index 0000000..7522008 --- /dev/null +++ b/src/main/scala/rsc/syntax/Trees.scala @@ -0,0 +1,363 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.syntax + +import rsc.lexis._ +import rsc.pretty._ +import rsc.semantics._ + +sealed trait Tree extends Pretty with Product { + var pos: Position = NoPosition + override def hashCode(): Int = System.identityHashCode(this) + override def equals(that: Any) = this eq that.asInstanceOf[AnyRef] + def printStr(p: Printer): Unit = PrettyTree.str(p, this) + def printRepl(p: Printer): Unit = PrettyTree.repl(p, this) +} + +final case class AnonId() extends Id + +final case class Case(pat: Pat, cond: Option[Term], stats: List[Stat]) + extends Tree + +final case class CtorId() extends NamedId { + def value = CtorId.value + def name = CtorId.name +} + +object CtorId { + val value = "this" + val name = TermName("") +} + +final case class DefnClass( + mods: List[Mod], + id: TptId, + tparams: List[TypeParam], + ctor: PrimaryCtor, + inits: List[Init], + stats: List[Stat]) + extends DefnTemplate + +final case class DefnDef( + mods: List[Mod], + id: TermId, + tparams: List[TypeParam], + params: List[TermParam], + ret: Tpt, + rhs: Option[Term]) + extends Stat + with Outline + +final case class DefnField( + mods: List[Mod], + id: TermId, + tpt: Tpt, + rhs: Option[Term]) + extends Stat + with Outline + +final case class DefnObject( + mods: List[Mod], + id: TermId, + inits: List[Init], + stats: List[Stat]) + extends DefnTemplate + with Outline { + def tparams = Nil + def ctor = PrimaryCtor(Nil, Nil) +} + +final case class DefnPackage(pid: TermPath, stats: List[Stat]) + extends Stat + with Outline { + def id = pid.id.asInstanceOf[NamedId] +} + +sealed trait DefnTemplate extends Stat with Outline { + def mods: List[Mod] + def id: NamedId + def tparams: List[TypeParam] + def ctor: PrimaryCtor + def inits: List[Init] + def stats: List[Stat] +} + +object DefnTemplate { + def unapply(tree: DefnTemplate): Option[( + List[Mod], + NamedId, + List[TypeParam], + PrimaryCtor, + List[Init], + List[Stat])] = { + Some((tree.mods, tree.id, tree.tparams, tree.ctor, tree.inits, tree.stats)) + } +} + +final case class DefnTrait( + mods: List[Mod], + id: TptId, + tparams: List[TypeParam], + inits: List[Init], + stats: List[Stat]) + extends DefnTemplate { + def ctor = PrimaryCtor(Nil, Nil) +} + +final case class DefnType( + mods: List[Mod], + id: TptId, + tparams: List[TypeParam], + tpt: Tpt) + extends Stat + with Outline + +object Error { + val value: String = "" +} + +sealed trait Id extends Tree { + var sym: Symbol = NoSymbol + def withSym(sym: Symbol): this.type = { + this.sym = sym + this + } +} + +final case class Import(importers: List[Importer]) extends Stat + +sealed trait Importee extends Tree + +final case class ImporteeName(id: SomeId) extends Importee + +final case class ImporteeRename(from: SomeId, to: SomeId) extends Importee + +final case class ImporteeUnimport(id: SomeId) extends Importee + +final case class ImporteeWildcard() extends Importee + +final case class Importer(qual: TermPath, importees: List[Importee]) + extends Tree + +final case class Init(tpt: Tpt, args: List[Term]) extends Term { + val id = CtorId() +} + +sealed trait Mod extends Tree + +final case class ModAbstract() extends Mod + +final case class ModCase() extends Mod + +final case class ModContravariant() extends Mod + +final case class ModCovariant() extends Mod + +final case class ModFinal() extends Mod + +final case class ModLazy() extends Mod + +final case class ModOverride() extends Mod + +final case class ModPrivate(within: Option[Path]) extends Mod + +final case class ModProtected(within: Option[Path]) extends Mod + +final case class ModSealed() extends Mod + +final case class ModVal() extends Mod + +final case class ModVar() extends Mod + +sealed trait NamedId extends Id with Path { + def id = this + def value: String + def name: Name +} + +object NamedId { + def unapply(id: NamedId): Some[String] = { + Some(id.value) + } +} + +sealed trait Outline extends Tree { + def id: Id +} + +sealed trait Pat extends Tree + +final case class PatAlternative(pats: List[Pat]) extends Pat + +final case class PatExtract(fun: TermPath, targs: List[Tpt], args: List[Pat]) + extends Pat + +final case class PatExtractInfix(lhs: Pat, op: TermId, rhs: List[Pat]) + extends Pat + +final case class PatId(value: String) extends Pat with NamedId { + def name = TermName(value) +} + +final case class PatLit(value: Any) extends Pat + +final case class PatRepeat(pat: Pat) extends Pat + +final case class PatSelect(qual: TermPath, id: TermId) extends Pat + +final case class PatTuple(args: List[Pat]) extends Pat + +final case class PatVar(id: Id, tpt: Option[Tpt]) extends Pat with Outline { + var tpe: Type = NoType +} + +sealed trait Path extends Tree { + def id: Id +} + +final case class PrimaryCtor(mods: List[Mod], params: List[TermParam]) + extends Tree + with Outline { + val id = CtorId() +} + +final case class SomeId(value: String) extends NamedId { + def name = SomeName(value) +} + +final case class Source(stats: List[Stat]) extends Tree + +sealed trait Stat extends Tree + +sealed trait Term extends Stat with Typeable + +final case class TermApply(fun: Term, args: List[Term]) extends Term + +final case class TermApplyInfix( + lhs: Term, + op: TermId, + targs: List[Tpt], + rhs: Term) + extends Term + +final case class TermApplyPostfix(arg: Term, op: TermId) extends Term + +final case class TermApplyPrefix(op: TermId, arg: Term) extends Term + +final case class TermApplyType(fun: Term, targs: List[Tpt]) extends Term + +final case class TermAscribe(term: Term, tpt: Tpt) extends Term + +final case class TermAssign(lhs: Term, rhs: Term) extends Term + +final case class TermBlock(stats: List[Stat]) extends Term + +final case class TermDo(body: Term, cond: Term) extends Term + +final case class TermEta(term: Term) extends Term + +final case class TermFunction(params: List[TermParam], body: Term) extends Term + +final case class TermId(value: String) extends TermPath with NamedId { + def name = TermName(value) +} + +final case class TermIf(cond: Term, thenp: Term, elsep: Option[Term]) + extends Term + +final case class TermLit(value: Any) extends Term + +final case class TermMatch(term: Term, cases: List[Case]) extends Term + +final case class TermNew(_init: Init) extends Term + +final case class TermParam(mods: List[Mod], id: TermId, tpt: Tpt) + extends Tree + with Outline + +final case class TermPartialFunction(cases: List[Case]) extends Term + +sealed trait TermPath extends Term with Path + +final case class TermRepeat(term: Term) extends Term + +final case class TermReturn(term: Option[Term]) extends Term + +final case class TermSelect(qual: Term, id: TermId) extends TermPath + +final case class TermSuper(qual: Id, mix: Id) extends TermPath { + def id = mix +} + +final case class TermThis(qual: Id) extends TermPath { + def id = qual +} + +final case class TermThrow(term: Term) extends Term + +final case class TermTuple(args: List[Term]) extends Term + +final case class TermWhile(cond: Term, body: Term) extends Term + +sealed trait Tpt extends Typeable + +sealed trait TptApply extends Tpt { + def fun: Tpt + def targs: List[Tpt] +} + +object TptApply { + def unapply(tree: TptApply): Some[(Tpt, List[Tpt])] = { + Some((tree.fun, tree.targs)) + } +} + +final case class TptFunction(targs: List[Tpt]) extends TptApply { + def fun = { + val value = "Function" + (targs.length - 1) + val sym = "_root_.scala." + value + "#" + TptId(value).withSym(sym) + } +} + +final case class TptId(value: String) extends TptPath with NamedId { + def name = TypeName(value) +} + +sealed trait TptPath extends Tpt with Path + +final case class TptParameterize(fun: Tpt, targs: List[Tpt]) extends TptApply + +final case class TptParameterizeInfix(lhs: Tpt, op: TptId, rhs: Tpt) + extends TptApply { + def fun = op + def targs = List(lhs, rhs) +} + +final case class TptRepeat(targ: Tpt) extends Tpt with TptApply { + def fun = TptId("Seq").withSym("_root_.scala.Seq#") + def targs = List(targ) +} + +final case class TptSelect(qual: TermPath, id: TptId) extends TptPath + +final case class TptTuple(targs: List[Tpt]) extends TptApply { + def fun = { + val value = "Tuple" + targs.length + val sym = "_root_.scala." + value + "#" + TptId(value).withSym(sym) + } +} + +sealed trait Typeable extends Tree + +final case class TypeParam( + mods: List[Mod], + id: TptId, + ubound: Option[Tpt], + lbound: Option[Tpt]) + extends Tree + with Outline { + def hi = ubound.getOrElse(TptId("Any").withSym("_root_.scala.Any#")) + def lo = lbound.getOrElse(TptId("Nothing").withSym("_root_.scala.Nothing#")) +} diff --git a/src/main/scala/rsc/typecheck/Atoms.scala b/src/main/scala/rsc/typecheck/Atoms.scala new file mode 100644 index 0000000..3ad47d2 --- /dev/null +++ b/src/main/scala/rsc/typecheck/Atoms.scala @@ -0,0 +1,51 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import rsc.pretty._ +import rsc.syntax._ + +sealed trait Atom extends Pretty with Product { + def printStr(p: Printer): Unit = PrettyAtom.str(p, this) + def printRepl(p: Printer): Unit = PrettyAtom.repl(p, this) +} + +final case class ApplyAtom(args: List[Tpt]) extends Atom + +final case class IdAtom(id: NamedId) extends Atom + +final case class SuperAtom(id: Id) extends Atom + +final case class ThisAtom(id: Id) extends Atom + +final case class UnsupportedAtom(unsupported: Tree) extends Atom + +trait Atoms { + implicit class PathAtomsOps(path: Path) { + def atoms: List[Atom] = { + path match { + case id: NamedId => List(IdAtom(id)) + case TermSelect(qual: Path, id) => qual.atoms ++ id.atoms + case TermSelect(qual, id) => List(UnsupportedAtom(qual)) ++ id.atoms + case TermSuper(qual, mix) => List(ThisAtom(qual), SuperAtom(mix)) + case TermThis(qual) => List(ThisAtom(qual)) + case TptSelect(qual, id) => qual.atoms ++ id.atoms + } + } + } + + implicit class TptAtomsOps(tpt: Tpt) { + def atoms: List[Atom] = { + tpt match { + case TptApply(tpt, args) => tpt.atoms ++ List(ApplyAtom(args)) + case tpt: TptPath => PathAtomsOps(tpt).atoms + } + } + } + + implicit class TptPathAtomsOps(tptPath: TptPath) { + def atoms: List[Atom] = { + PathAtomsOps(tptPath).atoms + } + } +} diff --git a/src/main/scala/rsc/typecheck/Envs.scala b/src/main/scala/rsc/typecheck/Envs.scala new file mode 100644 index 0000000..17cd4cd --- /dev/null +++ b/src/main/scala/rsc/typecheck/Envs.scala @@ -0,0 +1,166 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import scala.annotation.tailrec +import rsc.pretty._ +import rsc.semantics._ +import rsc.util._ + +sealed class Env protected (val _scopes: List[Scope]) extends Pretty { + def owner: OwnerScope = { + @tailrec def loop(_scopes: List[Scope]): OwnerScope = { + _scopes match { + case (head: OwnerScope) :: rest => head + case _ :: rest => loop(rest) + case Nil => crash(this) + } + } + loop(_scopes) + } + + def outer: Env = { + _scopes match { + case head :: rest => Env(rest) + case Nil => crash(this) + } + } + + def ::(scope: Scope): Env = { + Env(scope :: _scopes) + } + + def lookup(name: Name): Symbol = { + @tailrec def loop(_scopes: List[Scope]): Symbol = { + _scopes match { + case head :: tail => + head.lookup(name) match { + case NoSymbol => loop(tail) + case other => other + } + case Nil => + NoSymbol + } + } + loop(_scopes) + } + + def lookupThis(qual: Option[Name]): Symbol = { + @tailrec def loop(_scopes: List[Scope]): Symbol = { + _scopes match { + case (head: TemplateScope) :: tail => + val found = { + qual match { + case Some(SomeName(value)) => head.tree.id.value == value + case Some(name) => head.tree.id.name == name + case None => true + } + } + if (found) head.sym + else loop(tail) + case _ :: tail => + loop(tail) + case Nil => + NoSymbol + } + } + loop(_scopes) + } + + def lookupSuper(mix: Option[Name]): Symbol = { + _scopes match { + case List(thisScope: TemplateScope) => + mix match { + case Some(mix) => + @tailrec def loop(parents: List[TemplateScope]): Symbol = { + parents match { + case head :: tail => + val found = { + mix match { + case SomeName(value) => head.tree.id.value == value + case name => head.tree.id.name == name + } + } + if (found) head.sym + else loop(tail) + case Nil => + NoSymbol + } + } + loop(thisScope.parents) + case None => + thisScope.parents match { + case List(parent) => parent.sym + case other => SuperScope(thisScope).sym + } + } + case _ => + NoSymbol + } + } + + def resolve(name: Name): Resolution = { + @tailrec def loop(_scopes: List[Scope]): Resolution = { + _scopes match { + case head :: tail => + head.resolve(name) match { + case MissingResolution => loop(tail) + case other => other + } + case Nil => + MissingResolution + } + } + loop(_scopes) + } + + def resolveThis(qual: Option[Name]): Resolution = { + lookupThis(qual) match { + case NoSymbol => MissingResolution + case sym => FoundResolution(sym) + } + } + + def resolveSuper(mix: Option[Name]): Resolution = { + _scopes match { + case List(thisScope: TemplateScope) => + thisScope.status match { + case PendingStatus => + BlockedResolution(thisScope) + case BlockedStatus(_) => + BlockedResolution(thisScope) + case _: FailedStatus => + ErrorResolution + case SucceededStatus => + lookupSuper(mix) match { + case NoSymbol => MissingResolution + case sym => FoundResolution(sym) + } + } + case _ => + ErrorResolution + } + } + + override def printStr(p: Printer): Unit = { + PrettyEnv.str(p, this) + } + + override def printRepl(p: Printer): Unit = { + PrettyEnv.repl(p, this) + } +} + +object Env { + def apply(): Env = { + new Env(Nil) + } + + def apply(scopes: List[Scope]): Env = { + new Env(scopes) + } + + def apply(scopes: Scope*): Env = { + new Env(scopes.toList) + } +} diff --git a/src/main/scala/rsc/typecheck/Linker.scala b/src/main/scala/rsc/typecheck/Linker.scala new file mode 100644 index 0000000..97411ab --- /dev/null +++ b/src/main/scala/rsc/typecheck/Linker.scala @@ -0,0 +1,63 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import java.nio.{file => nio} +import rsc.report._ +import rsc.semantics._ +import rsc.settings._ +import rsc.syntax._ +import rsc.util._ + +final class Linker private ( + settings: Settings, + reporter: Reporter, + symtab: Symtab, + todo: Todo) { + def apply(trees: List[Source], classpath: List[nio.Path]): Unit = { + val pi = createPi() + val rootPackage = createPackage(pi, "_root_") + val emptyPackage = createPackage(pi, "_empty_") + // NOTE: We expect the deps to be available as stubs on the sourcepath. + // See stdlib/src/main/scala/Stdlib.scala for an example for such stubs. + } + + private def createPi(): Symbol = { + val sym = "π." + val scope = PackageScope(sym) + symtab.scopes(sym) = scope + todo.scopes.add(Env(), scope) + symtab.outlines(sym) = DefnPackage(TermId("π").withSym(sym), Nil) + sym + } + + private def createPackage(owner: Symbol, value: String): Symbol = { + val name = TermName(value) + val sym = { + if (owner == "π.") value + "." + else owner + value + "." + } + val outline = DefnPackage(TermId(value).withSym(sym), Nil) + val scope = PackageScope(sym) + todo.scopes.add(Env() -> scope) + val ownerScope = symtab.scopes(owner) + ownerScope.enter(name, sym) match { + case NoSymbol => + symtab.scopes(sym) = scope + symtab.outlines(sym) = outline + sym + case _ => + crash(ownerScope) + } + } +} + +object Linker { + def apply( + settings: Settings, + reporter: Reporter, + symtab: Symtab, + todo: Todo): Linker = { + new Linker(settings, reporter, symtab, todo) + } +} diff --git a/src/main/scala/rsc/typecheck/Outliner.scala b/src/main/scala/rsc/typecheck/Outliner.scala new file mode 100644 index 0000000..3d2e026 --- /dev/null +++ b/src/main/scala/rsc/typecheck/Outliner.scala @@ -0,0 +1,81 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import rsc.report._ +import rsc.semantics._ +import rsc.settings._ +import rsc.syntax._ +import rsc.util._ + +final class Outliner private ( + settings: Settings, + reporter: Reporter, + symtab: Symtab) { + def apply(env: Env, tpt: Tpt): Unit = { + assignSyms(env, tpt.atoms) + } + + def apply(env: Env, mod: Mod): Unit = { + mod match { + case ModPrivate(Some(id: SomeId)) => + assignSyms(env, id.atoms) + case ModProtected(Some(id: SomeId)) => + assignSyms(env, id.atoms) + case _ => + () + } + } + + private def assignSyms(startingEnv: Env, atoms: List[Atom]): Unit = { + def loop(env: Env, atoms: List[Atom]): Unit = { + atoms match { + case ApplyAtom(args) :: rest => + args.foreach(arg => assignSyms(startingEnv, arg.atoms)) + loop(env, rest) + case IdAtom(id) :: rest => + env.lookup(id.name) match { + case NoSymbol => + if (env == startingEnv) reporter.append(UnboundId(id)) + else reporter.append(UnboundMember(env.owner.sym, id)) + case sym => + id.sym = sym + if (rest.isEmpty) () + else loop(Env(symtab.scopes(sym)), rest) + } + case ThisAtom(id) :: rest => + env.lookupThis(id.nameopt) match { + case NoSymbol => + reporter.append(UnboundId(id)) + case sym => + id.sym = sym + if (rest.isEmpty) () + else loop(Env(symtab.scopes(sym)), rest) + } + case SuperAtom(id) :: rest => + env.lookupSuper(id.nameopt) match { + case NoSymbol => + reporter.append(UnboundId(id)) + case sym => + id.sym = sym + if (rest.isEmpty) () + else loop(Env(symtab.scopes(sym)), rest) + } + case (atom @ UnsupportedAtom(unsupported)) :: rest => + reporter.append(IllegalOutlinePart(unsupported)) + case Nil => + () + } + } + loop(startingEnv, atoms) + } +} + +object Outliner { + def apply( + settings: Settings, + reporter: Reporter, + symtab: Symtab): Outliner = { + new Outliner(settings, reporter, symtab) + } +} diff --git a/src/main/scala/rsc/typecheck/Resolutions.scala b/src/main/scala/rsc/typecheck/Resolutions.scala new file mode 100644 index 0000000..25ed301 --- /dev/null +++ b/src/main/scala/rsc/typecheck/Resolutions.scala @@ -0,0 +1,17 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import rsc.pretty._ +import rsc.semantics._ + +sealed trait Resolution extends Pretty with Product { + override def printStr(p: Printer): Unit = PrettyResolution.str(p, this) + override def printRepl(p: Printer): Unit = PrettyResolution.repl(p, this) +} + +final case class BlockedResolution(scope: Scope) extends Resolution +sealed trait FailedResolution extends Resolution +final case object MissingResolution extends FailedResolution +final case object ErrorResolution extends FailedResolution +final case class FoundResolution(sym: Symbol) extends Resolution diff --git a/src/main/scala/rsc/typecheck/Scheduler.scala b/src/main/scala/rsc/typecheck/Scheduler.scala new file mode 100644 index 0000000..534a2a8 --- /dev/null +++ b/src/main/scala/rsc/typecheck/Scheduler.scala @@ -0,0 +1,254 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import rsc.report._ +import rsc.semantics._ +import rsc.settings._ +import rsc.syntax._ +import rsc.util._ + +final class Scheduler private ( + settings: Settings, + reporter: Reporter, + symtab: Symtab, + todo: Todo) { + def apply(env: Env, tree: Tree): Env = { + tree match { + case tree: DefnDef => defnDef(env, tree) + case tree: DefnField => defnField(env, tree) + case tree: DefnPackage => defnPackage(env, tree) + case tree: DefnTemplate => defnTemplate(env, tree) + case tree: DefnType => defnType(env, tree) + case tree: Import => crash(tree) + case tree: PrimaryCtor => crash(tree) + case tree: Source => source(env, tree) + case tree: TermParam => termParam(env, tree) + case tree: TypeParam => typeParam(env, tree) + case _ => crash(tree) + } + } + + private def assignSym( + owner: OwnerScope, + id: NamedId, + outline: Outline): Symbol = { + val proposedSym = { + owner match { + case _: FlatScope => owner.sym + id.name.str + case _: PackageScope => owner.sym + id.name.str + case _: TemplateScope => owner.sym + id.name.str + } + } + owner.enter(id.name, proposedSym) match { + case NoSymbol => + id.sym = proposedSym + symtab.outlines(id.sym) = outline + case existingSym => + reporter.append(DoubleDef(outline, symtab.outlines(existingSym))) + } + id.sym + } + + private def defnDef(env: Env, tree: DefnDef): Env = { + val sym = assignSym(env.owner, tree.id, tree) + if (sym != NoSymbol) { + mods(env, tree.mods) + val tparamEnv = typeParams(env, tree.tparams) + val paramEnv = termParams(tparamEnv, tree.params) + todo.tpts.add(paramEnv -> tree.ret) + tree.rhs.foreach(rhs => todo.terms.add(paramEnv -> rhs)) + } + env + } + + private def defnField(env: Env, tree: DefnField): Env = { + val sym = assignSym(env.owner, tree.id, tree) + if (sym != NoSymbol) { + mods(env, tree.mods) + todo.tpts.add(env -> tree.tpt) + tree.rhs.foreach(rhs => todo.terms.add(env -> rhs)) + } + env + } + + private def defnPackage(env: Env, tree: DefnPackage): Env = { + def loop(env: Env, qual: TermPath): Env = { + val (qualEnv, id) = { + qual match { + case id: TermId => (env, id) + case TermSelect(qual: TermPath, id) => (loop(env, qual), id) + case _ => crash(tree) + } + } + val proposedSym = qualEnv.owner.sym + id.name.str + qualEnv.owner.enter(id.name, proposedSym) match { + case NoSymbol => + id.sym = proposedSym + val packageScope = PackageScope(id.sym) + symtab.scopes(id.sym) = packageScope + todo.scopes.add(qualEnv -> packageScope) + symtab.outlines(id.sym) = DefnPackage(id, Nil) + case existingSym => + val existingOutline = symtab.outlines(existingSym) + existingOutline match { + case _: DefnPackage => + id.sym = existingSym + case _ => + crash("overloading") + } + } + symtab.scopes(id.sym) :: qualEnv + } + val envN = loop(env, tree.pid) + stats(envN, tree.stats) + env + } + + private def defnTemplate(env: Env, tree: DefnTemplate): Env = { + val sym = assignSym(env.owner, tree.id, tree) + if (sym != NoSymbol) { + mods(env, tree.mods) + val tparamEnv = typeParams(env, tree.tparams) + val ctorEnv = { + mods(tparamEnv, tree.ctor.mods) + termParams(tparamEnv, tree.ctor.params) + } + tree.inits.foreach(init => todo.terms.add(ctorEnv -> init)) + val templateEnv = { + val templateScope = TemplateScope(tree) + symtab.scopes(sym) = templateScope + if (tree.isInstanceOf[DefnClass]) { + assignSym(templateScope, tree.ctor.id, tree.ctor) + } + todo.scopes.add(tparamEnv -> templateScope) + tree.ctor.params.foreach(p => templateScope.enter(p.id.name, p.id.sym)) + templateScope :: ctorEnv + } + stats(templateEnv, tree.stats) + } + env + } + + private def defnType(env: Env, tree: DefnType): Env = { + val sym = assignSym(env.owner, tree.id, tree) + if (sym != NoSymbol) { + mods(env, tree.mods) + todo.tpts.add(env -> tree.tpt) + } + env + } + + private def early(env: Env, trees: List[Stat]): Env = { + if (trees.nonEmpty) { + val earlyScope = FlatScope("early") + val earlyEnv = earlyScope :: env + stats(earlyEnv, trees) + earlyScope.succeed() + earlyEnv + } else { + env + } + } + + private def mods(env: Env, trees: List[Mod]): Env = { + trees.foreach { + case tree @ ModPrivate(Some(id: SomeId)) => + todo.mods.add(env -> tree) + case tree @ ModProtected(Some(id: SomeId)) => + todo.mods.add(env -> tree) + case _ => + () + } + env + } + + private def source(env: Env, tree: Source): Env = { + val sourceEnv = { + val hasPackages = tree.stats.exists(_.isInstanceOf[DefnPackage]) + if (hasPackages) env else symtab.scopes("_empty_.") :: env + } + stats(sourceEnv, tree.stats) + env + } + + private def stats(env: Env, trees: List[Stat]): Env = { + trees match { + case (tree: Import) :: rest => + val envN = { + tree.importers.foldLeft(env) { (env, importer) => + val scope = { + if (settings.xprint.isEmpty) ImporterScope(importer) + else ImporterScope("import " + importer.str, importer) + } + todo.scopes.add(env -> scope) + scope :: env + } + } + stats(envN, rest) + case (tree: Term) :: rest => + todo.terms.add(env -> tree) + stats(env, rest) + case tree :: rest => + apply(env, tree) + stats(env, rest) + case Nil => + env + } + } + + private def termParams(env: Env, trees: List[TermParam]): Env = { + if (trees.nonEmpty) { + val paramScope = FlatScope("params") + val paramEnv = paramScope :: env + trees.foreach(apply(paramEnv, _)) + paramScope.succeed() + paramEnv + } else { + env + } + } + + private def termParam(env: Env, tree: TermParam): Env = { + val sym = assignSym(env.owner, tree.id, tree) + if (sym != NoSymbol) { + // NOTE: Params are typechecked in env.outer, but their mods use env. + // This is inconsistent, and unfortunately not mentioned in SLS. + mods(env, tree.mods) + todo.tpts.add(env.outer -> tree.tpt) + } + env + } + + private def typeParams(env: Env, trees: List[TypeParam]): Env = { + if (trees.nonEmpty) { + val tparamScope = FlatScope("tparams") + val tparamEnv = tparamScope :: env + trees.foreach(apply(tparamEnv, _)) + tparamScope.succeed() + tparamEnv + } else { + env + } + } + + private def typeParam(env: Env, tree: TypeParam): Env = { + val sym = assignSym(env.owner, tree.id, tree) + if (sym != NoSymbol) { + mods(env, tree.mods) + tree.ubound.foreach(ubound => todo.tpts.add(env -> ubound)) + tree.lbound.foreach(lbound => todo.tpts.add(env -> lbound)) + } + env + } +} + +object Scheduler { + def apply( + settings: Settings, + reporter: Reporter, + symtab: Symtab, + todo: Todo): Scheduler = { + new Scheduler(settings, reporter, symtab, todo) + } +} diff --git a/src/main/scala/rsc/typecheck/Scoper.scala b/src/main/scala/rsc/typecheck/Scoper.scala new file mode 100644 index 0000000..55c3528 --- /dev/null +++ b/src/main/scala/rsc/typecheck/Scoper.scala @@ -0,0 +1,163 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import rsc.report._ +import rsc.semantics._ +import rsc.settings._ +import rsc.syntax._ +import rsc.util._ + +final class Scoper private ( + settings: Settings, + reporter: Reporter, + symtab: Symtab, + todo: Todo) { + def apply(env: Env, scope: Scope): Unit = { + if (!scope.status.isPending) { + crash(scope) + } + scope match { + case scope: ImporterScope => + trySucceed(env, scope) + case scope: FlatScope => + crash(scope) + case scope: PackageScope => + scope.succeed() + case scope: TemplateScope => + trySucceed(env, scope) + case scope: SuperScope => + crash(scope) + } + } + + private def trySucceed(env: Env, scope: ImporterScope): Unit = { + val qualResolution = assignSyms(env, scope.tree.qual) + qualResolution match { + case BlockedResolution(dep) => + scope.block(dep) + case _: FailedResolution => + scope.fail() + case FoundResolution(qualResolution) => + val parent = symtab.scopes(qualResolution) + parent.status match { + case _: IncompleteStatus => + scope.block(parent) + case _: FailedStatus => + scope.fail() + case SucceededStatus => + scope.parent = parent + scope.succeed() + } + } + } + + private def trySucceed(env: Env, scope: TemplateScope): Unit = { + val buf = List.newBuilder[TemplateScope] + val inits = { + if (scope.tree.inits.nonEmpty) { + scope.tree.inits + } else { + if (scope.tree.id.sym == "_root_.scala.Any#") Nil + else List(Init(TptId("AnyRef").withSym("_root_.scala.AnyRef#"), Nil)) + } + } + inits.foreach { + case Init(tpt, _) => + if (scope.status.isPending) { + def loop(tpt: Tpt): Resolution = { + tpt match { + case tpt: TptPath => + assignSyms(env, tpt) + case TptApply(tpt, _) => + loop(tpt) + } + } + loop(tpt) match { + case BlockedResolution(dep) => + scope.block(dep) + case _: FailedResolution => + scope.fail() + case FoundResolution(sym) => + symtab.scopes(sym) match { + case parentScope: TemplateScope => buf += parentScope + case other => crash(other) + } + } + } + } + if (scope.status.isPending) { + val parents = buf.result + val incompleteParent = parents.find(_.status.isIncomplete) + incompleteParent match { + case Some(incompleteParent) => + scope.block(incompleteParent) + case _ => + scope.parents = parents + scope.succeed() + } + } + } + + private def assignSyms(startingEnv: Env, path: Path): Resolution = { + def assignSym(env: Env, id: Id, resolver: => Resolution): Resolution = { + val cachedSym = id.sym + cachedSym match { + case NoSymbol => + val resolution = resolver + resolution match { + case BlockedResolution(_) => + resolution + case MissingResolution => + if (env == startingEnv) reporter.append(UnboundId(id)) + else reporter.append(UnboundMember(env.owner.sym, id)) + ErrorResolution + case ErrorResolution => + ErrorResolution + case FoundResolution(sym) => + id.sym = sym + resolution + } + case cachedSym => + FoundResolution(cachedSym) + } + } + def loop(env: Env, atoms: List[Atom]): Resolution = { + val atom :: rest = atoms + val resolution = { + atom match { + case atom: ApplyAtom => + crash(atom) + case IdAtom(id) => + assignSym(env, id, env.resolve(id.name)) + case ThisAtom(id) => + assignSym(env, id, env.resolveThis(id.nameopt)) + case SuperAtom(id) => + assignSym(env, id, env.resolveSuper(id.nameopt)) + case atom: UnsupportedAtom => + ErrorResolution + } + } + resolution match { + case BlockedResolution(_) => + resolution + case _: FailedResolution => + resolution + case FoundResolution(sym) => + if (rest.isEmpty) resolution + else loop(Env(symtab.scopes(sym)), rest) + } + } + loop(startingEnv, path.atoms) + } +} + +object Scoper { + def apply( + settings: Settings, + reporter: Reporter, + symtab: Symtab, + todo: Todo): Scoper = { + new Scoper(settings, reporter, symtab, todo) + } +} diff --git a/src/main/scala/rsc/typecheck/Scopes.scala b/src/main/scala/rsc/typecheck/Scopes.scala new file mode 100644 index 0000000..c7e3db9 --- /dev/null +++ b/src/main/scala/rsc/typecheck/Scopes.scala @@ -0,0 +1,388 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import java.util.{HashMap, Map} +import scala.collection.mutable +import rsc.pretty._ +import rsc.semantics._ +import rsc.syntax._ +import rsc.util._ + +sealed abstract class Scope(val sym: Symbol) extends Pretty { + var status: Status = PendingStatus + + def enter(name: Name, sym: Symbol): Symbol + + def lookup(name: Name): Symbol + + def resolve(name: Name): Resolution = { + status match { + case PendingStatus => + BlockedResolution(this) + case BlockedStatus(_) => + BlockedResolution(this) + case _: FailedStatus => + ErrorResolution + case SucceededStatus => + crash(this) + } + } + + def block(dep: Scope): Unit = { + status match { + case PendingStatus => + dep.status match { + case PendingStatus => + status = BlockedStatus(dep) + case BlockedStatus(depdep) => + val visited = mutable.Set[Scope]() + def loop(depdep: Scope): Unit = { + depdep.status match { + case BlockedStatus(depdepdep) => + if (visited(depdepdep)) { + val root = depdep + def loop(scope: Scope): List[Scope] = { + if (scope == root) { + scope :: Nil + } else { + val BlockedStatus(scopedep) = scope.status + scope :: loop(scopedep) + } + } + val cycle = loop(depdepdep) + cycle.foreach(_.status = CyclicStatus(cycle)) + val stuck = visited.toSet -- cycle + stuck.foreach(_.status = ErrorStatus) + } else { + visited += depdep + loop(depdepdep) + } + case _: FailedStatus => + status = ErrorStatus + case _ => + () + } + } + status = BlockedStatus(dep) + visited += this + visited += dep + loop(depdep) + case _: FailedStatus => + status = ErrorStatus + case SucceededStatus => + crash(this) + } + case _ => + crash(this) + } + } + + def unblock(): Unit = { + status match { + case BlockedStatus(dep) => + dep.status match { + case SucceededStatus => + status = PendingStatus + case other => + status = PendingStatus + block(dep) + } + case _ => + () + } + } + + def fail(): Unit = { + status match { + case PendingStatus => + status = ErrorStatus + case _ => + crash(this) + } + } + + def succeed(): Unit = { + status match { + case PendingStatus => + status = SucceededStatus + case _ => + crash(this) + } + } + + override def printStr(p: Printer): Unit = { + PrettyScope.str(p, this) + } + + override def printRepl(p: Printer): Unit = { + PrettyScope.repl(p, this) + } +} + +final class ImporterScope private (sym: Symbol, val tree: Importer) + extends Scope(sym) { + var _parent: Scope = null + + def parent: Scope = { + if (status.isSucceeded) { + _parent + } else { + crash(this) + } + } + + def parent_=(parent: Scope): Unit = { + if (status.isPending) { + _parent = parent + } else { + crash(this) + } + } + + override def enter(name: Name, sym: Symbol): Symbol = { + crash(this) + } + + val _mappings: Map[String, String] = new HashMap[String, String] + var _wildcard: Boolean = false + + tree.importees.foreach { + case ImporteeName(SomeId(value)) => _mappings.put(value, value) + case ImporteeRename(SomeId(from), SomeId(to)) => _mappings.put(to, from) + case ImporteeUnimport(SomeId(value)) => _mappings.put(value, null) + case ImporteeWildcard() => _wildcard = true + } + + private def remap(name: Name): Name = { + val value1 = { + val mapValue = _mappings.get(name.value) + if (_wildcard && (mapValue == null)) name.value + else mapValue + } + if (value1 != null) { + name match { + case _: SomeName => SomeName(value1) + case _: TermName => TermName(value1) + case _: TypeName => TypeName(value1) + } + } else { + null + } + } + + override def lookup(name: Name): Symbol = { + if (status.isSucceeded) { + val name1 = remap(name) + if (name1 != null) parent.lookup(name1) + else NoSymbol + } else { + crash(this) + } + } + + override def resolve(name: Name): Resolution = { + val name1 = remap(name) + if (name1 != null) { + status match { + case PendingStatus => + super.resolve(name) + case BlockedStatus(dep) => + super.resolve(name) + case _: FailedStatus => + MissingResolution + case SucceededStatus => + parent.resolve(name1) + } + } else { + MissingResolution + } + } + + override def succeed(): Unit = { + if (_parent == null) { + crash(this) + } + super.succeed() + } +} + +object ImporterScope { + def apply(tree: Importer): ImporterScope = { + val sym = freshSym() + "::" + new ImporterScope(sym, tree) + } + + def apply(alias: String, tree: Importer): ImporterScope = { + val sym = alias + " " + freshSym() + "::" + new ImporterScope(sym, tree) + } +} + +sealed abstract class OwnerScope(sym: Symbol) extends Scope(sym) { + val _storage: Map[Name, Symbol] = new HashMap[Name, Symbol] + + override def enter(name: Name, sym: Symbol): Symbol = { + if (status.isPending) { + val existing = _storage.get(name) + if (existing != null) { + existing + } else { + name match { + case SomeName(_) => + crash(name) + case _ => + sym match { + case NoSymbol => + crash(name) + case _ => + _storage.put(name, sym) + NoSymbol + } + } + } + } else { + crash(this) + } + } + + override def lookup(name: Name): Symbol = { + if (status.isSucceeded) { + val result = _storage.get(name) + if (result != null) { + result + } else { + name match { + case SomeName(value) => + crash(name) + case _ => + NoSymbol + } + } + } else { + crash(this) + } + } + + override def resolve(name: Name): Resolution = { + if (status.isSucceeded) { + val result = _storage.get(name) + if (result != null) { + FoundResolution(result) + } else { + name match { + case SomeName(value) => + crash(name) + case _ => + MissingResolution + } + } + } else { + super.resolve(name) + } + } +} + +final class FlatScope private (sym: Symbol) extends OwnerScope(sym) + +object FlatScope { + def apply(alias: String): FlatScope = { + val sym = alias + freshSym() + "::" + new FlatScope(sym) + } +} + +final class PackageScope private (sym: Symbol) extends OwnerScope(sym) + +object PackageScope { + def apply(sym: Symbol): PackageScope = { + new PackageScope(sym) + } +} + +final class TemplateScope private (sym: Symbol, val tree: DefnTemplate) + extends OwnerScope(sym) { + var _parents: List[TemplateScope] = null + var _env: Env = null + + def parents: List[TemplateScope] = { + if (status.isSucceeded) { + _parents + } else { + crash(this) + } + } + + def parents_=(parents: List[TemplateScope]): Unit = { + if (status.isPending) { + _parents = parents + _env = Env(parents.reverse) + } else { + crash(this) + } + } + + override def lookup(name: Name): Symbol = { + super.lookup(name) match { + case NoSymbol => + _env.lookup(name) + case sym => + sym + } + } + + override def resolve(name: Name): Resolution = { + super.resolve(name) match { + case MissingResolution => + _env.resolve(name) + case resolution => + resolution + } + } + + override def succeed(): Unit = { + if (_parents == null) { + crash(this) + } + super.succeed() + } +} + +object TemplateScope { + def apply(tree: DefnTemplate): TemplateScope = { + new TemplateScope(tree.id.sym, tree) + } +} + +final class SuperScope private (sym: Symbol, val underlying: TemplateScope) + extends Scope(sym) { + status = SucceededStatus + + override def enter(name: Name, sym: Symbol): Symbol = { + crash(this) + } + + override def lookup(name: Name): Symbol = { + underlying._env.lookup(name) + } + + override def resolve(name: Name): Resolution = { + underlying.status match { + case PendingStatus => + BlockedResolution(underlying) + case BlockedStatus(_) => + BlockedResolution(underlying) + case _: FailedStatus => + ErrorResolution + case SucceededStatus => + underlying._env.resolve(name) + } + } +} + +object SuperScope { + def apply(underlying: TemplateScope): SuperScope = { + val sym = underlying.sym + "::super::" + new SuperScope(sym, underlying) + } +} diff --git a/src/main/scala/rsc/typecheck/Statuses.scala b/src/main/scala/rsc/typecheck/Statuses.scala new file mode 100644 index 0000000..6eebd39 --- /dev/null +++ b/src/main/scala/rsc/typecheck/Statuses.scala @@ -0,0 +1,27 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import rsc.pretty._ + +sealed trait Status extends Pretty with Product { + def isIncomplete: Boolean = this.isInstanceOf[IncompleteStatus] + def isPending: Boolean = this == PendingStatus + def isBlocked: Boolean = this.isInstanceOf[BlockedStatus] + def isComplete: Boolean = this.isInstanceOf[CompleteStatus] + def isFailed: Boolean = this.isInstanceOf[FailedStatus] + def isCyclic: Boolean = this.isInstanceOf[CyclicStatus] + def isSucceeded: Boolean = this == SucceededStatus + override def printStr(p: Printer): Unit = PrettyStatus.str(p, this) + override def printRepl(p: Printer): Unit = PrettyStatus.repl(p, this) +} + +sealed trait IncompleteStatus extends Status +final case object PendingStatus extends IncompleteStatus +final case class BlockedStatus(scope: Scope) extends IncompleteStatus + +sealed trait CompleteStatus extends Status +sealed trait FailedStatus extends CompleteStatus +final case class CyclicStatus(scopes: List[Scope]) extends FailedStatus +final case object ErrorStatus extends FailedStatus +final case object SucceededStatus extends CompleteStatus diff --git a/src/main/scala/rsc/typecheck/Symtab.scala b/src/main/scala/rsc/typecheck/Symtab.scala new file mode 100644 index 0000000..13d1190 --- /dev/null +++ b/src/main/scala/rsc/typecheck/Symtab.scala @@ -0,0 +1,68 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import java.util.{HashMap, Map} +import rsc.pretty._ +import rsc.semantics._ +import rsc.syntax._ +import rsc.util._ + +final class Symtab private extends Pretty { + val _scopes: Map[Symbol, Scope] = new HashMap[Symbol, Scope] + val _outlines: Map[Symbol, Outline] = new HashMap[Symbol, Outline] + + object scopes { + def apply(sym: Symbol): Scope = { + val scope = _scopes.get(sym) + if (scope == null) { + crash(sym) + } + scope + } + + def update(sym: Symbol, scope: Scope): Unit = { + if (_scopes.containsKey(sym)) { + crash(sym) + } + sym match { + case NoSymbol => crash(scope) + case other => _scopes.put(other, scope) + } + } + } + + object outlines { + def apply(sym: Symbol): Outline = { + val outline = _outlines.get(sym) + if (outline == null) { + crash(sym) + } + outline + } + + def update(sym: Symbol, outline: Outline): Unit = { + if (_outlines.containsKey(sym)) { + crash(sym) + } + sym match { + case NoSymbol => crash(outline) + case other => _outlines.put(sym, outline) + } + } + } + + def printStr(p: Printer): Unit = { + PrettySymtab.str(p, this) + } + + def printRepl(p: Printer): Unit = { + PrettySymtab.repl(p, this) + } +} + +object Symtab { + def apply(): Symtab = { + new Symtab + } +} diff --git a/src/main/scala/rsc/typecheck/Todo.scala b/src/main/scala/rsc/typecheck/Todo.scala new file mode 100644 index 0000000..a538763 --- /dev/null +++ b/src/main/scala/rsc/typecheck/Todo.scala @@ -0,0 +1,22 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import java.util.{Queue, LinkedList} +import rsc.pretty._ +import rsc.syntax._ + +final class Todo private extends Pretty { + val scopes: Queue[(Env, Scope)] = new LinkedList[(Env, Scope)] + val mods: Queue[(Env, Mod)] = new LinkedList[(Env, Mod)] + val tpts: Queue[(Env, Tpt)] = new LinkedList[(Env, Tpt)] + val terms: Queue[(Env, Term)] = new LinkedList[(Env, Term)] + def printStr(p: Printer): Unit = PrettyTodo.str(p, this) + def printRepl(p: Printer): Unit = PrettyTodo.repl(p, this) +} + +object Todo { + def apply(): Todo = { + new Todo() + } +} diff --git a/src/main/scala/rsc/typecheck/Typechecker.scala b/src/main/scala/rsc/typecheck/Typechecker.scala new file mode 100644 index 0000000..6473ac4 --- /dev/null +++ b/src/main/scala/rsc/typecheck/Typechecker.scala @@ -0,0 +1,594 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.typecheck + +import rsc.lexis._ +import rsc.report._ +import rsc.semantics._ +import rsc.settings._ +import rsc.syntax._ +import rsc.util._ + +final class Typechecker private ( + settings: Settings, + reporter: Reporter, + symtab: Symtab) { + def apply(env: Env, tree: Typeable): Type = { + tree match { + case tree: Init => init(env, tree) + case tree: TermApply => termApply(env, tree) + case tree: TermApplyInfix => termApplyInfix(env, tree) + case tree: TermApplyPrefix => termApplyPrefix(env, tree) + case tree: TermApplyType => termApplyType(env, tree) + case tree: TermAssign => termAssign(env, tree) + case tree: TermBlock => termBlock(env, tree) + case tree: TermFunction => termFunction(env, tree) + case tree: TermId => termId(env, tree) + case tree: TermIf => termIf(env, tree) + case tree: TermLit => termLit(env, tree) + case tree: TermMatch => termMatch(env, tree) + case tree: TermNew => termNew(env, tree) + case tree: TermReturn => termReturn(env, tree) + case tree: TermSelect => termSelect(env, tree) + case tree: TermSuper => termSuper(env, tree) + case tree: TermThis => termThis(env, tree) + case tree: TermThrow => termThrow(env, tree) + case tree: TermWhile => termWhile(env, tree) + case tree: TptApply => tptApply(env, tree) + case tree: TptId => tptId(env, tree) + case tree: TptSelect => tptSelect(env, tree) + case _ => crash(tree) + } + } + + private def init(env: Env, tree: Init): Type = { + tree.args.foreach(apply(env, _)) + apply(env, tree.tpt) + } + + private def termApply(env: Env, tree: TermApply): Type = { + val funTpe = apply(env, tree.fun) + tree.args.foreach(apply(env, _)) + funTpe match { + case NoType => + NoType + case MethodType(Nil, _, ret) => + ret + case MethodType(other, _, _) => + crash("type inference") + case other => + val id1 = TermId("apply").withPos(tree.fun.pos.end, tree.fun) + val select1 = TermSelect(tree.fun, id1).withPos(tree.fun) + val tree1 = TermApply(select1, tree.args).withPos(tree) + apply(env, tree1) + } + } + + private def termApplyInfix(env: Env, tree: TermApplyInfix): Type = { + if (tree.op.value.isLeftAssoc) { + val select = TermSelect(tree.lhs, tree.op).withPos(tree.lhs, tree.op) + val applyType = { + if (tree.targs.isEmpty) select + else TermApplyType(select, tree.targs).withPos(select, tree.targs.last) + } + val tree1 = TermApply(applyType, List(tree.rhs)).withPos(tree) + apply(env, tree1) + } else { + crash(tree) + } + } + + private def termApplyPrefix(env: Env, tree: TermApplyPrefix): Type = { + val arg1 = TermSelect(tree.arg, tree.op).withPos(tree) + val tree1 = TermApply(arg1, Nil).withPos(tree) + apply(env, tree1) + } + + private def termApplyType(env: Env, tree: TermApplyType): Type = { + val funTpe = apply(env, tree.fun) + val targs = tree.targs.map { tpt => + apply(env, tpt) match { + case NoType => return NoType + case tpe: SimpleType => tpe + case other => crash(other) + } + } + funTpe match { + case NoType => + NoType + case MethodType(tparams, paramss, ret) => + funTpe.subst(tparams, targs) + case other => + val id1 = TermId("apply").withPos(tree.fun.pos.end, tree.fun) + val select1 = TermSelect(tree.fun, id1).withPos(tree.fun) + val tree1 = TermApplyType(select1, tree.targs).withPos(tree) + apply(env, tree1) + } + } + + private def termAssign(env: Env, tree: TermAssign): Type = { + tree.lhs match { + case TermApply(fun, args) => + val id1 = TermId("update").withPos(tree.lhs.pos.end, tree.lhs) + val select1 = TermSelect(fun, id1).withPos(tree.lhs) + val tree1 = TermApply(select1, args :+ tree.rhs).withPos(tree) + apply(env, tree1) + case other => + apply(env, other) + apply(env, tree.rhs) + } + } + + private def termBlock(env: Env, tree: TermBlock): Type = { + tree.stats match { + case stats :+ (term: Term) => + val tpts = List.newBuilder[Tpt] + val terms = List.newBuilder[Term] + val scope = FlatScope("block") + stats.foreach { + case stat @ DefnField(_, id, tpt, Some(rhs)) => + val sym = scope.sym + id.name.str + scope.enter(id.name, sym) match { + case NoSymbol => + id.sym = sym + symtab.outlines(id.sym) = stat + case existingSym => + reporter.append(DoubleDef(stat, symtab.outlines(existingSym))) + return NoType + } + tpts += tpt + terms += rhs + case term: Term => + terms += term + case other => + crash(other) + } + scope.succeed() + val env1 = scope :: env + tpts.result.foreach(apply(env1, _)) + terms.result.foreach(apply(env1, _)) + apply(env1, term) + case Nil => + SimpleType("_root_.scala.Unit#", Nil) + case other => + crash(other) + } + } + + private def termFunction(env: Env, tree: TermFunction): Type = { + val tpts = List.newBuilder[Tpt] + val scope = FlatScope("lambda") + tree.params.foreach { + case param @ TermParam(_, id, tpt) => + val sym = scope.sym + id.name.str + scope.enter(id.name, sym) match { + case NoSymbol => + id.sym = sym + symtab.outlines(id.sym) = param + case existingSym => + reporter.append(DoubleDef(param, symtab.outlines(existingSym))) + return NoType + } + tpts += tpt + } + scope.succeed() + val env1 = scope :: env + tpts.result.foreach(apply(env, _)) + apply(env1, tree.body) + } + + private def termId(env: Env, tree: TermId): Type = { + env.lookup(tree.name) match { + case NoSymbol => + reporter.append(UnboundId(tree)) + NoType + case sym => + tree.sym = sym + sym.tpe + } + } + + private def termIf(env: Env, tree: TermIf): Type = { + apply(env, tree.cond) + tree.elsep match { + case Some(elsep) => + val thenTpe = apply(env, tree.thenp) + val elseTpe = apply(env, elsep) + lub(List(thenTpe, elseTpe)) + case None => + apply(env, tree.thenp) + SimpleType("_root_.scala.Unit#", Nil) + } + } + + private def termLit(env: Env, tree: TermLit): Type = { + tree.value match { + case _: Unit => SimpleType("_root_.scala.Unit#", Nil) + case _: Boolean => SimpleType("_root_.scala.Boolean#", Nil) + case _: Byte => SimpleType("_root_.scala.Byte#", Nil) + case _: Short => SimpleType("_root_.scala.Short#", Nil) + case _: Char => SimpleType("_root_.scala.Char#", Nil) + case _: Int => SimpleType("_root_.scala.Int#", Nil) + case _: Float => SimpleType("_root_.scala.Float#", Nil) + case _: Long => SimpleType("_root_.scala.Long#", Nil) + case _: Double => SimpleType("_root_.scala.Double#", Nil) + case _: String => SimpleType("_root_.java.lang.String#", Nil) + case null => SimpleType("_root_.scala.AnyRef#", Nil) + case other => crash(other.getClass.toString) + } + } + + private def termMatch(env: Env, tree: TermMatch): Type = { + val termTpe = apply(env, tree.term) + val caseTpes = List.newBuilder[Type] + tree.cases.foreach { + case caseDef @ Case(pat, cond, stats) => + val scope = FlatScope("case") + def loop(pat: Pat): Unit = { + pat match { + case pat: PatAlternative => + pat.pats.foreach(loop) + case pat: PatId => + val tree1 = TermId(pat.value).withPos(pat) + apply(env, tree1) + pat.sym = tree1.sym + case pat: PatLit => + () + case pat: PatSelect => + val tree1 = TermSelect(pat.qual, pat.id).withPos(pat) + apply(env, tree1) + case pat @ PatVar(id: NamedId, tpt) => + tpt match { + case Some(tpt) => + val sym = scope.sym + id.name.str + scope.enter(id.name, sym) match { + case NoSymbol => + id.sym = sym + symtab.outlines(id.sym) = pat + pat.tpe = apply(env, tpt) + case existingSym => + val message = DoubleDef(pat, symtab.outlines(existingSym)) + reporter.append(message) + } + case None => + crash("type inference") + } + case PatVar(AnonId(), _) => + () + case pat => + crash("advanced patterns") + } + } + loop(pat) + scope.succeed() + val env1 = scope :: env + cond.foreach(apply(env1, _)) + val stats1 = TermBlock(stats).withPos(tree) + caseTpes += apply(env1, stats1) + } + lub(caseTpes.result) + } + + private def termNew(env: Env, tree: TermNew): Type = { + apply(env, tree._init) + } + + private def termReturn(env: Env, tree: TermReturn): Type = { + tree.term.foreach(apply(env, _)) + SimpleType("_root_.scala.Nothing#", Nil) + } + + // NOTE: termSelect contains an ad hoc informally-specified bug-ridden + // slow implementation of asSeenFrom. It's so bad that we even had to + // add a stub method Stack.get in Parser.scala to get things going. + // However, this allowed us to typecheck re2s, so we're going to keep + // this monstrosity alive for the time being. + private def termSelect(env: Env, tree: TermSelect): Type = { + val qualTpe = apply(env, tree.qual) + qualTpe match { + case NoType => + NoType + case qualTpe: MethodType => + reporter.append(NonValue(tree.qual, qualTpe)) + NoType + case qualTpe: SimpleType => + def lookup(qualSym: Symbol): Type = { + val qualScope = symtab.scopes(qualSym) + qualScope.lookup(tree.id.name) match { + case NoSymbol => + if (tree.id.value.isOpAssignment) { + val value1 = tree.id.value.stripSuffix("=") + val id1 = TermId(value1).withPos(tree.id) + val tree1 = TermSelect(tree.qual, id1).withPos(tree) + apply(env, tree1) + } else { + reporter.append(UnboundMember(qualSym, tree.id)) + NoType + } + case sym => + tree.id.sym = sym + sym.tpe + } + } + def loop(qualTpe: Type): Type = { + qualTpe match { + case NoType => + NoType + case _: MethodType => + crash(qualTpe) + case SimpleType(qualSym, targs) => + symtab.outlines(qualSym) match { + case DefnPackage(pid, _) => + lookup(pid.id.sym) + case DefnTemplate(_, id, tparams, _, _, _) => + lookup(id.sym).subst(tparams, targs) + case DefnType(_, _, tparams, tpt) => + loop(tpt.tpe.subst(tparams, targs)) + case tparam: TypeParam => + loop(tparam.hi.tpe) + } + } + } + loop(qualTpe) + } + } + + private def termSuper(env: Env, tree: TermSuper): Type = { + env.lookupThis(tree.qual.nameopt) match { + case NoSymbol => + reporter.append(UnboundId(tree.qual)) + NoType + case qualSym => + tree.qual.sym = qualSym + val env1 = Env(symtab.scopes(qualSym)) + env1.lookupSuper(tree.mix.nameopt) match { + case NoSymbol => + reporter.append(UnboundId(tree.mix)) + NoType + case mixSym => + tree.mix.sym = mixSym + symtab.outlines(mixSym) match { + case DefnTemplate(_, id, tparams, _, _, _) => + val targs = tparams.map(tp => SimpleType(tp.id.sym, Nil)) + SimpleType(id.sym, targs) + case other => + crash(other) + } + } + } + } + + private def termThis(env: Env, tree: TermThis): Type = { + env.lookupThis(tree.qual.nameopt) match { + case NoSymbol => + reporter.append(UnboundId(tree.id)) + NoType + case qualSym => + tree.qual.sym = qualSym + symtab.outlines(qualSym) match { + case DefnTemplate(_, id, tparams, _, _, _) => + val targs = tparams.map(tparam => SimpleType(tparam.id.sym, Nil)) + SimpleType(id.sym, targs) + case other => + crash(other) + } + } + } + + private def termThrow(env: Env, tree: TermThrow): Type = { + apply(env, tree.term) + SimpleType("_root_.scala.Nothing#", Nil) + } + + private def termWhile(env: Env, tree: TermWhile): Type = { + apply(env, tree.cond) + apply(env, tree.body) + } + + private def tptApply(env: Env, tree: TptApply): Type = { + val funTpe = apply(env, tree.fun) + funTpe match { + case NoType => + NoType + case SimpleType(funSym, Nil) => + val targs = tree.targs.map { + apply(env, _) match { + case targ: SimpleType => targ + case other => crash(other) + } + } + SimpleType(funSym, targs) + case other => + crash(other) + } + } + + private def tptId(env: Env, tree: TptId): Type = { + env.lookup(tree.name) match { + case NoSymbol => + reporter.append(UnboundId(tree)) + NoType + case sym => + tree.sym = sym + SimpleType(sym, Nil) + } + } + + private def tptSelect(env: Env, tree: TptSelect): Type = { + val qualTpe = apply(env, tree.qual) + qualTpe match { + case NoType => + NoType + case SimpleType(qualSym, Nil) => + val qualScope = symtab.scopes(qualSym) + qualScope.lookup(tree.id.name) match { + case NoSymbol => + reporter.append(UnboundMember(qualSym, tree.id)) + NoType + case sym => + tree.id.sym = sym + SimpleType(tree.id.sym, Nil) + } + case other => + crash(other) + } + } + + private implicit class TypecheckerTreeOps[T <: Tree](tree: T) { + def withPos(startEnd: Tree): T = { + tree.withPos(startEnd, startEnd) + } + + def withPos(start: Tree, end: Tree): T = { + tree.withPos(start.pos.input, start.pos.start, end.pos.end) + } + + def withPos(start: Offset, end: Tree): T = { + tree.withPos(end.pos.input, start, end.pos.end) + } + + def withPos(start: Tree, end: Offset): T = { + tree.withPos(start.pos.input, start.pos.start, end) + } + + private def withPos(input: Input, start: Offset, end: Offset): T = { + val syntheticPos = Position(input, start, end) + if (tree.pos == NoPosition) { + tree.pos = syntheticPos + tree + } else { + crash(tree) + } + } + } + + private implicit class TypecheckerTptOps(tpt: Tpt) { + def tpe: SimpleType = { + tpt match { + case TptApply(fun: TptPath, targs) => + if (fun.id.sym == NoSymbol) crash(fun) + else SimpleType(fun.id.sym, targs.map(_.tpe)) + case _: TptApply => + crash(tpt) + case tpt: TptPath => + if (tpt.id.sym == NoSymbol) crash(tpt.id) + else SimpleType(tpt.id.sym, Nil) + } + } + } + + private implicit class TypecheckerSymbolOps(sym: Symbol) { + def tpe: Type = { + symtab.outlines(sym) match { + case DefnDef(_, _, tparams, params, ret, _) => + val tpeTparams = tparams.map(_.id.sym) + val tpeParams = params.map(_.id.sym) + val tpeRet = ret.tpe + MethodType(tpeTparams, tpeParams, tpeRet) + case DefnField(_, _, tpt, _) => + tpt.tpe + case DefnObject(_, id, _, _) => + SimpleType(id.sym, Nil) + case DefnPackage(id: TermId, _) => + SimpleType(id.sym, Nil) + case DefnPackage(TermSelect(_, id: TermId), _) => + SimpleType(id.sym, Nil) + case pat: PatVar => + pat.tpe + case TermParam(_, _, tpt) => + tpt.tpe + case outline => + crash(outline) + } + } + } + + private implicit class TypecheckerTpeOps(tpe: Type) { + def subst(tparams: List[TypeParam], targs: List[SimpleType]): Type = { + if (tparams.isEmpty && targs.isEmpty) { + tpe + } else { + tpe match { + case NoType => + NoType + case tpe: MethodType => + val tparams1 = tpe.tparams.diff(tparams.map(_.id.sym)) + val params1 = tpe.params + val ret1 = { + tpe.ret.subst(tparams, targs) match { + case ret1: SimpleType => ret1 + case other => crash(other) + } + } + MethodType(tparams1, params1, ret1) + case tpe: SimpleType => + val i = tparams.indexWhere(_.id.sym == tpe.sym) + if (i != -1) { + targs(i) + } else { + val sym1 = tpe.sym + val targs1 = tpe.targs.map { + _.subst(tparams, targs) match { + case targ1: SimpleType => targ1 + case other => crash(other) + } + } + SimpleType(sym1, targs1) + } + } + } + } + def subst(tparams: Seq[Symbol], targs: List[SimpleType]): Type = { + if (tparams.isEmpty && targs.isEmpty) { + tpe + } else { + tpe match { + case NoType => + NoType + case tpe: MethodType => + val tparams1 = tpe.tparams.diff(tparams) + val params1 = tpe.params + val ret1 = { + tpe.ret.subst(tparams, targs) match { + case ret1: SimpleType => ret1 + case other => crash(other) + } + } + MethodType(tparams1, params1, ret1) + case tpe: SimpleType => + val i = tparams.indexWhere(_ == tpe.sym) + if (i != -1) { + targs(i) + } else { + val sym1 = tpe.sym + val targs1 = tpe.targs.map { + _.subst(tparams, targs) match { + case targ1: SimpleType => targ1 + case other => crash(other) + } + } + SimpleType(sym1, targs1) + } + } + } + } + } + + // NOTE: This is a markedly incorrect implementation, but it allowed us + // to typecheck re2s, so we're going to keep it for the time being. + private def lub(tpes: List[Type]): Type = { + tpes.distinct match { + case List(tpe) => tpe + case _ => SimpleType("_root_.scala.Any#", Nil) + } + } +} + +object Typechecker { + def apply( + settings: Settings, + reporter: Reporter, + symtab: Symtab): Typechecker = { + new Typechecker(settings, reporter, symtab) + } +} diff --git a/src/main/scala/rsc/typecheck/package.scala b/src/main/scala/rsc/typecheck/package.scala new file mode 100644 index 0000000..ba8ee56 --- /dev/null +++ b/src/main/scala/rsc/typecheck/package.scala @@ -0,0 +1,5 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc + +package object typecheck extends Atoms diff --git a/src/main/scala/rsc/util/CharUtil.scala b/src/main/scala/rsc/util/CharUtil.scala new file mode 100644 index 0000000..5f5dc5a --- /dev/null +++ b/src/main/scala/rsc/util/CharUtil.scala @@ -0,0 +1,46 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.util + +import scala.annotation.switch + +trait CharUtil { + final val LF = '\u000A' + + final val FF = '\u000C' + + final val CR = '\u000D' + + final val SU = '\u001A' + + def isAlphanumericIdStart(ch: Char): Boolean = { + ch == '_' || ch == '$' || Character.isUnicodeIdentifierStart(ch) + } + + def isAlphanumericIdPart(ch: Char): Boolean = { + ch == '$' || (ch != SU && Character.isUnicodeIdentifierPart(ch)) + } + + def isSymbolicIdStart(ch: Char): Boolean = { + isSymbolicIdPart(ch) + } + + def isSymbolicIdPart(ch: Char): Boolean = { + (ch: @switch) match { + case '~' | '!' | '@' | '#' | '%' | '^' | '*' | '+' | '-' | '<' | '>' | + '?' | ':' | '=' | '&' | '|' | '\\' | '/' => + true + case _ => + val chtpe = Character.getType(ch) + chtpe == Character.MATH_SYMBOL || chtpe == Character.OTHER_SYMBOL + } + } + + def isDecimalDigit(ch: Char): Boolean = { + '0' <= ch && ch <= '9' + } + + def isHexadecimalDigit(ch: Char): Boolean = { + isDecimalDigit(ch) || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F') + } +} diff --git a/src/main/scala/rsc/util/CrashException.scala b/src/main/scala/rsc/util/CrashException.scala new file mode 100644 index 0000000..3af5fa0 --- /dev/null +++ b/src/main/scala/rsc/util/CrashException.scala @@ -0,0 +1,40 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.util + +import rsc.lexis._ + +final case class CrashException( + pos: Position, + message: String, + cause: Throwable) + extends Error(message, cause) + +object CrashException { + def apply(pos: Position, message: String): CrashException = { + new CrashException(pos, message, null) + } + + def apply(input: Input, message: String): CrashException = { + val pos = Position(input, NoOffset, NoOffset) + new CrashException(pos, message, null) + } + + def apply(message: String): CrashException = { + new CrashException(NoPosition, message, null) + } + + // NOTE: Defined by the case class infrastructure. + // def apply(pos: Position, message: String, cause: Throwable): CrashException = { + // new CrashException(pos, message, cause) + // } + + def apply(input: Input, message: String, cause: Throwable): CrashException = { + val pos = Position(input, NoOffset, NoOffset) + new CrashException(pos, message, cause) + } + + def apply(message: String, cause: Throwable): CrashException = { + new CrashException(NoPosition, message, cause) + } +} diff --git a/src/main/scala/rsc/util/ErrorUtil.scala b/src/main/scala/rsc/util/ErrorUtil.scala new file mode 100644 index 0000000..92ec5c2 --- /dev/null +++ b/src/main/scala/rsc/util/ErrorUtil.scala @@ -0,0 +1,40 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.util + +import rsc.lexis._ +import rsc.pretty._ + +trait ErrorUtil { + def crash[T: Str: Repl](pos: Position, x: T): Nothing = { + throw CrashException(pos, message("crash", x)) + } + + def crash[T: Str: Repl](input: Input, x: T): Nothing = { + throw CrashException(input, message("crash", x)) + } + + def crash[T: Str: Repl](x: T): Nothing = { + throw CrashException(message("crash", x)) + } + + private def message[T: Str: Repl](summary: String, culprit: T): String = { + def safe(fn: => String): String = { + try fn + catch { + case ex: Throwable => + s"" + } + } + val str = safe(culprit.str) + val repl = safe(culprit.repl) + val onlyStr = { + val isPrimitive = culprit == null || culprit.getClass.isPrimitive + val isString = culprit.isInstanceOf[String] + val isUseless = str == repl + isPrimitive || isString || isUseless + } + if (onlyStr) s"$summary: $str" + else s"$summary: $str$EOL$repl" + } +} diff --git a/src/main/scala/rsc/util/PrettyUtil.scala b/src/main/scala/rsc/util/PrettyUtil.scala new file mode 100644 index 0000000..3bf7e77 --- /dev/null +++ b/src/main/scala/rsc/util/PrettyUtil.scala @@ -0,0 +1,26 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.util + +import rsc.pretty._ +import rsc.settings._ + +trait PrettyUtil { + implicit class PrinterUtilOps(p: Printer) { + def header(value: String): Unit = { + p.str(value) + p.newline() + p.str("=" * value.length) + p.newline() + } + + def settings: Settings = { + val maybeSettings = p.props.get("settings").map(_.asInstanceOf[Settings]) + maybeSettings.getOrElse(Settings()) + } + + def settings_=(settings: Settings): Unit = { + p.props("settings") = settings + } + } +} diff --git a/src/main/scala/rsc/util/StringUtil.scala b/src/main/scala/rsc/util/StringUtil.scala new file mode 100644 index 0000000..cbcbd30 --- /dev/null +++ b/src/main/scala/rsc/util/StringUtil.scala @@ -0,0 +1,14 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.util + +trait StringUtil { + def stripExtraTrailingZeros(str: String): String = { + val startExtra = str.lastIndexOf('.') + 2 + var endExtra = startExtra + while (endExtra < str.length) { + if (str(endExtra) == '0') endExtra += 1 + } + if (endExtra == str.length) str.substring(0, startExtra) else str + } +} diff --git a/src/main/scala/rsc/util/TreeUtil.scala b/src/main/scala/rsc/util/TreeUtil.scala new file mode 100644 index 0000000..1e83fef --- /dev/null +++ b/src/main/scala/rsc/util/TreeUtil.scala @@ -0,0 +1,17 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc.util + +import rsc.semantics._ +import rsc.syntax._ + +trait TreeUtil { + implicit class UtilIdOps(id: Id) { + def nameopt: Option[Name] = { + id match { + case id: NamedId => Some(id.name) + case _ => None + } + } + } +} diff --git a/src/main/scala/rsc/util/package.scala b/src/main/scala/rsc/util/package.scala new file mode 100644 index 0000000..774b541 --- /dev/null +++ b/src/main/scala/rsc/util/package.scala @@ -0,0 +1,10 @@ +// Copyright (c) 2017-2018 Twitter, Inc. +// Licensed under the Apache License, Version 2.0 (see LICENSE.md). +package rsc + +package object util + extends CharUtil + with ErrorUtil + with PrettyUtil + with StringUtil + with TreeUtil From d341ff6a6e4f019ee16c46e017f547725d892e16 Mon Sep 17 00:00:00 2001 From: Denys Shabalin Date: Mon, 15 Apr 2019 17:17:49 +0200 Subject: [PATCH 2/2] Update license file --- LICENSE.md | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/LICENSE.md b/LICENSE.md index ce2b1b1..65831c3 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -31,3 +31,70 @@ Scala Native Benchmarks are provided under Scala License: NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ``` + +# License notice for smarr/are-we-fast-yet + +Scala Native benchmarks are based on the code from [smarr/are-we-fast-yet +](https://github.com/smarr/are-we-fast-yet). Original license notice is +included below: + +``` + # Overview + + The benchmarks in this repository are from different sources and have different + licenses. + + ## Richards and DeltaBlue + + These benchmark are derived from the Smalltalk sources provided by Mario Wolczko. + + License details are available at: + http://web.archive.org/web/20050825101121/http://www.sunlabs.com/people/mario/java_benchmarking/index.html + + Further information: + http://www.wolczko.com/java_benchmarking.html + + ## Computer Language Benchmarks Game + + $Id: LICENSE,v 1.1 2012-12-29 19:28:50 igouy-guest Exp $ + + Revised BSD license + + This is a specific instance of the Open Source Initiative (OSI) BSD license template + http://www.opensource.org/licenses/bsd-license.php + + + Copyright 2008-2012 Isaac Gouy + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + + Neither the name of "The Computer Language Benchmarks Game" nor the name of "The Computer Language Shootout Benchmarks" nor the name "bencher" nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +``` + +# License notice for twitter/rsc + +Rsc benchmark is based on code snapshot from [twitter/rsc](https://github.com/twitter/rsc). +Original license included below: + +``` +Copyright (c) 2017-2019 Twitter, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +```