Skip to content

Commit

Permalink
Speedup of the string literal lexer (#54)
Browse files Browse the repository at this point in the history
* Speedup of the  string literal lexer

Loading a file with large strings was taking a long time because
the implementation of substring was not very efficient (it was
using unsafe_get, which is O(n) on utf8 strings).

Also, the special character decoding made many calls to substring,
which is O(n) (again because of ut8).

* Added changes suggested in the review
  • Loading branch information
pikatchu committed Oct 4, 2018
1 parent 9a48693 commit 51c2440
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 34 deletions.
53 changes: 28 additions & 25 deletions src/frontend/skipToken.sk
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ module Token;

extension class Token{} {
fun stringLiteralValue(): String {
accumulateStringValue("", this.value.sub(1, this.value.length() - 2), 0)
stringValue(this.value.sub(1, this.value.length() - 2))
}

fun templateStringLiteralValue(): String {
accumulateStringValue("", this.value, 0)
stringValue(this.value)
}

fun charLiteralValue(): Char {
Expand Down Expand Up @@ -103,19 +103,6 @@ fun charLiteral(value: String): Char {
Char::fromCode(charLiteralCode(value))
}

fun charLiteralLength(value: String, index: Int): Int {
if (value[index] != '\\') {
1
} else {
value[index + 1] match {
| 'x' -> 4
| 'u' -> 6
| 'U' -> 10
| _ -> 2
}
}
}

// Note this does not include quotes around the result.
fun escapeCharLiteralValue(ch: Char): String {
ch match {
Expand Down Expand Up @@ -151,15 +138,31 @@ fun escapeStringLiteralValue(s: String): String {
s.chars().map(escapeCharLiteralValue).join("")
}

fun accumulateStringValue(acc: String, value: String, index: Int): String {
if (index == value.length()) {
acc
} else {
length = charLiteralLength(value, index);
accumulateStringValue(
acc + charLiteral(value.sub(index, length)),
value,
index + length,
)
// advance iter past a single literal char
// iter must be at a valid literal char
fun skipLiteralChar(iter: mutable String.StringIterator): void {
if (iter.next().fromSome() == '\\') {
length = iter.next().fromSome() match {
| 'x' -> 2
| 'u' -> 4
| 'U' -> 8
| _ -> 0
};
_ = iter.drop(length);
}
}

fun stringValue(value: String): String {
// shortcut the common case
if (!value.contains("\\")) return value;

chars = mutable Vector[];
iter = value.getIter();
do {
charStart = iter.clone();
skipLiteralChar(iter);
chars.push(charLiteral(charStart.substring(iter)));
} while (!iter.atEnd());

String::fromChars(chars.toArray())
}
11 changes: 9 additions & 2 deletions src/runtime/prelude/core/primitives/String.sk

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 9 additions & 2 deletions tests/runtime/prelude/core/primitives/String.sk
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ native class .String uses Hashable, Show, Orderable {
@cpp_runtime
@may_alloc
native static fun fromUtf8(bytes: readonly Array<UInt8>): String;

// Careful, this is O(n) (because of utf8)
@cpp_runtime
native private fun unsafe_get(x: Int): Char;

Expand All @@ -35,8 +37,13 @@ native class .String uses Hashable, Show, Orderable {
throwOutOfBounds()
};

v = Array::fillBy(len, i -> this.unsafe_get(start + i));
static::fromChars(v)
// We don't want to use unsafe_get because it is O(n)
subString = mutable Vector[];
iter = this.getIter().drop(start);
for (_ in Range(0, len)) {
subString.push(iter.next().fromSome())
};
static::fromChars(freeze(subString.toArray()))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ The type: C
| ^
12 | "" + c

File "../../runtime/prelude/core/primitives/String.sk", line 106, characters 12-24:
File "../../runtime/prelude/core/primitives/String.sk", line 113, characters 12-24:
Is not a subtype of: readonly Show
104 | }
105 |
106 | fun +<T: readonly Show>(s: T): String {
111 | }
112 |
113 | fun +<T: readonly Show>(s: T): String {
| ^^^^^^^^^^^^^
107 | this.concat(s.toString())
114 | this.concat(s.toString())

0 comments on commit 51c2440

Please sign in to comment.