Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
skip/src/runtime/prelude/core/primitives/String.sk
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
532 lines (471 sloc)
12.6 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Copyright (c) Facebook, Inc. and its affiliates. | |
* | |
* This source code is licensed under the MIT license found in the | |
* LICENSE file in the root directory of this source tree. | |
*/ | |
module String; | |
native class .String uses Hashable, Show, Orderable { | |
@cpp_runtime | |
@may_alloc | |
native static fun fromChars(chars: readonly Array<Char>): String; | |
@cpp_runtime | |
@may_alloc | |
native static fun fromUtf8(bytes: readonly Array<UInt8>): String; | |
private static fun fromGenerator(gen: () -> mutable Iterator<Char>): String { | |
String::fromChars(Array::createFromIterator(gen())) | |
} | |
@cpp_runtime | |
native fun length(): Int; | |
@intrinsic | |
private native fun concat(String): String; | |
@cpp_runtime | |
native private fun toFloat_raw(): Float; | |
@cpp_runtime | |
native private fun compare_raw(other: String): Int; | |
fun take(len: Int): String { | |
this.getIter().collectString(len) | |
} | |
fun sub(start: readonly StringIterator, len: Int): String { | |
start.clone().collectString(len) | |
} | |
fun inspect(): Inspect { | |
InspectString(this) | |
} | |
fun substring(start: readonly StringIterator): String { | |
start.substring(this.getEndIter()) | |
} | |
frozen fun startsWith(prefix: String): Bool { | |
this.getIter().clone().startsWith(prefix.getIter()) | |
} | |
frozen fun endsWith(suffix: String): Bool { | |
iThis = this.getEndIter(); | |
iSuffix = suffix.getEndIter(); | |
loop { | |
(iThis.prev(), iSuffix.prev()) match { | |
| (_, None()) -> break true | |
| (None(), _) -> break false | |
| (Some(a), Some(b)) if (a != b) -> break false | |
| _ -> void | |
} | |
} | |
} | |
fun stripPrefix(prefix: String): String { | |
i = this.getIter(); | |
if (i.startsWith(prefix.getIter())) { | |
i.substring(this.getEndIter()) | |
} else { | |
this | |
} | |
} | |
fun stripSuffix(suffix: String): String { | |
i = this.getEndIter(); | |
j = suffix.getEndIter(); | |
loop { | |
(i.prev(), j.prev()) match { | |
| (None(), None()) -> break "" | |
| (_, None()) -> | |
_ = i.next(); | |
break this.getIter().substring(i) | |
| (None(), _) -> break this | |
| (Some(a), Some(b)) if (a != b) -> break this | |
| _ -> void | |
}; | |
} | |
} | |
fun +<T: readonly Show>(s: T): String { | |
this.concat(s.toString()) | |
} | |
static fun tabulate(size: Int, f: Int -> Char): String { | |
static::fromChars(Array::fillBy(size, f)) | |
} | |
fun repeat(count: Int): String { | |
String::fromGenerator(() -> { | |
for (_ in Range(0, count)) { | |
for (c in this) { | |
yield c; | |
} | |
} | |
}) | |
} | |
@cpp_runtime | |
fun isEmpty(): Bool { | |
this == "" | |
} | |
fun each(f: Char -> void): void { | |
this.foldl((_, c) -> f(c), void) | |
} | |
fun foldl<Taccum>(f: (Taccum, Char) -> Taccum, accum: Taccum): Taccum { | |
result = accum; | |
this.getIter().each(ch -> { | |
!result = f(result, ch) | |
}); | |
result; | |
} | |
fun toIntOption(): ?Int { | |
(valid, value) = toIntOptionHelper(this); | |
if (valid != 0) Some(value) else None() | |
} | |
fun toInt(): Int { | |
this.toIntOption() match { | |
| None() -> | |
invariant_violation("String.toInt: parse error on '" + this + "'") | |
| Some(i) -> i | |
} | |
} | |
fun isIntish(): Bool { | |
this.toIntOption().isSome() | |
} | |
fun toFloatOption(): ?Float { | |
if (this == "inf") { | |
Some(Float::inf) | |
} else if (this == "-inf") { | |
Some(-Float::inf) | |
} else if (this == "nan") { | |
Some(Float::nan) | |
} else { | |
hasNegative = false; | |
hasMantissaDigit = false; | |
hasZeroMantissa = false; | |
hasDot = false; | |
hasFractionalDigit = false; | |
hasExponent = false; | |
hasExponentSign = false; | |
hasExponentDigit = false; | |
error = false; | |
check = c -> { | |
if (c == '-' && !hasNegative && !hasMantissaDigit) { | |
!hasNegative = true | |
} else if (c == '0' && !hasMantissaDigit) { | |
!hasZeroMantissa = true; | |
!hasMantissaDigit = true | |
} else if ( | |
c >= '0' && | |
c <= '9' && | |
!hasDot && | |
!hasExponent && | |
!hasZeroMantissa | |
) { | |
!hasMantissaDigit = true | |
} else if (c >= '0' && c <= '9' && hasDot && !hasExponent) { | |
!hasFractionalDigit = true | |
} else if ((c >= '0' && c <= '9') && hasExponent) { | |
!hasExponentDigit = true | |
} else if (c == '.' && !hasDot && hasMantissaDigit && !hasExponent) { | |
!hasDot = true | |
} else if ((c == 'e' || c == 'E') && hasMantissaDigit && !hasExponent) { | |
!hasExponent = true | |
} else if ( | |
(c == '-' || c == '+') && | |
hasExponent && | |
!hasExponentSign && | |
!hasExponentDigit | |
) { | |
!hasExponentSign = true | |
} else { | |
!error = true | |
} | |
}; | |
this.each(check); | |
if ( | |
error || | |
!hasMantissaDigit || | |
!(hasDot || hasExponent) || | |
(hasDot && !hasFractionalDigit) || | |
(hasExponent && !hasExponentDigit) | |
) { | |
None() | |
} else { | |
Some(this.toFloat_raw()) | |
} | |
} | |
} | |
fun toFloat(): Float { | |
this.toFloatOption() match { | |
| None() -> | |
invariant_violation("String.toFloat: parse error on '" + this + "'") | |
| Some(f) -> f | |
} | |
} | |
fun trimLeft(): String { | |
this.search(ch ~> !isWhitespace(ch)) match { | |
| Some(i) -> i.substring(this.getEndIter()) | |
| None() -> "" | |
} | |
} | |
fun trimRight(): String { | |
this.searchRight(ch ~> !isWhitespace(ch)) match { | |
| Some(i) -> | |
_ = i.next(); | |
this.getIter().substring(i) | |
| None() -> "" | |
} | |
} | |
fun trim(): String { | |
this.trimLeft().trimRight() | |
} | |
// Finds the given pattern in the string, starting from 'from' | |
fun search(f: Char -> Bool): ?mutable StringIterator { | |
i = this.getIter(); | |
i.find(f).map(_ -> { | |
_ = i.prev(); | |
i | |
}) | |
} | |
fun searchRight(f: Char -> Bool): ?mutable StringIterator { | |
i = this.getEndIter(); | |
i.rfind(f).map(_ -> i) | |
} | |
fun chars(): Vector<Char> { | |
this.getIter().collect(Vector) | |
} | |
fun splitLast(substring: String): (String, String) { | |
i = this.getEndIter(); | |
if (!i.reverseSearch(substring)) { | |
("", this) | |
} else { | |
a = this.getIter().substring(i); | |
_ = i.drop(substring.length()); | |
b = i.substring(this.getEndIter()); | |
(a, b) | |
} | |
} | |
fun splitFirst(substring: String): (String, String) { | |
i = this.getIter(); | |
if (!i.search(substring)) { | |
(this, "") | |
} else { | |
a = this.getIter().substring(i); | |
_ = i.drop(substring.length()); | |
b = i.substring(this.getEndIter()); | |
(a, b) | |
} | |
} | |
@cpp_runtime | |
native fun contains(search: String): Bool; | |
fun replace(search: String, replacement: String): String { | |
if (search.isEmpty()) { | |
return this; | |
}; | |
String::fromGenerator(() -> { | |
i = this.getIter(); | |
searchLength = search.length(); | |
while (!i.atEnd()) { | |
start = i.clone(); | |
if (!i.search(search)) { | |
!i = start; | |
break void; | |
}; | |
for (c in start.substring(i)) yield c; | |
for (c in replacement) yield c; | |
_ = i.drop(searchLength); | |
}; | |
for (c in i) yield c; | |
}) | |
} | |
fun split(delimiter: String, maxCount: Int = Int::max): Vector<String> { | |
delimiterLength = delimiter.length(); | |
if (delimiterLength == 0) { | |
invariant_violation("String.split: cannot split with empty delimiter") | |
} else if (this.isEmpty()) { | |
Vector[""] | |
} else { | |
gen: () -> mutable Iterator<String> = () -> { | |
start = this.getIter(); | |
i = start.clone(); | |
while (i.search(delimiter) && maxCount > 0) { | |
!maxCount = maxCount - 1; | |
yield start.substring(i); | |
_ = i.drop(delimiterLength); | |
!start = i.clone(); | |
}; | |
yield start.substring(this.getEndIter()); | |
}; | |
Vector::createFromIterator(gen()); | |
} | |
} | |
fun join<TS: readonly Show, T: readonly Sequence<TS>>(pieces: T): String { | |
pieces.collect(Array).join(this); | |
} | |
fun padLeft(width: Int, chr: Char = ' '): String { | |
n = this.length(); | |
if (width <= n) { | |
this | |
} else { | |
String::fromGenerator(() -> { | |
for (_ in Range(0, width - n)) yield chr; | |
for (c in this) yield c; | |
}) | |
} | |
} | |
fun padRight(width: Int, chr: Char = ' '): String { | |
String::fromGenerator(() -> { | |
count = 0; | |
for (c in this) { | |
!count = count + 1; | |
yield c; | |
}; | |
for (_ in Range(count, width)) yield chr; | |
}); | |
} | |
fun ==(other: String): Bool { | |
this.compare_raw(other) == 0 | |
} | |
fun !=(other: String): Bool { | |
this.compare_raw(other) != 0 | |
} | |
fun <(other: String): Bool { | |
this.compare_raw(other) < 0 | |
} | |
fun >(other: String): Bool { | |
this.compare_raw(other) > 0 | |
} | |
fun <=(other: String): Bool { | |
this.compare_raw(other) <= 0 | |
} | |
fun >=(other: String): Bool { | |
this.compare_raw(other) >= 0 | |
} | |
fun compare(other: String): Order { | |
compare(this.compare_raw(other), 0) | |
} | |
fun toString(): String { | |
this | |
} | |
@cpp_runtime | |
fun hash(): Int { | |
this.foldl((acc, c) -> Hashable.combine(acc, c.hash()), 37) | |
} | |
fun getIter(): mutable StringIterator { | |
StringIterator::make(this) | |
} | |
fun getEndIter(): mutable StringIterator { | |
StringIterator::makeEnd(this) | |
} | |
fun values(): mutable StringIterator { | |
StringIterator::make(this) | |
} | |
@synonym("toUpperCase") | |
@synonym("toUpper") | |
@synonym("upper") | |
@synonym("uppercased") | |
@synonym("upcase") | |
@synonym("uc") | |
fun uppercase(): String { | |
String::fromChars(this.getIter().map(c -> c.capitalize()).collect(Array)) | |
} | |
@synonym("toLowerCase") | |
@synonym("toLower") | |
@synonym("lower") | |
@synonym("lowercased") | |
@synonym("downcase") | |
@synonym("lc") | |
fun lowercase(): String { | |
String::fromChars(this.getIter().map(c -> c.uncapitalize()).collect(Array)) | |
} | |
fun uppercaseFirst(): String { | |
String::fromGenerator(() -> { | |
i = this.getIter(); | |
i.next() match { | |
| Some(ch) -> yield ch.capitalize() | |
| None() -> void | |
}; | |
for (ch in i) yield ch; | |
}); | |
} | |
fun lowercaseFirst(): String { | |
String::fromGenerator(() -> { | |
i = this.getIter(); | |
i.next() match { | |
| Some(ch) -> yield ch.uncapitalize() | |
| None() -> void | |
}; | |
for (ch in i) yield ch; | |
}); | |
} | |
} | |
private fun toIntHelperRest(i: mutable StringIterator, value: Int): Int { | |
if (value < 0) { | |
if (value == Int::min) { | |
if (i.next().isNone()) value else -1 | |
} else { | |
-1 | |
} | |
} else { | |
i.next() match { | |
| None() -> value | |
| Some(ch) if (ch >= '0' && ch <= '9') -> | |
if (value > 922337203685477580) { | |
-1 | |
} else { | |
v = ch.code() - 48; | |
toIntHelperRest(i, value * 10 + v) | |
} | |
| _ -> -1 | |
} | |
} | |
} | |
@no_inline | |
@no_throw | |
@cpp_runtime | |
private fun toIntOptionHelper(s: String): (Int, Int) { | |
i = s.getIter(); | |
i.next() match { | |
| None() -> (0, 0) | |
| Some('-') -> | |
i.next() match { | |
| Some(ch) if (ch >= '1' && ch <= '9') -> | |
v = ch.code() - 48; | |
n = toIntHelperRest(i, v); | |
(n + 1, -n) | |
| _ -> (0, 0) | |
} | |
| Some(ch) if (ch >= '1' && ch <= '9') -> | |
v = ch.code() - 48; | |
n = toIntHelperRest(i, v); | |
(if (n >= 0) 1 else 0, n) | |
| Some('0') -> | |
i.next() match { | |
| Some(_) -> (0, 0) | |
| None() -> (1, 0) | |
} | |
| _ -> (0, 0) | |
} | |
} | |
// A character set containing characters in Unicode General Category Z*, | |
// U+0009, U+000A ~ U+000D, and U+0085. | |
private fun isWhitespace(c: Char): Bool { | |
c match { | |
// CHARACTER TABULATION | |
| '\u0009' | '\u000A' | '\u000B' | '\u000C' | '\u000D' | '\u0085' // LINE FEED // LINE TABULATION // FORM FEED // CARRIAGE RETURN // NEXT LINE // http://www.fileformat.info/info/unicode/category/Zl/list.htm | |
// LINE SEPARATOR | |
| '\u2028' // http://www.fileformat.info/info/unicode/category/Zp/list.htm | |
// PARAGRAPH SEPARATOR | |
| '\u2029' // http://www.fileformat.info/info/unicode/category/Zs/list.htm | |
// SPACE | |
| '\u0020' | '\u00A0' | '\u1680' | '\u2000' | '\u2001' | '\u2002' | '\u2003' // NO-BREAK SPACE // OGHAM SPACE MARK // EN QUAD // EM QUAD // EN SPACE // EM SPACE // THREE-PER-EM SPACE | |
| '\u2004' | '\u2005' | '\u2006' | '\u2007' | '\u2008' | '\u2009' | '\u200A' // FOUR-PER-EM SPACE // SIX-PER-EM SPACE // FIGURE SPACE // PUNCTUATION SPACE // THIN SPACE // HAIR SPACE // NARROW NO-BREAK SPACE | |
| '\u202F' | '\u205F' | '\u3000' -> // MEDIUM MATHEMATICAL SPACE // IDEOGRAPHIC SPACE | |
true | |
| _ -> false | |
} | |
} | |
// --- Support for String::convert() | |
base class Encoding { | |
children = | |
| Utf8() | |
| Ascii() | |
| Cp437() | |
| Utf16LE() | |
| Utf16BE() | |
| Iso8859_1() | |
| CustomEncoding(name: String) | |
fun code(): String | |
| Utf8() -> "utf8" | |
| Ascii() -> "ascii" | |
| Cp437() -> "cp437" | |
| Utf16LE() -> "utf16le" | |
| Utf16BE() -> "utf16be" | |
| Iso8859_1() -> "iso8859-1" | |
| CustomEncoding(name) -> name | |
} |