From 93b65f52d853394a84cf2a3d1b0d1fdecfdb10c2 Mon Sep 17 00:00:00 2001 From: Karoy Lorentey Date: Fri, 23 Nov 2018 21:14:21 +0000 Subject: [PATCH 1/5] [benchmark] Add some benchmarks for String breadcrumbs --- benchmark/CMakeLists.txt | 1 + .../single-source/StringBreadcrumbs.swift | 494 ++++++++++++++++++ benchmark/utils/main.swift | 2 + 3 files changed, 497 insertions(+) create mode 100644 benchmark/single-source/StringBreadcrumbs.swift diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 62d8aa2473c9e..255f05cb1bb90 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -153,6 +153,7 @@ set(SWIFT_BENCH_MODULES single-source/StaticArray single-source/StrComplexWalk single-source/StrToInt + single-source/StringBreadcrumbs single-source/StringBuilder single-source/StringComparison single-source/StringEdits diff --git a/benchmark/single-source/StringBreadcrumbs.swift b/benchmark/single-source/StringBreadcrumbs.swift new file mode 100644 index 0000000000000..d8021a11a81b6 --- /dev/null +++ b/benchmark/single-source/StringBreadcrumbs.swift @@ -0,0 +1,494 @@ +//===--- StringBreadcrumbs.swift ------------------------------*- swift -*-===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +import TestsUtils + +public let StringBreadcrumbs: [BenchmarkInfo] = [ + UTF16ToIndex(workload: longASCIIWorkload, count: 5_000).info, + UTF16ToIndex(workload: longMixedWorkload, count: 5_000).info, + IndexToUTF16(workload: longASCIIWorkload, count: 5_000).info, + IndexToUTF16(workload: longMixedWorkload, count: 5_000).info, + + UTF16ToIndexRange(workload: longASCIIWorkload, count: 1_000).info, + UTF16ToIndexRange(workload: longMixedWorkload, count: 1_000).info, + IndexToUTF16Range(workload: longASCIIWorkload, count: 1_000).info, + IndexToUTF16Range(workload: longMixedWorkload, count: 1_000).info, + + CopyUTF16CodeUnits(workload: shortASCIIWorkload, count: 500).info, + CopyUTF16CodeUnits(workload: shortMixedWorkload, count: 500).info, + + MutatedUTF16ToIndex(workload: shortASCIIWorkload, count: 50).info, + MutatedUTF16ToIndex(workload: shortMixedWorkload, count: 50).info, + MutatedIndexToUTF16(workload: shortASCIIWorkload, count: 50).info, + MutatedIndexToUTF16(workload: shortMixedWorkload, count: 50).info, +] + +extension String { + func forceNativeCopy() -> String { + var result = String() + result.reserveCapacity(64) + result.append(self) + return result + } +} + +let seed = 0x12345678 + +/// A linear congruential PRNG. +struct LCRNG: RandomNumberGenerator { + private var state: UInt64 + + init(seed: Int) { + state = UInt64(truncatingIfNeeded: seed) + for _ in 0..<10 { _ = next() } + } + + mutating func next() -> UInt64 { + state = 2862933555777941757 &* state &+ 3037000493 + return state + } +} + +extension Collection { + /// Returns a randomly ordered array of random non-overlapping index ranges + /// that cover this collection entirely. + /// + /// Note: some of the returned ranges may be empty. + func randomIndexRanges( + count: Int, + using generator: inout R + ) -> [Range] { + // Load indices into a buffer to prevent quadratic performance with + // forward-only collections. FIXME: Eliminate this if Self conforms to RAC. + let indices = Array(self.indices) + var cuts: [Index] = (0 ..< count - 1).map { _ in + indices.randomElement(using: &generator)! + } + cuts.append(self.startIndex) + cuts.append(self.endIndex) + cuts.sort() + let ranges = (0 ..< count).map { cuts[$0] ..< cuts[$0 + 1] } + return ranges.shuffled(using: &generator) + } +} + +struct Workload { + let name: String + let string: String +} + +class BenchmarkBase { + let name: String + let workload: Workload + + var inputString: String = "" + + init(name: String, workload: Workload) { + self.name = name + self.workload = workload + } + + var label: String { + return "\(name).\(workload.name)" + } + + func setUp() { + self.inputString = workload.string.forceNativeCopy() + } + + func tearDown() { + self.inputString = "" + } + + final func run(iterations: Int) { + for _ in 0 ..< iterations { + self.run() + } + } + + func run() {} + + var info: BenchmarkInfo { + return BenchmarkInfo( + name: self.label, + runFunction: self.run(iterations:), + tags: [.validation, .api, .String], + setUpFunction: self.setUp, + tearDownFunction: self.tearDown) + } +} + +//============================================================================== +// Workloads +//============================================================================== + +let asciiBase = #""" + * Debugger support. Swift has a `-g` command line switch that turns on + debug info for the compiled output. Using the standard lldb debugger + this will allow single-stepping through Swift programs, printing + backtraces, and navigating through stack frames; all in sync with + the corresponding Swift source code. An unmodified lldb cannot + inspect any variables. + + Example session: + + ``` + $ echo 'println("Hello World")' >hello.swift + $ swift hello.swift -c -g -o hello.o + $ ld hello.o "-dynamic" "-arch" "x86_64" "-macosx_version_min" "10.9.0" \ + -framework Foundation lib/swift/libswift_stdlib_core.dylib \ + lib/swift/libswift_stdlib_posix.dylib -lSystem -o hello + $ lldb hello + Current executable set to 'hello' (x86_64). + (lldb) b top_level_code + Breakpoint 1: where = hello`top_level_code + 26 at hello.swift:1, addre... + (lldb) r + Process 38592 launched: 'hello' (x86_64) + Process 38592 stopped + * thread #1: tid = 0x1599fb, 0x0000000100000f2a hello`top_level_code + ... + frame #0: 0x0000000100000f2a hello`top_level_code + 26 at hello.shi... + -> 1 println("Hello World") + (lldb) bt + * thread #1: tid = 0x1599fb, 0x0000000100000f2a hello`top_level_code + ... + frame #0: 0x0000000100000f2a hello`top_level_code + 26 at hello.shi... + frame #1: 0x0000000100000f5c hello`main + 28 + frame #2: 0x00007fff918605fd libdyld.dylib`start + 1 + frame #3: 0x00007fff918605fd libdyld.dylib`start + 1 + ``` + + Also try `s`, `n`, `up`, `down`. + + * `nil` can now be used without explicit casting. Previously, `nil` had + type `NSObject`, so one would have to write (e.g.) `nil as! NSArray` + to create a `nil` `NSArray`. Now, `nil` picks up the type of its + context. + + * `POSIX.EnvironmentVariables` and `swift.CommandLineArguments` global variables + were merged into a `swift.Process` variable. Now you can access command line + arguments with `Process.arguments`. In order to access environment variables + add `import POSIX` and use `Process.environmentVariables`. + + func _toUTF16Offsets(_ indices: Range) -> Range { + let lowerbound = _toUTF16Offset(indices.lowerBound) + let length = self.utf16.distance( + from: indices.lowerBound, to: indices.upperBound) + return Range( + uncheckedBounds: (lower: lowerbound, upper: lowerbound + length)) + } + 0 swift 0x00000001036b5f58 llvm::sys::PrintStackTrace(llvm::raw_ostream&) + 40 + 1 swift 0x00000001036b50f8 llvm::sys::RunSignalHandlers() + 248 + 2 swift 0x00000001036b6572 SignalHandler(int) + 258 + 3 libsystem_platform.dylib 0x00007fff64010b5d _sigtramp + 29 + 4 libsystem_platform.dylib 0x0000000100000000 _sigtramp + 2617177280 + 5 libswiftCore.dylib 0x0000000107b5d135 $sSh8_VariantV7element2atxSh5IndexVyx_G_tF + 613 + 6 libswiftCore.dylib 0x0000000107c51449 $sShyxSh5IndexVyx_Gcig + 9 + 7 libswiftCore.dylib 0x00000001059d60be $sShyxSh5IndexVyx_Gcig + 4258811006 + 8 swift 0x000000010078801d llvm::MCJIT::runFunction(llvm::Function*, llvm::ArrayRef) + 381 + 9 swift 0x000000010078b0a4 llvm::ExecutionEngine::runFunctionAsMain(llvm::Function*, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&, char const* const*) + 1268 + 10 swift 0x00000001000e048c REPLEnvironment::executeSwiftSource(llvm::StringRef, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&) + 1532 + 11 swift 0x00000001000dbbd3 swift::runREPL(swift::CompilerInstance&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&, bool) + 1443 + 12 swift 0x00000001000b5341 performCompile(swift::CompilerInstance&, swift::CompilerInvocation&, llvm::ArrayRef, int&, swift::FrontendObserver*, swift::UnifiedStatsReporter*) + 2865 + 13 swift 0x00000001000b38f4 swift::performFrontend(llvm::ArrayRef, char const*, void*, swift::FrontendObserver*) + 3028 + 14 swift 0x000000010006ca44 main + 660 + 15 libdyld.dylib 0x00007fff63e293f1 start + 1 + 16 libdyld.dylib 0x0000000000000008 start + 2619173912 + Illegal instruction: 4 + + """# +let shortASCIIWorkload = Workload( + name: "shortASCII", + string: asciiBase) +let longASCIIWorkload = Workload( + name: "longASCII", + string: String(repeating: asciiBase, count: 100)) + +let mixedBase = """ + siebenhundertsiebenundsiebzigtausendsiebenhundertsiebenundsiebzig + ๐Ÿ‘๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ๐Ÿ‡บ๐Ÿ‡ธ๐Ÿ‡จ๐Ÿ‡ฆ๐Ÿ‡ฒ๐Ÿ‡ฝ๐Ÿ‘๐Ÿป๐Ÿ‘๐Ÿผ๐Ÿ‘๐Ÿฝ๐Ÿ‘๐Ÿพ๐Ÿ‘๐Ÿฟ + siebenhundertsiebenundsiebzigtausendsiebenhundertsiebenundsiebzig + ๐Ÿ‘๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ๐Ÿ‡บ๐Ÿ‡ธ๐Ÿ‡จ๐Ÿ‡ฆ๐Ÿ‡ฒ๐Ÿ‡ฝ๐Ÿ‘๐Ÿป๐Ÿ‘๐Ÿผ๐Ÿ‘๐Ÿฝ๐Ÿ‘๐Ÿพ๐Ÿ‘๐Ÿฟthe quick brown fox๐Ÿ‘๐Ÿฟ๐Ÿ‘๐Ÿพ๐Ÿ‘๐Ÿฝ๐Ÿ‘๐Ÿผ๐Ÿ‘๐Ÿป๐Ÿ‡ฒ๐Ÿ‡ฝ๐Ÿ‡จ๐Ÿ‡ฆ๐Ÿ‡บ๐Ÿ‡ธ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง๐Ÿ‘ + siebenhundertsiebenundsiebzigtausendsiebenhundertsiebenundsiebzig + ไปŠๅ›žใฎใ‚ขใƒƒใƒ—ใƒ‡ใƒผใƒˆใงSwiftใซๅคงๅน…ใชๆ”น่‰ฏใŒๆ–ฝใ•ใ‚Œใ€ๅฎ‰ๅฎšใ—ใฆใ„ใฆใ—ใ‹ใ‚‚็›ดๆ„Ÿ็š„ใซไฝฟใ†ใ“ใจใŒใงใใ‚‹Appleใƒ—ใƒฉใƒƒใƒˆใƒ•ใ‚ฉใƒผใƒ ๅ‘ใ‘ใƒ—ใƒญใ‚ฐใƒฉใƒŸใƒณใ‚ฐ่จ€่ชžใซใชใ‚Šใพใ—ใŸใ€‚ + Worst thing about working on String is that it breaks *everything*. Asserts, debuggers, and *especially* printf-style debugging ๐Ÿ˜ญ + Swift ๆ˜ฏ้ขๅ‘ Apple ๅนณๅฐ็š„็ผ–็จ‹่ฏญ่จ€๏ผŒๅŠŸ่ƒฝๅผบๅคงไธ”็›ด่ง‚ๆ˜“็”จ๏ผŒ่€Œๆœฌๆฌกๆ›ดๆ–ฐๅฏนๅ…ถ่ฟ›่กŒไบ†ๅ…จ้ขไผ˜ๅŒ–ใ€‚ + siebenhundertsiebenundsiebzigtausendsiebenhundertsiebenundsiebzig + ์ด๋ฒˆ ์—…๋ฐ์ดํŠธ์—์„œ๋Š” ๊ฐ•๋ ฅํ•˜๋ฉด์„œ๋„ ์ง๊ด€์ ์ธ Apple ํ”Œ๋žซํผ์šฉ ํ”„๋กœ๊ทธ๋ž˜๋ฐ ์–ธ์–ด์ธ Swift๋ฅผ ์™„๋ฒฝํžˆ ๊ฐœ์„ ํ•˜์˜€์Šต๋‹ˆ๋‹ค. + Worst thing about working on String is that it breaks *everything*. Asserts, debuggers, and *especially* printf-style debugging ๐Ÿ˜ญ + ะฒ ั‡ะฐั‰ะฐั… ัŽะณะฐ ะถะธะป-ะฑั‹ะป ั†ะธั‚ั€ัƒั? ะดะฐ, ะฝะพ ั„ะฐะปัŒัˆะธะฒั‹ะน ัะบะทะตะผะฟะปัั€ + siebenhundertsiebenundsiebzigtausendsiebenhundertsiebenundsiebzig + \u{201c}Hello\u{2010}world\u{2026}\u{201d} + \u{300c}\u{300e}ไปŠๆ—ฅใฏ\u{3001}ไธ–็•Œ\u{3002}\u{300f}\u{300d} + Worst thing about working on String is that it breaks *everything*. Asserts, debuggers, and *especially* printf-style debugging ๐Ÿ˜ญ + + """ +let shortMixedWorkload = Workload( + name: "shortMixed", + string: mixedBase) +let longMixedWorkload = Workload( + name: "longMixed", + string: String(repeating: mixedBase, count: 100)) + + +//============================================================================== +// Benchmarks +//============================================================================== + +/// Convert `count` random UTF-16 offsets into String indices. +class UTF16ToIndex: BenchmarkBase { + let count: Int + var inputOffsets: [Int] = [] + var outputIndices: [String.Index] = [] + + init(workload: Workload, count: Int) { + self.count = count + super.init(name: "StringBreadcrumbs.UTF16ToIndex", workload: workload) + } + + override func setUp() { + super.setUp() + var rng = LCRNG(seed: seed) + let range = 0 ..< inputString.utf16.count + inputOffsets = Array(range.shuffled(using: &rng).prefix(count)) + outputIndices = [] + outputIndices.reserveCapacity(inputOffsets.count) + } + + override func tearDown() { + super.tearDown() + inputOffsets = [] + } + + @inline(never) + override func run() { + outputIndices.removeAll(keepingCapacity: true) + for offset in inputOffsets { + outputIndices.append(inputString._toUTF16Index(offset)) + } + } +} + +/// Convert `count` random String indices into UTF-16 offsets. +class IndexToUTF16: BenchmarkBase { + let count: Int + var inputIndices: [String.Index] = [] + var outputOffsets: [Int] = [] + + init(workload: Workload, count: Int) { + self.count = count + super.init(name: "StringBreadcrumbs.IndexToUTF16", workload: workload) + } + + override func setUp() { + super.setUp() + var rng = LCRNG(seed: seed) + inputIndices = Array(inputString.indices.shuffled(using: &rng).prefix(count)) + outputOffsets = [] + outputOffsets.reserveCapacity(inputIndices.count) + } + + override func tearDown() { + super.tearDown() + inputIndices = [] + } + + @inline(never) + override func run() { + outputOffsets.removeAll(keepingCapacity: true) + for index in inputIndices { + outputOffsets.append(inputString._toUTF16Offset(index)) + } + } +} + +/// Split a string into `count` random slices and convert their UTF-16 offsets +/// into String index ranges. +class UTF16ToIndexRange: BenchmarkBase { + let count: Int + var inputOffsets: [Range] = [] + var outputIndices: [Range] = [] + + init(workload: Workload, count: Int) { + self.count = count + super.init(name: "StringBreadcrumbs.UTF16ToIndexRange", workload: workload) + } + + override func setUp() { + super.setUp() + var rng = LCRNG(seed: seed) + inputOffsets = ( + 0 ..< inputString.utf16.count + ).randomIndexRanges(count: count, using: &rng) + outputIndices = [] + outputIndices.reserveCapacity(inputOffsets.count) + } + + override func tearDown() { + super.tearDown() + inputOffsets = [] + } + + @inline(never) + override func run() { + outputIndices.removeAll(keepingCapacity: true) + for range in inputOffsets { + outputIndices.append(inputString._toUTF16Indices(range)) + } + } +} + +/// Split a string into `count` random slices and convert their index ranges +/// into into UTF-16 offset pairs. +class IndexToUTF16Range: BenchmarkBase { + let count: Int + var inputIndices: [Range] = [] + var outputOffsets: [Range] = [] + + init(workload: Workload, count: Int) { + self.count = count + super.init(name: "StringBreadcrumbs.IndexToUTF16Range", workload: workload) + } + + override func setUp() { + super.setUp() + var rng = LCRNG(seed: seed) + inputIndices = self.inputString.randomIndexRanges(count: count, using: &rng) + outputOffsets = [] + outputOffsets.reserveCapacity(inputIndices.count) + } + + override func tearDown() { + super.tearDown() + inputIndices = [] + } + + @inline(never) + override func run() { + outputOffsets.removeAll(keepingCapacity: true) + for range in inputIndices { + outputOffsets.append(inputString._toUTF16Offsets(range)) + } + } +} + + +class CopyUTF16CodeUnits: BenchmarkBase { + let count: Int + var inputIndices: [Range] = [] + var outputBuffer: [UInt16] = [] + + init(workload: Workload, count: Int) { + self.count = count + super.init(name: "StringBreadcrumbs.CopyUTF16CodeUnits", workload: workload) + } + + override func setUp() { + super.setUp() + var rng = LCRNG(seed: seed) + inputIndices = ( + 0 ..< inputString.utf16.count + ).randomIndexRanges(count: count, using: &rng) + outputBuffer = Array(repeating: 0, count: inputString.utf16.count) + } + + override func tearDown() { + super.tearDown() + inputIndices = [] + } + + @inline(never) + override func run() { + for range in inputIndices { + print(range) + outputBuffer.withUnsafeMutableBufferPointer { buffer in + inputString._copyUTF16CodeUnits( + into: UnsafeMutableBufferPointer(rebasing: buffer[range]), + range: range) + } + } + } +} + +/// This is like `UTF16ToIndex` but appends to the string after every index +/// conversion. In effect, this tests breadcrumb creation performance. +class MutatedUTF16ToIndex: BenchmarkBase { + let count: Int + var inputOffsets: [Int] = [] + var outputIndices: [String.Index] = [] + + init(workload: Workload, count: Int) { + self.count = count + super.init( + name: "StringBreadcrumbs.MutatedUTF16ToIndex", + workload: workload) + } + + override func setUp() { + super.setUp() + var generator = LCRNG(seed: seed) + let range = 0 ..< inputString.utf16.count + inputOffsets = Array(range.shuffled(using: &generator).prefix(count)) + outputIndices = [] + outputIndices.reserveCapacity(inputOffsets.count) + } + + override func tearDown() { + super.tearDown() + inputOffsets = [] + } + + @inline(never) + override func run() { + outputIndices.removeAll(keepingCapacity: true) + for offset in inputOffsets { + outputIndices.append(inputString._toUTF16Index(offset)) + inputString.append(" ") + } + } +} + + +/// This is like `UTF16ToIndex` but appends to the string after every index +/// conversion. In effect, this tests breadcrumb creation performance. +class MutatedIndexToUTF16: BenchmarkBase { + let count: Int + var inputIndices: [String.Index] = [] + var outputOffsets: [Int] = [] + + init(workload: Workload, count: Int) { + self.count = count + super.init( + name: "StringBreadcrumbs.MutatedIndexToUTF16", + workload: workload) + } + + override func setUp() { + super.setUp() + var rng = LCRNG(seed: seed) + inputIndices = Array(inputString.indices.shuffled(using: &rng).prefix(count)) + outputOffsets = [] + outputOffsets.reserveCapacity(inputIndices.count) + } + + override func tearDown() { + super.tearDown() + inputIndices = [] + } + + @inline(never) + override func run() { + outputOffsets.removeAll(keepingCapacity: true) + for index in inputIndices { + outputOffsets.append(inputString._toUTF16Offset(index)) + inputString.append(" ") + } + } +} diff --git a/benchmark/utils/main.swift b/benchmark/utils/main.swift index aec148045a6cf..baec3b333410b 100644 --- a/benchmark/utils/main.swift +++ b/benchmark/utils/main.swift @@ -146,6 +146,7 @@ import StackPromo import StaticArray import StrComplexWalk import StrToInt +import StringBreadcrumbs import StringBuilder import StringComparison import StringEdits @@ -319,6 +320,7 @@ registerBenchmark(StackPromo) registerBenchmark(StaticArrayTest) registerBenchmark(StrComplexWalk) registerBenchmark(StrToInt) +registerBenchmark(StringBreadcrumbs) registerBenchmark(StringBuilder) registerBenchmark(StringComparison) registerBenchmark(StringEdits) From 8bf61366562ce4d138ec70b3da5d7199b470e8c8 Mon Sep 17 00:00:00 2001 From: Karoy Lorentey Date: Wed, 28 Nov 2018 16:36:50 +0000 Subject: [PATCH 2/5] [benchmark] Breadcrumbs: Improve docs; shorten names --- benchmark/CMakeLists.txt | 2 +- ...ingBreadcrumbs.swift => Breadcrumbs.swift} | 74 ++++++++++--------- benchmark/utils/main.swift | 4 +- 3 files changed, 42 insertions(+), 38 deletions(-) rename benchmark/single-source/{StringBreadcrumbs.swift => Breadcrumbs.swift} (88%) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 255f05cb1bb90..3985dd515669b 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -42,6 +42,7 @@ set(SWIFT_BENCH_MODULES single-source/BinaryFloatingPointConversionFromBinaryInteger single-source/BinaryFloatingPointProperties single-source/BitCount + single-source/Breadcrumbs single-source/ByteSwap single-source/COWTree single-source/COWArrayGuaranteedParameterOverhead @@ -153,7 +154,6 @@ set(SWIFT_BENCH_MODULES single-source/StaticArray single-source/StrComplexWalk single-source/StrToInt - single-source/StringBreadcrumbs single-source/StringBuilder single-source/StringComparison single-source/StringEdits diff --git a/benchmark/single-source/StringBreadcrumbs.swift b/benchmark/single-source/Breadcrumbs.swift similarity index 88% rename from benchmark/single-source/StringBreadcrumbs.swift rename to benchmark/single-source/Breadcrumbs.swift index d8021a11a81b6..59525297f5467 100644 --- a/benchmark/single-source/StringBreadcrumbs.swift +++ b/benchmark/single-source/Breadcrumbs.swift @@ -1,4 +1,4 @@ -//===--- StringBreadcrumbs.swift ------------------------------*- swift -*-===// +//===--- Breadcrumbs.swift ------------------------------------*- swift -*-===// // // This source file is part of the Swift.org open source project // @@ -12,24 +12,28 @@ import TestsUtils -public let StringBreadcrumbs: [BenchmarkInfo] = [ - UTF16ToIndex(workload: longASCIIWorkload, count: 5_000).info, - UTF16ToIndex(workload: longMixedWorkload, count: 5_000).info, - IndexToUTF16(workload: longASCIIWorkload, count: 5_000).info, - IndexToUTF16(workload: longMixedWorkload, count: 5_000).info, +// Tests the performance of String's memoized UTF-8 to UTF-16 index conversion +// breadcrumbs. These are used to speed up range- and positional access through +// conventional NSString APIs. - UTF16ToIndexRange(workload: longASCIIWorkload, count: 1_000).info, - UTF16ToIndexRange(workload: longMixedWorkload, count: 1_000).info, - IndexToUTF16Range(workload: longASCIIWorkload, count: 1_000).info, - IndexToUTF16Range(workload: longMixedWorkload, count: 1_000).info, +public let Breadcrumbs: [BenchmarkInfo] = [ + UTF16ToIdx(workload: longASCIIWorkload, count: 5_000).info, + UTF16ToIdx(workload: longMixedWorkload, count: 5_000).info, + IdxToUTF16(workload: longASCIIWorkload, count: 5_000).info, + IdxToUTF16(workload: longMixedWorkload, count: 5_000).info, - CopyUTF16CodeUnits(workload: shortASCIIWorkload, count: 500).info, - CopyUTF16CodeUnits(workload: shortMixedWorkload, count: 500).info, + UTF16ToIdxRange(workload: longASCIIWorkload, count: 1_000).info, + UTF16ToIdxRange(workload: longMixedWorkload, count: 1_000).info, + IdxToUTF16Range(workload: longASCIIWorkload, count: 1_000).info, + IdxToUTF16Range(workload: longMixedWorkload, count: 1_000).info, - MutatedUTF16ToIndex(workload: shortASCIIWorkload, count: 50).info, - MutatedUTF16ToIndex(workload: shortMixedWorkload, count: 50).info, - MutatedIndexToUTF16(workload: shortASCIIWorkload, count: 50).info, - MutatedIndexToUTF16(workload: shortMixedWorkload, count: 50).info, + CopyUTF16CodeUnits(workload: asciiWorkload, count: 500).info, + CopyUTF16CodeUnits(workload: mixedWorkload, count: 500).info, + + MutatedUTF16ToIdx(workload: asciiWorkload, count: 50).info, + MutatedUTF16ToIdx(workload: mixedWorkload, count: 50).info, + MutatedIdxToUTF16(workload: asciiWorkload, count: 50).info, + MutatedIdxToUTF16(workload: mixedWorkload, count: 50).info, ] extension String { @@ -204,8 +208,8 @@ let asciiBase = #""" Illegal instruction: 4 """# -let shortASCIIWorkload = Workload( - name: "shortASCII", +let asciiWorkload = Workload( + name: "ASCII", string: asciiBase) let longASCIIWorkload = Workload( name: "longASCII", @@ -230,8 +234,8 @@ let mixedBase = """ Worst thing about working on String is that it breaks *everything*. Asserts, debuggers, and *especially* printf-style debugging ๐Ÿ˜ญ """ -let shortMixedWorkload = Workload( - name: "shortMixed", +let mixedWorkload = Workload( + name: "Mixed", string: mixedBase) let longMixedWorkload = Workload( name: "longMixed", @@ -243,14 +247,14 @@ let longMixedWorkload = Workload( //============================================================================== /// Convert `count` random UTF-16 offsets into String indices. -class UTF16ToIndex: BenchmarkBase { +class UTF16ToIdx: BenchmarkBase { let count: Int var inputOffsets: [Int] = [] var outputIndices: [String.Index] = [] init(workload: Workload, count: Int) { self.count = count - super.init(name: "StringBreadcrumbs.UTF16ToIndex", workload: workload) + super.init(name: "Breadcrumbs.UTF16ToIdx", workload: workload) } override func setUp() { @@ -277,14 +281,14 @@ class UTF16ToIndex: BenchmarkBase { } /// Convert `count` random String indices into UTF-16 offsets. -class IndexToUTF16: BenchmarkBase { +class IdxToUTF16: BenchmarkBase { let count: Int var inputIndices: [String.Index] = [] var outputOffsets: [Int] = [] init(workload: Workload, count: Int) { self.count = count - super.init(name: "StringBreadcrumbs.IndexToUTF16", workload: workload) + super.init(name: "Breadcrumbs.IdxToUTF16", workload: workload) } override func setUp() { @@ -311,14 +315,14 @@ class IndexToUTF16: BenchmarkBase { /// Split a string into `count` random slices and convert their UTF-16 offsets /// into String index ranges. -class UTF16ToIndexRange: BenchmarkBase { +class UTF16ToIdxRange: BenchmarkBase { let count: Int var inputOffsets: [Range] = [] var outputIndices: [Range] = [] init(workload: Workload, count: Int) { self.count = count - super.init(name: "StringBreadcrumbs.UTF16ToIndexRange", workload: workload) + super.init(name: "Breadcrumbs.UTF16ToIdxRange", workload: workload) } override func setUp() { @@ -347,14 +351,14 @@ class UTF16ToIndexRange: BenchmarkBase { /// Split a string into `count` random slices and convert their index ranges /// into into UTF-16 offset pairs. -class IndexToUTF16Range: BenchmarkBase { +class IdxToUTF16Range: BenchmarkBase { let count: Int var inputIndices: [Range] = [] var outputOffsets: [Range] = [] init(workload: Workload, count: Int) { self.count = count - super.init(name: "StringBreadcrumbs.IndexToUTF16Range", workload: workload) + super.init(name: "Breadcrumbs.IdxToUTF16Range", workload: workload) } override func setUp() { @@ -387,7 +391,7 @@ class CopyUTF16CodeUnits: BenchmarkBase { init(workload: Workload, count: Int) { self.count = count - super.init(name: "StringBreadcrumbs.CopyUTF16CodeUnits", workload: workload) + super.init(name: "Breadcrumbs.CopyUTF16CodeUnits", workload: workload) } override func setUp() { @@ -417,9 +421,9 @@ class CopyUTF16CodeUnits: BenchmarkBase { } } -/// This is like `UTF16ToIndex` but appends to the string after every index +/// This is like `UTF16ToIdx` but appends to the string after every index /// conversion. In effect, this tests breadcrumb creation performance. -class MutatedUTF16ToIndex: BenchmarkBase { +class MutatedUTF16ToIdx: BenchmarkBase { let count: Int var inputOffsets: [Int] = [] var outputIndices: [String.Index] = [] @@ -427,7 +431,7 @@ class MutatedUTF16ToIndex: BenchmarkBase { init(workload: Workload, count: Int) { self.count = count super.init( - name: "StringBreadcrumbs.MutatedUTF16ToIndex", + name: "Breadcrumbs.MutatedUTF16ToIdx", workload: workload) } @@ -456,9 +460,9 @@ class MutatedUTF16ToIndex: BenchmarkBase { } -/// This is like `UTF16ToIndex` but appends to the string after every index +/// This is like `UTF16ToIdx` but appends to the string after every index /// conversion. In effect, this tests breadcrumb creation performance. -class MutatedIndexToUTF16: BenchmarkBase { +class MutatedIdxToUTF16: BenchmarkBase { let count: Int var inputIndices: [String.Index] = [] var outputOffsets: [Int] = [] @@ -466,7 +470,7 @@ class MutatedIndexToUTF16: BenchmarkBase { init(workload: Workload, count: Int) { self.count = count super.init( - name: "StringBreadcrumbs.MutatedIndexToUTF16", + name: "Breadcrumbs.MutatedIdxToUTF16", workload: workload) } diff --git a/benchmark/utils/main.swift b/benchmark/utils/main.swift index baec3b333410b..aca1c1499fd1b 100644 --- a/benchmark/utils/main.swift +++ b/benchmark/utils/main.swift @@ -30,6 +30,7 @@ import ArraySubscript import BinaryFloatingPointConversionFromBinaryInteger import BinaryFloatingPointProperties import BitCount +import Breadcrumbs import ByteSwap import COWTree import COWArrayGuaranteedParameterOverhead @@ -146,7 +147,6 @@ import StackPromo import StaticArray import StrComplexWalk import StrToInt -import StringBreadcrumbs import StringBuilder import StringComparison import StringEdits @@ -196,6 +196,7 @@ registerBenchmark(BinaryFloatingPointPropertiesBinade) registerBenchmark(BinaryFloatingPointPropertiesNextUp) registerBenchmark(BinaryFloatingPointPropertiesUlp) registerBenchmark(BitCount) +registerBenchmark(Breadcrumbs) registerBenchmark(ByteSwap) registerBenchmark(COWTree) registerBenchmark(COWArrayGuaranteedParameterOverhead) @@ -320,7 +321,6 @@ registerBenchmark(StackPromo) registerBenchmark(StaticArrayTest) registerBenchmark(StrComplexWalk) registerBenchmark(StrToInt) -registerBenchmark(StringBreadcrumbs) registerBenchmark(StringBuilder) registerBenchmark(StringComparison) registerBenchmark(StringEdits) From 7ce1219cb509fb1007808d3d4c33eaf92a3ffe1c Mon Sep 17 00:00:00 2001 From: Karoy Lorentey Date: Wed, 28 Nov 2018 18:03:28 +0000 Subject: [PATCH 3/5] [benchmark] Remove stray print --- benchmark/single-source/Breadcrumbs.swift | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmark/single-source/Breadcrumbs.swift b/benchmark/single-source/Breadcrumbs.swift index 59525297f5467..8fc43bea629d8 100644 --- a/benchmark/single-source/Breadcrumbs.swift +++ b/benchmark/single-source/Breadcrumbs.swift @@ -411,7 +411,6 @@ class CopyUTF16CodeUnits: BenchmarkBase { @inline(never) override func run() { for range in inputIndices { - print(range) outputBuffer.withUnsafeMutableBufferPointer { buffer in inputString._copyUTF16CodeUnits( into: UnsafeMutableBufferPointer(rebasing: buffer[range]), From 9d64704268e309d7cf7435aaa5a7a1d815b63427 Mon Sep 17 00:00:00 2001 From: Karoy Lorentey Date: Wed, 28 Nov 2018 19:08:58 +0000 Subject: [PATCH 4/5] [benchmark] Eliminate output buffers; collapse run() into run(iterations:); fix memory accumulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I planned to run some validation on the accumulated output in tearDown, but benchmarks donโ€™t seem the right place for doing that. Having the run() vs run(iterations:) split interfered with getting e.g. Array.withUnsafeMutableBufferPointer moved out of the outer loop. MutatedIdxToUTF16 and its dual used to append a space to the input string on every single iteration of the inner loop, accumulating spaces indefinitely, even across the outer iterations. It now works on a per-iteration copy of the input string and alternates between appending/removing a final character. --- benchmark/single-source/Breadcrumbs.swift | 120 +++++++++++----------- 1 file changed, 61 insertions(+), 59 deletions(-) diff --git a/benchmark/single-source/Breadcrumbs.swift b/benchmark/single-source/Breadcrumbs.swift index 8fc43bea629d8..6741f9139e271 100644 --- a/benchmark/single-source/Breadcrumbs.swift +++ b/benchmark/single-source/Breadcrumbs.swift @@ -113,21 +113,17 @@ class BenchmarkBase { self.inputString = "" } - final func run(iterations: Int) { - for _ in 0 ..< iterations { - self.run() - } + func run(iterations: Int) { + fatalError("unimplemented abstract method") } - func run() {} - var info: BenchmarkInfo { return BenchmarkInfo( name: self.label, - runFunction: self.run(iterations:), + runFunction: { self.run(iterations: $0) }, tags: [.validation, .api, .String], - setUpFunction: self.setUp, - tearDownFunction: self.tearDown) + setUpFunction: { self.setUp() }, + tearDownFunction: { self.tearDown() }) } } @@ -250,7 +246,6 @@ let longMixedWorkload = Workload( class UTF16ToIdx: BenchmarkBase { let count: Int var inputOffsets: [Int] = [] - var outputIndices: [String.Index] = [] init(workload: Workload, count: Int) { self.count = count @@ -262,8 +257,6 @@ class UTF16ToIdx: BenchmarkBase { var rng = LCRNG(seed: seed) let range = 0 ..< inputString.utf16.count inputOffsets = Array(range.shuffled(using: &rng).prefix(count)) - outputIndices = [] - outputIndices.reserveCapacity(inputOffsets.count) } override func tearDown() { @@ -272,10 +265,11 @@ class UTF16ToIdx: BenchmarkBase { } @inline(never) - override func run() { - outputIndices.removeAll(keepingCapacity: true) - for offset in inputOffsets { - outputIndices.append(inputString._toUTF16Index(offset)) + override func run(iterations: Int) { + for _ in 0 ..< iterations { + for offset in inputOffsets { + blackHole(inputString._toUTF16Index(offset)) + } } } } @@ -284,7 +278,6 @@ class UTF16ToIdx: BenchmarkBase { class IdxToUTF16: BenchmarkBase { let count: Int var inputIndices: [String.Index] = [] - var outputOffsets: [Int] = [] init(workload: Workload, count: Int) { self.count = count @@ -295,8 +288,6 @@ class IdxToUTF16: BenchmarkBase { super.setUp() var rng = LCRNG(seed: seed) inputIndices = Array(inputString.indices.shuffled(using: &rng).prefix(count)) - outputOffsets = [] - outputOffsets.reserveCapacity(inputIndices.count) } override func tearDown() { @@ -305,10 +296,11 @@ class IdxToUTF16: BenchmarkBase { } @inline(never) - override func run() { - outputOffsets.removeAll(keepingCapacity: true) - for index in inputIndices { - outputOffsets.append(inputString._toUTF16Offset(index)) + override func run(iterations: Int) { + for _ in 0 ..< iterations { + for index in inputIndices { + blackHole(inputString._toUTF16Offset(index)) + } } } } @@ -318,7 +310,6 @@ class IdxToUTF16: BenchmarkBase { class UTF16ToIdxRange: BenchmarkBase { let count: Int var inputOffsets: [Range] = [] - var outputIndices: [Range] = [] init(workload: Workload, count: Int) { self.count = count @@ -331,8 +322,6 @@ class UTF16ToIdxRange: BenchmarkBase { inputOffsets = ( 0 ..< inputString.utf16.count ).randomIndexRanges(count: count, using: &rng) - outputIndices = [] - outputIndices.reserveCapacity(inputOffsets.count) } override func tearDown() { @@ -341,10 +330,11 @@ class UTF16ToIdxRange: BenchmarkBase { } @inline(never) - override func run() { - outputIndices.removeAll(keepingCapacity: true) - for range in inputOffsets { - outputIndices.append(inputString._toUTF16Indices(range)) + override func run(iterations: Int) { + for _ in 0 ..< iterations { + for range in inputOffsets { + blackHole(inputString._toUTF16Indices(range)) + } } } } @@ -354,7 +344,6 @@ class UTF16ToIdxRange: BenchmarkBase { class IdxToUTF16Range: BenchmarkBase { let count: Int var inputIndices: [Range] = [] - var outputOffsets: [Range] = [] init(workload: Workload, count: Int) { self.count = count @@ -365,8 +354,6 @@ class IdxToUTF16Range: BenchmarkBase { super.setUp() var rng = LCRNG(seed: seed) inputIndices = self.inputString.randomIndexRanges(count: count, using: &rng) - outputOffsets = [] - outputOffsets.reserveCapacity(inputIndices.count) } override func tearDown() { @@ -375,10 +362,11 @@ class IdxToUTF16Range: BenchmarkBase { } @inline(never) - override func run() { - outputOffsets.removeAll(keepingCapacity: true) - for range in inputIndices { - outputOffsets.append(inputString._toUTF16Offsets(range)) + override func run(iterations: Int) { + for _ in 0 ..< iterations { + for range in inputIndices { + blackHole(inputString._toUTF16Offsets(range)) + } } } } @@ -409,12 +397,14 @@ class CopyUTF16CodeUnits: BenchmarkBase { } @inline(never) - override func run() { - for range in inputIndices { - outputBuffer.withUnsafeMutableBufferPointer { buffer in - inputString._copyUTF16CodeUnits( - into: UnsafeMutableBufferPointer(rebasing: buffer[range]), - range: range) + override func run(iterations: Int) { + outputBuffer.withUnsafeMutableBufferPointer { buffer in + for _ in 0 ..< iterations { + for range in inputIndices { + inputString._copyUTF16CodeUnits( + into: UnsafeMutableBufferPointer(rebasing: buffer[range]), + range: range) + } } } } @@ -425,7 +415,6 @@ class CopyUTF16CodeUnits: BenchmarkBase { class MutatedUTF16ToIdx: BenchmarkBase { let count: Int var inputOffsets: [Int] = [] - var outputIndices: [String.Index] = [] init(workload: Workload, count: Int) { self.count = count @@ -439,8 +428,6 @@ class MutatedUTF16ToIdx: BenchmarkBase { var generator = LCRNG(seed: seed) let range = 0 ..< inputString.utf16.count inputOffsets = Array(range.shuffled(using: &generator).prefix(count)) - outputIndices = [] - outputIndices.reserveCapacity(inputOffsets.count) } override func tearDown() { @@ -449,11 +436,20 @@ class MutatedUTF16ToIdx: BenchmarkBase { } @inline(never) - override func run() { - outputIndices.removeAll(keepingCapacity: true) - for offset in inputOffsets { - outputIndices.append(inputString._toUTF16Index(offset)) - inputString.append(" ") + override func run(iterations: Int) { + var flag = true + for _ in 0 ..< iterations { + var string = inputString + for offset in inputOffsets { + blackHole(string._toUTF16Index(offset)) + if flag { + string.append(" ") + flag = false + } else { + string.removeLast() + flag = true + } + } } } } @@ -464,7 +460,6 @@ class MutatedUTF16ToIdx: BenchmarkBase { class MutatedIdxToUTF16: BenchmarkBase { let count: Int var inputIndices: [String.Index] = [] - var outputOffsets: [Int] = [] init(workload: Workload, count: Int) { self.count = count @@ -477,8 +472,6 @@ class MutatedIdxToUTF16: BenchmarkBase { super.setUp() var rng = LCRNG(seed: seed) inputIndices = Array(inputString.indices.shuffled(using: &rng).prefix(count)) - outputOffsets = [] - outputOffsets.reserveCapacity(inputIndices.count) } override func tearDown() { @@ -487,11 +480,20 @@ class MutatedIdxToUTF16: BenchmarkBase { } @inline(never) - override func run() { - outputOffsets.removeAll(keepingCapacity: true) - for index in inputIndices { - outputOffsets.append(inputString._toUTF16Offset(index)) - inputString.append(" ") + override func run(iterations: Int) { + var flag = true + for _ in 0 ..< iterations { + var string = inputString + for index in inputIndices { + blackHole(string._toUTF16Offset(index)) + if flag { + string.append(" ") + flag = false + } else { + string.removeLast() + flag = true + } + } } } } From 69ae33ddb8350e7c6ef6f27185cc55b21ccaf00a Mon Sep 17 00:00:00 2001 From: Karoy Lorentey Date: Wed, 28 Nov 2018 19:51:28 +0000 Subject: [PATCH 5/5] [benchmark] More cleanups --- benchmark/single-source/Breadcrumbs.swift | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/benchmark/single-source/Breadcrumbs.swift b/benchmark/single-source/Breadcrumbs.swift index 6741f9139e271..36c9122689e60 100644 --- a/benchmark/single-source/Breadcrumbs.swift +++ b/benchmark/single-source/Breadcrumbs.swift @@ -120,10 +120,10 @@ class BenchmarkBase { var info: BenchmarkInfo { return BenchmarkInfo( name: self.label, - runFunction: { self.run(iterations: $0) }, + runFunction: self.run(iterations:), tags: [.validation, .api, .String], - setUpFunction: { self.setUp() }, - tearDownFunction: { self.tearDown() }) + setUpFunction: self.setUp, + tearDownFunction: self.tearDown) } } @@ -444,18 +444,17 @@ class MutatedUTF16ToIdx: BenchmarkBase { blackHole(string._toUTF16Index(offset)) if flag { string.append(" ") - flag = false } else { string.removeLast() - flag = true } + flag.toggle() } } } } -/// This is like `UTF16ToIdx` but appends to the string after every index +/// This is like `IdxToUTF16` but appends to the string after every index /// conversion. In effect, this tests breadcrumb creation performance. class MutatedIdxToUTF16: BenchmarkBase { let count: Int