Skip to content

Commit

Permalink
optimize: don't allocate intermediate strings
Browse files Browse the repository at this point in the history
  • Loading branch information
shwestrick committed Jan 4, 2024
1 parent ad85b48 commit 7e0ae5a
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 7 deletions.
34 changes: 27 additions & 7 deletions main.sml
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,31 @@ struct
fun compare (i, j) =
if i = j then EQUAL else String.compare (getStationName i, getStationName j)

fun equal (i, j) =
i = j
orelse (i >= 0 andalso j >= 0 andalso getStationName i = getStationName j)
fun equal (i1, i2) =
if i1 = i2 then
true
else if i1 < 0 orelse i2 < 0 then
false
else
let
val (start1, stop1) = getTokenRange (2 * i1)
val (start2, stop2) = getTokenRange (2 * i2)

fun check_from (j1, j2) =
j1 = stop1
orelse
((Seq.nth contents j1 = Seq.nth contents j2)
andalso check_from (j1 + 1, j2 + 1))
in
(stop1 - start1) = (stop2 - start2) andalso check_from (start1, start2)
end

fun hashStr str =
fun hashStr (numChars, getChar) =
let
(* just cap at 32 for long strings *)
val n = Int.min (32, String.size str)
val n = Int.min (32, numChars)
fun c i =
Word64.fromInt (Char.ord (String.sub (str, i)))
Word64.fromInt (Char.ord (getChar i))
fun loop h i =
if i >= n then h else loop (Word64.+ (Word64.* (h, 0w31), c i)) (i + 1)

Expand All @@ -111,7 +126,12 @@ struct
end

fun hash i =
Word64.toIntX (hashStr (getStationName i))
let
val (start, stop) = getTokenRange (2 * i)
in
Word64.toIntX (hashStr (stop - start, fn i =>
Seq.nth contents (start + i)))
end
end


Expand Down
18 changes: 18 additions & 0 deletions timings/aware-240104-174900-after-string-opt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
→ taskset --cpu-list 1-4,73-76 ./main @mpl procs 8 -- /usr3/data/1brc/measurements.txt --verbose
loading /usr3/data/1brc/measurements.txt
load file: 2.7783s
tokenize: 31.7032s
number of entries: 1000000000
process entries: 43.6512s
compact: 0.0002s
sort: 0.0013s


→ ./main @mpl procs 144 set-affinity -- /usr3/data/1brc/measurements.txt --verbose
loading /usr3/data/1brc/measurements.txt
load file: 1.5441s
tokenize: 3.0547s
number of entries: 1000000000
process entries: 2.7558s
compact: 0.0009s
sort: 0.0040s

0 comments on commit 7e0ae5a

Please sign in to comment.