Skip to content

Commit

Permalink
Merge pull request #129 from lehins/improve-sorting
Browse files Browse the repository at this point in the history
Improve performance of sorting both sequential and parallel:
  • Loading branch information
lehins committed May 11, 2023
2 parents e314834 + 2244984 commit ef637b5
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 10 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/haskell.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,19 @@ jobs:
- { os: ubuntu-latest, ghc: "9.0.2" }
- { os: ubuntu-latest, ghc: "9.2.7" }
- { os: ubuntu-latest, ghc: "9.4.4" }
- { os: ubuntu-latest, ghc: "9.6.1" }
# - { os: windows-latest, ghc: "8.0.2" } # OOM on tests
# - { os: windows-latest, ghc: "8.2.2" } # OOM on tests
# - { os: windows-latest, ghc: "9.0.1" } # OOM on tests
- { os: windows-latest, ghc: "9.2.7" }
- { os: windows-latest, ghc: "9.4.4" } # Linker errors
- { os: windows-latest, ghc: "9.4.4" }
- { os: windows-latest, ghc: "9.6.1" }
- { os: macOS-latest, ghc: "8.0.2" }
- { os: macOS-latest, ghc: "8.2.2" }
- { os: macOS-latest, ghc: "9.0.2" }
- { os: macOS-latest, ghc: "9.2.7" }
- { os: macOS-latest, ghc: "9.4.4" }
- { os: macOS-latest, ghc: "9.6.1" }

env:
cache-version: v3 # bump up this version to invalidate currently stored cache
Expand Down Expand Up @@ -261,7 +264,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
ghc: [9.2.5]
ghc: [9.2.7]

env:
STACK_ARGS: '--system-ghc'
Expand Down
56 changes: 56 additions & 0 deletions massiv-bench/bench/Sort.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}

module Main where

import Control.Concurrent
import Criterion.Main
import Data.Bits
import Data.Int
import Data.Massiv.Array as A
import Data.Word
import System.Random

log2i :: Int -> Int
log2i i = 63 - countLeadingZeros (fromIntegral i :: Word64)

main :: IO ()
main = do
let stdGen = mkStdGen 2023
-- sizes = [512,4096,32768,262144,2097152]
sizes = Prelude.take (log2i 32) $ iterate (* 8) 512
numCaps <- getNumCapabilities
defaultMain
[ bgroup
"Sort"
[ mkGroups numCaps (compute (uniformArray stdGen Par (Sz sz)))
| sz <- sizes
]
]

mkGroups :: Int -> Array S Ix1 Int64 -> Benchmark
mkGroups numCaps !vRand =
bgroup
(show (unSz (size vRand)))
[ mkGroup numCaps "random" vRand
, mkGroup numCaps "sorted" (A.quicksort vRand)
, mkGroup numCaps "reversed sorted" (A.compute (A.reverse Dim1 (A.quicksort vRand)))
, mkGroup numCaps "replicated" (A.replicate Seq (A.size vRand) 31415)
]

mkGroup :: Int -> String -> Array S Ix1 Int64 -> Benchmark
mkGroup numCaps name !v =
bgroup
name
[ bench
( "massiv/Array "
++ show comp
++ if comp == Seq then [] else " (min " ++ w n ++ " " ++ show s ++ ")"
)
$ nf A.quicksort (setComp comp v)
| (comp, n) <- (Seq, 1) : ((\n -> (ParN n, n)) <$> Prelude.take (log2i numCaps) (iterate (* 2) 2))
]
where
w n = show (log2i (fromIntegral n) + 4)
s = log2i (unSz (size v)) - 10
11 changes: 11 additions & 0 deletions massiv-bench/massiv-bench.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@ library
default-language: Haskell2010
ghc-options: -Wall

benchmark sort
type: exitcode-stdio-1.0
hs-source-dirs: bench
main-is: Sort.hs
ghc-options: -Wall -threaded -O2 -rtsopts -with-rtsopts=-N
build-depends: base
, criterion
, massiv
, random
default-language: Haskell2010

benchmark iter
type: exitcode-stdio-1.0
hs-source-dirs: bench
Expand Down
5 changes: 5 additions & 0 deletions massiv/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 1.0.4

* Improve performance of sorting algorithm and its parallelization. Fix huge slow down on
CPUs with at least 16 cores.

# 1.0.3

* Deprecated `indexWith` in favor of `indexAssert`
Expand Down
3 changes: 2 additions & 1 deletion massiv/massiv.cabal
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: massiv
version: 1.0.3.0
version: 1.0.4.0
synopsis: Massiv (Массив) is an Array Library.
description: Multi-dimensional Arrays with fusion, stencils and parallel computation.
homepage: https://github.com/lehins/massiv
Expand All @@ -22,6 +22,7 @@ tested-with: GHC == 8.0.2
, GHC == 9.0.2
, GHC == 9.2.7
, GHC == 9.4.4
, GHC == 9.6.1

flag unsafe-checks
description: Enable all the bounds checks for unsafe functions at the cost of
Expand Down
32 changes: 25 additions & 7 deletions massiv/src/Data/Massiv/Array/Ops/Sort.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@ import Control.Monad (when)
import Control.Monad.IO.Unlift
import Control.Monad.Primitive
import Control.Scheduler
import Data.Bits (countLeadingZeros)
import Data.Massiv.Array.Delayed.Stream
import Data.Massiv.Array.Mutable
import Data.Massiv.Array.Ops.Transform
import Data.Massiv.Core.Common
import Data.Massiv.Vector (scatMaybes, sunfoldrN)
import Data.Word (Word64)
import System.IO.Unsafe

-- | Count number of occurrences of each element in the array. Results will be
Expand Down Expand Up @@ -192,9 +194,18 @@ quicksortInternalM_
-> Scheduler s ()
-> MVector s r e
-> m ()
quicksortInternalM_ fLT fEQ scheduler marr =
scheduleWork scheduler $ qsort (numWorkers scheduler) 0 (unSz (sizeOfMArray marr) - 1)
quicksortInternalM_ fLT fEQ scheduler marr
| numWorkers scheduler < 2 || depthPar <= 0 = qsortSeq 0 (k - 1)
| otherwise = qsortPar depthPar 0 (k - 1)
where
-- How deep into the search tree should we continue scheduling jobs. Constants below
-- were discovered imperically:
depthPar = min (logNumWorkers + 4) (logSize - 10)
k = unSz (sizeOfMArray marr)
-- We must use log becuase decinding into a tree creates an exponential number of jobs
logNumWorkers = 63 - countLeadingZeros (fromIntegral (numWorkers scheduler) :: Word64)
-- Using many cores on small vectors only makes things slower
logSize = 63 - countLeadingZeros (fromIntegral k :: Word64)
ltSwap i j = do
ei <- unsafeLinearRead marr i
ej <- unsafeLinearRead marr j
Expand All @@ -212,17 +223,24 @@ quicksortInternalM_ fLT fEQ scheduler marr =
_ <- ltSwap hi lo
ltSwap mid hi
{-# INLINE getPivot #-}
qsort !n !lo !hi =
qsortPar !n !lo !hi =
when (lo < hi) $ do
p <- getPivot lo hi
l <- unsafeUnstablePartitionRegionM marr (`fLT` p) lo (hi - 1)
h <- unsafeUnstablePartitionRegionM marr (`fEQ` p) l hi
if n > 0
then do
let !n' = n - 1
scheduleWork scheduler $ qsort n' lo (l - 1)
scheduleWork scheduler $ qsort n' h hi
scheduleWork scheduler $ qsortPar n' lo (l - 1)
scheduleWork scheduler $ qsortPar n' h hi
else do
qsort n lo (l - 1)
qsort n h hi
qsortSeq lo (l - 1)
qsortSeq h hi
qsortSeq !lo !hi =
when (lo < hi) $ do
p <- getPivot lo hi
l <- unsafeUnstablePartitionRegionM marr (`fLT` p) lo (hi - 1)
h <- unsafeUnstablePartitionRegionM marr (`fEQ` p) l hi
qsortSeq lo (l - 1)
qsortSeq h hi
{-# INLINE quicksortInternalM_ #-}

0 comments on commit ef637b5

Please sign in to comment.