From eaebe3ea9c09e8d395ae0d793fc8b87d5f1d288c Mon Sep 17 00:00:00 2001 From: Veronika Romashkina Date: Tue, 9 Mar 2021 15:32:27 +0000 Subject: [PATCH] [#346] Implement `ordNub` using `nubOrd` from `containers` for better efficiency (#352) * [#346] Implement `ordNub` using `nubOrd` from `containers` for better efficiency Resolves #346 * Add cpp on old GHC support --- CHANGELOG.md | 4 +++ README.md | 3 ++- benchmark/Main.hs | 35 ++++++++++++++++++++++++++ src/Relude/Nub.hs | 62 ++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 100 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d04b619b..24a0379a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ The changelog is available [on GitHub][2]. * Add `infinitely` as more strictly typed `forever`. * Remove `Eq` constraint on `universeNonEmpty` * Add `maybeAt`, `!!?` with its arguments flipped. +* [#346](https://github.com/kowainik/relude/issues/346): + Reimplement `ordNub` through `nubOrd` from `containers`. + + Add `intNub` and `intNubOn` functions. ## 0.7.0.0 — May 14, 2020 diff --git a/README.md b/README.md index 7c23441c..63bbbab1 100644 --- a/README.md +++ b/README.md @@ -494,7 +494,8 @@ Finally, let's move to part describing the new cool features we bring with * `uncons` splits a list at the first element. * `ordNub` and `sortNub` are _O(n log n)_ versions of `nub` (which is quadratic), - also, `hashNub` and `unstableNub` are almost _O(n)_ versions of `nub`. + also, `hashNub` and `unstableNub` are almost _O(n)_ versions of `nub`, + and `intNub` for fast `Int`s nub. * `whenM`, `unlessM`, `ifM`, `guardM` — monadic guard combinators, that work with any `Monad`, e.g. `whenM (doesFileExist "foo")`. * General fold functions: diff --git a/benchmark/Main.hs b/benchmark/Main.hs index c8a8f172..7ad0a74b 100644 --- a/benchmark/Main.hs +++ b/benchmark/Main.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE CPP #-} + module Main (main) where import Relude hiding (show) @@ -17,6 +19,12 @@ main = defaultMain , bgroupList listOfBig "big" , bgroupList (nStrings 'z') "small str" , bgroupList (nStrings 'c') "big str" + +#if __GLASGOW_HASKELL__ > 804 + , bgroupIntList listOfSmall "small ints" + , bgroupIntList listOfBig "big ints" +#endif + , bgroupFold ] @@ -64,6 +72,33 @@ bgroupList f name = bgroup name $ map ($ f) safeSort :: [a] -> [a] safeSort = map NonEmpty.head . NonEmpty.group . sort +#if __GLASGOW_HASKELL__ > 804 +bgroupIntList + :: (Int -> [Int]) + -> String + -> Benchmark +bgroupIntList f name = bgroup name $ map ($ f) + [ bgroupNub 100 + , bgroupNub 500 + , bgroupNub 1000 + , bgroupNub 5000 + , bgroupNub 500000 + , bgroupNub 1000000 + ] + where + bgroupNub :: Int -> (Int -> [Int]) -> Benchmark + bgroupNub n listOf = bgroup (show n) nubBenchs + where + listN :: [Int] + listN = listOf n + + nubBenchs :: [Benchmark] + nubBenchs = + [ bench "ordNub" $ nf ordNub listN + , bench "intNub" $ nf intNub listN + ] +#endif + listOfSmall :: Int -> [Int] listOfSmall n = let part = n `div` 100 in concat $ replicate part [1..100] diff --git a/src/Relude/Nub.hs b/src/Relude/Nub.hs index b1a3cb76..da7fdea0 100644 --- a/src/Relude/Nub.hs +++ b/src/Relude/Nub.hs @@ -1,9 +1,10 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE Safe #-} {- | Copyright: (c) 2016 Stephen Diehl (c) 2016-2018 Serokell - (c) 2018-2020 Kowainik + (c) 2018-2021 Kowainik SPDX-License-Identifier: MIT Maintainer: Kowainik Stability: Stable @@ -27,6 +28,10 @@ Functions to remove duplicates from a list. * 'hashNub' is the fastest with 'Data.Text.Text'. + * 'intNub' is faster when you work with lists of 'Int's. + + * 'intNubOn' is fast with the lists of type that can have fixed number representations. + * 'sortNub' has better performance than 'ordNub' but should be used when sorting is also needed. * 'unstableNub' has better performance than 'hashNub' but doesn't save the original order. @@ -35,6 +40,12 @@ Functions to remove duplicates from a list. module Relude.Nub ( hashNub , ordNub + +#if __GLASGOW_HASKELL__ > 804 + , intNub + , intNubOn +#endif + , sortNub , unstableNub ) where @@ -43,18 +54,31 @@ import Data.Eq (Eq) import Data.Hashable (Hashable) import Data.HashSet as HashSet import Data.Ord (Ord) -import Prelude ((.)) +import Prelude (Int, (.)) import qualified Data.Set as Set +#if __GLASGOW_HASKELL__ > 804 +import qualified Data.Containers.ListUtils as Containers +#endif + +-- $setup +-- >>> import Prelude (fromEnum) -{- | Like 'Prelude.nub' but runs in \( O(n \log n) \) time and requires 'Ord'. +{- | Removes duplicate elements from a list, keeping only the first occurance of +the element. + +Like 'Prelude.nub' but runs in \( O(n \log n) \) time and requires 'Ord'. >>> ordNub [3, 3, 3, 2, 2, -1, 1] [3,2,-1,1] -} ordNub :: forall a . (Ord a) => [a] -> [a] +#if __GLASGOW_HASKELL__ > 804 +ordNub = Containers.nubOrd +{-# INLINE ordNub #-} +#else ordNub = go Set.empty where go :: Set.Set a -> [a] -> [a] @@ -64,6 +88,7 @@ ordNub = go Set.empty then go s xs else x : go (Set.insert x s) xs {-# INLINEABLE ordNub #-} +#endif {- | Like 'Prelude.nub' but runs in \( O(n \log_{16} n) \) time and requires 'Hashable'. @@ -101,3 +126,34 @@ sortNub = Set.toList . Set.fromList unstableNub :: (Eq a, Hashable a) => [a] -> [a] unstableNub = HashSet.toList . HashSet.fromList {-# INLINE unstableNub #-} + + +#if __GLASGOW_HASKELL__ > 804 + +{- | Removes duplicate elements from a list, keeping only the first occurance of +the element. + +Like 'Prelude.nub' but runs in \( O(n \min\(n, int_bits\)) \) time and requires 'Ord'. + +>>> intNub [3, 3, 3, 2, 2, -1, 1] +[3,2,-1,1] + +@since x.x.x.x +-} +intNub :: [Int] -> [Int] +intNub = Containers.nubInt + +{-# INLINE intNub #-} + +{- | Similar to 'intNub' but works on lists of any types by performing "nubbing" through 'Int's. + +>>> intNubOn fromEnum "ababbbcdaffee" +"abcdfe" + +@since x.x.x.x +-} +intNubOn :: (a -> Int) -> [a] -> [a] +intNubOn = Containers.nubIntOn +{-# INLINE intNubOn #-} + +#endif