Skip to content

Commit

Permalink
[#346] Implement ordNub using nubOrd from containers for better…
Browse files Browse the repository at this point in the history
… efficiency (#352)

* [#346] Implement `ordNub` using `nubOrd` from `containers` for better efficiency

Resolves #346

* Add cpp on old GHC support
  • Loading branch information
vrom911 committed Mar 9, 2021
1 parent f7b9117 commit eaebe3e
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 4 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ The changelog is available [on GitHub][2].
* Add `infinitely` as more strictly typed `forever`.
* Remove `Eq` constraint on `universeNonEmpty`
* Add `maybeAt`, `!!?` with its arguments flipped.
* [#346](https://github.com/kowainik/relude/issues/346):
Reimplement `ordNub` through `nubOrd` from `containers`.

Add `intNub` and `intNubOn` functions.

## 0.7.0.0 — May 14, 2020

Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,8 @@ Finally, let's move to part describing the new cool features we bring with

* `uncons` splits a list at the first element.
* `ordNub` and `sortNub` are _O(n log n)_ versions of `nub` (which is quadratic),
also, `hashNub` and `unstableNub` are almost _O(n)_ versions of `nub`.
also, `hashNub` and `unstableNub` are almost _O(n)_ versions of `nub`,
and `intNub` for fast `Int`s nub.
* `whenM`, `unlessM`, `ifM`, `guardM` — monadic guard combinators, that work
with any `Monad`, e.g. `whenM (doesFileExist "foo")`.
* General fold functions:
Expand Down
35 changes: 35 additions & 0 deletions benchmark/Main.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{-# LANGUAGE CPP #-}

module Main (main) where

import Relude hiding (show)
Expand All @@ -17,6 +19,12 @@ main = defaultMain
, bgroupList listOfBig "big"
, bgroupList (nStrings 'z') "small str"
, bgroupList (nStrings 'c') "big str"

#if __GLASGOW_HASKELL__ > 804
, bgroupIntList listOfSmall "small ints"
, bgroupIntList listOfBig "big ints"
#endif

, bgroupFold
]

Expand Down Expand Up @@ -64,6 +72,33 @@ bgroupList f name = bgroup name $ map ($ f)
safeSort :: [a] -> [a]
safeSort = map NonEmpty.head . NonEmpty.group . sort

#if __GLASGOW_HASKELL__ > 804
bgroupIntList
:: (Int -> [Int])
-> String
-> Benchmark
bgroupIntList f name = bgroup name $ map ($ f)
[ bgroupNub 100
, bgroupNub 500
, bgroupNub 1000
, bgroupNub 5000
, bgroupNub 500000
, bgroupNub 1000000
]
where
bgroupNub :: Int -> (Int -> [Int]) -> Benchmark
bgroupNub n listOf = bgroup (show n) nubBenchs
where
listN :: [Int]
listN = listOf n

nubBenchs :: [Benchmark]
nubBenchs =
[ bench "ordNub" $ nf ordNub listN
, bench "intNub" $ nf intNub listN
]
#endif

listOfSmall :: Int -> [Int]
listOfSmall n = let part = n `div` 100 in concat $ replicate part [1..100]

Expand Down
62 changes: 59 additions & 3 deletions src/Relude/Nub.hs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE Safe #-}

{- |
Copyright: (c) 2016 Stephen Diehl
(c) 2016-2018 Serokell
(c) 2018-2020 Kowainik
(c) 2018-2021 Kowainik
SPDX-License-Identifier: MIT
Maintainer: Kowainik <xrom.xkov@gmail.com>
Stability: Stable
Expand All @@ -27,6 +28,10 @@ Functions to remove duplicates from a list.
* 'hashNub' is the fastest with 'Data.Text.Text'.
* 'intNub' is faster when you work with lists of 'Int's.
* 'intNubOn' is fast with the lists of type that can have fixed number representations.
* 'sortNub' has better performance than 'ordNub' but should be used when sorting is also needed.
* 'unstableNub' has better performance than 'hashNub' but doesn't save the original order.
Expand All @@ -35,6 +40,12 @@ Functions to remove duplicates from a list.
module Relude.Nub
( hashNub
, ordNub

#if __GLASGOW_HASKELL__ > 804
, intNub
, intNubOn
#endif

, sortNub
, unstableNub
) where
Expand All @@ -43,18 +54,31 @@ import Data.Eq (Eq)
import Data.Hashable (Hashable)
import Data.HashSet as HashSet
import Data.Ord (Ord)
import Prelude ((.))
import Prelude (Int, (.))

import qualified Data.Set as Set
#if __GLASGOW_HASKELL__ > 804
import qualified Data.Containers.ListUtils as Containers
#endif


-- $setup
-- >>> import Prelude (fromEnum)

{- | Like 'Prelude.nub' but runs in \( O(n \log n) \) time and requires 'Ord'.
{- | Removes duplicate elements from a list, keeping only the first occurance of
the element.
Like 'Prelude.nub' but runs in \( O(n \log n) \) time and requires 'Ord'.
>>> ordNub [3, 3, 3, 2, 2, -1, 1]
[3,2,-1,1]
-}
ordNub :: forall a . (Ord a) => [a] -> [a]
#if __GLASGOW_HASKELL__ > 804
ordNub = Containers.nubOrd
{-# INLINE ordNub #-}
#else
ordNub = go Set.empty
where
go :: Set.Set a -> [a] -> [a]
Expand All @@ -64,6 +88,7 @@ ordNub = go Set.empty
then go s xs
else x : go (Set.insert x s) xs
{-# INLINEABLE ordNub #-}
#endif

{- | Like 'Prelude.nub' but runs in \( O(n \log_{16} n) \) time and requires 'Hashable'.
Expand Down Expand Up @@ -101,3 +126,34 @@ sortNub = Set.toList . Set.fromList
unstableNub :: (Eq a, Hashable a) => [a] -> [a]
unstableNub = HashSet.toList . HashSet.fromList
{-# INLINE unstableNub #-}


#if __GLASGOW_HASKELL__ > 804

{- | Removes duplicate elements from a list, keeping only the first occurance of
the element.
Like 'Prelude.nub' but runs in \( O(n \min\(n, int_bits\)) \) time and requires 'Ord'.
>>> intNub [3, 3, 3, 2, 2, -1, 1]
[3,2,-1,1]
@since x.x.x.x
-}
intNub :: [Int] -> [Int]
intNub = Containers.nubInt

{-# INLINE intNub #-}

{- | Similar to 'intNub' but works on lists of any types by performing "nubbing" through 'Int's.
>>> intNubOn fromEnum "ababbbcdaffee"
"abcdfe"
@since x.x.x.x
-}
intNubOn :: (a -> Int) -> [a] -> [a]
intNubOn = Containers.nubIntOn
{-# INLINE intNubOn #-}

#endif

0 comments on commit eaebe3e

Please sign in to comment.