Skip to content

Commit

Permalink
add takeFirst param to distinctBy to control which duplicates to pres…
Browse files Browse the repository at this point in the history
…erve
  • Loading branch information
jozic committed Aug 15, 2014
1 parent b31f45f commit 3037c22
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 8 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,20 @@ Released for scala 2.10 and scala 2.11

### distinctBy

preserving first duplicate
``` scala
scala> val xs = List(1 -> "one", 1 -> "ten", 2 -> "two", 2 -> "twenty").
| distinctBy(_._1)
xs: List[(Int, String)] = List((1,one), (2,two))
```

or preserving any duplicate you want
``` scala
scala> val xs = List(1 -> "one", 1 -> "ten", 2 -> "two", 2 -> "twenty").
| distinctBy(_._1, takeFirst = _._2.length > _._2.length)
xs: List[(Int, String)] = List((1,ten), (2,twenty))
```

### foldLeftWhile/foldRightWhile
``` scala
scala> val xs = Iterable(List(1,2,3), List(4,5), List(6,7,8,9)).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package object extensions {

/** Builds a new $coll from this $coll without any duplicate elements (as
* determined by `==` after applying transforming function `f`).
* First of all the duplicates is preserved
*
* Example:
* {{{
Expand All @@ -22,17 +23,34 @@ package object extensions {
* @return A new $coll which contains the first occurrence of every element of this $coll.
* @since 0.1.0
*/
def distinctBy[B](f: A => B)(implicit cbf: CanBuildFrom[Repr, A, Repr]): Repr = {
val b = cbf(seqLike.repr)
val seen = mutable.HashSet[B]()
def distinctBy[B](f: A => B)(implicit cbf: CanBuildFrom[Repr, A, Repr]): Repr =
distinctBy(f, (_, _) => true)(cbf)

/** Builds a new $coll from this $coll without any duplicate elements (as
* determined by `==` after applying transforming function `f`).
* Function `takeFirst` defines which of duplicates will be preserved.
* If it returns `true` first of compared duplicates will be kept, second one otherwise.
*
* Example:
* {{{
* scala> val xs = List(1 -> "one", 1 -> "ten", 2 -> "two", 2 -> "twenty").
* | distinctBy(_._1, takeFirst = _._2.length > _._2.length)
* xs: List[(Int, String)] = List((1,ten), (2,twenty))
* }}}
*
* @return A new $coll which contains selected occurrence of every element of this $coll.
* @since 0.1.1
*/
def distinctBy[B](f: A => B, takeFirst: (A, A) => Boolean)(implicit cbf: CanBuildFrom[Repr, A, Repr]): Repr = {
val seen = mutable.LinkedHashMap.empty[B, A]
for (x <- seqLike) {
val fx = f(x)
if (!seen(fx)) {
b += x
seen += fx
seen.get(fx) match {
case Some(a) => seen += fx -> (if (takeFirst(a, x)) a else x)
case _ => seen += fx -> x
}
}
b.result()
(cbf(seqLike.repr) ++= seen.values).result()
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class SeqLikeExtensionTest extends org.scalatest.FlatSpec with Matchers {
val eugeneM = Person("Eugene", "Medvediev", 31)
val vasiliy = Person("Vasiliy", "Platonov", 2)

"distinctBy" should "keep only unique elements based on transforming function" in {
"distinctBy" should "keep only first unique element based on transforming function" in {
val people = Seq(eugeneP, xeniya, eugeneM, vasiliy)

people.distinctBy(_.fName) should be(Seq(eugeneP, xeniya, vasiliy))
Expand All @@ -37,4 +37,11 @@ class SeqLikeExtensionTest extends org.scalatest.FlatSpec with Matchers {
assertEvaluateOnlyOnce(Seq.empty[Person])
assertEvaluateOnlyOnce(Stream.empty[Person])
}

it should "respect selector if one is provided" in {
val people = Seq(eugeneM, xeniya, eugeneP, vasiliy)

people.distinctBy(_.fName) should be(Seq(eugeneM, xeniya, vasiliy))
people.distinctBy(_.fName, (p1, p2) => p1.lName == "Platonov") should be(Seq(eugeneP, xeniya, vasiliy))
}
}

0 comments on commit 3037c22

Please sign in to comment.