Skip to content

Commit

Permalink
Harmonize index combination rule names with KMC3 for familiarity (fir…
Browse files Browse the repository at this point in the history
…st -> left, subtract -> counters_subtract, etc)
  • Loading branch information
jtnystrom committed Nov 25, 2022
1 parent e8fea9b commit 34bb0eb
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 17 deletions.
33 changes: 19 additions & 14 deletions src/main/scala/com/jnpersson/discount/bucket/Reducer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -129,21 +129,22 @@ object Reducer {
/** Select the minimum value */
object Min extends Type
/** Select the first value */
object First extends Type
object Left extends Type
/** Select the second value */
object Second extends Type
object Right extends Type
/** Subtract k-mer counts A-B, preserving positive results */
object Subtract extends Type
object CountersSubtract extends Type
/** Preserve only those k-mers that were present in A but absent in B (weaker version of subtract) */
object KmerSubtract extends Type

def parseType(typ: String): Type = typ match {
case "sum" => Sum
case "max" => Max
case "min" => Min
case "first" => First
case "second" => Second
case "subtract" => Subtract
case "left" => Left
case "right" => Right
case "counters_subtract" => CountersSubtract
case "kmers_subtract" => KmerSubtract
}

def unionForK(k: Int, forwardOnly: Boolean, reduction: Type = Sum): Reducer =
Expand All @@ -154,10 +155,12 @@ object Reducer {
case Sum => SumReducer(k, forwardOnly, intersect)
case Max => MaxReducer(k, forwardOnly, intersect)
case Min => MinReducer(k, forwardOnly, intersect)
case First => FirstReducer(k, forwardOnly, intersect)
case Second => SecondReducer(k, forwardOnly, intersect)
case Subtract => SubtractReducer(k, forwardOnly, intersect)
case KmerSubtract => KmerSubtractReducer(k, forwardOnly, intersect)
case Left => LeftReducer(k, forwardOnly, intersect)
case Right => RightReducer(k, forwardOnly, intersect)
case CountersSubtract => CountersSubtractReducer(k, forwardOnly, intersect)
case KmerSubtract =>
assert(intersect == false)
KmerSubtractReducer(k, forwardOnly)
}
}
}
Expand All @@ -169,7 +172,7 @@ final case class SumReducer(k: Int, forwardOnly: Boolean, intersect: Boolean) ex
cappedLongToInt(count1.toLong + count2.toLong)
}

final case class SubtractReducer(k: Int, forwardOnly: Boolean, intersect: Boolean) extends CountReducer {
final case class CountersSubtractReducer(k: Int, forwardOnly: Boolean, intersect: Boolean) extends CountReducer {

//Negate tags (counts)
override def preprocessSecond(bucket: ReducibleBucket): ReducibleBucket =
Expand All @@ -189,7 +192,9 @@ final case class SubtractReducer(k: Int, forwardOnly: Boolean, intersect: Boolea
}
}

final case class KmerSubtractReducer(k: Int, forwardOnly: Boolean, intersect: Boolean) extends CountReducer {
final case class KmerSubtractReducer(k: Int, forwardOnly: Boolean) extends CountReducer {
//Intersection with this reducer would always remove everything and produce an empty set
def intersect = false

//Negate tags (counts)
override def preprocessSecond(bucket: ReducibleBucket): ReducibleBucket =
Expand Down Expand Up @@ -219,12 +224,12 @@ final case class MaxReducer(k: Int, forwardOnly: Boolean, intersect: Boolean) ex
if (count1 > count2) count1 else count2
}

final case class FirstReducer(k: Int, forwardOnly: Boolean, intersect: Boolean) extends CountReducer {
final case class LeftReducer(k: Int, forwardOnly: Boolean, intersect: Boolean) extends CountReducer {
override def reduceCounts(count1: Tag, count2: Tag): Tag =
count1
}

final case class SecondReducer(k: Int, forwardOnly: Boolean, intersect: Boolean) extends CountReducer {
final case class RightReducer(k: Int, forwardOnly: Boolean, intersect: Boolean) extends CountReducer {
override def reduceCounts(count1: Tag, count2: Tag): Tag =
count2
}
Expand Down
8 changes: 5 additions & 3 deletions src/main/scala/com/jnpersson/discount/spark/Discount.scala
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ class DiscountConf(args: Array[String])(implicit spark: SparkSession) extends Sp
banner("Intersect sequence files or an index with other indexes.")
val inputs = opt[List[String]](descr = "Locations of additional indexes to intersect with", required = true)
val output = opt[String](descr = "Location where the intersected index is written", required = true)
val rule = choice(Seq("max", "min", "first", "second", "sum"), default = Some("min"),
val rule = choice(Seq("max", "min", "left", "right", "sum"), default = Some("min"),
descr = "Intersection rule for k-mer counts (default min)").map(Reducer.parseType)

def run(): Unit = {
Expand All @@ -187,7 +187,7 @@ class DiscountConf(args: Array[String])(implicit spark: SparkSession) extends Sp
banner("Union sequence files or an index with other indexes.")
val inputs = opt[List[String]](descr = "Locations of additional indexes to union with", required = true)
val output = opt[String](descr = "Location where the result is written", required = true)
val rule = choice(Seq("max", "min", "first", "second", "sum"), default = Some("sum"),
val rule = choice(Seq("max", "min", "left", "right", "sum"), default = Some("sum"),
descr = "Union rule for k-mer counts (default sum)").map(Reducer.parseType)

def run(): Unit = {
Expand All @@ -204,13 +204,15 @@ class DiscountConf(args: Array[String])(implicit spark: SparkSession) extends Sp
banner("Subtract an index from another index or from sequence files.")
val input = opt[String](descr = "Location of index B in (A-B)", required = true)
val output = opt[String](descr = "Location where the result is written", required = true)
val rule = choice(Seq("subtract"), default = Some("subtract"),
val rule = choice(Seq("counters_subtract", "kmers_subtract"), default = Some("counters_subtract"),
descr = "Difference rule for k-mer counts (default subtract)").map(Reducer.parseType)

def run(): Unit = {
val index1 = inputIndex(Some(input()))
val unionIdx = readIndex(input())
index1.params.compatibilityCheck(unionIdx.params, true)
//Conceptually, the diff operation is a kind of union: k-mers can remain even if they did not occur in
//both indexes
index1.union(unionIdx, rule()).write(output())
Index.read(output()).showStats()
}
Expand Down

0 comments on commit 34bb0eb

Please sign in to comment.