Skip to content

Commit

Permalink
Merge ea5e1ef into e4adb1c
Browse files Browse the repository at this point in the history
  • Loading branch information
smootoo committed Oct 28, 2015
2 parents e4adb1c + ea5e1ef commit 6a5af81
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 26 deletions.
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# can't use 0.13.7 https://github.com/sbt/sbt/issues/1776
sbt.version=0.13.7-RC3
sbt.version=0.13.8

2 changes: 1 addition & 1 deletion src/main/scala/org/suecarter/tablediff/JTableDiff.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@ static public <T> ReportContent reportContentFromArray(T[][] content, int rowDep
*/
@SuppressWarnings("unchecked")
static public ReportContent produceReportDiff(ReportContent table1, ReportContent table2) {
return TableDiff$.MODULE$.produceReportDiff(table1, table2, TableDiff$.MODULE$.defaultMainValueComparison());
return TableDiff$.MODULE$.produceReportDiff(table1, table2, TableDiff$.MODULE$.defaultMainValueComparison(),TableDiff$.MODULE$.diffChunkSize());
}
}
16 changes: 10 additions & 6 deletions src/main/scala/org/suecarter/tablediff/StringTableDiff.scala
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ object StringTableDiff {
horizontalLine('-') + "\n" + columnHeaders.map(resizeRowCells).mkString("", "\n", "\n")
else "") +
horizontalLine('-') + "\n" +
(if (!rows.isEmpty) {
(if (rows.nonEmpty) {
rows.map(resizeRowCells).mkString("", "\n", "\n") +
horizontalLine('-') + "\n"
} else "")
Expand All @@ -95,24 +95,28 @@ object StringTableDiff {
sameLeft:String = "",
missingRight: String = "-]",
addedRight: String = "+}",
sameRight:String = "") = {
sameRight:String = "",
chunkSize: Option[Int] = None,
complexityThreshold: Int = 4) = {
value.fold(l => {
val leftString = l.left.map(x => if (isEmpty(x)) "" else valueRenderer(x)).getOrElse("")
val rightString = l.right.map(x => if (isEmpty(x)) "" else valueRenderer(x)).getOrElse("")

val diffs = zipLongestCommonSubsequence(leftString, rightString)
val diffs = chunkSize.map(c => zipLongestCommonSubsequence(leftString, rightString, c)).
getOrElse(zipLongestCommonSubsequence(leftString, rightString))
val onlyNumericDiffs = {
case class NumberState(digitYet: Boolean = false,
digitsAndLetters:Boolean = false,
decimalPointYet: Boolean = false,
onlyNumerics: Boolean = false,
numericMinusYet: Boolean = false)
diffs.span(!_.hasANone)._2.reverse.span(!_.hasANone)._2.foldLeft(NumberState())((state, diffLoc) => {
val diffElements = diffs.span(!_.hasANone)._2.reverse.span(!_.hasANone)._2
diffElements.foldLeft(NumberState())((state, diffLoc) => {
val s = diffLoc.value match {
case _ if state.digitsAndLetters => state // never be true once digitsAndLetters is true
case m if m == '-' && !state.digitYet => state.copy(numericMinusYet = true, digitYet = true, onlyNumerics = true)
case p if p == '.' && state.decimalPointYet => state.copy(onlyNumerics = false)
case p if p == '.' && state.digitYet => state.copy(decimalPointYet = true)
case p if p == '.' => state.copy(decimalPointYet = true)
case d if Character.isDigit(d) => state.copy(digitYet = true, onlyNumerics = true)
case _ => state.copy(digitsAndLetters = true, onlyNumerics = false)
}
Expand Down Expand Up @@ -147,7 +151,7 @@ object StringTableDiff {
}.toList :+
diffs.lastOption.map(tail => decorate(Some(tail), None).prependValue(tail.value)).getOrElse(DiffComplex())
// somewhat arbitrary, but if the inplace diff is longer than a simple diff, just return the simple one
if (onlyNumericDiffs || inPlaceDiff.map(_.complex).sum >= 4)
if (onlyNumericDiffs || inPlaceDiff.map(_.complex).sum >= complexityThreshold)
l.left.map(x => if (isEmpty(x)) "" else missingLeft + valueRenderer(x) + missingRight).getOrElse("") +
l.right.map(x => if (isEmpty(x)) "" else addedLeft + valueRenderer(x) + addedRight).getOrElse("")
else
Expand Down
71 changes: 55 additions & 16 deletions src/main/scala/org/suecarter/tablediff/TableDiff.scala
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ object TableDiff {
*/
def produceReportDiff[R, C, M](leftReport: ReportContent[R, C, M],
rightReport: ReportContent[R, C, M],
mainValueComparison: (Option[M], Option[M]) => Boolean = defaultMainValueComparison)
mainValueComparison: (Option[M], Option[M]) => Boolean = defaultMainValueComparison,
chunkSize: Int = diffChunkSize)
: ReportContent[ValueDiff[R], ValueDiff[C], ValueDiff[M]] = {
// get the value from the main data corresponding to this row and column indexes
def mainValue(rowIndex: Option[Int], colIndex: Option[Int], report: ReportContent[R, C, M]): Option[M] =
Expand All @@ -155,10 +156,15 @@ object TableDiff {
}
// this logic tries to collapse headers diff back to a single ValueDiff if the mainData matches
// (i.e. only the headerValue is a diff). Will only search "one" row down for match
def collapseHeaders[T](headerRows: ReportRow[DiffLocation[T]], leftMainData: ReportSection[M], rightMainData: ReportSection[M])
def collapseHeaders[T](headerRows: ReportRow[DiffLocation[T]],
leftMainData: ReportSection[M],
rightMainData: ReportSection[M])
: ReportRow[DiffLocation[ValueDiff[T]]] = {
@tailrec
def inner[S](accumulator: ReportRow[DiffLocation[ValueDiff[S]]], headerRows: ReportRow[DiffLocation[S]], leftMainData: ReportSection[M], rightMainData: ReportSection[M])
def inner[S](accumulator: ReportRow[DiffLocation[ValueDiff[S]]],
headerRows: ReportRow[DiffLocation[S]],
leftMainData: ReportSection[M],
rightMainData: ReportSection[M])
: ReportRow[DiffLocation[ValueDiff[S]]] = {
if (headerRows.isEmpty)
accumulator
Expand All @@ -167,14 +173,15 @@ object TableDiff {
case Seq(dlLeft@DiffLocation(leftValue, Some(leftI), None), tail@_*) => {
// find row with just a left value
val matchingRightValue: Option[(DiffLocation[TableDiff.ValueDiff[S]], ReportRow[DiffLocation[S]])] = tail.collectFirst {
// try to match to row with just a right value
case dlRight@DiffLocation(rightValue, None, Some(rightI)) => {
// try to match to row with just a right value
val leftI = dlLeft.iLeft.getOrElse(0)
val rightI = dlRight.iRight.getOrElse(0)
if ((leftMainData.size == 0 && rightMainData.size == 0) // either main data is empty
if (((leftMainData.isEmpty && rightMainData.isEmpty) // either main data is empty
|| ((leftMainData.size >= leftI + 1) && // or indexes point to same elements, 1 row away
(rightMainData.size >= rightI + 1) &&
leftMainData(leftI) == rightMainData(rightI)))
&& rightI == accumulator.flatMap(_.iRight).reduceOption(_ max _).getOrElse(-1)+1)
(DiffLocation(Left(EitherSide(Some(dlLeft.value), Some(dlRight.value))), Some(leftI), Some(rightI)),
tail.filterNot(_ == dlRight))
else
Expand Down Expand Up @@ -209,7 +216,8 @@ object TableDiff {
if (rightReport.fillForwardBlankHeaders)
fillSectionHeaders(pivotHeaders(rightSection))
else
pivotHeaders(rightSection))
pivotHeaders(rightSection),
chunkSize = chunkSize)
}
val resultCols = zipLCSColumnSection(leftReport.columnHeaders, rightReport.columnHeaders)
val resultRowColHeaders = zipLCSColumnSection(leftReport.rowColumnHeaders, rightReport.rowColumnHeaders)
Expand All @@ -220,9 +228,10 @@ object TableDiff {
}
val leftRowHeaders = flattenRowHeaderSection(leftReport)
val rightRowHeaders = flattenRowHeaderSection(rightReport)
val resultRows = zipLongestCommonSubsequence(
val resultRows = zipLongestCommonSubsequence(
if (leftReport.fillForwardBlankHeaders) fillSectionHeaders(leftRowHeaders) else leftRowHeaders,
if (rightReport.fillForwardBlankHeaders) fillSectionHeaders(rightRowHeaders) else rightRowHeaders)
if (rightReport.fillForwardBlankHeaders) fillSectionHeaders(rightRowHeaders) else rightRowHeaders,
chunkSize = chunkSize)

// process row and column headers in context of MainData section
val rows: ReportRow[DiffLocation[ValueDiff[ReportRow[R]]]] =
Expand Down Expand Up @@ -253,7 +262,8 @@ object TableDiff {
columnHeaders = pivotHeaders(fillSectionHeaders(pivotHeaders(report.columnHeaders))))

protected[tablediff] def zipLongestCommonSubsequence[T](fullLeftSeq: ReportRow[T],
fullRightSeq: ReportRow[T]): ReportRow[DiffLocation[T]] = {
fullRightSeq: ReportRow[T],
chunkSize: Int = diffChunkSize): ReportRow[DiffLocation[T]] = {
def zipLCSChunk(leftOffset: Int,
rightOffset: Int,
leftSeq: ReportRow[T],
Expand Down Expand Up @@ -311,23 +321,43 @@ object TableDiff {
val (rightChunk, rightRemain) = nextRightSeq.splitAt(chunkSize)
val chunkDiffs = zipLCSChunk(leftOffSet, rightOffSet, leftChunk, rightChunk)
val anyMatches = chunkDiffs.foldLeft(false)((matched, diffLoc) => matched || diffLoc.locationType == InBoth)
// See if the edges of 2 chunks can simply be stiched back together
def stichChunks(leftChunk: ReportRow[DiffLocation[T]], rightChunk: ReportRow[DiffLocation[T]]): ReportRow[DiffLocation[T]] = {
val stiched = for {
rightDirection <- rightChunk.headOption.filter(_.hasANone).map(_.locationType)
possibleStichRight = rightChunk.takeWhile(d => rightDirection == d.locationType)
stichSize <- if (possibleStichRight.nonEmpty) Some(possibleStichRight.size) else None
possibleStichLeft = leftChunk.takeRight(stichSize).takeWhile(_.locationType ==
(if (rightDirection == OnlyLeft) OnlyRight else OnlyLeft)
)
possibleStich = possibleStichLeft.zip(possibleStichRight)
if possibleStichLeft.size == stichSize && possibleStich.forall{case (l,r) => l.value == r.value}
} yield {
leftChunk.dropRight(stichSize) ++
possibleStich.map { case (l, r) => DiffLocation(l.value,
l.iLeft orElse r.iLeft,
l.iRight orElse r.iRight)
} ++ rightChunk.drop(stichSize)
}
stiched.getOrElse(leftChunk ++ rightChunk)
}
// If we haven't found any matches and the remains are empty on one side, then keep the small side to match on
// if not, move onto the next chunks
if (!anyMatches && leftRemain.isEmpty && rightRemain.nonEmpty)
checkHeads(leftOffSet,
rightOffSet + rightChunk.size,
acc ++ chunkDiffs.filter(_.locationType == OnlyRight),
stichChunks(acc, chunkDiffs.filter(_.locationType == OnlyRight)),
leftChunk,
rightRemain)
else if (!anyMatches && leftRemain.nonEmpty && rightRemain.isEmpty)
checkHeads(leftOffSet + leftChunk.size,
rightOffSet,
acc ++ chunkDiffs.filter(_.locationType == OnlyLeft),
stichChunks(acc, chunkDiffs.filter(_.locationType == OnlyLeft)),
leftRemain,
rightChunk)
else checkHeads(leftOffSet + leftChunk.size,
rightOffSet + rightChunk.size,
acc ++ chunkDiffs,
stichChunks(acc, chunkDiffs),
leftRemain,
rightRemain)

Expand All @@ -338,10 +368,19 @@ object TableDiff {
}

private val defaultChunkSize = 1000
protected lazy val chunkSize = try {
Properties.envOrElse("TABLEDIFFCHUNKSIZE", defaultChunkSize.toString).toInt
} catch {
case x: NumberFormatException => defaultChunkSize
protected[tablediff] lazy val diffChunkSize = readChunkEnvVar()
val chunkEnvVarName = "TABLEDIFFCHUNKSIZE"
protected[tablediff] def readChunkEnvVar(envOverride: Option[String] = None) = {
val chunkEnvVar = envOverride.getOrElse(Properties.envOrElse(chunkEnvVarName, defaultChunkSize.toString))
try {
chunkEnvVar.toInt
} catch {
case x: NumberFormatException => {
System.err.println(s"Unable to set diff chunk size $chunkEnvVarName=$chunkEnvVar ${x.getMessage}")
System.err.println(s"Using defaultChunkSize $defaultChunkSize")
defaultChunkSize
}
}
}

private case class Memoise[A, B](f: A => B) extends (A => B) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class DiffRendererTests extends FunSuite {
("catde", "cat.de", "cat{+.+}de"),
("1246", "12x6", "12[-4-]{+x+}6"),
("124.6", "124..6", "124.{+.+}6"),
("124.6", "124...6", "124.{+..+}6"),
("-124.6", "124...6", "[---]124{+..+}.6"),
(None, "", ""),
(None, None, ""),
("", None, "")
Expand Down
99 changes: 99 additions & 0 deletions src/test/scala/org/suecarter/tablediff/TableDiffTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,37 @@ class TableDiffTests extends FunSuite {
}
}

test("Chunking sequence diffs") {
val cases: List[(String, String, String, Seq[String])] =
L(
("case 1", "a", "b", Seq("[-a-]{+b+}"))
, ("case 2", "a", "a", Seq("a"))
, ("case 3", "ab", "a", Seq("a[-b-]"))
, ("case 4", "abd", "acd", Seq("a[-b-]{+c+}d"))
, ("case 5", "ad", "df", Seq("[-a-]d{+f+}"))
, ("case 6", "abd", "def", Seq("[-a-]{+d+}[-b-]{+e+}[-d-]{+f+}", "[-ab-]{+de+}[-d-]{+f+}", "[-ab-]d{+ef+}"))
, ("case 7", "abdef", "def", Seq("[-a-]{+d+}[-b-]{+e+}[-de-]f", "[-ab-]def"))
, ("case 8", "abdef", "ababdef", Seq("ab[-d-]{+a+}[-e-]{+bde+}f", "ab[-de-]{+abde+}f", "ab{+ab+}def"))
, ("case 9", "abcdefgh", "adefgh", Seq("a[-b-]{+d+}[-c-]{+e+}[-d-]{+f+}[-e-]{+g+}[-fg-]h", "a[-bc-]defgh"))
, ("case 10", "abcdefgh", "abxxcdefgh", Seq("ab[-c-]{+x+}[-d-]{+x+}[-e-]{+c+}[-f-]{+d+}[-g-]{+efg+}h",
"ab[-cd-]{+xx+}[-ef-]{+cdef+}gh",
"ab{+xx+}cdefgh"))
)

cases.foreach {
case (description, left, right, expectedDiffs) =>
List(1, 2, 5, 100).zipWithIndex.foreach{case(chunkSize, i) => {
val stringRep = StringTableDiff.valueDiffRenderer(E(left, right),
chunkSize = Some(chunkSize),
complexityThreshold = Int.MaxValue)
val expectedDiff = if (i >= expectedDiffs.size) expectedDiffs.last else expectedDiffs(i)
//println(s"$description $chunkSize $stringRep")
assert(stringRep === expectedDiff, s"DiffStrings $description chunk size $chunkSize\n" +
s" $left $right\nGot $stringRep expected $expectedDiff")
}}
}
}

implicit def eitherTuple2(t: (Any, Any)): Left[EitherSide[Any], Nothing] = Left(EitherSide(Some(t._1), Some(t._2)))

test("diff reports") {
Expand Down Expand Up @@ -329,6 +360,74 @@ class TableDiffTests extends FunSuite {
}
}

test("chunking diff reports") {
val cases = L(
// Tuple3(left report, right report, List(difference reports for increasing chunk size))
(ReportContent(Array("No","0","1","2").map(Array(_)), 1,1),
ReportContent(Array("No","1","2").map(Array(_)), 1,1), List(
"""+-----+
||No |
|+-----+
||[-0-]|
||1 |
||2 |
|+-----+
|""".stripMargin
)),
(ReportContent(Array("No","0","1","2", "5").map(Array(_)), 1,1),
ReportContent(Array("No","1","2", "3", "4", "5").map(Array(_)), 1,1), List(
"""+-----+
||No |
|+-----+
||[-0-]|
||1 |
||2 |
||{+3+}|
||{+4+}|
||5 |
|+-----+
|""".stripMargin,
"""+-----+
||No |
|+-----+
||[-0-]|
||1 |
||{+2+}|
||{+3+}|
||{+4+}|
||[-2-]|
||5 |
|+-----+
|""".stripMargin,
"""+-----+
||No |
|+-----+
||[-0-]|
||1 |
||2 |
||{+3+}|
||{+4+}|
||5 |
|+-----+
|""".stripMargin
))
)
cases.foreach {
case (l, r, diffReports) =>
List(1,2,5,100).zipWithIndex.foreach {case(chunkSize, i) =>
val diffReport = produceReportDiff(l, r, chunkSize = chunkSize)
// println(StringTableDiff.diffReportToString(l))
// println(StringTableDiff.diffReportToString(r))
val expectedDiff = if (i >= diffReports.size) diffReports.last else diffReports(i)
val diffReportString = StringTableDiff.diffReportToString(diffReport)
// println(diffReportString)
assert(diffReportString === expectedDiff,
"Expected \n" + expectedDiff +
"Actual \n" + diffReportString)
}
}
}

test("Report Diff with custom comparison") {
val defaultDiff = onlyTheDiffs(produceReportDiff(leftReport, leftReportWithASmallNumericDiff))
assert(!defaultDiff.isEmpty, "Expected to have one diff")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.suecarter.tablediff

import org.scalatest.FunSuite
import scala.util.Properties
import scala.{List => L}
import ReportContent._
import TableDiffUtilTests.diffed
Expand Down Expand Up @@ -252,8 +253,11 @@ class TableDiffUtilTests extends FunSuite {
// println(diffReportToString(onlyDiffs))
assert(onlyDiffs === d)
}


}
test("read chunk env var") {
assert(TableDiff.readChunkEnvVar() == TableDiff.diffChunkSize)
println(s"testing invalid ${TableDiff.chunkEnvVarName}")
assert(TableDiff.readChunkEnvVar(Some("xxx")) == TableDiff.diffChunkSize)
}
}

Expand Down

0 comments on commit 6a5af81

Please sign in to comment.