In [1]:
%use dataframe, kandy

In [2]:
USE {
    dependencies {
        implementation("org.apache.commons:commons-math3:3.6.1")
    }
}

In [3]:
import java.nio.file.Path
import kotlin.io.path.*

fun String.parse(s1: String, s2: String): Pair<Int, Int> {
    return Regex("""$s1 = (\d+), $s2 = (\d+)""").find(this)!!.groupValues.let {
        it[1].toInt() to it[2].toInt()
    }
}

fun String.parseTime(s1: String, s2: String): Pair<Long, Long> {
    return Regex("""$s1 = (\d+) ms, $s2 = (\d+) ms""").find(this)!!.groupValues.let {
        it[1].toLong() to it[2].toLong()
    }
}

fun mkDataFrameFromLog(path: Path): DataFrame<*> {
    val benchmark = mutableListOf<String>()
    val cnt = mutableListOf<Int>()
    val sat = mutableListOf<Int>()
    val unsat = mutableListOf<Int>()
    val unknown = mutableListOf<Int>()
    val reusedUnsat = mutableListOf<Int>()
    val solvingTime = mutableListOf<Long>()
    val checkingTime = mutableListOf<Long>()
    val updatingTime = mutableListOf<Long>()

    path.readLines()
        .chunked(8)
        .dropLastWhile { it.size != 8 }
        .forEach {
            val benchmarkR = it[1].substringBefore(" ")
            val (cntR, satR) = it[2].parse("cnt", "sat")
            val (unsatR, unknownR) = it[3].parse("unsat", "unknown")
            val reusedUnsatR = Regex("reusedUnsat = (\\d+)").find(it[4])!!.groupValues[1].toInt()
            val (solvingTimeR, checkingTimeR) = it[5].parseTime("solvingTime", "checkingTime")
            val (updatingTimeR, _) = it[6].parseTime("updatingTime", "totalTime")

            benchmark += benchmarkR
            cnt += cntR
            sat += satR
            unsat += unsatR
            unknown += unknownR
            reusedUnsat += reusedUnsatR
            solvingTime += solvingTimeR
            checkingTime += checkingTimeR
            updatingTime += updatingTimeR
        }
    return dataFrameOf(
        "benchmark" to benchmark,
        "cnt" to cnt,
        "sat" to sat,
        "unsat" to unsat,
        "unknown" to unknown,
        "reusedUnsat" to reusedUnsat,
        "solvingTime" to solvingTime,
        "checkingTime" to checkingTime,
        "updatingTime" to updatingTime,
    )
}

In [10]:
fun DataFrame<*>.addConfigColumn(name: String): DataFrame<*> =
    add(column(List(rowsCount()) { name }).rename("config"))

fun DataFrame<*>.addProjectColumn(): DataFrame<*> =
    add(this["benchmark"].map { (it as String).substringBefore('-') }.rename("project"))

fun DataFrame<*>.timeToLong(): DataFrame<*> = columns().filter { it.name.endsWith("Time") }.toTypedArray().let { cols ->
    remove(*cols).addAll(cols.map { it.convertToLong() })
}

val folder = "../output/cache"

val data = Path(folder).listDirectoryEntries().map {
    if (it.div("smtData.csv").exists()) {
        DataFrame.readCSV((it / "smtData.csv").toFile()).timeToLong()
    } else {
        mkDataFrameFromLog(it / "cache-a-lot.log")
    }.addConfigColumn(it.name).addProjectColumn().sortBy("project")
}.concat()

data.columnNames()

[benchmark, cnt, sat, unsat, unknown, reusedUnsat, solvingTime, checkingTime, updatingTime, config, project]

In [11]:
val configs = data.config.countDistinct()
val fullBenchs = data.groupBy("benchmark")
    .aggregate { it.config.countDistinct() into "configs" }
    .filter { it["configs"] == configs }
    .benchmark.values.toList()
val fullData = data.filter { benchmark in fullBenchs }

"Excluded: ${data.benchmark.countDistinct() - fullBenchs.size}"

Excluded: 0

In [61]:
import org.jetbrains.kotlinx.kandy.ir.Plot

fun plotByConfig(
    chunkSize: Int,
    width: Int,
    height: Int,
    filter: (String) -> Boolean = { true },
    sortBy: (String) -> String = { it },
    forceSize: Boolean = true,
    block: (String) -> Plot
) = plotBunch {
    fullData.config.distinct()
        .values
        .sortedBy { sortBy(it) }
        .filter { filter(it) }
        .chunked(chunkSize)
        .forEachIndexed { i, confs ->
            confs.forEachIndexed { j, conf ->
                if (forceSize) {
                    add(block(conf), width * j, height * i, width, height)
                } else {
                    add(block(conf), width * j, height * i)
                }
            }
        }
}

In [83]:
val interestedFilter: (String) -> Boolean = { "random" !in it && it.endsWith("tt") }
val interestedSort: (String) -> String = { it.reversed() }

In [137]:
plotByConfig(2, 600, 400, { it != "empty-tt" && interestedFilter(it) }, interestedSort) { conf ->
    fullData
        .filter { config == conf }
        .groupBy("project")
        .aggregate {
            val total = unsat.sum()
            val reused = reusedUnsat.sum()
            (total - reused).toDouble() / 1 into "unique unsat"
            reused.toDouble() / 1 into "reused unsat"
        }
        .gather("reused unsat", "unique unsat")
        .into("name", "value")
        .sortBy("project")
        .groupBy("name")
        .plot {
            bars {
                x(project)
                y("value")
                fillColor("name") {
                    scale = categorical(
                        "unique unsat" to Color.GREY,
                        "reused unsat" to Color.ORANGE,
                    )
                }
                position = Position.stack()
            }
            layout {
                title = conf
            }
        }
}

In [85]:
plotByConfig(3, 350, 400, interestedFilter, interestedSort, false) { conf ->
    fullData
        .filter { config == conf }
        .groupBy("project")
        .aggregate {
            (solvingTime.sum().toDouble() / cnt.sum()) into "solvingTime"
            (checkingTime.sum().toDouble() / cnt.sum()) into "checkingTime"
            (updatingTime.sum().toDouble() / cnt.sum()) into "updatingTime"
        }
        .gather(
            "solvingTime",
            "checkingTime",
            "updatingTime",
        )
        .into("type", "value")
        .sortBy("project")
        .groupBy("type")
        .plot {
            y {
                limits = 0..600
            }
            bars {
                x(project)
                y("value") {
                    axis.name = "avg. time (ms)"
                }
                fillColor("type") {
                    scale = categorical(
                        "solvingTime" to Color.ORANGE,
                        "checkingTime" to Color.GREY,
                        "updatingTime" to Color.BLUE,
                    )
                    if (conf != "empty-tt") {
                        legend {
                            type = LegendType.None
                        }
                    }
                }
                position = Position.stack()
            }
            layout { 
                title = conf
                if (conf == "empty-tt") {
                    size = 500 to 400
                } else {
                    size = 350 to 400
                }
            }
        }
}

In [89]:
plotByConfig(3, 400, 400, interestedFilter, interestedSort) { conf ->
    fullData
        .filter { config == conf }
        .sortBy("project")
        .plot {
            y {
                limits = 0..2300
            }
            boxplot(
                project,
                column<Double>(
                    solvingTime.values.zip(cnt.values).map { it.first.toDouble() / it.second }
                ).named("avg. solving time (ms)")
            )
            layout { 
                title = conf
            }
        }
}

In [134]:
val tts = fullData.groupBy("project", "config")
    .aggregate { 
        solvingTime.sum().toDouble() / cnt.sum() into "solvingTime"
        checkingTime.sum().toDouble() / cnt.sum() into "checkingTime"
    }.sortBy("project", "config")
    .filter { 
        config.endsWith("tt") 
                && "random" !in config
    }
    .run {
        add(column(solvingTime.cast<Double>().values.zip(checkingTime.cast<Double>().values).map { it.first + it.second }).named("totalTime"))
    }
tts

project,config,solvingTime,checkingTime,totalTime
FASTJSON,empty-tt,357.053109,0.0,357.053109
FASTJSON,fullopt-full-tt,355.248298,36.55833,391.806627
FASTJSON,simple-full-tt,350.871993,0.683613,351.555606
GUAVA,empty-tt,503.909491,0.0,503.909491
GUAVA,fullopt-full-tt,498.373248,46.090509,544.463757
GUAVA,simple-full-tt,515.325591,56.397677,571.723268
SEATA,empty-tt,68.098684,0.0,68.098684
SEATA,fullopt-full-tt,65.657895,4.703947,70.361842
SEATA,simple-full-tt,69.085526,0.0,69.085526
SPOON,empty-tt,419.85317,0.0,419.85317


In [135]:
val diffTts = tts.groupBy("project").updateGroups {
    val base = filter { config == "empty-tt" }.single().solvingTime
    groupBy("project", "config").aggregate { 100.0 * (base - solvingTime.single()) / base into "solvingTime (%)" }
}.concat()

diffTts

project,config,solvingTime (%)
FASTJSON,empty-tt,0.0
FASTJSON,fullopt-full-tt,0.505474
FASTJSON,simple-full-tt,1.731148
GUAVA,empty-tt,0.0
GUAVA,fullopt-full-tt,1.098658
GUAVA,simple-full-tt,-2.265506
SEATA,empty-tt,0.0
SEATA,fullopt-full-tt,3.584195
SEATA,simple-full-tt,-1.449135
SPOON,empty-tt,0.0


In [136]:
diffTts.filter { config != "empty-tt" }.groupBy("config").sortBy("config").plot { 
    bars { 
        x(project)
        y(`solvingTime (%)`)
        fillColor(config)
    }
}

In [147]:
fun res(name: String, unsat: Double, succ: Double, total: Double) {
    println(name)
    println("unsat: ${String.format("%.2f", 100.0 * (305746.0 - unsat) / 305746)}%")
    println("success: ${String.format("%.2f", 100.0 * (901134.0 - succ) / 901134)}%")
    println("total: ${String.format("%.2f", 100.0 * (2810097.0 - total) / 2810097)}%")
}

In [148]:
res("simple", 146564.0, 752142.0, 2689276.0)

simple
unsat: 52.06%
success: 16.53%
total: 4.30%


In [149]:
res("fullopt", 107510.0, 722478.0, 2631517.0)

fullopt
unsat: 64.84%
success: 19.83%
total: 6.35%
