In [1]:
%use dataframe, kandy

In [2]:
USE {
    dependencies {
        implementation("org.apache.commons:commons-math3:3.6.1")
    }
}

In [3]:
import java.nio.file.Path
import kotlin.io.path.*

fun String.parse(s1: String, s2: String): Pair<Int, Int> {
    return Regex("""$s1 = (\d+), $s2 = (\d+)""").find(this)!!.groupValues.let {
        it[1].toInt() to it[2].toInt()
    }
}

fun String.parseTime(s1: String, s2: String): Pair<Long, Long> {
    return Regex("""$s1 = (\d+) ms, $s2 = (\d+) ms""").find(this)!!.groupValues.let {
        it[1].toLong() to it[2].toLong()
    }
}

fun mkDataFrameFromLog(path: Path): DataFrame<*> {
    val benchmark = mutableListOf<String>()
    val cnt = mutableListOf<Int>()
    val sat = mutableListOf<Int>()
    val unsat = mutableListOf<Int>()
    val unknown = mutableListOf<Int>()
    val reusedUnsat = mutableListOf<Int>()
    val solvingTime = mutableListOf<Long>()
    val checkingTime = mutableListOf<Long>()
    val updatingTime = mutableListOf<Long>()

    path.readLines()
        .chunked(8)
        .dropLastWhile { it.size != 8 }
        .forEach {
            val benchmarkR = it[1].substringBefore(" ")
            val (cntR, satR) = it[2].parse("cnt", "sat")
            val (unsatR, unknownR) = it[3].parse("unsat", "unknown")
            val reusedUnsatR = Regex("reusedUnsat = (\\d+)").find(it[4])!!.groupValues[1].toInt()
            val (solvingTimeR, checkingTimeR) = it[5].parseTime("solvingTime", "checkingTime")
            val (updatingTimeR, _) = it[6].parseTime("updatingTime", "totalTime")

            benchmark += benchmarkR
            cnt += cntR
            sat += satR
            unsat += unsatR
            unknown += unknownR
            reusedUnsat += reusedUnsatR
            solvingTime += solvingTimeR
            checkingTime += checkingTimeR
            updatingTime += updatingTimeR
        }
    return dataFrameOf(
        "benchmark" to benchmark,
        "cnt" to cnt,
        "sat" to sat,
        "unsat" to unsat,
        "unknown" to unknown,
        "reusedUnsat" to reusedUnsat,
        "solvingTime" to solvingTime,
        "checkingTime" to checkingTime,
        "updatingTime" to updatingTime,
    )
}

In [4]:
fun DataFrame<*>.addConfigColumn(name: String): DataFrame<*> =
    add(column(List(rowsCount()) { name }).rename("config"))

fun DataFrame<*>.addProjectColumn(): DataFrame<*> =
    add(this["benchmark"].map { (it as String).substringBefore('-') }.rename("project"))

fun DataFrame<*>.timeToLong(): DataFrame<*> = columns().filter { it.name.endsWith("Time") }.toTypedArray().let { cols ->
    remove(*cols).addAll(cols.map { it.convertToLong() })
}

val folder = "../output/cache"

val data = Path(folder).listDirectoryEntries().map {
    if (it.div("smtData.csv").exists()) {
        DataFrame.readCSV((it / "smtData.csv").toFile()).timeToLong()
    } else {
        mkDataFrameFromLog(it / "cache-a-lot.log")
    }.addConfigColumn(it.name).addProjectColumn().sortBy("project")
}.concat()

data.columnNames()

[benchmark, cnt, sat, unsat, unknown, reusedUnsat, solvingTime, checkingTime, updatingTime, config, project]

In [5]:
val configs = data.config.countDistinct()
val fullBenchs = data.groupBy("benchmark")
    .aggregate { it.config.countDistinct() into "configs" }
    .filter { it["configs"] == configs }
    .benchmark.values.toList()
val fullData = data.filter { benchmark in fullBenchs }

In [6]:
fullData
    .filter { !config.endsWith("tt") && config != "empty" }
    .groupBy("config", "project")
    .aggregate {
        val total = unsat.sum()
        val reused = reusedUnsat.sum()
        (total - reused).toDouble() / total into "unique unsat"
        reused.toDouble() / total into "reused unsat"
    }
    .gather("reused unsat", "unique unsat")
    .into("name", "value")
    .sortBy("project", "config")
    .groupBy("name")
    .plot {
        val name = "FASTJSON"
        val indices = project.indices.filter { project[it] == name }
        bars {
            x(config.get(indices))
            y(get("value").get(indices))
            fillColor(get("name").get(indices)) {
                scale = categorical(
                    "unique unsat" to Color.GREY,
                    "reused unsat" to Color.ORANGE,
                )
            }
            position = Position.stack()
        }
    }

In [7]:
fullData
    .filter { config == "full-random" }
    .groupBy("project", "config")
    .aggregate {
        (solvingTime.sum().toDouble() / cnt.sum()) into "solvingTime"
        (checkingTime.sum().toDouble() / cnt.sum()) into "checkingTime"
        (updatingTime.sum().toDouble() / cnt.sum()) into "updatingTime"
    }
    .gather(
        "solvingTime", 
        "checkingTime", 
        "updatingTime",
    )
    .into("type", "value")
    .sortBy("project", "config")
    .groupBy("type")
    .plot {
        bars {
            x(project.values.zip(config.values))
            y("value") {
                axis.name = "avg. time (ms)"
            }
            fillColor("type") {
                scale = categorical(
                    "solvingTime" to Color.ORANGE,
                    "checkingTime" to Color.GREY,
                    "updatingTime" to Color.BLUE,
                )
            }
            position = Position.stack()
        }
    }

In [8]:
fullData
    .filter { !config.endsWith("tt") }
    .sortBy("project", "config")
    .plot {
        boxplot(
            column<String>(
                project.values.zip(config.values).map { (p, c) -> "$p $c" }
            ).named("project config"),
            column<Double>(
                solvingTime.values.zip(cnt.values).map { it.first.toDouble() / it.second }
            ).named("avg. solving time (ms)")
        )
    }

In [9]:
fullData.groupBy("project", "config")
    .aggregate { 
        solvingTime.sum().toDouble() / cnt.sum() into "solvingTime"
        checkingTime.sum().toDouble() / cnt.sum() into "checkingTime"
    }.sortBy("project", "config").filter { !config.endsWith("tt") }

project,config,solvingTime,checkingTime
FASTJSON,empty,538040263,0
FASTJSON,full-random,470364010,37221874281
FASTJSON,simple-full,488373870,1632703
FASTJSON,simple-random,522843878,350041
GUAVA,empty,467236573,0
GUAVA,full-random,431443734,64581360614
GUAVA,simple-full,428585678,896419
GUAVA,simple-random,470991049,287724
SEATA,empty,111637168,0
SEATA,full-random,95876106,3205858407


In [19]:
fullData
    .filter { project == "FASTJSON" }
    .sortBy("benchmark")
    .plot {
        val i1 = config.indices.filter { config[it] == "full-random" }
        val i2 = config.indices.filter { config[it] == "simple-full" }
        points {
            x(checkingTime.get(i2).values.zip(cnt.get(i2).values).map { it.first.toDouble() / it.second })
            y(checkingTime.get(i1).values.zip(cnt.get(i1).values).map { it.first.toDouble() / it.second })
        }
    }

In [32]:
import org.jetbrains.kotlinx.statistics.stats.mean

val t = data.filter { config == "simple-full" }.let { it.filter { benchmark in fullBenchs }.checkingTime.sum().toDouble() / it.filter { benchmark !in fullBenchs }.checkingTime.sum() }
t

0.019794794438673137