# Ablation study

## Setup

In [186]:
%useLatestDescriptors
%use dataframe, kandy

In [187]:
import java.nio.file.Path
import kotlin.io.path.*

fun <T> Path.readAllInstances(transform: (Path) -> DataFrame<T>) = toFile()
    .walk()
    .filter { it.isDirectory && (it.name.startsWith("f") || it.name.startsWith("v")) }
    .map {
        it.toPath()
            .listDirectoryEntries()
            .map(transform)
            .reduce { acc, dataFrame -> acc.concat(dataFrame) }
    }
    .reduce { acc, df -> acc.concat(df) }


fun readMetricsJson(json: Path): DataFrame<Int> {
    val metricsFile = json / "output/metrics.json"

    return when {
        metricsFile.exists() -> DataFrame.readJson(metricsFile.pathString).cast<Int>()
        else -> emptyDataFrame()
    }
}

fun <T> DataFrame<T>.groupByInstanceName() = groupBy {
    val instanceNames = column<String>("InputDir").map { it.removePrefix("study/instances/").split("/", limit = 2) }
    (instanceNames.map { it[0] } named "Severity") and (instanceNames.map {
        it[1].removePrefix("issue_").replace("_inlined", "")
    } named "Issue")
}

fun <T> DataFrame<T>.aggregateTotalAfterSize(toolName: String) = groupByInstanceName()
    .aggregate {
        get("Total").get("BeforeSize").cast<Int>().max() into "Before"
        get("Total").get("AfterSize").cast<Int>().mean() into toolName
    }

fun <T> DataFrame<T>.sortedByIssues() =
    sortWith { a, b ->
        when {
            a["Version"] == "final" -> 1
            b["Version"] == "final" -> -1
            else -> a["Version"].toString().compareTo(b["Version"].toString())
        }
    }
        .sortBy { get("Issue") }
        .sortByDesc { get("Severity") }
        .sortWith { dataRow, dataRow2 ->
            when {
                dataRow["Severity"] == "extra" -> 1
                dataRow2["Severity"] == "extra" -> -1
                else -> 0
            }
        }

fun DataFrame<*>.toLatexTable(): String {
    val header = columns().joinToString(" & ") { it.name() }
    val rows = rows().joinToString(" \\\\\n") { row ->
        row.values().joinToString(" & ") { when (it) {
            is Double -> "%.2f".format(it)
            null -> "-"
            else -> it.toString()
        } }
    }

    return """
        \begin{table}[h!]
            \centering
            \begin{tabular}{${"|c".repeat(columnsCount())}|}
                \hline
                $header \\
                \hline
                $rows \\
                \hline
            \end{tabular}
            \caption{todo}
            \label{tab:todo}
        \end{table}
    """.trimIndent()
}

In [188]:
val seruPerses = Path("results_perses").readAllInstances(::readMetricsJson)
val baseSeruSize = seruPerses.aggregateTotalAfterSize("Seru+Perses")
//baseSeruSize

In [189]:
fun <T> DataFrame<T>.aggregateCueTokensGT() = groupBy {
    val issueName = expr { get("issue").toString().removePrefix("issue_").removeSuffix("_inlined") }
    get("severity") named "Severity" and (issueName named "Issue") and (get("version") named "Version")
}
    .aggregate {
        get("tokens").cast<Int>().mean() into "mean"
        get("tokens").cast<Int>().std() into "std"
    }
    .sortedByIssues()
    .merge("Issue", "Version").by("/").into("Issue")

val cueTokensPersesGT = DataFrame.readCSV("groundtruth_perses/groundtruth_perses_tokens_cue_parser.csv")
val persesMeanGTTokens = cueTokensPersesGT.aggregateCueTokensGT()
    .remove("std")
    .rename("mean").into("Perses")
//persesMeanGTTokens

In [202]:
val strategies = listOf(
    "Let",
    "Empty Declaration",
    "Package",
    "Redundant Nesting",
    "List",
    "Trivial If",
    "If",
    "Ellipsis",
    "Constant Propagation",
    "String Interpolation",
    "Loop Unrolling",
    "Unification",
    "Union",
    "Import",
)

val strategyDFs = List(14) {
    Path("ablation_results", "strategy_$it").readAllInstances(::readMetricsJson)
}
val noConstantPropagation = Path("ablation_results", "no_constant_propagation").readAllInstances(::readMetricsJson)

In [207]:
val strategiesTotalSize = strategyDFs
    .mapIndexed { i, it -> it.aggregateTotalAfterSize(strategies[i]) }
    .reduce { acc, frame -> acc.fullJoin(frame) }

val noConstantPropagationTotalSize = noConstantPropagation.aggregateTotalAfterSize("Seru+Perses \\ Constant Propagation")

//strategiesTotalSize

# Difference to ground truth

In [224]:
val diffToBaseSize = persesMeanGTTokens
    .fullJoin(baseSeruSize)
    .fullJoin(noConstantPropagationTotalSize)
    .fullJoin(strategiesTotalSize)
    .move("Before").to(2)
    .update { allAfter("Perses") }.with {
        (it as Double) - getValue<Double>("Perses")
    }

val diffToBaseSizeRelative = diffToBaseSize
    .update { allAfter("Perses") }.with {
    ((it as Double) / getValue<Int>("Before")) * 100
}

//diffToBaseSize

In [228]:
fun <T> DataFrame<T>.plotDifferenceToGroundTruth(yLabel: String) =
    remove("Perses", "Before")
        .reorder { allAfter("Issue") }.by { it.cast<Double>().mean() }
        .gather { allAfter("Issue") }.into("Heuristic", "Diff")
        .plot {
            line {
                x("Heuristic")
                y { constant(0) }
            }

            boxplot("Heuristic", "Diff") {
                boxes {
                    alpha = .1
                    width = .5
                }
                outliers {
                    show = false
                }
            }

            points {
                x("Heuristic")
                y("Diff")
                color("Heuristic") {
                    legend {
                        type = LegendType.DiscreteLegend(nRow = 6)
                    }
                    scale = categorical(
                        "Seru+Perses" to Color.hex("#1f77b4"), // Blue
                        "Seru+Perses \\ Constant Propagation" to Color.hex("#ff7f0e"), // Orange
                        "Constant Propagation" to Color.hex("#d62728"), // Red
                        "Empty Declaration" to Color.hex("#2ca02c"), // Green
                        "Let" to Color.hex("#9467bd"), // Purple
                        "Trivial If" to Color.hex("#8c564b"), // Brown
                        "Ellipsis" to Color.hex("#e377c2"), // Pink
                        "Redundant Nesting" to Color.hex("#7f7f7f"), // Gray
                        "List" to Color.hex("#bcbd22"), // Olive
                        "Package" to Color.hex("#17becf"), // Cyan
                        "Unification" to Color.hex("#4b0082"), // Indigo
                        "String Interpolation" to Color.hex("#6e5596"), // Dark Purple
                        "If" to Color.hex("#ff9896"), // Light Red
                        "Union" to Color.hex("#98df8a"), // Light Green
                        "Loop Unrolling" to Color.hex("#c5b0d5"), // Lavender
                        "Import" to Color.hex("#ffbb78")  // Peach
                    )
                }
                alpha = .7
                size = 2.5
                position = Position.jitter(.2)
            }

            layout {
                style {
                    xAxis {
                        text { blank = true }
                    }
                    yAxisLabel = yLabel
                    legend {
                        position = LegendPosition.Bottom
                        title {
                            blank = true
                        }
                    }
                }
                size = 1000 to 700
            }
        }

In [229]:
diffToBaseSize.plotDifferenceToGroundTruth("Difference to Perses [Token]")

In [230]:
diffToBaseSizeRelative.plotDifferenceToGroundTruth("Reduction rate difference to Perses [%]")

# Constant propagation