In [7]:
@file:DependsOn("com.github.haifengl:smile-core:2.6.0")
@file:DependsOn("com.github.haifengl:smile-kotlin:2.6.0")

In [8]:
import java.io.File


val csvPath = "ml/hrv_data_labeled.csv"

val lines = File(csvPath).readLines()
require(lines.isNotEmpty()) { "CSV is empty: $csvPath" }

val header = lines.first().split(',')

fun idx(name: String): Int =
    header.indexOf(name).also {
        require(it >= 0) { "Column '$name' not found in header: $header" }
    }

val idxHrMean = idx("hr_mean")
val idxRmssd  = idx("hrv_rmssd")
val idxPnn50  = idx("hrv_pnn50")
val idxLabel  = idx("label")


In [9]:
data class HrvRow(
    val hrMean: Double,
    val hrvRmssd: Double,
    val hrvPnn50: Double,
    val label: Int
)

In [10]:

fun String.isMissing(): Boolean =
    this.isBlank() || equals("nan", true) || equals("NaN", true)

val rows: List<HrvRow> = lines
    .drop(1)                  // skip header
    .filter { it.isNotBlank() }
    .mapNotNull { line ->
        val cols = line.split(',')
        if (cols.size <= idxLabel) return@mapNotNull null

        val hrStr    = cols[idxHrMean]
        val rmssdStr = cols[idxRmssd]
        val pnnStr   = cols[idxPnn50]
        val labelStr = cols[idxLabel]

        if (listOf(hrStr, rmssdStr, pnnStr, labelStr).any { it.isMissing() }) {
            return@mapNotNull null
        }

        try {
            HrvRow(
                hrMean   = hrStr.toDouble(),
                hrvRmssd = rmssdStr.toDouble(),
                hrvPnn50 = pnnStr.toDouble(),
                label    = labelStr.toInt()
            )
        } catch (_: NumberFormatException) {
            null
        }
    }

require(rows.isNotEmpty()) { "No valid rows after dropna-style filtering." }

// Features X and labels y
val X: Array<DoubleArray> = rows.map {
    doubleArrayOf(it.hrMean, it.hrvRmssd, it.hrvPnn50)
}.toTypedArray()

val y: IntArray = rows.map { it.label }.toIntArray()

println("Samples: ${X.size}, features: ${X[0].size}")

Samples: 125, features: 3


In [11]:
import smile.classification.logit
import smile.classification.LogisticRegression

// Train logistic regression (lambda = 0.0 like sklearn default C=1.0, no extra regularization)
val model: LogisticRegression = logit(X, y, lambda = 0.0)


In [13]:
val binomial = model as LogisticRegression.Binomial

val w: DoubleArray = binomial.coefficients()
val bias = w.last()
val coefs = w.copyOfRange(0, w.size - 1)

val featureNames = arrayOf("hr_mean", "hrv_rmssd", "hrv_pnn50")

println("Intercept (bias): $bias")
println("Coefficients:")
for ((name, coef) in featureNames.zip(coefs.toTypedArray())) {
    println("  $name: $coef")
}

Intercept (bias): -44.81547609618099
Coefficients:
  hr_mean: 0.5080593707164218
  hrv_rmssd: 0.008691739652445069
  hrv_pnn50: -0.03465265923286699


In [14]:
val preds = X.map { binomial.predict(it) }.toIntArray()
val correct = preds.indices.count { preds[it] == y[it] }
val acc = correct.toDouble() / y.size

println("Training accuracy: $acc")

Training accuracy: 0.912
