In [1]:
%use kotlin-dl
%use krangl

In [2]:
import org.jetbrains.kotlinx.dl.api.core.history.EpochTrainingEvent
import org.jetbrains.kotlinx.dl.api.core.history.TrainingHistory

In [3]:
fun normalize(df: DataFrame): DataFrame {
    val normalizedCols = mutableListOf<DataCol>()
    for (col in df.cols) {
        val min = col.min()!!
        val max = col.max()!!
        val normalized = (col - min) / (max - min)
        normalizedCols.add(normalized)
    }
    val normalizedDF = dataFrameOf(*normalizedCols.toTypedArray())
    normalizedDF.setNames(*df.names.toTypedArray())
    return normalizedDF
}

In [4]:
fun getXy(
    df: DataFrame, 
    label: String = "quality"
): Pair<Array<FloatArray>, FloatArray> {
    val features = df.remove(label)
    val nFeatures = features.ncol
    val normalizedFeatures = normalize(features)
    val columnsArray = normalizedFeatures.toFloatMatrix()
    
    val X = Array(features.nrow) { FloatArray(nFeatures) }

    for (col in 0 until nFeatures) {
        for (row in 0 until df.nrow) {
            X[row][col] = columnsArray[col][row]
        }
    }
    
    val labels = df.get(label).toDoubles().filterNotNull().map { it.toFloat() }
    
    val y = labels.toFloatArray()
    return Pair(X, y)
}

In [5]:
val dfTrain = DataFrame.readCSV("data/winequality-white-train.csv")
val dfValid = DataFrame.readCSV("data/winequality-white-val.csv")

In [11]:
val (X, y) = getXy(dfTrain)
val (XValid, yValid) = getXy(dfValid)

In [12]:
X[0].forEach { print("$it ") }
print(y[0])

0.31 0.25945947 0.21084337 0.012269938 0.12166172 0.020905923 0.3573086 0.102756895 0.47272727 0.22352941 0.45 5.0

In [13]:
val trainDataset = OnHeapDataset.create(X, y)
val validDataset = OnHeapDataset.create(XValid, yValid)

In [14]:
class PrintingCallback : Callback() {
    override fun onEpochEnd(epoch: Int, event: EpochTrainingEvent, logs: TrainingHistory) {
        println("Epoch: $epoch - loss: ${event.lossValue} - val loss: ${event.valLossValue}")
    }
}

In [21]:
val model = Sequential.of(
    Input(11),
    Dense(8),
    Dense(8),
    Dense(8),
    Dense(8),
    Dense(8),
    Dense(8),
    Dense(8),
    Dense(1, activation = Activations.Linear)
)


In [22]:
model.compile(
    optimizer = Adam(0.001f),
    loss = Losses.MAE,
    metric = Metrics.MAE,
    callback = PrintingCallback(),
)

In [28]:
model.summary().print()

Model type: Sequential
______________________________________________________________________________
Layer (type)                           Output Shape              Param #      
input_1(Input)                         [None, 11]                0            
______________________________________________________________________________
dense_2(Dense)                         [None, 8]                 96           
______________________________________________________________________________
dense_3(Dense)                         [None, 8]                 72           
______________________________________________________________________________
dense_4(Dense)                         [None, 8]                 72           
______________________________________________________________________________
dense_5(Dense)                         [None, 8]                 72           
______________________________________________________________________________
dense_6(Dense)               

In [29]:
model.fit(
    trainingDataset = trainDataset,
    validationDataset = validDataset,
    epochs = 250,
    trainBatchSize = 32,
    validationBatchSize = 1024
)    

Epoch: 1 - loss: 5.172645568847656 - val loss: 0.8932986855506897
Epoch: 2 - loss: 0.7548993229866028 - val loss: 0.7362748384475708
Epoch: 3 - loss: 0.6922183632850647 - val loss: 0.6759459972381592
Epoch: 4 - loss: 0.6693269610404968 - val loss: 0.6483440399169922
Epoch: 5 - loss: 0.6548283696174622 - val loss: 0.6396056413650513
Epoch: 6 - loss: 0.6440584063529968 - val loss: 0.6369010210037231
Epoch: 7 - loss: 0.635884702205658 - val loss: 0.635438084602356
Epoch: 8 - loss: 0.6286631226539612 - val loss: 0.6335959434509277
Epoch: 9 - loss: 0.6233331561088562 - val loss: 0.6316449046134949
Epoch: 10 - loss: 0.618718683719635 - val loss: 0.6287419199943542
Epoch: 11 - loss: 0.6147053837776184 - val loss: 0.6243162155151367
Epoch: 12 - loss: 0.6105544567108154 - val loss: 0.6200976371765137
Epoch: 13 - loss: 0.6069340705871582 - val loss: 0.6162220239639282
Epoch: 14 - loss: 0.6035922169685364 - val loss: 0.6125243306159973
Epoch: 15 - loss: 0.6010518074035645 - val loss: 0.6085104942

org.jetbrains.kotlinx.dl.api.core.history.TrainingHistory@54e22bdd

In [31]:
val result = model.evaluate(validDataset)
println(result.lossValue)

0.5996628403663635


In [12]:
val dfTest = DataFrame.readCSV("data/winequality-white-test.csv")
dfTest

fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
6.4,0.31,0.38,2.9,0.038,19.0,102.0,0.9912,3.17,0.35,11.0,7
6.6,0.27,0.41,1.3,0.052,16.0,142.0,0.9951,3.42,0.47,10.0,6
7.0,0.25,0.32,9.0,0.046,56.0,245.0,0.9955,3.25,0.5,10.4,6
6.6,0.38,0.15,4.6,0.044,25.0,78.0,0.9931,3.11,0.38,10.2,6
6.9,0.21,0.33,1.8,0.034,48.0,136.0,0.9899,3.25,0.41,12.6,7


In [13]:
val (XTest, yTest) = getXy(dfTest)

In [14]:
val testDataset = OnHeapDataset.create(XTest, yTest)

In [15]:
model.evaluate(testDataset)

EvaluationResult(lossValue=0.8474463820457458, metrics={MAE=0.8474463820457458})