In [23]:
import java.io.File
import com.lignting.neural.*
import org.jetbrains.kotlinx.multik.api.mk
import org.jetbrains.kotlinx.multik.api.ndarray

In [24]:
val trainPath = "res/used_car_train_20200313.csv"
val testPath = "res/used_car_testA_20200313.csv"

In [25]:
fun readData(path: String): List<Pair<List<Double?>, Double?>> {
    val data = File(path).readLines().drop(1)
        .map { it.split(" ").map { it.trim() } }
        .map { it.map { it.toDoubleOrNull() } }
        .map { it.toMutableList() }
        .map { it to it.removeAt(15) }
    return data
}

In [26]:
val trainData = readData(trainPath)
val testData = readData(testPath)

In [27]:
val xNullCounter = MutableList(trainData[0].first.size) { 0 }
var yNullCounter = 0
trainData.forEach {
    it.first.mapIndexed { index, value -> index to if (value == null) 1 else 0 }.forEach {
        xNullCounter[it.first] += it.second
    }
    if (it.second == null)
        yNullCounter++
}
xNullCounter to yNullCounter

([0, 0, 0, 1, 0, 4506, 8680, 5981, 0, 0, 24324, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 0)

In [28]:
fun fillna(data: List<Pair<List<Double?>, Double?>>) =
    data.map {
        it.first.map {
            it ?: -1.0
        } to (it.second ?: -1.0)
    }

In [29]:
val proceedTrainData = fillna(trainData)
val processedTestData = fillna(testData)

In [30]:
val trainX = mk.ndarray(proceedTrainData.map { it.first })
val trainY = mk.ndarray(proceedTrainData.map { listOf(it.second) })
val testX = mk.ndarray(processedTestData.map { it.first })
val testY = mk.ndarray(processedTestData.map { listOf(it.second) })

In [31]:
val model = Model(
    Dense(30, 20),
    Relu(),
    Dense(20, 12),
    Relu(),
    Dense(12, 8),
    Relu(),
    Dense(8, 4),
    Relu(),
    Dense(4, 1),
    loss = Mae(),
    learningRate = 0.01
)

In [32]:
(0..3).forEach {
    val loss = model.fit(trainX, trainY)
    println("train times = $it, loss = $loss")
}

[-1.0000000000026357]
[0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
train times = 0, loss = 7.741972117324097E9
[-1.0000000000026357]
[0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
train times = 1, loss = 1.7754154656832637E13
[-1.0000000000026357]
[0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
train times = 2, loss = 3.550056734154816E13
[-1.0000000000026357]
[0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.0, 0.0, 0.0, 0