In [71]:
%use krangl

In [72]:
val df: DataFrame = dataFrameOf("first_name", "last_name", "age", "weight")(
    "Max", "Doe", 23, 55,
    "Franz", "Smith", 23, 88,
    "Horst", "Keanes", 12, 82
)
df

first_name,last_name,age,weight
Max,Doe,23,55
Franz,Smith,23,88
Horst,Keanes,12,82


In [73]:
df.addColumn("salary_category") { rowNumber }

first_name,last_name,age,weight,salary_category
Max,Doe,23,55,1
Franz,Smith,23,88,2
Horst,Keanes,12,82,3


In [74]:
df.addColumn("age_3y_later") { it["age"] + 3 }

first_name,last_name,age,weight,age_3y_later
Max,Doe,23,55,26
Franz,Smith,23,88,26
Horst,Keanes,12,82,15


In [75]:
// Note: krangl dataframes are immutable so we need to (re)assign results to preserve changes.
val newDF = df.addColumn("full_name") { it["first_name"] + " " + it["last_name"] }
newDF

first_name,last_name,age,weight,full_name
Max,Doe,23,55,Max Doe
Franz,Smith,23,88,Franz Smith
Horst,Keanes,12,82,Horst Keanes


In [76]:
df.addColumn("user_id") { it["last_name"] + "_id" + rowNumber }

first_name,last_name,age,weight,user_id
Max,Doe,23,55,Doe_id1
Franz,Smith,23,88,Smith_id2
Horst,Keanes,12,82,Keanes_id3


In [77]:
df.addColumn("first_name_initial") { it["first_name"].map<String> { it.first() } }

first_name,last_name,age,weight,first_name_initial
Max,Doe,23,55,M
Franz,Smith,23,88,F
Horst,Keanes,12,82,H


In [78]:
df.addColumn("with_anz") { it["first_name"].toStrings().map { it!!.contains("anz") } }

first_name,last_name,age,weight,with_anz
Max,Doe,23,55,False
Franz,Smith,23,88,True
Horst,Keanes,12,82,False


In [79]:
df.addColumns(
    "age_plus3" to { it["age"] + 3 },
    "initials" to { it["first_name"].map<String> { it.first() } concat it["last_name"].map<String> { it.first() } }
)

first_name,last_name,age,weight,age_plus3,initials
Max,Doe,23,55,26,M D
Franz,Smith,23,88,26,F S
Horst,Keanes,12,82,15,H K


In [80]:
df.sortedBy("age")

first_name,last_name,age,weight
Horst,Keanes,12,82
Max,Doe,23,55
Franz,Smith,23,88


In [81]:
df.sortedBy("age", "weight")

first_name,last_name,age,weight
Horst,Keanes,12,82
Max,Doe,23,55
Franz,Smith,23,88


In [82]:
df.sortedByDescending("age")

first_name,last_name,age,weight
Franz,Smith,23,88
Max,Doe,23,55
Horst,Keanes,12,82


In [83]:
df.sortedBy { it["weight"].toInts() }

first_name,last_name,age,weight
Max,Doe,23,55
Horst,Keanes,12,82
Franz,Smith,23,88


In [84]:
df.selectIf { it is IntCol }

age,weight
23,55
23,88
12,82


In [85]:
df.select("last_name")

last_name
Doe
Smith
Keanes


In [86]:
df.remove("weight")

first_name,last_name,age
Max,Doe,23
Franz,Smith,23
Horst,Keanes,12


In [87]:
df.removeIf { it is IntCol }

first_name,last_name
Max,Doe
Franz,Smith
Horst,Keanes


In [88]:
df.select({ endsWith("name") })

first_name,last_name
Max,Doe
Franz,Smith
Horst,Keanes


In [89]:
df.filter { it["age"].eq(23) }

first_name,last_name,age,weight
Max,Doe,23,55
Franz,Smith,23,88


In [90]:
df.filter { it["weight"] gt 60 }

first_name,last_name,age,weight
Franz,Smith,23,88
Horst,Keanes,12,82


In [91]:
df.filter { it["last_name"].isMatching<String> { startsWith("Do") } }

first_name,last_name,age,weight
Max,Doe,23,55


In [92]:
df.filterByRow { it["age"] as Int > 20 }

first_name,last_name,age,weight
Max,Doe,23,55
Franz,Smith,23,88


In [93]:
df.count("age", "last_name")

age,last_name,n
23,Doe,1
23,Smith,1
12,Keanes,1


In [94]:
import java.math.BigDecimal

BigDecimal(
    df.filter { it["age"] gt 20 }.summarize("mean_age" to { it["age"].mean(true) }).get("mean_age").values().get(0)
        .toString()
).intValueExact()

23

In [95]:
df.summarize(
    "min_age" to { it["age"].min() },
    "max_age" to { it["age"].max() }
)

min_age,max_age
12.0,23.0


In [96]:
df.summarize(
    "min_age" `=` { it["age"].min() },
    "max_age" `=` { it["age"].max() }
)

min_age,max_age
12.0,23.0


In [97]:
val groupedDf: DataFrame = df.groupBy("age") // or provide multiple grouping attributes with varargs
val sumDF = groupedDf.summarize(
    "mean_weight" to { it["weight"].mean(removeNA = true) },
    "num_persons" to { nrow }
)
sumDF

age,mean_weight,num_persons
23,71.5,2
12,82.0,1


In [98]:
groupedDf.ungroup()

first_name,last_name,age,weight
Max,Doe,23,55
Franz,Smith,23,88
Horst,Keanes,12,82


In [99]:
data class Person(val age: Int, val mean_weight: Double, val num_persons: Int)
val records = sumDF.rowsAs<Person>()
DISPLAY(records)
DISPLAY(records::class.java)

[Person(age=23, mean_weight=71.5, num_persons=2), Person(age=12, mean_weight=82.0, num_persons=1)]

class java.util.ArrayList

In [100]:
records.first().mean_weight

71.5

In [101]:
records.asDataFrame()

age,mean_weight,num_persons
23,71.5,2
12,82.0,1


In [102]:
records.deparseRecords { mapOf("age" to it.age, "weight" to it.mean_weight) }

age,weight
23,71.5
12,82.0
