Skip to content
master
Go to file
Code

Latest commit

 

Git stats

Files

Permalink
Failed to load latest commit information.
Type
Name
Latest commit message
Commit time
R
 
 
 
 
man
 
 
src
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

README.md

mlr3oml

tic CRAN Status StackOverflow Mattermost

mlr3oml allows to create mlr3 tasks directly from OpenML data sets. Furthermore, you can also obtain the data and the resampling for a given OpenML task. Caching can be enabled by setting the option "mlr3oml.cache". Uploading to OpenML is currently not supported, use the OpenML package package for this.

Short Demo

library("mlr3")
library("mlr3oml")

# new parametrized task "oml"
tsk("oml", data_id = 31)
## INFO  [15:05:51.231] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/31}
## INFO  [15:05:51.378] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/features/31}
## INFO  [15:05:51.446] Downloading ARFF {url: https://www.openml.org/data/v1/download/31/credit-g.arff}

## <TaskClassif:credit-g> (1000 x 21)
## * Target: class
## * Properties: twoclass
## * Features (20):
##   - fct (13): checking_status, credit_history, employment,
##     foreign_worker, housing, job, other_parties, other_payment_plans,
##     own_telephone, personal_status, property_magnitude, purpose,
##     savings_status
##   - dbl (7): age, credit_amount, duration, existing_credits,
##     installment_commitment, num_dependents, residence_since
tsk("oml", task_id = 59)
## INFO  [15:05:51.552] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/59}
## INFO  [15:05:51.589] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/61}
## INFO  [15:05:51.621] Downloading ARFF {url: https://www.openml.org/data/v1/download/61/iris.arff}

## <TaskClassif:Task 59: iris (Supervised Classification)> (150 x 5)
## * Target: class
## * Properties: multiclass
## * Features (4):
##   - dbl (4): petallength, petalwidth, sepallength, sepalwidth
# same for resampling
rsmp("oml", task_id = 59)
## INFO  [15:05:51.688] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/59}
## INFO  [15:05:51.727] Downloading ARFF {url: https://www.openml.org/api_splits/get/59/Task_59_splits.arff}
## INFO  [15:05:51.784] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/61}
## INFO  [15:05:51.818] Downloading ARFF {url: https://www.openml.org/data/v1/download/61/iris.arff}

## <ResamplingCustom> with 10 iterations
## * Instantiated: TRUE
## * Parameters: list()
# R6 class for data sets
oml_data = OMLData$new(61)
oml_data$name
## INFO  [15:05:51.862] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/61}

## [1] "iris"
oml_data$nrow
## INFO  [15:05:51.904] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/qualities/61}

## [1] 150
oml_data$ncol
## INFO  [15:05:51.956] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/features/61}

## [1] 5
oml_data$data
## INFO  [15:05:52.002] Downloading ARFF {url: https://www.openml.org/data/v1/download/61/iris.arff}

##      sepallength sepalwidth petallength petalwidth          class
##   1:         5.1        3.5         1.4        0.2    Iris-setosa
##   2:         4.9        3.0         1.4        0.2    Iris-setosa
##   3:         4.7        3.2         1.3        0.2    Iris-setosa
##   4:         4.6        3.1         1.5        0.2    Iris-setosa
##   5:         5.0        3.6         1.4        0.2    Iris-setosa
##  ---
## 146:         6.7        3.0         5.2        2.3 Iris-virginica
## 147:         6.3        2.5         5.0        1.9 Iris-virginica
## 148:         6.5        3.0         5.2        2.0 Iris-virginica
## 149:         6.2        3.4         5.4        2.3 Iris-virginica
## 150:         5.9        3.0         5.1        1.8 Iris-virginica
# R6 class for tasks
oml_task = OMLTask$new(31)
oml_task$name
## INFO  [15:05:52.062] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/31}

## [1] "Task 31: credit-g (Supervised Classification)"
oml_task$nrow
## INFO  [15:05:52.111] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/qualities/31}

## [1] 1000
oml_task$ncol
## INFO  [15:05:52.162] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/features/31}
## INFO  [15:05:52.205] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/31}

## [1] 21
oml_task$task
## INFO  [15:05:52.251] Downloading ARFF {url: https://www.openml.org/data/v1/download/31/credit-g.arff}

## <TaskClassif:Task 31: credit-g (Supervised Classification)> (1000 x 21)
## * Target: class
## * Properties: twoclass
## * Features (20):
##   - fct (13): checking_status, credit_history, employment,
##     foreign_worker, housing, job, other_parties, other_payment_plans,
##     own_telephone, personal_status, property_magnitude, purpose,
##     savings_status
##   - dbl (7): age, credit_amount, duration, existing_credits,
##     installment_commitment, num_dependents, residence_since
oml_task$resampling
## INFO  [15:05:52.338] Downloading ARFF {url: https://www.openml.org/api_splits/get/31/Task_31_splits.arff}

## <ResamplingCustom> with 10 iterations
## * Instantiated: TRUE
## * Parameters: list()
# list oml data sets with 5 features and 50 - 200 instances
tab = list_oml_data_sets(number_features = 5, number_instances = c(50, 200))
## INFO  [15:05:52.406] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/list/number_instances/50..200/number_features/5/limit/1000}
## INFO  [15:05:52.610] Retrieving JSON {url: https://www.openml.org/api/v1/json/data/list/number_instances/50..200/number_features/5/limit/1000/offset/1000}
head(tab[, .(data_id, name)])
##    data_id                    name
## 1:      61                    iris
## 2:     199                fruitfly
## 3:     214                baskball
## 4:     329              hayes-roth
## 5:     346                    aids
## 6:     551 analcatdata_michiganacc
# list first 10 oml tasks
tab = list_oml_tasks(limit = 10)
## INFO  [15:05:52.799] Retrieving JSON {url: https://www.openml.org/api/v1/json/task/list/limit/10}
tab[, .(task_id, data_id, name)]
##     task_id data_id            name
##  1:       2       2          anneal
##  2:       3       3        kr-vs-kp
##  3:       4       4           labor
##  4:       5       5      arrhythmia
##  5:       6       6          letter
##  6:       7       7       audiology
##  7:       8       8 liver-disorders
##  8:       9       9           autos
##  9:      10      10           lymph
## 10:      11      11   balance-scale
You can’t perform that action at this time.