-
-
Notifications
You must be signed in to change notification settings - Fork 404
/
Measure.R
158 lines (154 loc) · 5.4 KB
/
Measure.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#' @title Construct performance measure.
#'
#' @description
#' A measure object encapsulates a function to evaluate the performance of a
#' prediction. Information about already implemented measures can be obtained
#' here: [measures].
#'
#' A learner is trained on a training set d1, results in a model m and predicts
#' another set d2 (which may be a different one or the training set) resulting
#' in the prediction. The performance measure can now be defined using all of
#' the information of the original task, the fitted model and the prediction.
#'
#' @param id (`character(1)`)\cr
#' Name of measure.
#' @param minimize (`logical(1)`)\cr
#' Should the measure be minimized?
#' Default is `TRUE`.
#' @param properties ([character])\cr
#' Set of measure properties. Some standard property names include:
#' - classif: Is the measure applicable for classification?
#' - classif.multi: Is the measure applicable for multi-class classification?
#' - multilabel: Is the measure applicable for multilabel classification?
#' - regr: Is the measure applicable for regression?
#' - surv: Is the measure applicable for survival?
#' - cluster: Is the measure applicable for cluster?
#' - costsens: Is the measure applicable for cost-sensitive learning?
#' - req.pred: Is prediction object required in calculation? Usually the case.
#' - req.truth: Is truth column required in calculation? Usually the case.
#' - req.task: Is task object required in calculation? Usually not the case
#' - req.model: Is model object required in calculation? Usually not the case.
#' - req.feats: Are feature values required in calculation? Usually not the case.
#' - req.prob: Are predicted probabilities required in calculation? Usually not the case, example would be AUC.
#'
#' Default is `character(0)`.
#' @param fun (`function(task, model, pred, feats, extra.args)`)\cr
#' Calculates the performance value. Usually you will only need the prediction
#' object `pred`.
#' - `task` ([Task])\cr
#' The task.
#' - `model` ([WrappedModel])\cr
#' The fitted model.
#' - `pred` ([Prediction])\cr
#' Prediction object.
#' - `feats` ([data.frame])\cr
#' The features.
#' - `extra.args` ([list])\cr
#' See below.
#' @param extra.args ([list])\cr
#' List of extra arguments which will always be passed to `fun`.
#' Can be changed after construction via [setMeasurePars()].
#' Default is empty list.
#' @param aggr ([Aggregation])\cr
#' Aggregation function, which is used to aggregate the values measured
#' on test / training sets of the measure to a single value.
#' Default is [test.mean].
#' @param best (`numeric(1)`)\cr
#' Best obtainable value for measure.
#' Default is -`Inf` or `Inf`, depending on `minimize`.
#' @param worst (`numeric(1)`)\cr
#' Worst obtainable value for measure.
#' Default is `Inf` or -`Inf`, depending on `minimize`.
#' @param name ([character]) \cr
#' Name of the measure. Default is `id`.
#' @param note ([character]) \cr
#' Description and additional notes for the measure. Default is \dQuote{}.
#' @template ret_measure
#' @export
#' @family performance
#' @aliases Measure
#' @examples
#' f = function(task, model, pred, extra.args) {
#' sum((pred$data$response - pred$data$truth)^2)
#' }
#' makeMeasure(id = "my.sse", minimize = TRUE,
#' properties = c("regr", "response"), fun = f)
makeMeasure = function(id, minimize, properties = character(0L),
fun, extra.args = list(), aggr = test.mean, best = NULL, worst = NULL, name = id, note = "") {
assertString(id)
assertFlag(minimize)
assertCharacter(properties, any.missing = FALSE)
assertFunction(fun)
assertList(extra.args)
assertString(note)
if (is.null(best)) {
best = ifelse(minimize, -Inf, Inf)
} else {
assertNumber(best)
}
if (is.null(worst)) {
worst = ifelse(minimize, Inf, -Inf)
} else {
assertNumber(worst)
}
m = makeS3Obj("Measure",
id = id,
minimize = minimize,
properties = properties,
fun = fun,
extra.args = extra.args,
best = best,
worst = worst,
name = name,
note = note
)
setAggregation(m, aggr)
}
#' @title Get default measure.
#'
#' @description
#' Get the default measure for a task type, task, task description or a learner.
#' Currently these are:
#' classif: mmce\cr
#' regr: mse\cr
#' cluster: db\cr
#' surv: cindex\cr
#' costsen: mcp\cr
#' multilabel: multilabel.hamloss\cr
#'
#' @param x ([character(1)` | [Task] | [TaskDesc] | [Learner])\cr
#' Task type, task, task description, learner name, a learner, or a type of learner (e.g. "classif").
#' @return ([Measure]).
#' @export
getDefaultMeasure = function(x) {
type = if (inherits(x, "TaskDesc")) {
x$type
} else if (inherits(x, "Task")) {
x$task.desc$type
} else if (inherits(x, "Learner")) {
x$type
} else if (x %in% listLearners()$class) {
stri_split_fixed(x, ".", simplify = TRUE)[1]
} else {
x
}
switch(type,
classif = mmce,
cluster = db,
regr = mse,
surv = cindex,
costsens = mcp,
multilabel = multilabel.hamloss
)
}
#' @export
print.Measure = function(x, ...) {
catf("Name: %s", x$name)
catf("Performance measure: %s", x$id)
catf("Properties: %s", collapse(x$properties))
catf("Minimize: %s", x$minimize)
catf("Best: %g; Worst: %g", x$best, x$worst)
catf("Aggregated by: %s", x$aggr$id)
catf("Arguments: %s", listToShortString(x$extra.args))
catf("Note: %s", x$note)
}