-
Notifications
You must be signed in to change notification settings - Fork 1
/
eval_oml.py
218 lines (188 loc) · 9.02 KB
/
eval_oml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import matplotlib.pyplot as plt
from river.evaluate import iter_progressive_val_score
from spotPython.utils.progress import progress_bar
from numpy import mean
from numpy import zeros
from numpy import array
def eval_oml_iter_progressive(dataset, metric, models, step=100, weight_coeff=0.0, log_level=50):
"""Evaluate OML Models on Streaming Data
This function evaluates one or more OML models on a streaming dataset. The evaluation
is done iteratively, and the models are tested every `step` iterations. The results
are returned as a dictionary of metrics and their values.
Args:
dataset (list or river.Stream): A list of river.Stream objects containing the
streaming data to be evaluated. If a single river.Stream object is provided,
it is automatically converted to a list.
metric (river.metrics.base.MultiClassMetric or river.metrics.base.RegressionMetric):
The metric to be used for evaluation.
models (dict): A dictionary of OML models to be evaluated. The keys are the names
of the models, and the values are the model objects.
step (int): Iteration number at which to yield results. This only takes into account
the predictions, and not the training steps. Defaults to 100.
weight_coeff (float): Results are multiplied by (step/n_steps)**weight_coeff,
where n_steps is the total number of iterations. Results from the beginning have
a lower weight than results from the end if weight_coeff > 1. If weight_coeff == 0,
then results are multiplied by 1 and every result has an equal weight. Defaults to 0.0.
log_level (int): The level of logging to use. 0 = no logging, 50 = print only important
information. Defaults to 50.
Returns:
(dict): A dictionary containing the evaluation results. The keys are the names of the
models, and the values are dictionaries with the following keys:
- "step": A list of iteration numbers at which the model was evaluated.
- "error": A list of the weighted errors for each iteration.
- "r_time": A list of the weighted running times for each iteration.
- "memory": A list of the weighted memory usages for each iteration.
- "metric_name": The name of the metric used for evaluation.
Reference:
https://riverml.xyz/0.15.0/recipes/on-hoeffding-trees/
Examples:
>>> from river import compose
from river import linear_model
from river import preprocessing, datasets, utils, metrics
import matplotlib.pyplot as plt
from spotRiver.utils.features import get_ordinal_date
from spotRiver.evaluation.eval_nowcast import eval_nowcast_model, plot_nowcast_model
model = compose.Pipeline(
('ordinal_date', compose.FuncTransformer(get_ordinal_date)),
('scale', preprocessing.StandardScaler()),
('lin_reg', linear_model.LinearRegression())
)
dataset = datasets.AirlinePassengers()
dates, metric, y_trues, y_preds = eval_nowcast_model(model, dataset=dataset)
plot_nowcast_model(dates, metric, y_trues, y_preds)
"""
metric_name = metric.__class__.__name__
# Convert dataset to a list if needed
if dataset.__class__ != list:
dataset = [dataset]
n_steps = len(dataset)
result = {}
for model_name, model in models.items():
result_i = {"step": [], "error": [], "r_time": [], "memory": []}
for checkpoint in iter_progressive_val_score(
dataset, model, metric, measure_time=True, measure_memory=True, step=step
):
if log_level <= 20:
progress_bar(checkpoint["Step"] / n_steps, message="Eval iter_prog_val_score:")
w = (checkpoint["Step"] / n_steps) ** weight_coeff
result_i["step"].append(checkpoint["Step"])
result_i["error"].append(w * checkpoint[metric_name].get())
# Convert timedelta object into seconds
result_i["r_time"].append(w * checkpoint["Time"].total_seconds())
# Make sure the memory measurements are in MB
raw_memory = checkpoint["Memory"]
result_i["memory"].append(w * raw_memory * 2**-20)
result_i["metric_name"] = metric_name
result[model_name] = result_i
return result
def plot_oml_iter_progressive(result, log_x=False, log_y=False, figsize=None, filename=None):
"""Plot evaluation of OML models.
Args:
result (dict): A dictionary of evaluation results, as returned by eval_oml_iter_progressive.
log_x (bool, optional): If True, the x-axis is set to log scale. Defaults to False.
log_y (bool, optional): If True, the y-axis is set to log scale. Defaults to False.
figsize (tuple, optional): The size of the figure. Defaults to None, in which case
the default figure size `(10, 5)` is used.
filename (str, optional): The name of the file to save the plot to. If None, the plot
is not saved. Defaults to None.
Returns:
(matplotlib.figure.Figure): The figure object.
Reference:
https://riverml.xyz/0.15.0/recipes/on-hoeffding-trees/
Examples:
>>> from spotRiver.evaluation.eval_oml import plot_oml_iter_progressive
>>> result = {
... "model1": {
... "step": [1, 2, 3],
... "error": [0.1, 0.2, 0.3],
... "r_time": [0.1, 0.2, 0.3],
... "memory": [0.1, 0.2, 0.3],
... "metric_name": "MAE"
... },
... "model2": {
... "step": [1, 2, 3],
... "error": [0.2, 0.3, 0.4],
... "r_time": [0.2, 0.3, 0.4],
... "memory": [0.2, 0.3, 0.4],
... "metric_name": "MAE"
... }
... }
>>> plot_oml_iter_progressive(result)
<Figure size 1000x500 with 3 Axes>
"""
if figsize is None:
figsize = (10, 5)
fig, ax = plt.subplots(figsize=figsize, nrows=3, dpi=300)
for model_name, model in result.items():
ax[0].plot(model["step"], model["error"], label=model_name)
ax[1].plot(model["step"], model["r_time"], label=model_name)
ax[2].plot(model["step"], model["memory"], label=model_name)
ax[0].set_ylabel(model["metric_name"])
ax[1].set_ylabel("Time (seconds)")
ax[2].set_ylabel("Memory (MB)")
ax[2].set_xlabel("Instances")
ax[0].grid(True)
ax[1].grid(True)
ax[2].grid(True)
if log_y:
ax[0].set_yscale("log")
ax[1].set_yscale("log")
ax[2].set_yscale("log")
ax[0].legend(loc="upper center", bbox_to_anchor=(0.5, 1.25), ncol=3, fancybox=True, shadow=True)
plt.tight_layout()
plt.close()
if filename is not None:
fig.savefig(filename, dpi=300)
return fig
def fun_eval_oml_iter_progressive(result, metric=None, weights=None):
"""Wrapper function for eval_oml_iter_progressive, returning a single function value.
Args:
result (dict): A dictionary of evaluation results, as returned by eval_oml_iter_progressive.
metric (function, optional): The metric function to use for computing the function value.
Defaults to None, in which case the mean function is used.
weights (numpy.array, optional): An array of weights for error, r_time, and memory.
If None, the weights are set to [1, 0, 0], which considers only the error.
Defaults to None.
Returns:
(numpy.array): An array of function values, one for each model in the evaluation results.
Raises:
ValueError: If the weights array is not of length 3.
Reference:
https://riverml.xyz/0.15.0/recipes/on-hoeffding-trees/
Examples:
>>> from spotRiver.evaluation.eval_oml import fun_eval_oml_iter_progressive
>>> result = {
... "model1": {
... "step": [1, 2, 3],
... "error": [0.1, 0.2, 0.3],
... "r_time": [0.1, 0.2, 0.3],
... "memory": [0.1, 0.2, 0.3],
... "metric_name": "MAE"
... },
... "model2": {
... "step": [1, 2, 3],
... "error": [0.2, 0.3, 0.4],
... "r_time": [0.2, 0.3, 0.4],
... "memory": [0.2, 0.3, 0.4],
... "metric_name": "MAE"
... }
... }
>>> fun_eval_oml_iter_progressive(result)
array([0.1, 0.2])
"""
if metric is None:
metric = mean
if weights is None:
weights = array([1, 0, 0])
if len(weights) != 3:
raise ValueError("The weights array must be of length 3.")
model_names = list(result.keys())
n = len(model_names)
y = zeros([n])
for i in range(n):
y[i] = (
weights[0] * metric(result[model_names[i]]["error"])
+ weights[1] * metric(result[model_names[i]]["r_time"])
+ weights[2] * metric(result[model_names[i]]["memory"])
)
return y